diff options
author | David S. Miller <davem@davemloft.net> | 2020-03-30 19:52:37 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-03-30 19:52:37 -0700 |
commit | ed52f2c608c9451fa2bad298b2ab927416105d65 (patch) | |
tree | d624be01447b5d578aa79b02b37f0023a867bb42 /net | |
parent | f87238d30c0d550553a37585d0e27a8052952bb4 (diff) | |
parent | 8596a75f6c830a693ec86e6467a58b225713a7f1 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/bpf/test_run.c | 4 | ||||
-rw-r--r-- | net/core/dev.c | 26 | ||||
-rw-r--r-- | net/core/filter.c | 141 | ||||
-rw-r--r-- | net/core/net_namespace.c | 15 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 14 | ||||
-rw-r--r-- | net/core/sock.c | 12 | ||||
-rw-r--r-- | net/ipv4/bpf_tcp_ca.c | 33 | ||||
-rw-r--r-- | net/ipv4/ip_input.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_bpf.c | 152 | ||||
-rw-r--r-- | net/ipv4/udp.c | 6 | ||||
-rw-r--r-- | net/ipv6/ip6_input.c | 3 | ||||
-rw-r--r-- | net/ipv6/udp.c | 9 | ||||
-rw-r--r-- | net/sched/act_bpf.c | 3 |
13 files changed, 326 insertions, 95 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 4c921f5154e0..29dbdd4c29f6 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -114,6 +114,9 @@ out: * architecture dependent calling conventions. 7+ can be supported in the * future. */ +__diag_push(); +__diag_ignore(GCC, 8, "-Wmissing-prototypes", + "Global functions as their definitions will be in vmlinux BTF"); int noinline bpf_fentry_test1(int a) { return a + 1; @@ -149,6 +152,7 @@ int noinline bpf_modify_return_test(int a, int *b) *b += 1; return a + *b; } +__diag_pop(); ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); diff --git a/net/core/dev.c b/net/core/dev.c index dee392f21466..9c9e763bfe0e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8655,15 +8655,17 @@ static void dev_xdp_uninstall(struct net_device *dev) * @dev: device * @extack: netlink extended ack * @fd: new program fd or negative value to clear + * @expected_fd: old program fd that userspace expects to replace or clear * @flags: xdp-related flags * * Set or clear a bpf program for a device */ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, u32 flags) + int fd, int expected_fd, u32 flags) { const struct net_device_ops *ops = dev->netdev_ops; enum bpf_netdev_command query; + u32 prog_id, expected_id = 0; struct bpf_prog *prog = NULL; bpf_op_t bpf_op, bpf_chk; bool offload; @@ -8684,15 +8686,29 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, if (bpf_op == bpf_chk) bpf_chk = generic_xdp_install; - if (fd >= 0) { - u32 prog_id; + prog_id = __dev_xdp_query(dev, bpf_op, query); + if (flags & XDP_FLAGS_REPLACE) { + if (expected_fd >= 0) { + prog = bpf_prog_get_type_dev(expected_fd, + BPF_PROG_TYPE_XDP, + bpf_op == ops->ndo_bpf); + if (IS_ERR(prog)) + return PTR_ERR(prog); + expected_id = prog->aux->id; + bpf_prog_put(prog); + } + if (prog_id != expected_id) { + NL_SET_ERR_MSG(extack, "Active program does not match expected"); + return -EEXIST; + } + } + if (fd >= 0) { if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); return -EEXIST; } - prog_id = __dev_xdp_query(dev, bpf_op, query); if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) { NL_SET_ERR_MSG(extack, "XDP program already attached"); return -EBUSY; @@ -8715,7 +8731,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, return 0; } } else { - if (!__dev_xdp_query(dev, bpf_op, query)) + if (!prog_id) return 0; } diff --git a/net/core/filter.c b/net/core/filter.c index 96350a743539..7628b947dbc3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2642,6 +2642,19 @@ static const struct bpf_func_proto bpf_msg_pop_data_proto = { .arg4_type = ARG_ANYTHING, }; +#ifdef CONFIG_CGROUP_NET_CLASSID +BPF_CALL_0(bpf_get_cgroup_classid_curr) +{ + return __task_get_classid(current); +} + +static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = { + .func = bpf_get_cgroup_classid_curr, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; +#endif + BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { return task_get_classid(skb); @@ -4117,6 +4130,18 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx) +{ + return sock_gen_cookie(ctx); +} + +static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = { + .func = bpf_get_socket_cookie_sock, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) { return sock_gen_cookie(ctx->sk); @@ -4129,6 +4154,39 @@ static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static u64 __bpf_get_netns_cookie(struct sock *sk) +{ +#ifdef CONFIG_NET_NS + return net_gen_cookie(sk ? sk->sk_net.net : &init_net); +#else + return 0; +#endif +} + +BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx) +{ + return __bpf_get_netns_cookie(ctx); +} + +static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = { + .func = bpf_get_netns_cookie_sock, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX_OR_NULL, +}; + +BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx) +{ + return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL); +} + +static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = { + .func = bpf_get_netns_cookie_sock_addr, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX_OR_NULL, +}; + BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb) { struct sock *sk = sk_to_full_sk(skb->sk); @@ -4147,8 +4205,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock, - struct bpf_map *, map, u64, flags, void *, data, u64, size) +BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags, + void *, data, u64, size) { if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; @@ -4156,8 +4214,8 @@ BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock, return bpf_event_output(map, flags, data, size, NULL, 0, NULL); } -static const struct bpf_func_proto bpf_sockopt_event_output_proto = { - .func = bpf_sockopt_event_output, +static const struct bpf_func_proto bpf_event_output_data_proto = { + .func = bpf_event_output_data, .gpl_only = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -5343,8 +5401,7 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { BPF_CALL_1(bpf_sk_release, struct sock *, sk) { - /* Only full sockets have sk->sk_flags. */ - if (!sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE)) + if (sk_is_refcounted(sk)) sock_gen_put(sk); return 0; } @@ -5860,6 +5917,36 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags) +{ + if (flags != 0) + return -EINVAL; + if (!skb_at_tc_ingress(skb)) + return -EOPNOTSUPP; + if (unlikely(dev_net(skb->dev) != sock_net(sk))) + return -ENETUNREACH; + if (unlikely(sk->sk_reuseport)) + return -ESOCKTNOSUPPORT; + if (sk_is_refcounted(sk) && + unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) + return -ENOENT; + + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sock_pfree; + + return 0; +} + +static const struct bpf_func_proto bpf_sk_assign_proto = { + .func = bpf_sk_assign, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_SOCK_COMMON, + .arg3_type = ARG_ANYTHING, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5954,6 +6041,26 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_sock_proto; + case BPF_FUNC_get_netns_cookie: + return &bpf_get_netns_cookie_sock_proto; + case BPF_FUNC_perf_event_output: + return &bpf_event_output_data_proto; + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; +#ifdef CONFIG_CGROUPS + case BPF_FUNC_get_current_cgroup_id: + return &bpf_get_current_cgroup_id_proto; + case BPF_FUNC_get_current_ancestor_cgroup_id: + return &bpf_get_current_ancestor_cgroup_id_proto; +#endif +#ifdef CONFIG_CGROUP_NET_CLASSID + case BPF_FUNC_get_cgroup_classid: + return &bpf_get_cgroup_classid_curr_proto; +#endif default: return bpf_base_func_proto(func_id); } @@ -5978,8 +6085,26 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } case BPF_FUNC_get_socket_cookie: return &bpf_get_socket_cookie_sock_addr_proto; + case BPF_FUNC_get_netns_cookie: + return &bpf_get_netns_cookie_sock_addr_proto; case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; + case BPF_FUNC_perf_event_output: + return &bpf_event_output_data_proto; + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; +#ifdef CONFIG_CGROUPS + case BPF_FUNC_get_current_cgroup_id: + return &bpf_get_current_cgroup_id_proto; + case BPF_FUNC_get_current_ancestor_cgroup_id: + return &bpf_get_current_ancestor_cgroup_id_proto; +#endif +#ifdef CONFIG_CGROUP_NET_CLASSID + case BPF_FUNC_get_cgroup_classid: + return &bpf_get_cgroup_classid_curr_proto; +#endif #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: return &bpf_sock_addr_sk_lookup_tcp_proto; @@ -6153,6 +6278,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skb_ecn_set_ce_proto; case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; + case BPF_FUNC_sk_assign: + return &bpf_sk_assign_proto; #endif default: return bpf_base_func_proto(func_id); @@ -6222,7 +6349,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; case BPF_FUNC_perf_event_output: - return &bpf_sockopt_event_output_proto; + return &bpf_event_output_data_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 757cc1d084e7..190ca66a383b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -69,6 +69,20 @@ EXPORT_SYMBOL_GPL(pernet_ops_rwsem); static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; +static atomic64_t cookie_gen; + +u64 net_gen_cookie(struct net *net) +{ + while (1) { + u64 res = atomic64_read(&net->net_cookie); + + if (res) + return res; + res = atomic64_inc_return(&cookie_gen); + atomic64_cmpxchg(&net->net_cookie, 0, res); + } +} + static struct net_generic *net_alloc_generic(void) { struct net_generic *ng; @@ -1087,6 +1101,7 @@ static int __init net_ns_init(void) panic("Could not allocate generic netns"); rcu_assign_pointer(init_net.gen, ng); + net_gen_cookie(&init_net); down_write(&pernet_ops_rwsem); if (setup_net(&init_net, &init_user_ns)) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 14e6ea21c378..709ebbf8ab5b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1872,7 +1872,9 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { }; static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { + [IFLA_XDP_UNSPEC] = { .strict_start_type = IFLA_XDP_EXPECTED_FD }, [IFLA_XDP_FD] = { .type = NLA_S32 }, + [IFLA_XDP_EXPECTED_FD] = { .type = NLA_S32 }, [IFLA_XDP_ATTACHED] = { .type = NLA_U8 }, [IFLA_XDP_FLAGS] = { .type = NLA_U32 }, [IFLA_XDP_PROG_ID] = { .type = NLA_U32 }, @@ -2799,8 +2801,20 @@ static int do_setlink(const struct sk_buff *skb, } if (xdp[IFLA_XDP_FD]) { + int expected_fd = -1; + + if (xdp_flags & XDP_FLAGS_REPLACE) { + if (!xdp[IFLA_XDP_EXPECTED_FD]) { + err = -EINVAL; + goto errout; + } + expected_fd = + nla_get_s32(xdp[IFLA_XDP_EXPECTED_FD]); + } + err = dev_change_xdp_fd(dev, extack, nla_get_s32(xdp[IFLA_XDP_FD]), + expected_fd, xdp_flags); if (err) goto errout; diff --git a/net/core/sock.c b/net/core/sock.c index 0fc8937a7ff4..da32d9b6d09f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2071,6 +2071,18 @@ void sock_efree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_efree); +/* Buffer destructor for prefetch/receive path where reference count may + * not be held, e.g. for listen sockets. + */ +#ifdef CONFIG_INET +void sock_pfree(struct sk_buff *skb) +{ + if (sk_is_refcounted(skb->sk)) + sock_gen_put(skb->sk); +} +EXPORT_SYMBOL(sock_pfree); +#endif /* CONFIG_INET */ + kuid_t sock_i_uid(struct sock *sk) { kuid_t uid; diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 2bf3abeb1456..e3939f76b024 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -7,6 +7,7 @@ #include <linux/btf.h> #include <linux/filter.h> #include <net/tcp.h> +#include <net/bpf_sk_storage.h> static u32 optional_ops[] = { offsetof(struct tcp_congestion_ops, init), @@ -27,6 +28,27 @@ static u32 unsupported_ops[] = { static const struct btf_type *tcp_sock_type; static u32 tcp_sock_id, sock_id; +static int btf_sk_storage_get_ids[5]; +static struct bpf_func_proto btf_sk_storage_get_proto __read_mostly; + +static int btf_sk_storage_delete_ids[5]; +static struct bpf_func_proto btf_sk_storage_delete_proto __read_mostly; + +static void convert_sk_func_proto(struct bpf_func_proto *to, int *to_btf_ids, + const struct bpf_func_proto *from) +{ + int i; + + *to = *from; + to->btf_id = to_btf_ids; + for (i = 0; i < ARRAY_SIZE(to->arg_type); i++) { + if (to->arg_type[i] == ARG_PTR_TO_SOCKET) { + to->arg_type[i] = ARG_PTR_TO_BTF_ID; + to->btf_id[i] = tcp_sock_id; + } + } +} + static int bpf_tcp_ca_init(struct btf *btf) { s32 type_id; @@ -42,6 +64,13 @@ static int bpf_tcp_ca_init(struct btf *btf) tcp_sock_id = type_id; tcp_sock_type = btf_type_by_id(btf, tcp_sock_id); + convert_sk_func_proto(&btf_sk_storage_get_proto, + btf_sk_storage_get_ids, + &bpf_sk_storage_get_proto); + convert_sk_func_proto(&btf_sk_storage_delete_proto, + btf_sk_storage_delete_ids, + &bpf_sk_storage_delete_proto); + return 0; } @@ -167,6 +196,10 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, switch (func_id) { case BPF_FUNC_tcp_send_ack: return &bpf_tcp_send_ack_proto; + case BPF_FUNC_sk_storage_get: + return &btf_sk_storage_get_proto; + case BPF_FUNC_sk_storage_delete: + return &btf_sk_storage_delete_proto; default: return bpf_base_func_proto(func_id); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index aa438c6758a7..b0c244af1e4d 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -509,7 +509,8 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) IPCB(skb)->iif = skb->skb_iif; /* Must drop socket now because of tproxy. */ - skb_orphan(skb); + if (!skb_sk_is_prefetched(skb)) + skb_orphan(skb); return skb; diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index fe7b4fbc31c1..5a05327f97c1 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -10,38 +10,6 @@ #include <net/inet_common.h> #include <net/tls.h> -static bool tcp_bpf_stream_read(const struct sock *sk) -{ - struct sk_psock *psock; - bool empty = true; - - rcu_read_lock(); - psock = sk_psock(sk); - if (likely(psock)) - empty = list_empty(&psock->ingress_msg); - rcu_read_unlock(); - return !empty; -} - -static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, - int flags, long timeo, int *err) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - int ret = 0; - - if (!timeo) - return ret; - - add_wait_queue(sk_sleep(sk), &wait); - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - ret = sk_wait_event(sk, &timeo, - !list_empty(&psock->ingress_msg) || - !skb_queue_empty(&sk->sk_receive_queue), &wait); - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - remove_wait_queue(sk_sleep(sk), &wait); - return ret; -} - int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags) { @@ -115,49 +83,6 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, } EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg); -int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) -{ - struct sk_psock *psock; - int copied, ret; - - psock = sk_psock_get(sk); - if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); - if (!skb_queue_empty(&sk->sk_receive_queue) && - sk_psock_queue_empty(psock)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - lock_sock(sk); -msg_bytes_ready: - copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); - if (!copied) { - int data, err = 0; - long timeo; - - timeo = sock_rcvtimeo(sk, nonblock); - data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err); - if (data) { - if (!sk_psock_queue_empty(psock)) - goto msg_bytes_ready; - release_sock(sk); - sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - } - if (err) { - ret = err; - goto out; - } - copied = -EAGAIN; - } - ret = copied; -out: - release_sock(sk); - sk_psock_put(sk, psock); - return ret; -} - static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg, u32 apply_bytes, int flags) { @@ -298,6 +223,82 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, } EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); +#ifdef CONFIG_BPF_STREAM_PARSER +static bool tcp_bpf_stream_read(const struct sock *sk) +{ + struct sk_psock *psock; + bool empty = true; + + rcu_read_lock(); + psock = sk_psock(sk); + if (likely(psock)) + empty = list_empty(&psock->ingress_msg); + rcu_read_unlock(); + return !empty; +} + +static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, + int flags, long timeo, int *err) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + ret = sk_wait_event(sk, &timeo, + !list_empty(&psock->ingress_msg) || + !skb_queue_empty(&sk->sk_receive_queue), &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} + +static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int nonblock, int flags, int *addr_len) +{ + struct sk_psock *psock; + int copied, ret; + + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + if (!skb_queue_empty(&sk->sk_receive_queue) && + sk_psock_queue_empty(psock)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + lock_sock(sk); +msg_bytes_ready: + copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); + if (!copied) { + int data, err = 0; + long timeo; + + timeo = sock_rcvtimeo(sk, nonblock); + data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err); + if (data) { + if (!sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + release_sock(sk); + sk_psock_put(sk, psock); + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + } + if (err) { + ret = err; + goto out; + } + copied = -EAGAIN; + } + ret = copied; +out: + release_sock(sk); + sk_psock_put(sk, psock); + return ret; +} + static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg, int *copied, int flags) { @@ -528,7 +529,6 @@ out_err: return copied ? copied : err; } -#ifdef CONFIG_BPF_STREAM_PARSER enum { TCP_BPF_IPV4, TCP_BPF_IPV6, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7ea90eb4a1ba..32564b350823 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2288,6 +2288,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct rtable *rt = skb_rtable(skb); __be32 saddr, daddr; struct net *net = dev_net(skb->dev); + bool refcounted; /* * Validate the packet. @@ -2313,7 +2314,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - sk = skb_steal_sock(skb); + sk = skb_steal_sock(skb, &refcounted); if (sk) { struct dst_entry *dst = skb_dst(skb); int ret; @@ -2322,7 +2323,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, udp_sk_rx_dst_set(sk, dst); ret = udp_unicast_rcv_skb(sk, skb, uh); - sock_put(sk); + if (refcounted) + sock_put(sk); return ret; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 7b089d0ac8cd..e96304d8a4a7 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -285,7 +285,8 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, rcu_read_unlock(); /* Must drop socket now because of tproxy. */ - skb_orphan(skb); + if (!skb_sk_is_prefetched(skb)) + skb_orphan(skb); return skb; err: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 5dc439a391fe..7d4151747340 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -843,6 +843,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, struct net *net = dev_net(skb->dev); struct udphdr *uh; struct sock *sk; + bool refcounted; u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) @@ -879,7 +880,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; /* Check if the socket is already available, e.g. due to early demux */ - sk = skb_steal_sock(skb); + sk = skb_steal_sock(skb, &refcounted); if (sk) { struct dst_entry *dst = skb_dst(skb); int ret; @@ -888,12 +889,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, udp6_sk_rx_dst_set(sk, dst); if (!uh->check && !udp_sk(sk)->no_check6_rx) { - sock_put(sk); + if (refcounted) + sock_put(sk); goto report_csum_error; } ret = udp6_unicast_rcv_skb(sk, skb, uh); - sock_put(sk); + if (refcounted) + sock_put(sk); return ret; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 46f47e58b3be..54d5652cfe6c 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -12,6 +12,7 @@ #include <linux/bpf.h> #include <net/netlink.h> +#include <net/sock.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -53,6 +54,8 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(filter, skb); } + if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) + skb_orphan(skb); rcu_read_unlock(); /* A BPF program may overwrite the default action opcode. |