diff options
Diffstat (limited to 'net/core/sock_map.c')
-rw-r--r-- | net/core/sock_map.c | 441 |
1 files changed, 361 insertions, 80 deletions
diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 119f52a99dc1..ddc899e83313 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -2,6 +2,7 @@ /* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */ #include <linux/bpf.h> +#include <linux/btf_ids.h> #include <linux/filter.h> #include <linux/errno.h> #include <linux/file.h> @@ -147,8 +148,8 @@ static void sock_map_add_link(struct sk_psock *psock, static void sock_map_del_link(struct sock *sk, struct sk_psock *psock, void *link_raw) { + bool strp_stop = false, verdict_stop = false; struct sk_psock_link *link, *tmp; - bool strp_stop = false; spin_lock_bh(&psock->link_lock); list_for_each_entry_safe(link, tmp, &psock->link, list) { @@ -158,14 +159,19 @@ static void sock_map_del_link(struct sock *sk, map); if (psock->parser.enabled && stab->progs.skb_parser) strp_stop = true; + if (psock->parser.enabled && stab->progs.skb_verdict) + verdict_stop = true; list_del(&link->list); sk_psock_free_link(link); } } spin_unlock_bh(&psock->link_lock); - if (strp_stop) { + if (strp_stop || verdict_stop) { write_lock_bh(&sk->sk_callback_lock); - sk_psock_stop_strp(sk, psock); + if (strp_stop) + sk_psock_stop_strp(sk, psock); + else + sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); } } @@ -184,8 +190,6 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock) { struct proto *prot; - sock_owned_by_me(sk); - switch (sk->sk_type) { case SOCK_STREAM: prot = tcp_bpf_get_proto(sk, psock); @@ -231,20 +235,21 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, { struct bpf_prog *msg_parser, *skb_parser, *skb_verdict; struct sk_psock *psock; - bool skb_progs; int ret; skb_verdict = READ_ONCE(progs->skb_verdict); - skb_parser = READ_ONCE(progs->skb_parser); - skb_progs = skb_parser && skb_verdict; - if (skb_progs) { + if (skb_verdict) { skb_verdict = bpf_prog_inc_not_zero(skb_verdict); if (IS_ERR(skb_verdict)) return PTR_ERR(skb_verdict); + } + + skb_parser = READ_ONCE(progs->skb_parser); + if (skb_parser) { skb_parser = bpf_prog_inc_not_zero(skb_parser); if (IS_ERR(skb_parser)) { - bpf_prog_put(skb_verdict); - return PTR_ERR(skb_parser); + ret = PTR_ERR(skb_parser); + goto out_put_skb_verdict; } } @@ -253,7 +258,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, msg_parser = bpf_prog_inc_not_zero(msg_parser); if (IS_ERR(msg_parser)) { ret = PTR_ERR(msg_parser); - goto out; + goto out_put_skb_parser; } } @@ -265,15 +270,16 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, if (psock) { if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) || - (skb_progs && READ_ONCE(psock->progs.skb_parser))) { + (skb_parser && READ_ONCE(psock->progs.skb_parser)) || + (skb_verdict && READ_ONCE(psock->progs.skb_verdict))) { sk_psock_put(sk, psock); ret = -EBUSY; goto out_progs; } } else { psock = sk_psock_init(sk, map->numa_node); - if (!psock) { - ret = -ENOMEM; + if (IS_ERR(psock)) { + ret = PTR_ERR(psock); goto out_progs; } } @@ -286,28 +292,32 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, goto out_drop; write_lock_bh(&sk->sk_callback_lock); - if (skb_progs && !psock->parser.enabled) { + if (skb_parser && skb_verdict && !psock->parser.enabled) { ret = sk_psock_init_strp(sk, psock); - if (ret) { - write_unlock_bh(&sk->sk_callback_lock); - goto out_drop; - } + if (ret) + goto out_unlock_drop; psock_set_prog(&psock->progs.skb_verdict, skb_verdict); psock_set_prog(&psock->progs.skb_parser, skb_parser); sk_psock_start_strp(sk, psock); + } else if (!skb_parser && skb_verdict && !psock->parser.enabled) { + psock_set_prog(&psock->progs.skb_verdict, skb_verdict); + sk_psock_start_verdict(sk,psock); } write_unlock_bh(&sk->sk_callback_lock); return 0; +out_unlock_drop: + write_unlock_bh(&sk->sk_callback_lock); out_drop: sk_psock_put(sk, psock); out_progs: if (msg_parser) bpf_prog_put(msg_parser); -out: - if (skb_progs) { - bpf_prog_put(skb_verdict); +out_put_skb_parser: + if (skb_parser) bpf_prog_put(skb_parser); - } +out_put_skb_verdict: + if (skb_verdict) + bpf_prog_put(skb_verdict); return ret; } @@ -322,8 +332,8 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk) if (!psock) { psock = sk_psock_init(sk, map->numa_node); - if (!psock) - return -ENOMEM; + if (IS_ERR(psock)) + return PTR_ERR(psock); } ret = sock_map_init_proto(sk, psock); @@ -384,7 +394,7 @@ static void *sock_map_lookup(struct bpf_map *map, void *key) struct sock *sk; sk = __sock_map_lookup_elem(map, *(u32 *)key); - if (!sk || !sk_fullsock(sk)) + if (!sk) return NULL; if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt)) return NULL; @@ -402,7 +412,7 @@ static void *sock_map_lookup_sys(struct bpf_map *map, void *key) if (!sk) return ERR_PTR(-ENOENT); - sock_gen_cookie(sk); + __sock_gen_cookie(sk); return &sk->sk_cookie; } @@ -478,8 +488,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, return -EINVAL; if (unlikely(idx >= map->max_entries)) return -E2BIG; - if (inet_csk_has_ulp(sk)) - return -EINVAL; link = sk_psock_init_link(); if (!link) @@ -563,10 +571,12 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) return false; } -static int sock_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static int sock_hash_update_common(struct bpf_map *map, void *key, + struct sock *sk, u64 flags); + +int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, + u64 flags) { - u32 idx = *(u32 *)key; struct socket *sock; struct sock *sk; int ret; @@ -595,14 +605,41 @@ static int sock_map_update_elem(struct bpf_map *map, void *key, sock_map_sk_acquire(sk); if (!sock_map_sk_state_allowed(sk)) ret = -EOPNOTSUPP; + else if (map->map_type == BPF_MAP_TYPE_SOCKMAP) + ret = sock_map_update_common(map, *(u32 *)key, sk, flags); else - ret = sock_map_update_common(map, idx, sk, flags); + ret = sock_hash_update_common(map, key, sk, flags); sock_map_sk_release(sk); out: fput(sock->file); return ret; } +static int sock_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + struct sock *sk = (struct sock *)value; + int ret; + + if (unlikely(!sk || !sk_fullsock(sk))) + return -EINVAL; + + if (!sock_map_sk_is_suitable(sk)) + return -EOPNOTSUPP; + + local_bh_disable(); + bh_lock_sock(sk); + if (!sock_map_sk_state_allowed(sk)) + ret = -EOPNOTSUPP; + else if (map->map_type == BPF_MAP_TYPE_SOCKMAP) + ret = sock_map_update_common(map, *(u32 *)key, sk, flags); + else + ret = sock_hash_update_common(map, key, sk, flags); + bh_unlock_sock(sk); + local_bh_enable(); + return ret; +} + BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops, struct bpf_map *, map, void *, key, u64, flags) { @@ -681,8 +718,116 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = { .arg4_type = ARG_ANYTHING, }; +struct sock_map_seq_info { + struct bpf_map *map; + struct sock *sk; + u32 index; +}; + +struct bpf_iter__sockmap { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_map *, map); + __bpf_md_ptr(void *, key); + __bpf_md_ptr(struct sock *, sk); +}; + +DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta, + struct bpf_map *map, void *key, + struct sock *sk) + +static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info) +{ + if (unlikely(info->index >= info->map->max_entries)) + return NULL; + + info->sk = __sock_map_lookup_elem(info->map, info->index); + + /* can't return sk directly, since that might be NULL */ + return info; +} + +static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(rcu) +{ + struct sock_map_seq_info *info = seq->private; + + if (*pos == 0) + ++*pos; + + /* pairs with sock_map_seq_stop */ + rcu_read_lock(); + return sock_map_seq_lookup_elem(info); +} + +static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) + __must_hold(rcu) +{ + struct sock_map_seq_info *info = seq->private; + + ++*pos; + ++info->index; + + return sock_map_seq_lookup_elem(info); +} + +static int sock_map_seq_show(struct seq_file *seq, void *v) + __must_hold(rcu) +{ + struct sock_map_seq_info *info = seq->private; + struct bpf_iter__sockmap ctx = {}; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, !v); + if (!prog) + return 0; + + ctx.meta = &meta; + ctx.map = info->map; + if (v) { + ctx.key = &info->index; + ctx.sk = info->sk; + } + + return bpf_iter_run_prog(prog, &ctx); +} + +static void sock_map_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) +{ + if (!v) + (void)sock_map_seq_show(seq, NULL); + + /* pairs with sock_map_seq_start */ + rcu_read_unlock(); +} + +static const struct seq_operations sock_map_seq_ops = { + .start = sock_map_seq_start, + .next = sock_map_seq_next, + .stop = sock_map_seq_stop, + .show = sock_map_seq_show, +}; + +static int sock_map_init_seq_private(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct sock_map_seq_info *info = priv_data; + + info->map = aux->map; + return 0; +} + +static const struct bpf_iter_seq_info sock_map_iter_seq_info = { + .seq_ops = &sock_map_seq_ops, + .init_seq_private = sock_map_init_seq_private, + .seq_priv_size = sizeof(struct sock_map_seq_info), +}; + static int sock_map_btf_id; const struct bpf_map_ops sock_map_ops = { + .map_meta_equal = bpf_map_meta_equal, .map_alloc = sock_map_alloc, .map_free = sock_map_free, .map_get_next_key = sock_map_get_next_key, @@ -694,6 +839,7 @@ const struct bpf_map_ops sock_map_ops = { .map_check_btf = map_check_no_btf, .map_btf_name = "bpf_stab", .map_btf_id = &sock_map_btf_id, + .iter_seq_info = &sock_map_iter_seq_info, }; struct bpf_shtab_elem { @@ -855,8 +1001,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, WARN_ON_ONCE(!rcu_read_lock_held()); if (unlikely(flags > BPF_EXIST)) return -EINVAL; - if (inet_csk_has_ulp(sk)) - return -EINVAL; link = sk_psock_init_link(); if (!link) @@ -915,45 +1059,6 @@ out_free: return ret; } -static int sock_hash_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) -{ - struct socket *sock; - struct sock *sk; - int ret; - u64 ufd; - - if (map->value_size == sizeof(u64)) - ufd = *(u64 *)value; - else - ufd = *(u32 *)value; - if (ufd > S32_MAX) - return -EINVAL; - - sock = sockfd_lookup(ufd, &ret); - if (!sock) - return ret; - sk = sock->sk; - if (!sk) { - ret = -EINVAL; - goto out; - } - if (!sock_map_sk_is_suitable(sk)) { - ret = -EOPNOTSUPP; - goto out; - } - - sock_map_sk_acquire(sk); - if (!sock_map_sk_state_allowed(sk)) - ret = -EOPNOTSUPP; - else - ret = sock_hash_update_common(map, key, sk, flags); - sock_map_sk_release(sk); -out: - fput(sock->file); - return ret; -} - static int sock_hash_get_next_key(struct bpf_map *map, void *key, void *key_next) { @@ -971,7 +1076,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key, if (!elem) goto find_first_elem; - elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)), + elem_next = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&elem->node)), struct bpf_shtab_elem, node); if (elem_next) { memcpy(key_next, elem_next->key, key_size); @@ -983,7 +1088,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key, find_first_elem: for (; i < htab->buckets_num; i++) { head = &sock_hash_select_bucket(htab, i)->head; - elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), + elem_next = hlist_entry_safe(rcu_dereference(hlist_first_rcu(head)), struct bpf_shtab_elem, node); if (elem_next) { memcpy(key_next, elem_next->key, key_size); @@ -1119,7 +1224,7 @@ static void *sock_hash_lookup_sys(struct bpf_map *map, void *key) if (!sk) return ERR_PTR(-ENOENT); - sock_gen_cookie(sk); + __sock_gen_cookie(sk); return &sk->sk_cookie; } @@ -1128,7 +1233,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key) struct sock *sk; sk = __sock_hash_lookup_elem(map, key); - if (!sk || !sk_fullsock(sk)) + if (!sk) return NULL; if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt)) return NULL; @@ -1217,12 +1322,128 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = { .arg4_type = ARG_ANYTHING, }; +struct sock_hash_seq_info { + struct bpf_map *map; + struct bpf_shtab *htab; + u32 bucket_id; +}; + +static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info, + struct bpf_shtab_elem *prev_elem) +{ + const struct bpf_shtab *htab = info->htab; + struct bpf_shtab_bucket *bucket; + struct bpf_shtab_elem *elem; + struct hlist_node *node; + + /* try to find next elem in the same bucket */ + if (prev_elem) { + node = rcu_dereference(hlist_next_rcu(&prev_elem->node)); + elem = hlist_entry_safe(node, struct bpf_shtab_elem, node); + if (elem) + return elem; + + /* no more elements, continue in the next bucket */ + info->bucket_id++; + } + + for (; info->bucket_id < htab->buckets_num; info->bucket_id++) { + bucket = &htab->buckets[info->bucket_id]; + node = rcu_dereference(hlist_first_rcu(&bucket->head)); + elem = hlist_entry_safe(node, struct bpf_shtab_elem, node); + if (elem) + return elem; + } + + return NULL; +} + +static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(rcu) +{ + struct sock_hash_seq_info *info = seq->private; + + if (*pos == 0) + ++*pos; + + /* pairs with sock_hash_seq_stop */ + rcu_read_lock(); + return sock_hash_seq_find_next(info, NULL); +} + +static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos) + __must_hold(rcu) +{ + struct sock_hash_seq_info *info = seq->private; + + ++*pos; + return sock_hash_seq_find_next(info, v); +} + +static int sock_hash_seq_show(struct seq_file *seq, void *v) + __must_hold(rcu) +{ + struct sock_hash_seq_info *info = seq->private; + struct bpf_iter__sockmap ctx = {}; + struct bpf_shtab_elem *elem = v; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, !elem); + if (!prog) + return 0; + + ctx.meta = &meta; + ctx.map = info->map; + if (elem) { + ctx.key = elem->key; + ctx.sk = elem->sk; + } + + return bpf_iter_run_prog(prog, &ctx); +} + +static void sock_hash_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) +{ + if (!v) + (void)sock_hash_seq_show(seq, NULL); + + /* pairs with sock_hash_seq_start */ + rcu_read_unlock(); +} + +static const struct seq_operations sock_hash_seq_ops = { + .start = sock_hash_seq_start, + .next = sock_hash_seq_next, + .stop = sock_hash_seq_stop, + .show = sock_hash_seq_show, +}; + +static int sock_hash_init_seq_private(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct sock_hash_seq_info *info = priv_data; + + info->map = aux->map; + info->htab = container_of(aux->map, struct bpf_shtab, map); + return 0; +} + +static const struct bpf_iter_seq_info sock_hash_iter_seq_info = { + .seq_ops = &sock_hash_seq_ops, + .init_seq_private = sock_hash_init_seq_private, + .seq_priv_size = sizeof(struct sock_hash_seq_info), +}; + static int sock_hash_map_btf_id; const struct bpf_map_ops sock_hash_ops = { + .map_meta_equal = bpf_map_meta_equal, .map_alloc = sock_hash_alloc, .map_free = sock_hash_free, .map_get_next_key = sock_hash_get_next_key, - .map_update_elem = sock_hash_update_elem, + .map_update_elem = sock_map_update_elem, .map_delete_elem = sock_hash_delete_elem, .map_lookup_elem = sock_hash_lookup, .map_lookup_elem_sys_only = sock_hash_lookup_sys, @@ -1230,6 +1451,7 @@ const struct bpf_map_ops sock_hash_ops = { .map_check_btf = map_check_no_btf, .map_btf_name = "bpf_shtab", .map_btf_id = &sock_hash_map_btf_id, + .iter_seq_info = &sock_hash_iter_seq_info, }; static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) @@ -1340,3 +1562,62 @@ void sock_map_close(struct sock *sk, long timeout) release_sock(sk); saved_close(sk, timeout); } + +static int sock_map_iter_attach_target(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux) +{ + struct bpf_map *map; + int err = -EINVAL; + + if (!linfo->map.map_fd) + return -EBADF; + + map = bpf_map_get_with_uref(linfo->map.map_fd); + if (IS_ERR(map)) + return PTR_ERR(map); + + if (map->map_type != BPF_MAP_TYPE_SOCKMAP && + map->map_type != BPF_MAP_TYPE_SOCKHASH) + goto put_map; + + if (prog->aux->max_rdonly_access > map->key_size) { + err = -EACCES; + goto put_map; + } + + aux->map = map; + return 0; + +put_map: + bpf_map_put_with_uref(map); + return err; +} + +static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux) +{ + bpf_map_put_with_uref(aux->map); +} + +static struct bpf_iter_reg sock_map_iter_reg = { + .target = "sockmap", + .attach_target = sock_map_iter_attach_target, + .detach_target = sock_map_iter_detach_target, + .show_fdinfo = bpf_iter_map_show_fdinfo, + .fill_link_info = bpf_iter_map_fill_link_info, + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__sockmap, key), + PTR_TO_RDONLY_BUF_OR_NULL }, + { offsetof(struct bpf_iter__sockmap, sk), + PTR_TO_BTF_ID_OR_NULL }, + }, +}; + +static int __init bpf_sockmap_iter_init(void) +{ + sock_map_iter_reg.ctx_arg_info[1].btf_id = + btf_sock_ids[BTF_SOCK_TYPE_SOCK]; + return bpf_iter_reg_target(&sock_map_iter_reg); +} +late_initcall(bpf_sockmap_iter_init); |