summaryrefslogtreecommitdiff
path: root/net/core/sock_map.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/sock_map.c')
-rw-r--r--net/core/sock_map.c582
1 files changed, 456 insertions, 126 deletions
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 4059f94e9bb5..ddc899e83313 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
#include <linux/bpf.h>
+#include <linux/btf_ids.h>
#include <linux/filter.h>
#include <linux/errno.h>
#include <linux/file.h>
@@ -70,11 +71,49 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
struct fd f;
int ret;
+ if (attr->attach_flags || attr->replace_bpf_fd)
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
+ fdput(f);
+ return ret;
+}
+
+int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+{
+ u32 ufd = attr->target_fd;
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct fd f;
+ int ret;
+
+ if (attr->attach_flags || attr->replace_bpf_fd)
+ return -EINVAL;
+
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
- ret = sock_map_prog_update(map, prog, attr->attach_type);
+
+ prog = bpf_prog_get(attr->attach_bpf_fd);
+ if (IS_ERR(prog)) {
+ ret = PTR_ERR(prog);
+ goto put_map;
+ }
+
+ if (prog->type != ptype) {
+ ret = -EINVAL;
+ goto put_prog;
+ }
+
+ ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
+put_prog:
+ bpf_prog_put(prog);
+put_map:
fdput(f);
return ret;
}
@@ -109,8 +148,8 @@ static void sock_map_add_link(struct sk_psock *psock,
static void sock_map_del_link(struct sock *sk,
struct sk_psock *psock, void *link_raw)
{
+ bool strp_stop = false, verdict_stop = false;
struct sk_psock_link *link, *tmp;
- bool strp_stop = false;
spin_lock_bh(&psock->link_lock);
list_for_each_entry_safe(link, tmp, &psock->link, list) {
@@ -120,14 +159,19 @@ static void sock_map_del_link(struct sock *sk,
map);
if (psock->parser.enabled && stab->progs.skb_parser)
strp_stop = true;
+ if (psock->parser.enabled && stab->progs.skb_verdict)
+ verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
}
}
spin_unlock_bh(&psock->link_lock);
- if (strp_stop) {
+ if (strp_stop || verdict_stop) {
write_lock_bh(&sk->sk_callback_lock);
- sk_psock_stop_strp(sk, psock);
+ if (strp_stop)
+ sk_psock_stop_strp(sk, psock);
+ else
+ sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
}
}
@@ -146,8 +190,6 @@ static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
{
struct proto *prot;
- sock_owned_by_me(sk);
-
switch (sk->sk_type) {
case SOCK_STREAM:
prot = tcp_bpf_get_proto(sk, psock);
@@ -193,20 +235,21 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
{
struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
struct sk_psock *psock;
- bool skb_progs;
int ret;
skb_verdict = READ_ONCE(progs->skb_verdict);
- skb_parser = READ_ONCE(progs->skb_parser);
- skb_progs = skb_parser && skb_verdict;
- if (skb_progs) {
+ if (skb_verdict) {
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
if (IS_ERR(skb_verdict))
return PTR_ERR(skb_verdict);
+ }
+
+ skb_parser = READ_ONCE(progs->skb_parser);
+ if (skb_parser) {
skb_parser = bpf_prog_inc_not_zero(skb_parser);
if (IS_ERR(skb_parser)) {
- bpf_prog_put(skb_verdict);
- return PTR_ERR(skb_parser);
+ ret = PTR_ERR(skb_parser);
+ goto out_put_skb_verdict;
}
}
@@ -215,7 +258,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
msg_parser = bpf_prog_inc_not_zero(msg_parser);
if (IS_ERR(msg_parser)) {
ret = PTR_ERR(msg_parser);
- goto out;
+ goto out_put_skb_parser;
}
}
@@ -227,15 +270,16 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
if (psock) {
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
- (skb_progs && READ_ONCE(psock->progs.skb_parser))) {
+ (skb_parser && READ_ONCE(psock->progs.skb_parser)) ||
+ (skb_verdict && READ_ONCE(psock->progs.skb_verdict))) {
sk_psock_put(sk, psock);
ret = -EBUSY;
goto out_progs;
}
} else {
psock = sk_psock_init(sk, map->numa_node);
- if (!psock) {
- ret = -ENOMEM;
+ if (IS_ERR(psock)) {
+ ret = PTR_ERR(psock);
goto out_progs;
}
}
@@ -248,28 +292,32 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
goto out_drop;
write_lock_bh(&sk->sk_callback_lock);
- if (skb_progs && !psock->parser.enabled) {
+ if (skb_parser && skb_verdict && !psock->parser.enabled) {
ret = sk_psock_init_strp(sk, psock);
- if (ret) {
- write_unlock_bh(&sk->sk_callback_lock);
- goto out_drop;
- }
+ if (ret)
+ goto out_unlock_drop;
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
psock_set_prog(&psock->progs.skb_parser, skb_parser);
sk_psock_start_strp(sk, psock);
+ } else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
+ psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+ sk_psock_start_verdict(sk,psock);
}
write_unlock_bh(&sk->sk_callback_lock);
return 0;
+out_unlock_drop:
+ write_unlock_bh(&sk->sk_callback_lock);
out_drop:
sk_psock_put(sk, psock);
out_progs:
if (msg_parser)
bpf_prog_put(msg_parser);
-out:
- if (skb_progs) {
- bpf_prog_put(skb_verdict);
+out_put_skb_parser:
+ if (skb_parser)
bpf_prog_put(skb_parser);
- }
+out_put_skb_verdict:
+ if (skb_verdict)
+ bpf_prog_put(skb_verdict);
return ret;
}
@@ -284,8 +332,8 @@ static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
if (!psock) {
psock = sk_psock_init(sk, map->numa_node);
- if (!psock)
- return -ENOMEM;
+ if (IS_ERR(psock))
+ return PTR_ERR(psock);
}
ret = sock_map_init_proto(sk, psock);
@@ -346,7 +394,7 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
struct sock *sk;
sk = __sock_map_lookup_elem(map, *(u32 *)key);
- if (!sk || !sk_fullsock(sk))
+ if (!sk)
return NULL;
if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
return NULL;
@@ -364,7 +412,7 @@ static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
if (!sk)
return ERR_PTR(-ENOENT);
- sock_gen_cookie(sk);
+ __sock_gen_cookie(sk);
return &sk->sk_cookie;
}
@@ -440,8 +488,6 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
link = sk_psock_init_link();
if (!link)
@@ -525,10 +571,12 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
return false;
}
-static int sock_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
+static int sock_hash_update_common(struct bpf_map *map, void *key,
+ struct sock *sk, u64 flags);
+
+int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
+ u64 flags)
{
- u32 idx = *(u32 *)key;
struct socket *sock;
struct sock *sk;
int ret;
@@ -557,14 +605,41 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
sock_map_sk_acquire(sk);
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
+ else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
+ ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
else
- ret = sock_map_update_common(map, idx, sk, flags);
+ ret = sock_hash_update_common(map, key, sk, flags);
sock_map_sk_release(sk);
out:
fput(sock->file);
return ret;
}
+static int sock_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
+{
+ struct sock *sk = (struct sock *)value;
+ int ret;
+
+ if (unlikely(!sk || !sk_fullsock(sk)))
+ return -EINVAL;
+
+ if (!sock_map_sk_is_suitable(sk))
+ return -EOPNOTSUPP;
+
+ local_bh_disable();
+ bh_lock_sock(sk);
+ if (!sock_map_sk_state_allowed(sk))
+ ret = -EOPNOTSUPP;
+ else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
+ ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
+ else
+ ret = sock_hash_update_common(map, key, sk, flags);
+ bh_unlock_sock(sk);
+ local_bh_enable();
+ return ret;
+}
+
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops,
struct bpf_map *, map, void *, key, u64, flags)
{
@@ -643,7 +718,116 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+struct sock_map_seq_info {
+ struct bpf_map *map;
+ struct sock *sk;
+ u32 index;
+};
+
+struct bpf_iter__sockmap {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_map *, map);
+ __bpf_md_ptr(void *, key);
+ __bpf_md_ptr(struct sock *, sk);
+};
+
+DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta,
+ struct bpf_map *map, void *key,
+ struct sock *sk)
+
+static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info)
+{
+ if (unlikely(info->index >= info->map->max_entries))
+ return NULL;
+
+ info->sk = __sock_map_lookup_elem(info->map, info->index);
+
+ /* can't return sk directly, since that might be NULL */
+ return info;
+}
+
+static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(rcu)
+{
+ struct sock_map_seq_info *info = seq->private;
+
+ if (*pos == 0)
+ ++*pos;
+
+ /* pairs with sock_map_seq_stop */
+ rcu_read_lock();
+ return sock_map_seq_lookup_elem(info);
+}
+
+static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ __must_hold(rcu)
+{
+ struct sock_map_seq_info *info = seq->private;
+
+ ++*pos;
+ ++info->index;
+
+ return sock_map_seq_lookup_elem(info);
+}
+
+static int sock_map_seq_show(struct seq_file *seq, void *v)
+ __must_hold(rcu)
+{
+ struct sock_map_seq_info *info = seq->private;
+ struct bpf_iter__sockmap ctx = {};
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, !v);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (v) {
+ ctx.key = &info->index;
+ ctx.sk = info->sk;
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static void sock_map_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ if (!v)
+ (void)sock_map_seq_show(seq, NULL);
+
+ /* pairs with sock_map_seq_start */
+ rcu_read_unlock();
+}
+
+static const struct seq_operations sock_map_seq_ops = {
+ .start = sock_map_seq_start,
+ .next = sock_map_seq_next,
+ .stop = sock_map_seq_stop,
+ .show = sock_map_seq_show,
+};
+
+static int sock_map_init_seq_private(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct sock_map_seq_info *info = priv_data;
+
+ info->map = aux->map;
+ return 0;
+}
+
+static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
+ .seq_ops = &sock_map_seq_ops,
+ .init_seq_private = sock_map_init_seq_private,
+ .seq_priv_size = sizeof(struct sock_map_seq_info),
+};
+
+static int sock_map_btf_id;
const struct bpf_map_ops sock_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_map_alloc,
.map_free = sock_map_free,
.map_get_next_key = sock_map_get_next_key,
@@ -653,9 +837,12 @@ const struct bpf_map_ops sock_map_ops = {
.map_lookup_elem = sock_map_lookup,
.map_release_uref = sock_map_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "bpf_stab",
+ .map_btf_id = &sock_map_btf_id,
+ .iter_seq_info = &sock_map_iter_seq_info,
};
-struct bpf_htab_elem {
+struct bpf_shtab_elem {
struct rcu_head rcu;
u32 hash;
struct sock *sk;
@@ -663,14 +850,14 @@ struct bpf_htab_elem {
u8 key[];
};
-struct bpf_htab_bucket {
+struct bpf_shtab_bucket {
struct hlist_head head;
raw_spinlock_t lock;
};
-struct bpf_htab {
+struct bpf_shtab {
struct bpf_map map;
- struct bpf_htab_bucket *buckets;
+ struct bpf_shtab_bucket *buckets;
u32 buckets_num;
u32 elem_size;
struct sk_psock_progs progs;
@@ -682,17 +869,17 @@ static inline u32 sock_hash_bucket_hash(const void *key, u32 len)
return jhash(key, len, 0);
}
-static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab,
- u32 hash)
+static struct bpf_shtab_bucket *sock_hash_select_bucket(struct bpf_shtab *htab,
+ u32 hash)
{
return &htab->buckets[hash & (htab->buckets_num - 1)];
}
-static struct bpf_htab_elem *
+static struct bpf_shtab_elem *
sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
u32 key_size)
{
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_elem *elem;
hlist_for_each_entry_rcu(elem, head, node) {
if (elem->hash == hash &&
@@ -705,10 +892,10 @@ sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 key_size = map->key_size, hash;
- struct bpf_htab_bucket *bucket;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -719,8 +906,8 @@ static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
return elem ? elem->sk : NULL;
}
-static void sock_hash_free_elem(struct bpf_htab *htab,
- struct bpf_htab_elem *elem)
+static void sock_hash_free_elem(struct bpf_shtab *htab,
+ struct bpf_shtab_elem *elem)
{
atomic_dec(&htab->count);
kfree_rcu(elem, rcu);
@@ -729,9 +916,9 @@ static void sock_hash_free_elem(struct bpf_htab *htab,
static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
void *link_raw)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_elem *elem_probe, *elem = link_raw;
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_elem *elem_probe, *elem = link_raw;
+ struct bpf_shtab_bucket *bucket;
WARN_ON_ONCE(!rcu_read_lock_held());
bucket = sock_hash_select_bucket(htab, elem->hash);
@@ -753,10 +940,10 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
static int sock_hash_delete_elem(struct bpf_map *map, void *key)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 hash, key_size = map->key_size;
- struct bpf_htab_bucket *bucket;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
int ret = -ENOENT;
hash = sock_hash_bucket_hash(key, key_size);
@@ -774,12 +961,12 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
return ret;
}
-static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
- void *key, u32 key_size,
- u32 hash, struct sock *sk,
- struct bpf_htab_elem *old)
+static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab,
+ void *key, u32 key_size,
+ u32 hash, struct sock *sk,
+ struct bpf_shtab_elem *old)
{
- struct bpf_htab_elem *new;
+ struct bpf_shtab_elem *new;
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
if (!old) {
@@ -803,10 +990,10 @@ static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
static int sock_hash_update_common(struct bpf_map *map, void *key,
struct sock *sk, u64 flags)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 key_size = map->key_size, hash;
- struct bpf_htab_elem *elem, *elem_new;
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab_elem *elem, *elem_new;
+ struct bpf_shtab_bucket *bucket;
struct sk_psock_link *link;
struct sk_psock *psock;
int ret;
@@ -814,8 +1001,6 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
WARN_ON_ONCE(!rcu_read_lock_held());
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
link = sk_psock_init_link();
if (!link)
@@ -874,50 +1059,11 @@ out_free:
return ret;
}
-static int sock_hash_update_elem(struct bpf_map *map, void *key,
- void *value, u64 flags)
-{
- struct socket *sock;
- struct sock *sk;
- int ret;
- u64 ufd;
-
- if (map->value_size == sizeof(u64))
- ufd = *(u64 *)value;
- else
- ufd = *(u32 *)value;
- if (ufd > S32_MAX)
- return -EINVAL;
-
- sock = sockfd_lookup(ufd, &ret);
- if (!sock)
- return ret;
- sk = sock->sk;
- if (!sk) {
- ret = -EINVAL;
- goto out;
- }
- if (!sock_map_sk_is_suitable(sk)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
-
- sock_map_sk_acquire(sk);
- if (!sock_map_sk_state_allowed(sk))
- ret = -EOPNOTSUPP;
- else
- ret = sock_hash_update_common(map, key, sk, flags);
- sock_map_sk_release(sk);
-out:
- fput(sock->file);
- return ret;
-}
-
static int sock_hash_get_next_key(struct bpf_map *map, void *key,
void *key_next)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_elem *elem, *elem_next;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_elem *elem, *elem_next;
u32 hash, key_size = map->key_size;
struct hlist_head *head;
int i = 0;
@@ -930,8 +1076,8 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
if (!elem)
goto find_first_elem;
- elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
- struct bpf_htab_elem, node);
+ elem_next = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&elem->node)),
+ struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
return 0;
@@ -942,8 +1088,8 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
find_first_elem:
for (; i < htab->buckets_num; i++) {
head = &sock_hash_select_bucket(htab, i)->head;
- elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
- struct bpf_htab_elem, node);
+ elem_next = hlist_entry_safe(rcu_dereference(hlist_first_rcu(head)),
+ struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
return 0;
@@ -955,7 +1101,7 @@ find_first_elem:
static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
{
- struct bpf_htab *htab;
+ struct bpf_shtab *htab;
int i, err;
u64 cost;
@@ -977,15 +1123,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
bpf_map_init_from_attr(&htab->map, attr);
htab->buckets_num = roundup_pow_of_two(htab->map.max_entries);
- htab->elem_size = sizeof(struct bpf_htab_elem) +
+ htab->elem_size = sizeof(struct bpf_shtab_elem) +
round_up(htab->map.key_size, 8);
if (htab->buckets_num == 0 ||
- htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) {
+ htab->buckets_num > U32_MAX / sizeof(struct bpf_shtab_bucket)) {
err = -EINVAL;
goto free_htab;
}
- cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) +
+ cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) +
(u64) htab->elem_size * htab->map.max_entries;
if (cost >= U32_MAX - PAGE_SIZE) {
err = -EINVAL;
@@ -996,7 +1142,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
goto free_htab;
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
- sizeof(struct bpf_htab_bucket),
+ sizeof(struct bpf_shtab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
bpf_map_charge_finish(&htab->map.memory);
@@ -1017,10 +1163,10 @@ free_htab:
static void sock_hash_free(struct bpf_map *map)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_bucket *bucket;
struct hlist_head unlink_list;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_elem *elem;
struct hlist_node *node;
int i;
@@ -1078,7 +1224,7 @@ static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
if (!sk)
return ERR_PTR(-ENOENT);
- sock_gen_cookie(sk);
+ __sock_gen_cookie(sk);
return &sk->sk_cookie;
}
@@ -1087,7 +1233,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key)
struct sock *sk;
sk = __sock_hash_lookup_elem(map, key);
- if (!sk || !sk_fullsock(sk))
+ if (!sk)
return NULL;
if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
return NULL;
@@ -1096,7 +1242,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key)
static void sock_hash_release_progs(struct bpf_map *map)
{
- psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs);
+ psock_progs_drop(&container_of(map, struct bpf_shtab, map)->progs);
}
BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops,
@@ -1176,16 +1322,136 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
.arg4_type = ARG_ANYTHING,
};
+struct sock_hash_seq_info {
+ struct bpf_map *map;
+ struct bpf_shtab *htab;
+ u32 bucket_id;
+};
+
+static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info,
+ struct bpf_shtab_elem *prev_elem)
+{
+ const struct bpf_shtab *htab = info->htab;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
+ struct hlist_node *node;
+
+ /* try to find next elem in the same bucket */
+ if (prev_elem) {
+ node = rcu_dereference(hlist_next_rcu(&prev_elem->node));
+ elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
+ if (elem)
+ return elem;
+
+ /* no more elements, continue in the next bucket */
+ info->bucket_id++;
+ }
+
+ for (; info->bucket_id < htab->buckets_num; info->bucket_id++) {
+ bucket = &htab->buckets[info->bucket_id];
+ node = rcu_dereference(hlist_first_rcu(&bucket->head));
+ elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
+ if (elem)
+ return elem;
+ }
+
+ return NULL;
+}
+
+static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(rcu)
+{
+ struct sock_hash_seq_info *info = seq->private;
+
+ if (*pos == 0)
+ ++*pos;
+
+ /* pairs with sock_hash_seq_stop */
+ rcu_read_lock();
+ return sock_hash_seq_find_next(info, NULL);
+}
+
+static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ __must_hold(rcu)
+{
+ struct sock_hash_seq_info *info = seq->private;
+
+ ++*pos;
+ return sock_hash_seq_find_next(info, v);
+}
+
+static int sock_hash_seq_show(struct seq_file *seq, void *v)
+ __must_hold(rcu)
+{
+ struct sock_hash_seq_info *info = seq->private;
+ struct bpf_iter__sockmap ctx = {};
+ struct bpf_shtab_elem *elem = v;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, !elem);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (elem) {
+ ctx.key = elem->key;
+ ctx.sk = elem->sk;
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static void sock_hash_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ if (!v)
+ (void)sock_hash_seq_show(seq, NULL);
+
+ /* pairs with sock_hash_seq_start */
+ rcu_read_unlock();
+}
+
+static const struct seq_operations sock_hash_seq_ops = {
+ .start = sock_hash_seq_start,
+ .next = sock_hash_seq_next,
+ .stop = sock_hash_seq_stop,
+ .show = sock_hash_seq_show,
+};
+
+static int sock_hash_init_seq_private(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct sock_hash_seq_info *info = priv_data;
+
+ info->map = aux->map;
+ info->htab = container_of(aux->map, struct bpf_shtab, map);
+ return 0;
+}
+
+static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
+ .seq_ops = &sock_hash_seq_ops,
+ .init_seq_private = sock_hash_init_seq_private,
+ .seq_priv_size = sizeof(struct sock_hash_seq_info),
+};
+
+static int sock_hash_map_btf_id;
const struct bpf_map_ops sock_hash_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
.map_alloc = sock_hash_alloc,
.map_free = sock_hash_free,
.map_get_next_key = sock_hash_get_next_key,
- .map_update_elem = sock_hash_update_elem,
+ .map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_hash_delete_elem,
.map_lookup_elem = sock_hash_lookup,
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "bpf_shtab",
+ .map_btf_id = &sock_hash_map_btf_id,
+ .iter_seq_info = &sock_hash_iter_seq_info,
};
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
@@ -1194,7 +1460,7 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
case BPF_MAP_TYPE_SOCKMAP:
return &container_of(map, struct bpf_stab, map)->progs;
case BPF_MAP_TYPE_SOCKHASH:
- return &container_of(map, struct bpf_htab, map)->progs;
+ return &container_of(map, struct bpf_shtab, map)->progs;
default:
break;
}
@@ -1203,27 +1469,32 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
}
int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- u32 which)
+ struct bpf_prog *old, u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
+ struct bpf_prog **pprog;
if (!progs)
return -EOPNOTSUPP;
switch (which) {
case BPF_SK_MSG_VERDICT:
- psock_set_prog(&progs->msg_parser, prog);
+ pprog = &progs->msg_parser;
break;
case BPF_SK_SKB_STREAM_PARSER:
- psock_set_prog(&progs->skb_parser, prog);
+ pprog = &progs->skb_parser;
break;
case BPF_SK_SKB_STREAM_VERDICT:
- psock_set_prog(&progs->skb_verdict, prog);
+ pprog = &progs->skb_verdict;
break;
default:
return -EOPNOTSUPP;
}
+ if (old)
+ return psock_replace_prog(pprog, prog, old);
+
+ psock_set_prog(pprog, prog);
return 0;
}
@@ -1291,3 +1562,62 @@ void sock_map_close(struct sock *sk, long timeout)
release_sock(sk);
saved_close(sk, timeout);
}
+
+static int sock_map_iter_attach_target(struct bpf_prog *prog,
+ union bpf_iter_link_info *linfo,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_map *map;
+ int err = -EINVAL;
+
+ if (!linfo->map.map_fd)
+ return -EBADF;
+
+ map = bpf_map_get_with_uref(linfo->map.map_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ if (map->map_type != BPF_MAP_TYPE_SOCKMAP &&
+ map->map_type != BPF_MAP_TYPE_SOCKHASH)
+ goto put_map;
+
+ if (prog->aux->max_rdonly_access > map->key_size) {
+ err = -EACCES;
+ goto put_map;
+ }
+
+ aux->map = map;
+ return 0;
+
+put_map:
+ bpf_map_put_with_uref(map);
+ return err;
+}
+
+static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux)
+{
+ bpf_map_put_with_uref(aux->map);
+}
+
+static struct bpf_iter_reg sock_map_iter_reg = {
+ .target = "sockmap",
+ .attach_target = sock_map_iter_attach_target,
+ .detach_target = sock_map_iter_detach_target,
+ .show_fdinfo = bpf_iter_map_show_fdinfo,
+ .fill_link_info = bpf_iter_map_fill_link_info,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__sockmap, key),
+ PTR_TO_RDONLY_BUF_OR_NULL },
+ { offsetof(struct bpf_iter__sockmap, sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+};
+
+static int __init bpf_sockmap_iter_init(void)
+{
+ sock_map_iter_reg.ctx_arg_info[1].btf_id =
+ btf_sock_ids[BTF_SOCK_TYPE_SOCK];
+ return bpf_iter_reg_target(&sock_map_iter_reg);
+}
+late_initcall(bpf_sockmap_iter_init);