diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/Kconfig | 7 | ||||
-rw-r--r-- | kernel/bpf/Makefile | 2 | ||||
-rw-r--r-- | kernel/bpf/bloom_filter.c | 204 | ||||
-rw-r--r-- | kernel/bpf/bpf_struct_ops.c | 32 | ||||
-rw-r--r-- | kernel/bpf/bpf_struct_ops_types.h | 3 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 103 | ||||
-rw-r--r-- | kernel/bpf/core.c | 4 | ||||
-rw-r--r-- | kernel/bpf/preload/.gitignore | 4 | ||||
-rw-r--r-- | kernel/bpf/preload/Makefile | 26 | ||||
-rw-r--r-- | kernel/bpf/preload/iterators/Makefile | 38 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 77 | ||||
-rw-r--r-- | kernel/bpf/trampoline.c | 12 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 250 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 18 |
14 files changed, 660 insertions, 120 deletions
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index a82d6de86522..d24d518ddd63 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -64,6 +64,7 @@ config BPF_JIT_DEFAULT_ON config BPF_UNPRIV_DEFAULT_OFF bool "Disable unprivileged BPF by default" + default y depends on BPF_SYSCALL help Disables unprivileged BPF by default by setting the corresponding @@ -72,6 +73,12 @@ config BPF_UNPRIV_DEFAULT_OFF disable it by setting it to 1 (from which no other transition to 0 is possible anymore). + Unprivileged BPF could be used to exploit certain potential + speculative execution side-channel vulnerabilities on unmitigated + affected hardware. + + If you are unsure how to answer this question, answer Y. + source "kernel/bpf/preload/Kconfig" config BPF_LSM diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 7f33098ca63f..cf6ca339f3cd 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -7,7 +7,7 @@ endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c new file mode 100644 index 000000000000..277a05e9c984 --- /dev/null +++ b/kernel/bpf/bloom_filter.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bitmap.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/err.h> +#include <linux/jhash.h> +#include <linux/random.h> + +#define BLOOM_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK) + +struct bpf_bloom_filter { + struct bpf_map map; + u32 bitset_mask; + u32 hash_seed; + /* If the size of the values in the bloom filter is u32 aligned, + * then it is more performant to use jhash2 as the underlying hash + * function, else we use jhash. This tracks the number of u32s + * in an u32-aligned value size. If the value size is not u32 aligned, + * this will be 0. + */ + u32 aligned_u32_count; + u32 nr_hash_funcs; + unsigned long bitset[]; +}; + +static u32 hash(struct bpf_bloom_filter *bloom, void *value, + u32 value_size, u32 index) +{ + u32 h; + + if (bloom->aligned_u32_count) + h = jhash2(value, bloom->aligned_u32_count, + bloom->hash_seed + index); + else + h = jhash(value, value_size, bloom->hash_seed + index); + + return h & bloom->bitset_mask; +} + +static int bloom_map_peek_elem(struct bpf_map *map, void *value) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + u32 i, h; + + for (i = 0; i < bloom->nr_hash_funcs; i++) { + h = hash(bloom, value, map->value_size, i); + if (!test_bit(h, bloom->bitset)) + return -ENOENT; + } + + return 0; +} + +static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + u32 i, h; + + if (flags != BPF_ANY) + return -EINVAL; + + for (i = 0; i < bloom->nr_hash_funcs; i++) { + h = hash(bloom, value, map->value_size, i); + set_bit(h, bloom->bitset); + } + + return 0; +} + +static int bloom_map_pop_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + +static int bloom_map_delete_elem(struct bpf_map *map, void *value) +{ + return -EOPNOTSUPP; +} + +static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) +{ + u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; + int numa_node = bpf_map_attr_numa_node(attr); + struct bpf_bloom_filter *bloom; + + if (!bpf_capable()) + return ERR_PTR(-EPERM); + + if (attr->key_size != 0 || attr->value_size == 0 || + attr->max_entries == 0 || + attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || + !bpf_map_flags_access_ok(attr->map_flags) || + /* The lower 4 bits of map_extra (0xF) specify the number + * of hash functions + */ + (attr->map_extra & ~0xF)) + return ERR_PTR(-EINVAL); + + nr_hash_funcs = attr->map_extra; + if (nr_hash_funcs == 0) + /* Default to using 5 hash functions if unspecified */ + nr_hash_funcs = 5; + + /* For the bloom filter, the optimal bit array size that minimizes the + * false positive probability is n * k / ln(2) where n is the number of + * expected entries in the bloom filter and k is the number of hash + * functions. We use 7 / 5 to approximate 1 / ln(2). + * + * We round this up to the nearest power of two to enable more efficient + * hashing using bitmasks. The bitmask will be the bit array size - 1. + * + * If this overflows a u32, the bit array size will have 2^32 (4 + * GB) bits. + */ + if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) || + check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) || + nr_bits > (1UL << 31)) { + /* The bit array size is 2^32 bits but to avoid overflowing the + * u32, we use U32_MAX, which will round up to the equivalent + * number of bytes + */ + bitset_bytes = BITS_TO_BYTES(U32_MAX); + bitset_mask = U32_MAX; + } else { + if (nr_bits <= BITS_PER_LONG) + nr_bits = BITS_PER_LONG; + else + nr_bits = roundup_pow_of_two(nr_bits); + bitset_bytes = BITS_TO_BYTES(nr_bits); + bitset_mask = nr_bits - 1; + } + + bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); + bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node); + + if (!bloom) + return ERR_PTR(-ENOMEM); + + bpf_map_init_from_attr(&bloom->map, attr); + + bloom->nr_hash_funcs = nr_hash_funcs; + bloom->bitset_mask = bitset_mask; + + /* Check whether the value size is u32-aligned */ + if ((attr->value_size & (sizeof(u32) - 1)) == 0) + bloom->aligned_u32_count = + attr->value_size / sizeof(u32); + + if (!(attr->map_flags & BPF_F_ZERO_SEED)) + bloom->hash_seed = get_random_int(); + + return &bloom->map; +} + +static void bloom_map_free(struct bpf_map *map) +{ + struct bpf_bloom_filter *bloom = + container_of(map, struct bpf_bloom_filter, map); + + bpf_map_area_free(bloom); +} + +static void *bloom_map_lookup_elem(struct bpf_map *map, void *key) +{ + /* The eBPF program should use map_peek_elem instead */ + return ERR_PTR(-EINVAL); +} + +static int bloom_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + /* The eBPF program should use map_push_elem instead */ + return -EINVAL; +} + +static int bloom_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) +{ + /* Bloom filter maps are keyless */ + return btf_type_is_void(key_type) ? 0 : -EINVAL; +} + +static int bpf_bloom_map_btf_id; +const struct bpf_map_ops bloom_filter_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc = bloom_map_alloc, + .map_free = bloom_map_free, + .map_push_elem = bloom_map_push_elem, + .map_peek_elem = bloom_map_peek_elem, + .map_pop_elem = bloom_map_pop_elem, + .map_lookup_elem = bloom_map_lookup_elem, + .map_update_elem = bloom_map_update_elem, + .map_delete_elem = bloom_map_delete_elem, + .map_check_btf = bloom_map_check_btf, + .map_btf_name = "bpf_bloom_filter", + .map_btf_id = &bpf_bloom_map_btf_id, +}; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 9abcc33f02cf..8ecfe4752769 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -93,6 +93,9 @@ const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { }; const struct bpf_prog_ops bpf_struct_ops_prog_ops = { +#ifdef CONFIG_NET + .test_run = bpf_struct_ops_test_run, +#endif }; static const struct btf_type *module_type; @@ -312,6 +315,20 @@ static int check_zero_holes(const struct btf_type *t, void *data) return 0; } +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, + struct bpf_prog *prog, + const struct btf_func_model *model, + void *image, void *image_end) +{ + u32 flags; + + tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; + tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; + return arch_prepare_bpf_trampoline(NULL, image, image_end, + model, flags, tprogs, NULL); +} + static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { @@ -323,7 +340,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_tramp_progs *tprogs = NULL; void *udata, *kdata; int prog_fd, err = 0; - void *image; + void *image, *image_end; u32 i; if (flags) @@ -363,12 +380,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, udata = &uvalue->data; kdata = &kvalue->data; image = st_map->image; + image_end = st_map->image + PAGE_SIZE; for_each_member(i, t, member) { const struct btf_type *mtype, *ptype; struct bpf_prog *prog; u32 moff; - u32 flags; moff = btf_member_bit_offset(t, member) / 8; ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); @@ -430,14 +447,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto reset_unlock; } - tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; - tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; - flags = st_ops->func_models[i].ret_size > 0 ? - BPF_TRAMP_F_RET_FENTRY_RET : 0; - err = arch_prepare_bpf_trampoline(NULL, image, - st_map->image + PAGE_SIZE, - &st_ops->func_models[i], - flags, tprogs, NULL); + err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + &st_ops->func_models[i], + image, image_end); if (err < 0) goto reset_unlock; diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h index 066d83ea1c99..5678a9ddf817 100644 --- a/kernel/bpf/bpf_struct_ops_types.h +++ b/kernel/bpf/bpf_struct_ops_types.h @@ -2,6 +2,9 @@ /* internal file - do not include directly */ #ifdef CONFIG_BPF_JIT +#ifdef CONFIG_NET +BPF_STRUCT_OPS_TYPE(bpf_dummy_ops) +#endif #ifdef CONFIG_INET #include <net/tcp.h> BPF_STRUCT_OPS_TYPE(tcp_congestion_ops) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c3d605b22473..dbc3ad07e21b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -281,7 +281,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", - [BTF_KIND_TAG] = "TAG", + [BTF_KIND_DECL_TAG] = "DECL_TAG", }; const char *btf_type_str(const struct btf_type *t) @@ -460,15 +460,15 @@ static bool btf_type_is_datasec(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } -static bool btf_type_is_tag(const struct btf_type *t) +static bool btf_type_is_decl_tag(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_TAG; + return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; } -static bool btf_type_is_tag_target(const struct btf_type *t) +static bool btf_type_is_decl_tag_target(const struct btf_type *t) { return btf_type_is_func(t) || btf_type_is_struct(t) || - btf_type_is_var(t); + btf_type_is_var(t) || btf_type_is_typedef(t); } u32 btf_nr_types(const struct btf *btf) @@ -549,7 +549,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, static bool btf_type_is_resolve_source_only(const struct btf_type *t) { return btf_type_is_var(t) || - btf_type_is_tag(t) || + btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -576,7 +576,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t) btf_type_is_struct(t) || btf_type_is_array(t) || btf_type_is_var(t) || - btf_type_is_tag(t) || + btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -630,9 +630,9 @@ static const struct btf_var *btf_type_var(const struct btf_type *t) return (const struct btf_var *)(t + 1); } -static const struct btf_tag *btf_type_tag(const struct btf_type *t) +static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t) { - return (const struct btf_tag *)(t + 1); + return (const struct btf_decl_tag *)(t + 1); } static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) @@ -3820,11 +3820,11 @@ static const struct btf_kind_operations float_ops = { .show = btf_df_show, }; -static s32 btf_tag_check_meta(struct btf_verifier_env *env, +static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { - const struct btf_tag *tag; + const struct btf_decl_tag *tag; u32 meta_needed = sizeof(*tag); s32 component_idx; const char *value; @@ -3852,7 +3852,7 @@ static s32 btf_tag_check_meta(struct btf_verifier_env *env, return -EINVAL; } - component_idx = btf_type_tag(t)->component_idx; + component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx < -1) { btf_verifier_log_type(env, t, "Invalid component_idx"); return -EINVAL; @@ -3863,7 +3863,7 @@ static s32 btf_tag_check_meta(struct btf_verifier_env *env, return meta_needed; } -static int btf_tag_resolve(struct btf_verifier_env *env, +static int btf_decl_tag_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { const struct btf_type *next_type; @@ -3874,7 +3874,7 @@ static int btf_tag_resolve(struct btf_verifier_env *env, u32 vlen; next_type = btf_type_by_id(btf, next_type_id); - if (!next_type || !btf_type_is_tag_target(next_type)) { + if (!next_type || !btf_type_is_decl_tag_target(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } @@ -3883,9 +3883,9 @@ static int btf_tag_resolve(struct btf_verifier_env *env, !env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id); - component_idx = btf_type_tag(t)->component_idx; + component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx != -1) { - if (btf_type_is_var(next_type)) { + if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) { btf_verifier_log_type(env, v->t, "Invalid component_idx"); return -EINVAL; } @@ -3909,18 +3909,18 @@ static int btf_tag_resolve(struct btf_verifier_env *env, return 0; } -static void btf_tag_log(struct btf_verifier_env *env, const struct btf_type *t) +static void btf_decl_tag_log(struct btf_verifier_env *env, const struct btf_type *t) { btf_verifier_log(env, "type=%u component_idx=%d", t->type, - btf_type_tag(t)->component_idx); + btf_type_decl_tag(t)->component_idx); } -static const struct btf_kind_operations tag_ops = { - .check_meta = btf_tag_check_meta, - .resolve = btf_tag_resolve, +static const struct btf_kind_operations decl_tag_ops = { + .check_meta = btf_decl_tag_check_meta, + .resolve = btf_decl_tag_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, - .log_details = btf_tag_log, + .log_details = btf_decl_tag_log, .show = btf_df_show, }; @@ -4058,7 +4058,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_VAR] = &var_ops, [BTF_KIND_DATASEC] = &datasec_ops, [BTF_KIND_FLOAT] = &float_ops, - [BTF_KIND_TAG] = &tag_ops, + [BTF_KIND_DECL_TAG] = &decl_tag_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, @@ -4143,7 +4143,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return !btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); - if (btf_type_is_tag(t)) + if (btf_type_is_decl_tag(t)) return btf_resolved_type_id(btf, type_id) && !btf_resolved_type_size(btf, type_id); @@ -6343,3 +6343,58 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = { }; BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct) + +/* BTF ID set registration API for modules */ + +struct kfunc_btf_id_list { + struct list_head list; + struct mutex mutex; +}; + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + +void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ + mutex_lock(&l->mutex); + list_add(&s->list, &l->list); + mutex_unlock(&l->mutex); +} +EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set); + +void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, + struct kfunc_btf_id_set *s) +{ + mutex_lock(&l->mutex); + list_del_init(&s->list); + mutex_unlock(&l->mutex); +} +EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set); + +bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, + struct module *owner) +{ + struct kfunc_btf_id_set *s; + + if (!owner) + return false; + mutex_lock(&klist->mutex); + list_for_each_entry(s, &klist->list, list) { + if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { + mutex_unlock(&klist->mutex); + return true; + } + } + mutex_unlock(&klist->mutex); + return false; +} + +#endif + +#define DEFINE_KFUNC_BTF_ID_LIST(name) \ + struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ + __MUTEX_INITIALIZER(name.mutex) }; \ + EXPORT_SYMBOL_GPL(name) + +DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); +DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ded9163185d1..327e3996eadb 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,6 +32,7 @@ #include <linux/perf_event.h> #include <linux/extable.h> #include <linux/log2.h> +#include <linux/bpf_verifier.h> #include <asm/barrier.h> #include <asm/unaligned.h> @@ -2263,6 +2264,9 @@ static void bpf_prog_free_deferred(struct work_struct *work) int i; aux = container_of(work, struct bpf_prog_aux, work); +#ifdef CONFIG_BPF_SYSCALL + bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab); +#endif bpf_free_used_maps(aux); bpf_free_used_btfs(aux); if (bpf_prog_is_dev_bound(aux)) diff --git a/kernel/bpf/preload/.gitignore b/kernel/bpf/preload/.gitignore index 856a4c5ad0dd..9452322902a5 100644 --- a/kernel/bpf/preload/.gitignore +++ b/kernel/bpf/preload/.gitignore @@ -1,4 +1,2 @@ -/FEATURE-DUMP.libbpf -/bpf_helper_defs.h -/feature +/libbpf /bpf_preload_umd diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index 1951332dd15f..1400ac58178e 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -1,21 +1,35 @@ # SPDX-License-Identifier: GPL-2.0 LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ -LIBBPF_A = $(obj)/libbpf.a -LIBBPF_OUT = $(abspath $(obj)) +LIBBPF_OUT = $(abspath $(obj))/libbpf +LIBBPF_A = $(LIBBPF_OUT)/libbpf.a +LIBBPF_DESTDIR = $(LIBBPF_OUT) +LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include # Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test # in tools/scripts/Makefile.include always succeeds when building the kernel # with $(O) pointing to a relative path, as in "make O=build bindeb-pkg". -$(LIBBPF_A): - $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a +$(LIBBPF_A): | $(LIBBPF_OUT) + $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ \ + DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $(LIBBPF_OUT)/libbpf.a install_headers + +libbpf_hdrs: $(LIBBPF_A) + +.PHONY: libbpf_hdrs + +$(LIBBPF_OUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $@ userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ - -I $(srctree)/tools/lib/ -Wno-unused-result + -I $(LIBBPF_INCLUDE) -Wno-unused-result userprogs := bpf_preload_umd -clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/ +clean-files := libbpf/ + +$(obj)/iterators/iterators.o: | libbpf_hdrs bpf_preload_umd-objs := iterators/iterators.o bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile index 28fa8c1440f4..b8bd60511227 100644 --- a/kernel/bpf/preload/iterators/Makefile +++ b/kernel/bpf/preload/iterators/Makefile @@ -1,18 +1,26 @@ # SPDX-License-Identifier: GPL-2.0 OUTPUT := .output +abs_out := $(abspath $(OUTPUT)) + CLANG ?= clang LLC ?= llc LLVM_STRIP ?= llvm-strip + +TOOLS_PATH := $(abspath ../../../../tools) +BPFTOOL_SRC := $(TOOLS_PATH)/bpf/bpftool +BPFTOOL_OUTPUT := $(abs_out)/bpftool DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) -LIBBPF_SRC := $(abspath ../../../../tools/lib/bpf) -BPFOBJ := $(OUTPUT)/libbpf.a -BPF_INCLUDE := $(OUTPUT) -INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../../../tools/lib) \ - -I$(abspath ../../../../tools/include/uapi) + +LIBBPF_SRC := $(TOOLS_PATH)/lib/bpf +LIBBPF_OUTPUT := $(abs_out)/libbpf +LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include +BPFOBJ := $(LIBBPF_OUTPUT)/libbpf.a + +INCLUDES := -I$(OUTPUT) -I$(LIBBPF_INCLUDE) -I$(TOOLS_PATH)/include/uapi CFLAGS := -g -Wall -abs_out := $(abspath $(OUTPUT)) ifeq ($(V),1) Q = msg = @@ -44,14 +52,18 @@ $(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT) -c $(filter %.c,$^) -o $@ && \ $(LLVM_STRIP) -g $@ -$(OUTPUT): +$(OUTPUT) $(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT): $(call msg,MKDIR,$@) - $(Q)mkdir -p $(OUTPUT) + $(Q)mkdir -p $@ -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ - OUTPUT=$(abspath $(dir $@))/ $(abspath $@) + OUTPUT=$(abspath $(dir $@))/ prefix= \ + DESTDIR=$(LIBBPF_DESTDIR) $(abspath $@) install_headers -$(DEFAULT_BPFTOOL): - $(Q)$(MAKE) $(submake_extras) -C ../../../../tools/bpf/bpftool \ - prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install +$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \ + OUTPUT=$(BPFTOOL_OUTPUT)/ \ + LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \ + LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ \ + prefix= DESTDIR=$(abs_out)/ install-bin diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1cad6979a0d0..50f96ea4452a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -199,7 +199,8 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, err = bpf_fd_reuseport_array_update_elem(map, key, value, flags); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_push_elem(map, value, flags); } else { rcu_read_lock(); @@ -238,7 +239,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { err = bpf_fd_reuseport_array_lookup_elem(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_QUEUE || - map->map_type == BPF_MAP_TYPE_STACK) { + map->map_type == BPF_MAP_TYPE_STACK || + map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { err = map->ops->map_peek_elem(map, value); } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { /* struct_ops map requires directly updating "value" */ @@ -348,6 +350,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->max_entries = attr->max_entries; map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); map->numa_node = bpf_map_attr_numa_node(attr); + map->map_extra = attr->map_extra; } static int bpf_map_alloc_id(struct bpf_map *map) @@ -555,6 +558,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "value_size:\t%u\n" "max_entries:\t%u\n" "map_flags:\t%#x\n" + "map_extra:\t%#llx\n" "memlock:\t%lu\n" "map_id:\t%u\n" "frozen:\t%u\n", @@ -563,6 +567,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->value_size, map->max_entries, map->map_flags, + (unsigned long long)map->map_extra, bpf_map_memory_footprint(map), map->id, READ_ONCE(map->frozen)); @@ -812,7 +817,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, return ret; } -#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id +#define BPF_MAP_CREATE_LAST_FIELD map_extra /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -833,6 +838,10 @@ static int map_create(union bpf_attr *attr) return -EINVAL; } + if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && + attr->map_extra != 0) + return -EINVAL; + f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) return f_flags; @@ -1082,6 +1091,14 @@ static int map_lookup_elem(union bpf_attr *attr) if (!value) goto free_key; + if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) { + if (copy_from_user(value, uvalue, value_size)) + err = -EFAULT; + else + err = bpf_map_copy_value(map, key, value, attr->flags); + goto free_value; + } + err = bpf_map_copy_value(map, key, value, attr->flags); if (err) goto free_value; @@ -1807,8 +1824,14 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) return 0; } +struct bpf_prog_kstats { + u64 nsecs; + u64 cnt; + u64 misses; +}; + static void bpf_prog_get_stats(const struct bpf_prog *prog, - struct bpf_prog_stats *stats) + struct bpf_prog_kstats *stats) { u64 nsecs = 0, cnt = 0, misses = 0; int cpu; @@ -1821,9 +1844,9 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, st = per_cpu_ptr(prog->stats, cpu); do { start = u64_stats_fetch_begin_irq(&st->syncp); - tnsecs = st->nsecs; - tcnt = st->cnt; - tmisses = st->misses; + tnsecs = u64_stats_read(&st->nsecs); + tcnt = u64_stats_read(&st->cnt); + tmisses = u64_stats_read(&st->misses); } while (u64_stats_fetch_retry_irq(&st->syncp, start)); nsecs += tnsecs; cnt += tcnt; @@ -1839,7 +1862,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_prog *prog = filp->private_data; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - struct bpf_prog_stats stats; + struct bpf_prog_kstats stats; bpf_prog_get_stats(prog, &stats); bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); @@ -1851,7 +1874,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) "prog_id:\t%u\n" "run_time_ns:\t%llu\n" "run_cnt:\t%llu\n" - "recursion_misses:\t%llu\n", + "recursion_misses:\t%llu\n" + "verified_insns:\t%u\n", prog->type, prog->jited, prog_tag, @@ -1859,7 +1883,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) prog->aux->id, stats.nsecs, stats.cnt, - stats.misses); + stats.misses, + prog->aux->verified_insns); } #endif @@ -3578,7 +3603,7 @@ static int bpf_prog_get_info_by_fd(struct file *file, struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); struct bpf_prog_info info; u32 info_len = attr->info.info_len; - struct bpf_prog_stats stats; + struct bpf_prog_kstats stats; char __user *uinsns; u32 ulen; int err; @@ -3628,6 +3653,8 @@ static int bpf_prog_get_info_by_fd(struct file *file, info.run_cnt = stats.cnt; info.recursion_misses = stats.misses; + info.verified_insns = prog->aux->verified_insns; + if (!bpf_capable()) { info.jited_prog_len = 0; info.xlated_prog_len = 0; @@ -3874,6 +3901,7 @@ static int bpf_map_get_info_by_fd(struct file *file, info.value_size = map->value_size; info.max_entries = map->max_entries; info.map_flags = map->map_flags; + info.map_extra = map->map_extra; memcpy(info.name, map->name, sizeof(map->name)); if (map->btf) { @@ -4756,6 +4784,31 @@ static const struct bpf_func_proto bpf_sys_close_proto = { .arg1_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res) +{ + if (flags) + return -EINVAL; + + if (name_sz <= 1 || name[name_sz - 1]) + return -EINVAL; + + if (!bpf_dump_raw_ok(current_cred())) + return -EPERM; + + *res = kallsyms_lookup_name(name); + return *res ? 0 : -ENOENT; +} + +const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = { + .func = bpf_kallsyms_lookup_name, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +}; + static const struct bpf_func_proto * syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -4766,6 +4819,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_btf_find_by_name_kind_proto; case BPF_FUNC_sys_close: return &bpf_sys_close_proto; + case BPF_FUNC_kallsyms_lookup_name: + return &bpf_kallsyms_lookup_name_proto; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 39eaaff81953..e98de5e73ba5 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -545,7 +545,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog) stats = this_cpu_ptr(prog->stats); u64_stats_update_begin(&stats->syncp); - stats->misses++; + u64_stats_inc(&stats->misses); u64_stats_update_end(&stats->syncp); } @@ -586,11 +586,13 @@ static void notrace update_prog_stats(struct bpf_prog *prog, * Hence check that 'start' is valid. */ start > NO_START_TIME) { + unsigned long flags; + stats = this_cpu_ptr(prog->stats); - u64_stats_update_begin(&stats->syncp); - stats->cnt++; - stats->nsecs += sched_clock() - start; - u64_stats_update_end(&stats->syncp); + flags = u64_stats_update_begin_irqsave(&stats->syncp); + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, sched_clock() - start); + u64_stats_update_end_irqrestore(&stats->syncp, flags); } } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1433752db740..a4b48bd4e3ca 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1420,12 +1420,12 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) static bool __reg64_bound_s32(s64 a) { - return a > S32_MIN && a < S32_MAX; + return a >= S32_MIN && a <= S32_MAX; } static bool __reg64_bound_u32(u64 a) { - return a > U32_MIN && a < U32_MAX; + return a >= U32_MIN && a <= U32_MAX; } static void __reg_combine_64_into_32(struct bpf_reg_state *reg) @@ -1640,52 +1640,168 @@ static int add_subprog(struct bpf_verifier_env *env, int off) return env->subprog_cnt - 1; } +#define MAX_KFUNC_DESCS 256 +#define MAX_KFUNC_BTFS 256 + struct bpf_kfunc_desc { struct btf_func_model func_model; u32 func_id; s32 imm; + u16 offset; +}; + +struct bpf_kfunc_btf { + struct btf *btf; + struct module *module; + u16 offset; }; -#define MAX_KFUNC_DESCS 256 struct bpf_kfunc_desc_tab { struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS]; u32 nr_descs; }; -static int kfunc_desc_cmp_by_id(const void *a, const void *b) +struct bpf_kfunc_btf_tab { + struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS]; + u32 nr_descs; +}; + +static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) { const struct bpf_kfunc_desc *d0 = a; const struct bpf_kfunc_desc *d1 = b; /* func_id is not greater than BTF_MAX_TYPE */ - return d0->func_id - d1->func_id; + return d0->func_id - d1->func_id ?: d0->offset - d1->offset; +} + +static int kfunc_btf_cmp_by_off(const void *a, const void *b) +{ + const struct bpf_kfunc_btf *d0 = a; + const struct bpf_kfunc_btf *d1 = b; + + return d0->offset - d1->offset; } static const struct bpf_kfunc_desc * -find_kfunc_desc(const struct bpf_prog *prog, u32 func_id) +find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset) { struct bpf_kfunc_desc desc = { .func_id = func_id, + .offset = offset, }; struct bpf_kfunc_desc_tab *tab; tab = prog->aux->kfunc_tab; return bsearch(&desc, tab->descs, tab->nr_descs, - sizeof(tab->descs[0]), kfunc_desc_cmp_by_id); + sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off); } -static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) +static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, + s16 offset, struct module **btf_modp) +{ + struct bpf_kfunc_btf kf_btf = { .offset = offset }; + struct bpf_kfunc_btf_tab *tab; + struct bpf_kfunc_btf *b; + struct module *mod; + struct btf *btf; + int btf_fd; + + tab = env->prog->aux->kfunc_btf_tab; + b = bsearch(&kf_btf, tab->descs, tab->nr_descs, + sizeof(tab->descs[0]), kfunc_btf_cmp_by_off); + if (!b) { + if (tab->nr_descs == MAX_KFUNC_BTFS) { + verbose(env, "too many different module BTFs\n"); + return ERR_PTR(-E2BIG); + } + + if (bpfptr_is_null(env->fd_array)) { + verbose(env, "kfunc offset > 0 without fd_array is invalid\n"); + return ERR_PTR(-EPROTO); + } + + if (copy_from_bpfptr_offset(&btf_fd, env->fd_array, + offset * sizeof(btf_fd), + sizeof(btf_fd))) + return ERR_PTR(-EFAULT); + + btf = btf_get_by_fd(btf_fd); + if (IS_ERR(btf)) { + verbose(env, "invalid module BTF fd specified\n"); + return btf; + } + + if (!btf_is_module(btf)) { + verbose(env, "BTF fd for kfunc is not a module BTF\n"); + btf_put(btf); + return ERR_PTR(-EINVAL); + } + + mod = btf_try_get_module(btf); + if (!mod) { + btf_put(btf); + return ERR_PTR(-ENXIO); + } + + b = &tab->descs[tab->nr_descs++]; + b->btf = btf; + b->module = mod; + b->offset = offset; + + sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), + kfunc_btf_cmp_by_off, NULL); + } + if (btf_modp) + *btf_modp = b->module; + return b->btf; +} + +void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) +{ + if (!tab) + return; + + while (tab->nr_descs--) { + module_put(tab->descs[tab->nr_descs].module); + btf_put(tab->descs[tab->nr_descs].btf); + } + kfree(tab); +} + +static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, + u32 func_id, s16 offset, + struct module **btf_modp) +{ + if (offset) { + if (offset < 0) { + /* In the future, this can be allowed to increase limit + * of fd index into fd_array, interpreted as u16. + */ + verbose(env, "negative offset disallowed for kernel module function call\n"); + return ERR_PTR(-EINVAL); + } + + return __find_kfunc_desc_btf(env, offset, btf_modp); + } + return btf_vmlinux ?: ERR_PTR(-ENOENT); +} + +static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) { const struct btf_type *func, *func_proto; + struct bpf_kfunc_btf_tab *btf_tab; struct bpf_kfunc_desc_tab *tab; struct bpf_prog_aux *prog_aux; struct bpf_kfunc_desc *desc; const char *func_name; + struct btf *desc_btf; unsigned long addr; int err; prog_aux = env->prog->aux; tab = prog_aux->kfunc_tab; + btf_tab = prog_aux->kfunc_btf_tab; if (!tab) { if (!btf_vmlinux) { verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n"); @@ -1713,7 +1829,29 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) prog_aux->kfunc_tab = tab; } - if (find_kfunc_desc(env->prog, func_id)) + /* func_id == 0 is always invalid, but instead of returning an error, be + * conservative and wait until the code elimination pass before returning + * error, so that invalid calls that get pruned out can be in BPF programs + * loaded from userspace. It is also required that offset be untouched + * for such calls. + */ + if (!func_id && !offset) + return 0; + + if (!btf_tab && offset) { + btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL); + if (!btf_tab) + return -ENOMEM; + prog_aux->kfunc_btf_tab = btf_tab; + } + + desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); + if (IS_ERR(desc_btf)) { + verbose(env, "failed to find BTF for kernel function\n"); + return PTR_ERR(desc_btf); + } + + if (find_kfunc_desc(env->prog, func_id, offset)) return 0; if (tab->nr_descs == MAX_KFUNC_DESCS) { @@ -1721,20 +1859,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) return -E2BIG; } - func = btf_type_by_id(btf_vmlinux, func_id); + func = btf_type_by_id(desc_btf, func_id); if (!func || !btf_type_is_func(func)) { verbose(env, "kernel btf_id %u is not a function\n", func_id); return -EINVAL; } - func_proto = btf_type_by_id(btf_vmlinux, func->type); + func_proto = btf_type_by_id(desc_btf, func->type); if (!func_proto || !btf_type_is_func_proto(func_proto)) { verbose(env, "kernel function btf_id %u does not have a valid func_proto\n", func_id); return -EINVAL; } - func_name = btf_name_by_offset(btf_vmlinux, func->name_off); + func_name = btf_name_by_offset(desc_btf, func->name_off); addr = kallsyms_lookup_name(func_name); if (!addr) { verbose(env, "cannot find address for kernel function %s\n", @@ -1745,12 +1883,13 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id) desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; desc->imm = BPF_CALL_IMM(addr); - err = btf_distill_func_proto(&env->log, btf_vmlinux, + desc->offset = offset; + err = btf_distill_func_proto(&env->log, desc_btf, func_proto, func_name, &desc->func_model); if (!err) sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), - kfunc_desc_cmp_by_id, NULL); + kfunc_desc_cmp_by_id_off, NULL); return err; } @@ -1829,7 +1968,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) } else if (bpf_pseudo_call(insn)) { ret = add_subprog(env, i + insn->imm + 1); } else { - ret = add_kfunc_call(env, insn->imm); + ret = add_kfunc_call(env, insn->imm, insn->off); } if (ret < 0) @@ -2166,12 +2305,17 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) { const struct btf_type *func; + struct btf *desc_btf; if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - func = btf_type_by_id(btf_vmlinux, insn->imm); - return btf_name_by_offset(btf_vmlinux, func->name_off); + desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); + if (IS_ERR(desc_btf)) + return "<error>"; + + func = btf_type_by_id(desc_btf, insn->imm); + return btf_name_by_offset(desc_btf, func->name_off); } /* For given verifier state backtrack_insn() is called from the last insn to @@ -4858,7 +5002,10 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env, return -EINVAL; } break; - + case BPF_MAP_TYPE_BLOOM_FILTER: + if (meta->func_id == BPF_FUNC_map_peek_elem) + *arg_type = ARG_PTR_TO_MAP_VALUE; + break; default: break; } @@ -5433,6 +5580,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_task_storage_delete) goto error; break; + case BPF_MAP_TYPE_BLOOM_FILTER: + if (func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_map_push_elem) + goto error; + break; default: break; } @@ -5500,13 +5652,18 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_SOCKHASH) goto error; break; - case BPF_FUNC_map_peek_elem: case BPF_FUNC_map_pop_elem: - case BPF_FUNC_map_push_elem: if (map->map_type != BPF_MAP_TYPE_QUEUE && map->map_type != BPF_MAP_TYPE_STACK) goto error; break; + case BPF_FUNC_map_peek_elem: + case BPF_FUNC_map_push_elem: + if (map->map_type != BPF_MAP_TYPE_QUEUE && + map->map_type != BPF_MAP_TYPE_STACK && + map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) @@ -6530,23 +6687,33 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; u32 i, nargs, func_id, ptr_type_id; + struct module *btf_mod = NULL; const struct btf_param *args; + struct btf *desc_btf; int err; + /* skip for now, but return error when we find this in fixup_kfunc_call */ + if (!insn->imm) + return 0; + + desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); + if (IS_ERR(desc_btf)) + return PTR_ERR(desc_btf); + func_id = insn->imm; - func = btf_type_by_id(btf_vmlinux, func_id); - func_name = btf_name_by_offset(btf_vmlinux, func->name_off); - func_proto = btf_type_by_id(btf_vmlinux, func->type); + func = btf_type_by_id(desc_btf, func_id); + func_name = btf_name_by_offset(desc_btf, func->name_off); + func_proto = btf_type_by_id(desc_btf, func->type); if (!env->ops->check_kfunc_call || - !env->ops->check_kfunc_call(func_id)) { + !env->ops->check_kfunc_call(func_id, btf_mod)) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs); + err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); if (err) return err; @@ -6554,15 +6721,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) mark_reg_not_init(env, regs, caller_saved[i]); /* Check return type */ - t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL); + t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); if (btf_type_is_scalar(t)) { mark_reg_unknown(env, regs, BPF_REG_0); mark_btf_func_reg_size(env, BPF_REG_0, t->size); } else if (btf_type_is_ptr(t)) { - ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type, + ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id); if (!btf_type_is_struct(ptr_type)) { - ptr_type_name = btf_name_by_offset(btf_vmlinux, + ptr_type_name = btf_name_by_offset(desc_btf, ptr_type->name_off); verbose(env, "kernel function %s returns pointer type %s %s is not supported\n", func_name, btf_type_str(ptr_type), @@ -6570,7 +6737,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].btf = btf_vmlinux; + regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); @@ -6581,7 +6748,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) for (i = 0; i < nargs; i++) { u32 regno = i + 1; - t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL); + t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL); if (btf_type_is_ptr(t)) mark_btf_func_reg_size(env, regno, sizeof(void *)); else @@ -11121,7 +11288,8 @@ static int do_check(struct bpf_verifier_env *env) env->jmps_processed++; if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || - insn->off != 0 || + (insn->src_reg != BPF_PSEUDO_KFUNC_CALL + && insn->off != 0) || (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL && insn->src_reg != BPF_PSEUDO_KFUNC_CALL) || @@ -12477,6 +12645,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; + func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab; func[i]->aux->linfo = prog->aux->linfo; func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; @@ -12662,10 +12831,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, { const struct bpf_kfunc_desc *desc; + if (!insn->imm) { + verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); + return -EINVAL; + } + /* insn->imm has the btf func_id. Replace it with * an address (relative to __bpf_base_call). */ - desc = find_kfunc_desc(env->prog, insn->imm); + desc = find_kfunc_desc(env->prog, insn->imm, insn->off); if (!desc) { verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n", insn->imm); @@ -12946,7 +13120,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_push_elem || insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || - insn->imm == BPF_FUNC_redirect_map)) { + insn->imm == BPF_FUNC_redirect_map || + insn->imm == BPF_FUNC_for_each_map_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -12990,6 +13165,11 @@ static int do_misc_fixups(struct bpf_verifier_env *env) (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_redirect, (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_for_each_callback, + (int (*)(struct bpf_map *map, + bpf_callback_t callback_fn, + void *callback_ctx, + u64 flags))NULL)); patch_map_ops_generic: switch (insn->imm) { @@ -13014,6 +13194,9 @@ patch_map_ops_generic: case BPF_FUNC_redirect_map: insn->imm = BPF_CALL_IMM(ops->map_redirect); continue; + case BPF_FUNC_for_each_map_elem: + insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); + continue; } goto patch_call_imm; @@ -13863,6 +14046,7 @@ skip_full_check: env->verification_time = ktime_get_ns() - start_time; print_verification_stats(env); + env->prog->aux->verified_insns = env->insn_processed; if (log->level && bpf_verifier_log_full(log)) ret = -ENOSPC; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6b3153841a33..7396488793ff 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1608,6 +1608,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_to_tcp_request_sock_proto; case BPF_FUNC_skc_to_udp6_sock: return &bpf_skc_to_udp6_sock_proto; + case BPF_FUNC_skc_to_unix_sock: + return &bpf_skc_to_unix_sock_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_tracing_proto; case BPF_FUNC_sk_storage_delete: @@ -1644,13 +1646,7 @@ static bool raw_tp_prog_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - return true; + return bpf_tracing_ctx_access(off, size, type); } static bool tracing_prog_is_valid_access(int off, int size, @@ -1658,13 +1654,7 @@ static bool tracing_prog_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS) - return false; - if (type != BPF_READ) - return false; - if (off % size != 0) - return false; - return btf_ctx_access(off, size, type, prog, info); + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); } int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, |