diff options
96 files changed, 3203 insertions, 640 deletions
diff --git a/Documentation/bpf/map_cgrp_storage.rst b/Documentation/bpf/map_cgrp_storage.rst new file mode 100644 index 000000000000..5d3f603efffa --- /dev/null +++ b/Documentation/bpf/map_cgrp_storage.rst @@ -0,0 +1,109 @@ +.. SPDX-License-Identifier: GPL-2.0-only +.. Copyright (C) 2022 Meta Platforms, Inc. and affiliates. + +========================= +BPF_MAP_TYPE_CGRP_STORAGE +========================= + +The ``BPF_MAP_TYPE_CGRP_STORAGE`` map type represents a local fix-sized +storage for cgroups. It is only available with ``CONFIG_CGROUPS``. +The programs are made available by the same Kconfig. The +data for a particular cgroup can be retrieved by looking up the map +with that cgroup. + +This document describes the usage and semantics of the +``BPF_MAP_TYPE_CGRP_STORAGE`` map type. + +Usage +===== + +The map key must be ``sizeof(int)`` representing a cgroup fd. +To access the storage in a program, use ``bpf_cgrp_storage_get``:: + + void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags) + +``flags`` could be 0 or ``BPF_LOCAL_STORAGE_GET_F_CREATE`` which indicates that +a new local storage will be created if one does not exist. + +The local storage can be removed with ``bpf_cgrp_storage_delete``:: + + long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup) + +The map is available to all program types. + +Examples +======== + +A BPF program example with BPF_MAP_TYPE_CGRP_STORAGE:: + + #include <vmlinux.h> + #include <bpf/bpf_helpers.h> + #include <bpf/bpf_tracing.h> + + struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); + } cgrp_storage SEC(".maps"); + + SEC("tp_btf/sys_enter") + int BPF_PROG(on_enter, struct pt_regs *regs, long id) + { + struct task_struct *task = bpf_get_current_task_btf(); + long *ptr; + + ptr = bpf_cgrp_storage_get(&cgrp_storage, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + __sync_fetch_and_add(ptr, 1); + + return 0; + } + +Userspace accessing map declared above:: + + #include <linux/bpf.h> + #include <linux/libbpf.h> + + __u32 map_lookup(struct bpf_map *map, int cgrp_fd) + { + __u32 *value; + value = bpf_map_lookup_elem(bpf_map__fd(map), &cgrp_fd); + if (value) + return *value; + return 0; + } + +Difference Between BPF_MAP_TYPE_CGRP_STORAGE and BPF_MAP_TYPE_CGROUP_STORAGE +============================================================================ + +The old cgroup storage map ``BPF_MAP_TYPE_CGROUP_STORAGE`` has been marked as +deprecated (renamed to ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``). The new +``BPF_MAP_TYPE_CGRP_STORAGE`` map should be used instead. The following +illusates the main difference between ``BPF_MAP_TYPE_CGRP_STORAGE`` and +``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``. + +(1). ``BPF_MAP_TYPE_CGRP_STORAGE`` can be used by all program types while + ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` is available only to cgroup program types + like BPF_CGROUP_INET_INGRESS or BPF_CGROUP_SOCK_OPS, etc. + +(2). ``BPF_MAP_TYPE_CGRP_STORAGE`` supports local storage for more than one + cgroup while ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` only supports one cgroup + which is attached by a BPF program. + +(3). ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` allocates local storage at attach time so + ``bpf_get_local_storage()`` always returns non-NULL local storage. + ``BPF_MAP_TYPE_CGRP_STORAGE`` allocates local storage at runtime so + it is possible that ``bpf_cgrp_storage_get()`` may return null local storage. + To avoid such null local storage issue, user space can do + ``bpf_map_update_elem()`` to pre-allocate local storage before a BPF program + is attached. + +(4). ``BPF_MAP_TYPE_CGRP_STORAGE`` supports deleting local storage by a BPF program + while ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` only deletes storage during + prog detach time. + +So overall, ``BPF_MAP_TYPE_CGRP_STORAGE`` supports all ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED`` +functionality and beyond. It is recommended to use ``BPF_MAP_TYPE_CGRP_STORAGE`` +instead of ``BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED``. diff --git a/Documentation/bpf/maps.rst b/Documentation/bpf/maps.rst index f41619e312ac..4906ff0f8382 100644 --- a/Documentation/bpf/maps.rst +++ b/Documentation/bpf/maps.rst @@ -1,52 +1,81 @@ -========= -eBPF maps +======== +BPF maps +======== + +BPF 'maps' provide generic storage of different types for sharing data between +kernel and user space. There are several storage types available, including +hash, array, bloom filter and radix-tree. Several of the map types exist to +support specific BPF helpers that perform actions based on the map contents. The +maps are accessed from BPF programs via BPF helpers which are documented in the +`man-pages`_ for `bpf-helpers(7)`_. + +BPF maps are accessed from user space via the ``bpf`` syscall, which provides +commands to create maps, lookup elements, update elements and delete +elements. More details of the BPF syscall are available in +:doc:`/userspace-api/ebpf/syscall` and in the `man-pages`_ for `bpf(2)`_. + +Map Types ========= -'maps' is a generic storage of different types for sharing data between kernel -and userspace. +.. toctree:: + :maxdepth: 1 + :glob: -The maps are accessed from user space via BPF syscall, which has commands: + map_* -- create a map with given type and attributes - ``map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)`` - using attr->map_type, attr->key_size, attr->value_size, attr->max_entries - returns process-local file descriptor or negative error +Usage Notes +=========== -- lookup key in a given map - ``err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)`` - using attr->map_fd, attr->key, attr->value - returns zero and stores found elem into value or negative error +.. c:function:: + int bpf(int command, union bpf_attr *attr, u32 size) -- create or update key/value pair in a given map - ``err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)`` - using attr->map_fd, attr->key, attr->value - returns zero or negative error +Use the ``bpf()`` system call to perform the operation specified by +``command``. The operation takes parameters provided in ``attr``. The ``size`` +argument is the size of the ``union bpf_attr`` in ``attr``. -- find and delete element by key in a given map - ``err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)`` - using attr->map_fd, attr->key +**BPF_MAP_CREATE** -- to delete map: close(fd) - Exiting process will delete maps automatically +Create a map with the desired type and attributes in ``attr``: -userspace programs use this syscall to create/access maps that eBPF programs -are concurrently updating. +.. code-block:: c -maps can have different types: hash, array, bloom filter, radix-tree, etc. + int fd; + union bpf_attr attr = { + .map_type = BPF_MAP_TYPE_ARRAY; /* mandatory */ + .key_size = sizeof(__u32); /* mandatory */ + .value_size = sizeof(__u32); /* mandatory */ + .max_entries = 256; /* mandatory */ + .map_flags = BPF_F_MMAPABLE; + .map_name = "example_array"; + }; -The map is defined by: + fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); - - type - - max number of elements - - key size in bytes - - value size in bytes +Returns a process-local file descriptor on success, or negative error in case of +failure. The map can be deleted by calling ``close(fd)``. Maps held by open +file descriptors will be deleted automatically when a process exits. -Map Types -========= +.. note:: Valid characters for ``map_name`` are ``A-Z``, ``a-z``, ``0-9``, + ``'_'`` and ``'.'``. -.. toctree:: - :maxdepth: 1 - :glob: +**BPF_MAP_LOOKUP_ELEM** + +Lookup key in a given map using ``attr->map_fd``, ``attr->key``, +``attr->value``. Returns zero and stores found elem into ``attr->value`` on +success, or negative error on failure. + +**BPF_MAP_UPDATE_ELEM** + +Create or update key/value pair in a given map using ``attr->map_fd``, ``attr->key``, +``attr->value``. Returns zero on success or negative error on failure. + +**BPF_MAP_DELETE_ELEM** + +Find and delete element by key in a given map using ``attr->map_fd``, +``attr->key``. Returns zero on success or negative error on failure. - map_*
\ No newline at end of file +.. Links: +.. _man-pages: https://www.kernel.org/doc/man-pages/ +.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html +.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 30f76178608b..62f805f427b7 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1649,13 +1649,8 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, struct bpf_prog *p = l->link.prog; int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); - if (p->aux->sleepable) { - enter_prog = (u64)__bpf_prog_enter_sleepable; - exit_prog = (u64)__bpf_prog_exit_sleepable; - } else { - enter_prog = (u64)__bpf_prog_enter; - exit_prog = (u64)__bpf_prog_exit; - } + enter_prog = (u64)bpf_trampoline_enter(p); + exit_prog = (u64)bpf_trampoline_exit(p); if (l->cookie == 0) { /* if cookie is zero, one instruction is enough to store it */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 51afd6d0c05f..cec5195602bc 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -904,6 +904,65 @@ static void emit_nops(u8 **pprog, int len) *pprog = prog; } +/* emit the 3-byte VEX prefix + * + * r: same as rex.r, extra bit for ModRM reg field + * x: same as rex.x, extra bit for SIB index field + * b: same as rex.b, extra bit for ModRM r/m, or SIB base + * m: opcode map select, encoding escape bytes e.g. 0x0f38 + * w: same as rex.w (32 bit or 64 bit) or opcode specific + * src_reg2: additional source reg (encoded as BPF reg) + * l: vector length (128 bit or 256 bit) or reserved + * pp: opcode prefix (none, 0x66, 0xf2 or 0xf3) + */ +static void emit_3vex(u8 **pprog, bool r, bool x, bool b, u8 m, + bool w, u8 src_reg2, bool l, u8 pp) +{ + u8 *prog = *pprog; + const u8 b0 = 0xc4; /* first byte of 3-byte VEX prefix */ + u8 b1, b2; + u8 vvvv = reg2hex[src_reg2]; + + /* reg2hex gives only the lower 3 bit of vvvv */ + if (is_ereg(src_reg2)) + vvvv |= 1 << 3; + + /* + * 2nd byte of 3-byte VEX prefix + * ~ means bit inverted encoding + * + * 7 0 + * +---+---+---+---+---+---+---+---+ + * |~R |~X |~B | m | + * +---+---+---+---+---+---+---+---+ + */ + b1 = (!r << 7) | (!x << 6) | (!b << 5) | (m & 0x1f); + /* + * 3rd byte of 3-byte VEX prefix + * + * 7 0 + * +---+---+---+---+---+---+---+---+ + * | W | ~vvvv | L | pp | + * +---+---+---+---+---+---+---+---+ + */ + b2 = (w << 7) | ((~vvvv & 0xf) << 3) | (l << 2) | (pp & 3); + + EMIT3(b0, b1, b2); + *pprog = prog; +} + +/* emit BMI2 shift instruction */ +static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) +{ + u8 *prog = *pprog; + bool r = is_ereg(dst_reg); + u8 m = 2; /* escape code 0f38 */ + + emit_3vex(&prog, r, false, r, m, is64, src_reg, false, op); + EMIT2(0xf7, add_2reg(0xC0, dst_reg, dst_reg)); + *pprog = prog; +} + #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, @@ -1150,17 +1209,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image case BPF_ALU64 | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X: + /* BMI2 shifts aren't better when shift count is already in rcx */ + if (boot_cpu_has(X86_FEATURE_BMI2) && src_reg != BPF_REG_4) { + /* shrx/sarx/shlx dst_reg, dst_reg, src_reg */ + bool w = (BPF_CLASS(insn->code) == BPF_ALU64); + u8 op; + + switch (BPF_OP(insn->code)) { + case BPF_LSH: + op = 1; /* prefix 0x66 */ + break; + case BPF_RSH: + op = 3; /* prefix 0xf2 */ + break; + case BPF_ARSH: + op = 2; /* prefix 0xf3 */ + break; + } - /* Check for bad case when dst_reg == rcx */ - if (dst_reg == BPF_REG_4) { - /* mov r11, dst_reg */ - EMIT_mov(AUX_REG, dst_reg); - dst_reg = AUX_REG; + emit_shiftx(&prog, dst_reg, src_reg, w, op); + + break; } if (src_reg != BPF_REG_4) { /* common case */ - EMIT1(0x51); /* push rcx */ - + /* Check for bad case when dst_reg == rcx */ + if (dst_reg == BPF_REG_4) { + /* mov r11, dst_reg */ + EMIT_mov(AUX_REG, dst_reg); + dst_reg = AUX_REG; + } else { + EMIT1(0x51); /* push rcx */ + } /* mov rcx, src_reg */ EMIT_mov(BPF_REG_4, src_reg); } @@ -1172,12 +1252,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image b3 = simple_alu_opcodes[BPF_OP(insn->code)]; EMIT2(0xD3, add_1reg(b3, dst_reg)); - if (src_reg != BPF_REG_4) - EMIT1(0x59); /* pop rcx */ + if (src_reg != BPF_REG_4) { + if (insn->dst_reg == BPF_REG_4) + /* mov dst_reg, r11 */ + EMIT_mov(insn->dst_reg, AUX_REG); + else + EMIT1(0x59); /* pop rcx */ + } - if (insn->dst_reg == BPF_REG_4) - /* mov dst_reg, r11 */ - EMIT_mov(insn->dst_reg, AUX_REG); break; case BPF_ALU | BPF_END | BPF_FROM_BE: @@ -1825,10 +1907,6 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_link *l, int stack_size, int run_ctx_off, bool save_ret) { - void (*exit)(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_exit; - u64 (*enter)(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_enter; u8 *prog = *pprog; u8 *jmp_insn; int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); @@ -1847,23 +1925,12 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off); - if (p->aux->sleepable) { - enter = __bpf_prog_enter_sleepable; - exit = __bpf_prog_exit_sleepable; - } else if (p->type == BPF_PROG_TYPE_STRUCT_OPS) { - enter = __bpf_prog_enter_struct_ops; - exit = __bpf_prog_exit_struct_ops; - } else if (p->expected_attach_type == BPF_LSM_CGROUP) { - enter = __bpf_prog_enter_lsm_cgroup; - exit = __bpf_prog_exit_lsm_cgroup; - } - /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: lea rsi, [rbp - ctx_cookie_off] */ EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); - if (emit_call(&prog, enter, prog)) + if (emit_call(&prog, bpf_trampoline_enter(p), prog)) return -EINVAL; /* remember prog start time returned by __bpf_prog_enter */ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); @@ -1908,7 +1975,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); /* arg3: lea rdx, [rbp - run_ctx_off] */ EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); - if (emit_call(&prog, exit, prog)) + if (emit_call(&prog, bpf_trampoline_exit(p), prog)) return -EINVAL; *pprog = prog; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0566705c1d4e..8d948bfcb984 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -855,22 +855,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *orig_call); -/* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx); -u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx); -u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); +typedef u64 (*bpf_trampoline_enter_t)(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); +bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog); +bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog); struct bpf_ksym { unsigned long start; @@ -2057,6 +2053,7 @@ struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); void bpf_task_storage_free(struct task_struct *task); +void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, @@ -2311,6 +2308,10 @@ static inline bool has_current_bpf_ctx(void) static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) { } + +static inline void bpf_cgrp_storage_free(struct cgroup *cgroup) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -2535,7 +2536,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; +extern const struct bpf_func_proto bpf_task_storage_get_recur_proto; extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; @@ -2549,6 +2552,8 @@ extern const struct bpf_func_proto bpf_copy_from_user_task_proto; extern const struct bpf_func_proto bpf_set_retval_proto; extern const struct bpf_func_proto bpf_get_retval_proto; extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto; +extern const struct bpf_func_proto bpf_cgrp_storage_get_proto; +extern const struct bpf_func_proto bpf_cgrp_storage_delete_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 7ea18d4da84b..6d37a40cd90e 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -116,21 +116,22 @@ static struct bpf_local_storage_cache name = { \ .idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock), \ } -u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache); -void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, - u16 idx); - /* Helper functions for bpf_local_storage */ int bpf_local_storage_map_alloc_check(union bpf_attr *attr); -struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr); +struct bpf_map * +bpf_local_storage_map_alloc(union bpf_attr *attr, + struct bpf_local_storage_cache *cache); struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit); -void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, +bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage); + +void bpf_local_storage_map_free(struct bpf_map *map, + struct bpf_local_storage_cache *cache, int __percpu *busy_counter); int bpf_local_storage_map_check_btf(const struct bpf_map *map, @@ -141,10 +142,6 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem); -bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, - struct bpf_local_storage_elem *selem, - bool uncharge_omem, bool use_trace_rcu); - void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); void bpf_selem_link_map(struct bpf_local_storage_map *smap, diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2c6a4f2562a7..d4ee3ccd3753 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -86,6 +86,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) #ifdef CONFIG_CGROUPS BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_CGRP_STORAGE, cgrp_storage_map_ops) #endif #ifdef CONFIG_CGROUP_BPF BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9e1e6965f407..1a32baa78ce2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -642,10 +642,23 @@ static inline u32 type_flag(u32 type) } /* only use after check_attach_btf_id() */ -static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) +static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { return prog->type == BPF_PROG_TYPE_EXT ? prog->aux->dst_prog->type : prog->type; } +static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) +{ + switch (resolve_prog_type(prog)) { + case BPF_PROG_TYPE_TRACING: + return prog->expected_attach_type != BPF_TRACE_ITER; + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_LSM: + return false; + default: + return true; + } +} + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 2aea877d644f..c9744efd202f 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -265,5 +265,6 @@ MAX_BTF_TRACING_TYPE, }; extern u32 btf_tracing_ids[]; +extern u32 bpf_cgroup_btf_id[]; #endif diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6e01f10f0d88..8a0d5466c7be 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -507,6 +507,10 @@ struct cgroup { /* Used to store internal freezer state */ struct cgroup_freezer_state freezer; +#ifdef CONFIG_BPF_SYSCALL + struct bpf_local_storage __rcu *bpf_cgrp_storage; +#endif + /* All ancestors including self */ struct cgroup *ancestors[]; }; diff --git a/include/linux/module.h b/include/linux/module.h index ec61fb53979a..35876e89eb93 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -879,8 +879,17 @@ static inline bool module_sig_ok(struct module *module) } #endif /* CONFIG_MODULE_SIG */ +#if defined(CONFIG_MODULES) && defined(CONFIG_KALLSYMS) int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); +#else +static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, unsigned long), + void *data) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_MODULES && CONFIG_KALLSYMS */ #endif /* _LINUX_MODULE_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 17f61338f8f8..94659f6b3395 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -922,7 +922,14 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, - BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching + * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to + * both cgroup-attached and other progs and supports all functionality + * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark + * BPF_MAP_TYPE_CGROUP_STORAGE deprecated. + */ + BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, @@ -935,6 +942,7 @@ enum bpf_map_type { BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, + BPF_MAP_TYPE_CGRP_STORAGE, }; /* Note that tracing related programs such as @@ -5435,6 +5443,44 @@ union bpf_attr { * **-E2BIG** if user-space has tried to publish a sample which is * larger than the size of the ring buffer, or which cannot fit * within a struct bpf_dynptr. + * + * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *cgroup*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *cgroup* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this + * helper enforces the key must be a cgroup struct and the map must also + * be a **BPF_MAP_TYPE_CGRP_STORAGE**. + * + * In reality, the local-storage value is embedded directly inside of the + * *cgroup* object itself, rather than being located in the + * **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is + * queried for some *map* on a *cgroup* object, the kernel will perform an + * O(n) iteration over all of the live local-storage values for that + * *cgroup* object until the local-storage value for the *map* is found. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup) + * Description + * Delete a bpf_local_storage from a *cgroup*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5647,6 +5693,8 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \ FN(ktime_get_tai_ns, 208, ##ctx) \ FN(user_ringbuf_drain, 209, ##ctx) \ + FN(cgrp_storage_get, 210, ##ctx) \ + FN(cgrp_storage_delete, 211, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 341c94f208f4..3a12e6b400a2 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -25,7 +25,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) obj-$(CONFIG_BPF_SYSCALL) += stackmap.o endif ifeq ($(CONFIG_CGROUPS),y) -obj-$(CONFIG_BPF_SYSCALL) += cgroup_iter.o +obj-$(CONFIG_BPF_SYSCALL) += cgroup_iter.o bpf_cgrp_storage.o endif obj-$(CONFIG_CGROUP_BPF) += cgroup.o ifeq ($(CONFIG_INET),y) diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c new file mode 100644 index 000000000000..309403800f82 --- /dev/null +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. + */ + +#include <linux/types.h> +#include <linux/bpf.h> +#include <linux/bpf_local_storage.h> +#include <uapi/linux/btf.h> +#include <linux/btf_ids.h> + +DEFINE_BPF_STORAGE_CACHE(cgroup_cache); + +static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy); + +static void bpf_cgrp_storage_lock(void) +{ + migrate_disable(); + this_cpu_inc(bpf_cgrp_storage_busy); +} + +static void bpf_cgrp_storage_unlock(void) +{ + this_cpu_dec(bpf_cgrp_storage_busy); + migrate_enable(); +} + +static bool bpf_cgrp_storage_trylock(void) +{ + migrate_disable(); + if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) { + this_cpu_dec(bpf_cgrp_storage_busy); + migrate_enable(); + return false; + } + return true; +} + +static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner) +{ + struct cgroup *cg = owner; + + return &cg->bpf_cgrp_storage; +} + +void bpf_cgrp_storage_free(struct cgroup *cgroup) +{ + struct bpf_local_storage *local_storage; + bool free_cgroup_storage = false; + unsigned long flags; + + rcu_read_lock(); + local_storage = rcu_dereference(cgroup->bpf_cgrp_storage); + if (!local_storage) { + rcu_read_unlock(); + return; + } + + bpf_cgrp_storage_lock(); + raw_spin_lock_irqsave(&local_storage->lock, flags); + free_cgroup_storage = bpf_local_storage_unlink_nolock(local_storage); + raw_spin_unlock_irqrestore(&local_storage->lock, flags); + bpf_cgrp_storage_unlock(); + rcu_read_unlock(); + + if (free_cgroup_storage) + kfree_rcu(local_storage, rcu); +} + +static struct bpf_local_storage_data * +cgroup_storage_lookup(struct cgroup *cgroup, struct bpf_map *map, bool cacheit_lockit) +{ + struct bpf_local_storage *cgroup_storage; + struct bpf_local_storage_map *smap; + + cgroup_storage = rcu_dereference_check(cgroup->bpf_cgrp_storage, + bpf_rcu_lock_held()); + if (!cgroup_storage) + return NULL; + + smap = (struct bpf_local_storage_map *)map; + return bpf_local_storage_lookup(cgroup_storage, smap, cacheit_lockit); +} + +static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_local_storage_data *sdata; + struct cgroup *cgroup; + int fd; + + fd = *(int *)key; + cgroup = cgroup_get_from_fd(fd); + if (IS_ERR(cgroup)) + return ERR_CAST(cgroup); + + bpf_cgrp_storage_lock(); + sdata = cgroup_storage_lookup(cgroup, map, true); + bpf_cgrp_storage_unlock(); + cgroup_put(cgroup); + return sdata ? sdata->data : NULL; +} + +static int bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) +{ + struct bpf_local_storage_data *sdata; + struct cgroup *cgroup; + int fd; + + fd = *(int *)key; + cgroup = cgroup_get_from_fd(fd); + if (IS_ERR(cgroup)) + return PTR_ERR(cgroup); + + bpf_cgrp_storage_lock(); + sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map, + value, map_flags, GFP_ATOMIC); + bpf_cgrp_storage_unlock(); + cgroup_put(cgroup); + return PTR_ERR_OR_ZERO(sdata); +} + +static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map) +{ + struct bpf_local_storage_data *sdata; + + sdata = cgroup_storage_lookup(cgroup, map, false); + if (!sdata) + return -ENOENT; + + bpf_selem_unlink(SELEM(sdata), true); + return 0; +} + +static int bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) +{ + struct cgroup *cgroup; + int err, fd; + + fd = *(int *)key; + cgroup = cgroup_get_from_fd(fd); + if (IS_ERR(cgroup)) + return PTR_ERR(cgroup); + + bpf_cgrp_storage_lock(); + err = cgroup_storage_delete(cgroup, map); + bpf_cgrp_storage_unlock(); + cgroup_put(cgroup); + return err; +} + +static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -ENOTSUPP; +} + +static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) +{ + return bpf_local_storage_map_alloc(attr, &cgroup_cache); +} + +static void cgroup_storage_map_free(struct bpf_map *map) +{ + bpf_local_storage_map_free(map, &cgroup_cache, NULL); +} + +/* *gfp_flags* is a hidden argument provided by the verifier */ +BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup, + void *, value, u64, flags, gfp_t, gfp_flags) +{ + struct bpf_local_storage_data *sdata; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + + if (!cgroup) + return (unsigned long)NULL; + + if (!bpf_cgrp_storage_trylock()) + return (unsigned long)NULL; + + sdata = cgroup_storage_lookup(cgroup, map, true); + if (sdata) + goto unlock; + + /* only allocate new storage, when the cgroup is refcounted */ + if (!percpu_ref_is_dying(&cgroup->self.refcnt) && + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) + sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map, + value, BPF_NOEXIST, gfp_flags); + +unlock: + bpf_cgrp_storage_unlock(); + return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; +} + +BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgroup) +{ + int ret; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (!cgroup) + return -EINVAL; + + if (!bpf_cgrp_storage_trylock()) + return -EBUSY; + + ret = cgroup_storage_delete(cgroup, map); + bpf_cgrp_storage_unlock(); + return ret; +} + +BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, bpf_local_storage_map) +const struct bpf_map_ops cgrp_storage_map_ops = { + .map_meta_equal = bpf_map_meta_equal, + .map_alloc_check = bpf_local_storage_map_alloc_check, + .map_alloc = cgroup_storage_map_alloc, + .map_free = cgroup_storage_map_free, + .map_get_next_key = notsupp_get_next_key, + .map_lookup_elem = bpf_cgrp_storage_lookup_elem, + .map_update_elem = bpf_cgrp_storage_update_elem, + .map_delete_elem = bpf_cgrp_storage_delete_elem, + .map_check_btf = bpf_local_storage_map_check_btf, + .map_btf_id = &cgroup_storage_map_btf_ids[0], + .map_owner_storage_ptr = cgroup_storage_ptr, +}; + +const struct bpf_func_proto bpf_cgrp_storage_get_proto = { + .func = bpf_cgrp_storage_get, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_cgroup_btf_id[0], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + +const struct bpf_func_proto bpf_cgrp_storage_delete_proto = { + .func = bpf_cgrp_storage_delete, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &bpf_cgroup_btf_id[0], +}; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 5f7683b19199..6a1d4d22816a 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -56,11 +56,9 @@ static struct bpf_local_storage_data *inode_storage_lookup(struct inode *inode, void bpf_inode_storage_free(struct inode *inode) { - struct bpf_local_storage_elem *selem; struct bpf_local_storage *local_storage; bool free_inode_storage = false; struct bpf_storage_blob *bsb; - struct hlist_node *n; bsb = bpf_inode(inode); if (!bsb) @@ -74,30 +72,11 @@ void bpf_inode_storage_free(struct inode *inode) return; } - /* Neither the bpf_prog nor the bpf-map's syscall - * could be modifying the local_storage->list now. - * Thus, no elem can be added-to or deleted-from the - * local_storage->list by the bpf_prog or by the bpf-map's syscall. - * - * It is racing with bpf_local_storage_map_free() alone - * when unlinking elem from the local_storage->list and - * the map's bucket->list. - */ raw_spin_lock_bh(&local_storage->lock); - hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { - /* Always unlink from map before unlinking from - * local_storage. - */ - bpf_selem_unlink_map(selem); - free_inode_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, false, false); - } + free_inode_storage = bpf_local_storage_unlink_nolock(local_storage); raw_spin_unlock_bh(&local_storage->lock); rcu_read_unlock(); - /* free_inoode_storage should always be true as long as - * local_storage->list was non-empty. - */ if (free_inode_storage) kfree_rcu(local_storage, rcu); } @@ -226,23 +205,12 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) { - struct bpf_local_storage_map *smap; - - smap = bpf_local_storage_map_alloc(attr); - if (IS_ERR(smap)) - return ERR_CAST(smap); - - smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache); - return &smap->map; + return bpf_local_storage_map_alloc(attr, &inode_cache); } static void inode_storage_map_free(struct bpf_map *map) { - struct bpf_local_storage_map *smap; - - smap = (struct bpf_local_storage_map *)map; - bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx); - bpf_local_storage_map_free(smap, NULL); + bpf_local_storage_map_free(map, &inode_cache, NULL); } BTF_ID_LIST_SINGLE(inode_storage_map_btf_ids, struct, diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 9dc6de1cf185..93d9b1b17bc8 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -113,9 +113,9 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. */ -bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, - struct bpf_local_storage_elem *selem, - bool uncharge_mem, bool use_trace_rcu) +static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem, + bool uncharge_mem, bool use_trace_rcu) { struct bpf_local_storage_map *smap; bool free_local_storage; @@ -242,6 +242,7 @@ void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu) __bpf_selem_unlink_storage(selem, use_trace_rcu); } +/* If cacheit_lockit is false, this lookup function is lockless */ struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, @@ -500,7 +501,7 @@ unlock_err: return ERR_PTR(err); } -u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache) +static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache) { u64 min_usage = U64_MAX; u16 i, res = 0; @@ -524,76 +525,14 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache) return res; } -void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, - u16 idx) +static void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, + u16 idx) { spin_lock(&cache->idx_lock); cache->idx_usage_counts[idx]--; spin_unlock(&cache->idx_lock); } -void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, - int __percpu *busy_counter) -{ - struct bpf_local_storage_elem *selem; - struct bpf_local_storage_map_bucket *b; - unsigned int i; - - /* Note that this map might be concurrently cloned from - * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone - * RCU read section to finish before proceeding. New RCU - * read sections should be prevented via bpf_map_inc_not_zero. - */ - synchronize_rcu(); - - /* bpf prog and the userspace can no longer access this map - * now. No new selem (of this map) can be added - * to the owner->storage or to the map bucket's list. - * - * The elem of this map can be cleaned up here - * or when the storage is freed e.g. - * by bpf_sk_storage_free() during __sk_destruct(). - */ - for (i = 0; i < (1U << smap->bucket_log); i++) { - b = &smap->buckets[i]; - - rcu_read_lock(); - /* No one is adding to b->list now */ - while ((selem = hlist_entry_safe( - rcu_dereference_raw(hlist_first_rcu(&b->list)), - struct bpf_local_storage_elem, map_node))) { - if (busy_counter) { - migrate_disable(); - this_cpu_inc(*busy_counter); - } - bpf_selem_unlink(selem, false); - if (busy_counter) { - this_cpu_dec(*busy_counter); - migrate_enable(); - } - cond_resched_rcu(); - } - rcu_read_unlock(); - } - - /* While freeing the storage we may still need to access the map. - * - * e.g. when bpf_sk_storage_free() has unlinked selem from the map - * which then made the above while((selem = ...)) loop - * exit immediately. - * - * However, while freeing the storage one still needs to access the - * smap->elem_size to do the uncharging in - * bpf_selem_unlink_storage_nolock(). - * - * Hence, wait another rcu grace period for the storage to be freed. - */ - synchronize_rcu(); - - kvfree(smap->buckets); - bpf_map_area_free(smap); -} - int bpf_local_storage_map_alloc_check(union bpf_attr *attr) { if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK || @@ -613,7 +552,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) return 0; } -struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) +static struct bpf_local_storage_map *__bpf_local_storage_map_alloc(union bpf_attr *attr) { struct bpf_local_storage_map *smap; unsigned int i; @@ -663,3 +602,117 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, return 0; } + +bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage) +{ + struct bpf_local_storage_elem *selem; + bool free_storage = false; + struct hlist_node *n; + + /* Neither the bpf_prog nor the bpf_map's syscall + * could be modifying the local_storage->list now. + * Thus, no elem can be added to or deleted from the + * local_storage->list by the bpf_prog or by the bpf_map's syscall. + * + * It is racing with bpf_local_storage_map_free() alone + * when unlinking elem from the local_storage->list and + * the map's bucket->list. + */ + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { + /* Always unlink from map before unlinking from + * local_storage. + */ + bpf_selem_unlink_map(selem); + /* If local_storage list has only one element, the + * bpf_selem_unlink_storage_nolock() will return true. + * Otherwise, it will return false. The current loop iteration + * intends to remove all local storage. So the last iteration + * of the loop will set the free_cgroup_storage to true. + */ + free_storage = bpf_selem_unlink_storage_nolock( + local_storage, selem, false, false); + } + + return free_storage; +} + +struct bpf_map * +bpf_local_storage_map_alloc(union bpf_attr *attr, + struct bpf_local_storage_cache *cache) +{ + struct bpf_local_storage_map *smap; + + smap = __bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = bpf_local_storage_cache_idx_get(cache); + return &smap->map; +} + +void bpf_local_storage_map_free(struct bpf_map *map, + struct bpf_local_storage_cache *cache, + int __percpu *busy_counter) +{ + struct bpf_local_storage_map_bucket *b; + struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; + unsigned int i; + + smap = (struct bpf_local_storage_map *)map; + bpf_local_storage_cache_idx_free(cache, smap->cache_idx); + + /* Note that this map might be concurrently cloned from + * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone + * RCU read section to finish before proceeding. New RCU + * read sections should be prevented via bpf_map_inc_not_zero. + */ + synchronize_rcu(); + + /* bpf prog and the userspace can no longer access this map + * now. No new selem (of this map) can be added + * to the owner->storage or to the map bucket's list. + * + * The elem of this map can be cleaned up here + * or when the storage is freed e.g. + * by bpf_sk_storage_free() during __sk_destruct(). + */ + for (i = 0; i < (1U << smap->bucket_log); i++) { + b = &smap->buckets[i]; + + rcu_read_lock(); + /* No one is adding to b->list now */ + while ((selem = hlist_entry_safe( + rcu_dereference_raw(hlist_first_rcu(&b->list)), + struct bpf_local_storage_elem, map_node))) { + if (busy_counter) { + migrate_disable(); + this_cpu_inc(*busy_counter); + } + bpf_selem_unlink(selem, false); + if (busy_counter) { + this_cpu_dec(*busy_counter); + migrate_enable(); + } + cond_resched_rcu(); + } + rcu_read_unlock(); + } + + /* While freeing the storage we may still need to access the map. + * + * e.g. when bpf_sk_storage_free() has unlinked selem from the map + * which then made the above while((selem = ...)) loop + * exit immediately. + * + * However, while freeing the storage one still needs to access the + * smap->elem_size to do the uncharging in + * bpf_selem_unlink_storage_nolock(). + * + * Hence, wait another rcu grace period for the storage to be freed. + */ + synchronize_rcu(); + + kvfree(smap->buckets); + bpf_map_area_free(smap); +} diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 6f290623347e..8e832db8151a 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -71,10 +71,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map, void bpf_task_storage_free(struct task_struct *task) { - struct bpf_local_storage_elem *selem; struct bpf_local_storage *local_storage; bool free_task_storage = false; - struct hlist_node *n; unsigned long flags; rcu_read_lock(); @@ -85,32 +83,13 @@ void bpf_task_storage_free(struct task_struct *task) return; } - /* Neither the bpf_prog nor the bpf-map's syscall - * could be modifying the local_storage->list now. - * Thus, no elem can be added-to or deleted-from the - * local_storage->list by the bpf_prog or by the bpf-map's syscall. - * - * It is racing with bpf_local_storage_map_free() alone - * when unlinking elem from the local_storage->list and - * the map's bucket->list. - */ bpf_task_storage_lock(); raw_spin_lock_irqsave(&local_storage->lock, flags); - hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { - /* Always unlink from map before unlinking from - * local_storage. - */ - bpf_selem_unlink_map(selem); - free_task_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, false, false); - } + free_task_storage = bpf_local_storage_unlink_nolock(local_storage); raw_spin_unlock_irqrestore(&local_storage->lock, flags); bpf_task_storage_unlock(); rcu_read_unlock(); - /* free_task_storage should always be true as long as - * local_storage->list was non-empty. - */ if (free_task_storage) kfree_rcu(local_storage, rcu); } @@ -184,7 +163,8 @@ out: return err; } -static int task_storage_delete(struct task_struct *task, struct bpf_map *map) +static int task_storage_delete(struct task_struct *task, struct bpf_map *map, + bool nobusy) { struct bpf_local_storage_data *sdata; @@ -192,6 +172,9 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map) if (!sdata) return -ENOENT; + if (!nobusy) + return -EBUSY; + bpf_selem_unlink(SELEM(sdata), true); return 0; @@ -220,63 +203,108 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) } bpf_task_storage_lock(); - err = task_storage_delete(task, map); + err = task_storage_delete(task, map, true); bpf_task_storage_unlock(); out: put_pid(pid); return err; } -/* *gfp_flags* is a hidden argument provided by the verifier */ -BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, - task, void *, value, u64, flags, gfp_t, gfp_flags) +/* Called by bpf_task_storage_get*() helpers */ +static void *__bpf_task_storage_get(struct bpf_map *map, + struct task_struct *task, void *value, + u64 flags, gfp_t gfp_flags, bool nobusy) { struct bpf_local_storage_data *sdata; - WARN_ON_ONCE(!bpf_rcu_lock_held()); - if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) - return (unsigned long)NULL; - - if (!task) - return (unsigned long)NULL; - - if (!bpf_task_storage_trylock()) - return (unsigned long)NULL; - - sdata = task_storage_lookup(task, map, true); + sdata = task_storage_lookup(task, map, nobusy); if (sdata) - goto unlock; + return sdata->data; /* only allocate new storage, when the task is refcounted */ if (refcount_read(&task->usage) && - (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) { sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, gfp_flags); + return IS_ERR(sdata) ? NULL : sdata->data; + } + + return NULL; +} -unlock: +/* *gfp_flags* is a hidden argument provided by the verifier */ +BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags, gfp_t, gfp_flags) +{ + bool nobusy; + void *data; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) + return (unsigned long)NULL; + + nobusy = bpf_task_storage_trylock(); + data = __bpf_task_storage_get(map, task, value, flags, + gfp_flags, nobusy); + if (nobusy) + bpf_task_storage_unlock(); + return (unsigned long)data; +} + +/* *gfp_flags* is a hidden argument provided by the verifier */ +BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags, gfp_t, gfp_flags) +{ + void *data; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) + return (unsigned long)NULL; + + bpf_task_storage_lock(); + data = __bpf_task_storage_get(map, task, value, flags, + gfp_flags, true); bpf_task_storage_unlock(); - return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : - (unsigned long)sdata->data; + return (unsigned long)data; } -BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, +BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, task) { + bool nobusy; int ret; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; - if (!bpf_task_storage_trylock()) - return -EBUSY; + nobusy = bpf_task_storage_trylock(); + /* This helper must only be called from places where the lifetime of the task + * is guaranteed. Either by being refcounted or by being protected + * by an RCU read-side critical section. + */ + ret = task_storage_delete(task, map, nobusy); + if (nobusy) + bpf_task_storage_unlock(); + return ret; +} + +BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, + task) +{ + int ret; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (!task) + return -EINVAL; + bpf_task_storage_lock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ - ret = task_storage_delete(task, map); + ret = task_storage_delete(task, map, true); bpf_task_storage_unlock(); return ret; } @@ -288,23 +316,12 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) { - struct bpf_local_storage_map *smap; - - smap = bpf_local_storage_map_alloc(attr); - if (IS_ERR(smap)) - return ERR_CAST(smap); - - smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache); - return &smap->map; + return bpf_local_storage_map_alloc(attr, &task_cache); } static void task_storage_map_free(struct bpf_map *map) { - struct bpf_local_storage_map *smap; - - smap = (struct bpf_local_storage_map *)map; - bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); - bpf_local_storage_map_free(smap, &bpf_task_storage_busy); + bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy); } BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map) @@ -322,6 +339,17 @@ const struct bpf_map_ops task_storage_map_ops = { .map_owner_storage_ptr = task_storage_ptr, }; +const struct bpf_func_proto bpf_task_storage_get_recur_proto = { + .func = bpf_task_storage_get_recur, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_task_storage_get_proto = { .func = bpf_task_storage_get, .gpl_only = false, @@ -333,6 +361,15 @@ const struct bpf_func_proto bpf_task_storage_get_proto = { .arg4_type = ARG_ANYTHING, }; +const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { + .func = bpf_task_storage_delete_recur, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], +}; + const struct bpf_func_proto bpf_task_storage_delete_proto = { .func = bpf_task_storage_delete, .gpl_only = false, diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c index 9fcf09f2ef00..fbc6167c3599 100644 --- a/kernel/bpf/cgroup_iter.c +++ b/kernel/bpf/cgroup_iter.c @@ -157,7 +157,7 @@ static const struct seq_operations cgroup_iter_seq_ops = { .show = cgroup_iter_seq_show, }; -BTF_ID_LIST_SINGLE(bpf_cgroup_btf_id, struct, cgroup) +BTF_ID_LIST_GLOBAL_SINGLE(bpf_cgroup_btf_id, struct, cgroup) static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux) { diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index b5ba34ddd4b6..bb03fdba73bb 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -85,7 +85,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) { u32 value_size = attr->value_size; struct bpf_cpu_map *cmap; - int err = -ENOMEM; if (!bpf_capable()) return ERR_PTR(-EPERM); @@ -97,29 +96,26 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) attr->map_flags & ~BPF_F_NUMA_NODE) return ERR_PTR(-EINVAL); + /* Pre-limit array size based on NR_CPUS, not final CPU check */ + if (attr->max_entries > NR_CPUS) + return ERR_PTR(-E2BIG); + cmap = bpf_map_area_alloc(sizeof(*cmap), NUMA_NO_NODE); if (!cmap) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&cmap->map, attr); - /* Pre-limit array size based on NR_CPUS, not final CPU check */ - if (cmap->map.max_entries > NR_CPUS) { - err = -E2BIG; - goto free_cmap; - } - /* Alloc array for possible remote "destination" CPUs */ cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *), cmap->map.numa_node); - if (!cmap->cpu_map) - goto free_cmap; + if (!cmap->cpu_map) { + bpf_map_area_free(cmap); + return ERR_PTR(-ENOMEM); + } return &cmap->map; -free_cmap: - bpf_map_area_free(cmap); - return ERR_PTR(err); } static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a6b04faed282..124fd199ce5c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1663,6 +1663,12 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_dynptr_write_proto; case BPF_FUNC_dynptr_data: return &bpf_dynptr_data_proto; +#ifdef CONFIG_CGROUPS + case BPF_FUNC_cgrp_storage_get: + return &bpf_cgrp_storage_get_proto; + case BPF_FUNC_cgrp_storage_delete: + return &bpf_cgrp_storage_delete_proto; +#endif default: break; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7b373a5e861f..5887592eeb93 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1016,7 +1016,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_SK_STORAGE && map->map_type != BPF_MAP_TYPE_INODE_STORAGE && - map->map_type != BPF_MAP_TYPE_TASK_STORAGE) + map->map_type != BPF_MAP_TYPE_TASK_STORAGE && + map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) return -ENOTSUPP; if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) { @@ -2117,11 +2118,11 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, st = per_cpu_ptr(prog->stats, cpu); do { - start = u64_stats_fetch_begin_irq(&st->syncp); + start = u64_stats_fetch_begin(&st->syncp); tnsecs = u64_stats_read(&st->nsecs); tcnt = u64_stats_read(&st->cnt); tmisses = u64_stats_read(&st->misses); - } while (u64_stats_fetch_retry_irq(&st->syncp, start)); + } while (u64_stats_fetch_retry(&st->syncp, start)); nsecs += tnsecs; cnt += tcnt; misses += tmisses; @@ -5133,13 +5134,14 @@ int kern_sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) run_ctx.bpf_cookie = 0; run_ctx.saved_run_ctx = NULL; - if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) { + if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) { /* recursion detected */ bpf_prog_put(prog); return -EBUSY; } attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); - __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx); + __bpf_prog_exit_sleepable_recur(prog, 0 /* bpf_prog_run does runtime stats */, + &run_ctx); bpf_prog_put(prog); return 0; #endif diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index bf0906e1e2b9..d6395215b849 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -864,7 +864,7 @@ static __always_inline u64 notrace bpf_prog_start_time(void) * [2..MAX_U64] - execute bpf prog and record execution time. * This is start time. */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) +static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { rcu_read_lock(); @@ -901,7 +901,8 @@ static void notrace update_prog_stats(struct bpf_prog *prog, } } -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx) +static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) __releases(RCU) { bpf_reset_run_ctx(run_ctx->saved_run_ctx); @@ -912,8 +913,8 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_ rcu_read_unlock(); } -u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx) +static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { /* Runtime stats are exported via actual BPF_LSM_CGROUP @@ -927,8 +928,8 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, return NO_START_TIME; } -void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx) +static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) __releases(RCU) { bpf_reset_run_ctx(run_ctx->saved_run_ctx); @@ -937,7 +938,8 @@ void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, rcu_read_unlock(); } -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) +u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) { rcu_read_lock_trace(); migrate_disable(); @@ -953,8 +955,8 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_r return bpf_prog_start_time(); } -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx) +void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) { bpf_reset_run_ctx(run_ctx->saved_run_ctx); @@ -964,8 +966,30 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, rcu_read_unlock_trace(); } -u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx) +static u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) +{ + rcu_read_lock_trace(); + migrate_disable(); + might_fault(); + + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + + return bpf_prog_start_time(); +} + +static void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) +{ + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + + update_prog_stats(prog, start); + migrate_enable(); + rcu_read_unlock_trace(); +} + +static u64 notrace __bpf_prog_enter(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { rcu_read_lock(); @@ -976,8 +1000,8 @@ u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, return bpf_prog_start_time(); } -void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx) +static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) __releases(RCU) { bpf_reset_run_ctx(run_ctx->saved_run_ctx); @@ -997,6 +1021,36 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) percpu_ref_put(&tr->pcref); } +bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog) +{ + bool sleepable = prog->aux->sleepable; + + if (bpf_prog_check_recur(prog)) + return sleepable ? __bpf_prog_enter_sleepable_recur : + __bpf_prog_enter_recur; + + if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM && + prog->expected_attach_type == BPF_LSM_CGROUP) + return __bpf_prog_enter_lsm_cgroup; + + return sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter; +} + +bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog) +{ + bool sleepable = prog->aux->sleepable; + + if (bpf_prog_check_recur(prog)) + return sleepable ? __bpf_prog_exit_sleepable_recur : + __bpf_prog_exit_recur; + + if (resolve_prog_type(prog) == BPF_PROG_TYPE_LSM && + prog->expected_attach_type == BPF_LSM_CGROUP) + return __bpf_prog_exit_lsm_cgroup; + + return sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit; +} + int __weak arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7f0a9f6cb889..82c07fe0bfb1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5634,16 +5634,6 @@ struct bpf_reg_types { u32 *btf_id; }; -static const struct bpf_reg_types map_key_value_types = { - .types = { - PTR_TO_STACK, - PTR_TO_PACKET, - PTR_TO_PACKET_META, - PTR_TO_MAP_KEY, - PTR_TO_MAP_VALUE, - }, -}; - static const struct bpf_reg_types sock_types = { .types = { PTR_TO_SOCK_COMMON, @@ -5710,8 +5700,8 @@ static const struct bpf_reg_types dynptr_types = { }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { - [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, - [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, + [ARG_PTR_TO_MAP_KEY] = &mem_types, + [ARG_PTR_TO_MAP_VALUE] = &mem_types, [ARG_CONST_SIZE] = &scalar_types, [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, @@ -6360,6 +6350,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_task_storage_delete) goto error; break; + case BPF_MAP_TYPE_CGRP_STORAGE: + if (func_id != BPF_FUNC_cgrp_storage_get && + func_id != BPF_FUNC_cgrp_storage_delete) + goto error; + break; case BPF_MAP_TYPE_BLOOM_FILTER: if (func_id != BPF_FUNC_map_peek_elem && func_id != BPF_FUNC_map_push_elem) @@ -6472,6 +6467,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) goto error; break; + case BPF_FUNC_cgrp_storage_get: + case BPF_FUNC_cgrp_storage_delete: + if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) + goto error; + break; default: break; } @@ -10671,7 +10671,7 @@ static int check_return_code(struct bpf_verifier_env *env) * 3 let S be a stack * 4 S.push(v) * 5 while S is not empty - * 6 t <- S.pop() + * 6 t <- S.peek() * 7 if t is what we're looking for: * 8 return t * 9 for all edges e in G.adjacentEdges(t) do @@ -14150,7 +14150,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) if (insn->imm == BPF_FUNC_task_storage_get || insn->imm == BPF_FUNC_sk_storage_get || - insn->imm == BPF_FUNC_inode_storage_get) { + insn->imm == BPF_FUNC_inode_storage_get || + insn->imm == BPF_FUNC_cgrp_storage_get) { if (env->prog->aux->sleepable) insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL); else diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2319946715e0..f1e6058089f5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5349,6 +5349,7 @@ static void css_free_rwork_fn(struct work_struct *work) atomic_dec(&cgrp->root->nr_cgrps); cgroup1_pidlist_destroy_all(cgrp); cancel_work_sync(&cgrp->release_agent_work); + bpf_cgrp_storage_free(cgrp); if (cgroup_parent(cgrp)) { /* diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index f5c5c9175333..4523f99b0358 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -494,7 +494,6 @@ unsigned long module_kallsyms_lookup_name(const char *name) return ret; } -#ifdef CONFIG_LIVEPATCH int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data) @@ -531,4 +530,3 @@ out: mutex_unlock(&module_mutex); return ret; } -#endif /* CONFIG_LIVEPATCH */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1ed08967fb97..f2d8d070d024 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/bpf.h> +#include <linux/bpf_verifier.h> #include <linux/bpf_perf_event.h> #include <linux/btf.h> #include <linux/filter.h> @@ -1456,6 +1457,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_current_cgroup_id_proto; case BPF_FUNC_get_current_ancestor_cgroup_id: return &bpf_get_current_ancestor_cgroup_id_proto; + case BPF_FUNC_cgrp_storage_get: + return &bpf_cgrp_storage_get_proto; + case BPF_FUNC_cgrp_storage_delete: + return &bpf_cgrp_storage_delete_proto; #endif case BPF_FUNC_send_signal: return &bpf_send_signal_proto; @@ -1490,8 +1495,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; case BPF_FUNC_task_storage_get: + if (bpf_prog_check_recur(prog)) + return &bpf_task_storage_get_recur_proto; return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: + if (bpf_prog_check_recur(prog)) + return &bpf_task_storage_delete_recur_proto; return &bpf_task_storage_delete_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; @@ -2452,6 +2461,8 @@ struct bpf_kprobe_multi_link { unsigned long *addrs; u64 *cookies; u32 cnt; + u32 mods_cnt; + struct module **mods; }; struct bpf_kprobe_multi_run_ctx { @@ -2507,6 +2518,14 @@ error: return err; } +static void kprobe_multi_put_modules(struct module **mods, u32 cnt) +{ + u32 i; + + for (i = 0; i < cnt; i++) + module_put(mods[i]); +} + static void free_user_syms(struct user_syms *us) { kvfree(us->syms); @@ -2519,6 +2538,7 @@ static void bpf_kprobe_multi_link_release(struct bpf_link *link) kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); unregister_fprobe(&kmulti_link->fp); + kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt); } static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) @@ -2528,6 +2548,7 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); kvfree(kmulti_link->addrs); kvfree(kmulti_link->cookies); + kfree(kmulti_link->mods); kfree(kmulti_link); } @@ -2550,7 +2571,7 @@ static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void swap(*cookie_a, *cookie_b); } -static int __bpf_kprobe_multi_cookie_cmp(const void *a, const void *b) +static int bpf_kprobe_multi_addrs_cmp(const void *a, const void *b) { const unsigned long *addr_a = a, *addr_b = b; @@ -2561,7 +2582,7 @@ static int __bpf_kprobe_multi_cookie_cmp(const void *a, const void *b) static int bpf_kprobe_multi_cookie_cmp(const void *a, const void *b, const void *priv) { - return __bpf_kprobe_multi_cookie_cmp(a, b); + return bpf_kprobe_multi_addrs_cmp(a, b); } static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) @@ -2579,7 +2600,7 @@ static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) return 0; entry_ip = run_ctx->entry_ip; addr = bsearch(&entry_ip, link->addrs, link->cnt, sizeof(entry_ip), - __bpf_kprobe_multi_cookie_cmp); + bpf_kprobe_multi_addrs_cmp); if (!addr) return 0; cookie = link->cookies + (addr - link->addrs); @@ -2663,6 +2684,71 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv) } } +struct module_addr_args { + unsigned long *addrs; + u32 addrs_cnt; + struct module **mods; + int mods_cnt; + int mods_cap; +}; + +static int module_callback(void *data, const char *name, + struct module *mod, unsigned long addr) +{ + struct module_addr_args *args = data; + struct module **mods; + + /* We iterate all modules symbols and for each we: + * - search for it in provided addresses array + * - if found we check if we already have the module pointer stored + * (we iterate modules sequentially, so we can check just the last + * module pointer) + * - take module reference and store it + */ + if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr), + bpf_kprobe_multi_addrs_cmp)) + return 0; + + if (args->mods && args->mods[args->mods_cnt - 1] == mod) + return 0; + + if (args->mods_cnt == args->mods_cap) { + args->mods_cap = max(16, args->mods_cap * 3 / 2); + mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL); + if (!mods) + return -ENOMEM; + args->mods = mods; + } + + if (!try_module_get(mod)) + return -EINVAL; + + args->mods[args->mods_cnt] = mod; + args->mods_cnt++; + return 0; +} + +static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) +{ + struct module_addr_args args = { + .addrs = addrs, + .addrs_cnt = addrs_cnt, + }; + int err; + + /* We return either err < 0 in case of error, ... */ + err = module_kallsyms_on_each_symbol(module_callback, &args); + if (err) { + kprobe_multi_put_modules(args.mods, args.mods_cnt); + kfree(args.mods); + return err; + } + + /* or number of modules found if everything is ok. */ + *mods = args.mods; + return args.mods_cnt; +} + int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_kprobe_multi_link *link = NULL; @@ -2773,10 +2859,25 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr bpf_kprobe_multi_cookie_cmp, bpf_kprobe_multi_cookie_swap, link); + } else { + /* + * We need to sort addrs array even if there are no cookies + * provided, to allow bsearch in get_modules_for_addrs. + */ + sort(addrs, cnt, sizeof(*addrs), + bpf_kprobe_multi_addrs_cmp, NULL); + } + + err = get_modules_for_addrs(&link->mods, addrs, cnt); + if (err < 0) { + bpf_link_cleanup(&link_primer); + return err; } + link->mods_cnt = err; err = register_fprobe_ips(&link->fp, addrs, cnt); if (err) { + kprobe_multi_put_modules(link->mods, link->mods_cnt); bpf_link_cleanup(&link_primer); return err; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fbf2543111c0..72de9009a6a0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -8267,6 +8267,10 @@ struct kallsyms_data { size_t found; }; +/* This function gets called for all kernel and module symbols + * and returns 1 in case we resolved all the requested symbols, + * 0 otherwise. + */ static int kallsyms_callback(void *data, const char *name, struct module *mod, unsigned long addr) { @@ -8309,17 +8313,19 @@ static int kallsyms_callback(void *data, const char *name, int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs) { struct kallsyms_data args; - int err; + int found_all; memset(addrs, 0, sizeof(*addrs) * cnt); args.addrs = addrs; args.syms = sorted_syms; args.cnt = cnt; args.found = 0; - err = kallsyms_on_each_symbol(kallsyms_callback, &args); - if (err < 0) - return err; - return args.found == args.cnt ? 0 : -ESRCH; + + found_all = kallsyms_on_each_symbol(kallsyms_callback, &args); + if (found_all) + return 0; + found_all = module_kallsyms_on_each_symbol(kallsyms_callback, &args); + return found_all ? 0 : -ESRCH; } #ifdef CONFIG_SYSCTL diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 94374d529ea4..49884e7de080 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -48,10 +48,8 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) /* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { - struct bpf_local_storage_elem *selem; struct bpf_local_storage *sk_storage; bool free_sk_storage = false; - struct hlist_node *n; rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); @@ -60,24 +58,8 @@ void bpf_sk_storage_free(struct sock *sk) return; } - /* Netiher the bpf_prog nor the bpf-map's syscall - * could be modifying the sk_storage->list now. - * Thus, no elem can be added-to or deleted-from the - * sk_storage->list by the bpf_prog or by the bpf-map's syscall. - * - * It is racing with bpf_local_storage_map_free() alone - * when unlinking elem from the sk_storage->list and - * the map's bucket->list. - */ raw_spin_lock_bh(&sk_storage->lock); - hlist_for_each_entry_safe(selem, n, &sk_storage->list, snode) { - /* Always unlink from map before unlinking from - * sk_storage. - */ - bpf_selem_unlink_map(selem); - free_sk_storage = bpf_selem_unlink_storage_nolock( - sk_storage, selem, true, false); - } + free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage); raw_spin_unlock_bh(&sk_storage->lock); rcu_read_unlock(); @@ -87,23 +69,12 @@ void bpf_sk_storage_free(struct sock *sk) static void bpf_sk_storage_map_free(struct bpf_map *map) { - struct bpf_local_storage_map *smap; - - smap = (struct bpf_local_storage_map *)map; - bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx); - bpf_local_storage_map_free(smap, NULL); + bpf_local_storage_map_free(map, &sk_cache, NULL); } static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) { - struct bpf_local_storage_map *smap; - - smap = bpf_local_storage_map_alloc(attr); - if (IS_ERR(smap)) - return ERR_CAST(smap); - - smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache); - return &smap->map; + return bpf_local_storage_map_alloc(attr, &sk_cache); } static int notsupp_get_next_key(struct bpf_map *map, void *key, diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst index 60c6494adb1b..57f93edd1957 100644 --- a/samples/bpf/README.rst +++ b/samples/bpf/README.rst @@ -37,8 +37,8 @@ user, simply call:: make headers_install -This will creates a local "usr/include" directory in the git/build top -level directory, that the make system automatically pickup first. +This will create a local "usr/include" directory in the git/build top +level directory, that the make system will automatically pick up first. Compiling ========= @@ -87,7 +87,7 @@ Cross compiling samples ----------------------- In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH environment variables before calling make. But do this before clean, -cofiguration and header install steps described above. This will direct make to +configuration and header install steps described above. This will direct make to build samples for the cross target:: export ARCH=arm64 diff --git a/samples/bpf/hbm_edt_kern.c b/samples/bpf/hbm_edt_kern.c index a65b677acdb0..6294f1d716c0 100644 --- a/samples/bpf/hbm_edt_kern.c +++ b/samples/bpf/hbm_edt_kern.c @@ -35,7 +35,7 @@ * * If the credit is below the drop threshold, the packet is dropped. If it * is a TCP packet, then it also calls tcp_cwr since packets dropped by - * by a cgroup skb BPF program do not automatically trigger a call to + * a cgroup skb BPF program do not automatically trigger a call to * tcp_cwr in the current kernel code. * * This BPF program actually uses 2 drop thresholds, one threshold diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index ac370e638fa3..281dc964de8d 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -51,7 +51,7 @@ static void poll_stats(int map_fd, int interval) sleep(interval); - while (bpf_map_get_next_key(map_fd, &key, &key) != -1) { + while (bpf_map_get_next_key(map_fd, &key, &key) == 0) { __u64 sum = 0; assert(bpf_map_lookup_elem(map_fd, &key, values) == 0); diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c index 3332ba6bb95f..67804ecf7ce3 100644 --- a/samples/bpf/xdp2_kern.c +++ b/samples/bpf/xdp2_kern.c @@ -112,6 +112,10 @@ int xdp_prog1(struct xdp_md *ctx) if (ipproto == IPPROTO_UDP) { swap_src_dst_mac(data); + + if (bpf_xdp_store_bytes(ctx, 0, pkt, sizeof(pkt))) + return rc; + rc = XDP_TX; } diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index c0e6690be82a..fdb0aff8cb5a 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -685,6 +685,7 @@ class PrinterHelpers(Printer): 'struct udp6_sock', 'struct unix_sock', 'struct task_struct', + 'struct cgroup', 'struct __sk_buff', 'struct sk_msg_md', @@ -742,6 +743,7 @@ class PrinterHelpers(Printer): 'struct udp6_sock', 'struct unix_sock', 'struct task_struct', + 'struct cgroup', 'struct path', 'struct btf_ptr', 'struct inode', diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 7f3b67a8b48f..11250c4734fe 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -55,7 +55,7 @@ MAP COMMANDS | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** -| | **task_storage** | **bloom_filter** | **user_ringbuf** } +| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index eb1b2a254eb1..14de72544995 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -31,7 +31,7 @@ PROG COMMANDS | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] +| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** @@ -131,7 +131,7 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog loadall** pins all programs @@ -150,6 +150,17 @@ DESCRIPTION Optional **pinmaps** argument can be provided to pin all maps under *MAP_DIR* directory. + If **autoattach** is specified program will be attached + before pin. In that case, only the link (representing the + program attached to its hook) is pinned, not the program as + such, so the path won't show in **bpftool prog show -f**, + only show in **bpftool link show -f**. Also, this only works + when bpftool (libbpf) is able to infer all necessary + information from the object file, in particular, it's not + supported for all program types. If a program does not + support autoattach, bpftool falls back to regular pinning + for that program instead. + Note: *PATH* must be located in *bpffs* mount. It must not contain a dot character ('.'), which is reserved for future extensions of *bpffs*. diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst index 4107a586b68b..05350a1aadf9 100644 --- a/tools/bpf/bpftool/Documentation/common_options.rst +++ b/tools/bpf/bpftool/Documentation/common_options.rst @@ -7,10 +7,10 @@ Print bpftool's version number (similar to **bpftool version**), the number of the libbpf version in use, and optional features that were included when bpftool was compiled. Optional features include linking - against libbfd to provide the disassembler for JIT-ted programs - (**bpftool prog dump jited**) and usage of BPF skeletons (some - features like **bpftool prog profile** or showing pids associated to - BPF objects may rely on it). + against LLVM or libbfd to provide the disassembler for JIT-ted + programs (**bpftool prog dump jited**) and usage of BPF skeletons + (some features like **bpftool prog profile** or showing pids + associated to BPF objects may rely on it). -j, --json Generate JSON output. For commands that cannot produce JSON, this diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 4a95c017ad4c..787b857d3fb5 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -93,11 +93,22 @@ INSTALL ?= install RM ?= rm -f FEATURE_USER = .bpftool -FEATURE_TESTS = libbfd libbfd-liberty libbfd-liberty-z \ - disassembler-four-args disassembler-init-styled libcap \ - clang-bpf-co-re -FEATURE_DISPLAY = libbfd libbfd-liberty libbfd-liberty-z \ - libcap clang-bpf-co-re + +FEATURE_TESTS := clang-bpf-co-re +FEATURE_TESTS += llvm +FEATURE_TESTS += libcap +FEATURE_TESTS += libbfd +FEATURE_TESTS += libbfd-liberty +FEATURE_TESTS += libbfd-liberty-z +FEATURE_TESTS += disassembler-four-args +FEATURE_TESTS += disassembler-init-styled + +FEATURE_DISPLAY := clang-bpf-co-re +FEATURE_DISPLAY += llvm +FEATURE_DISPLAY += libcap +FEATURE_DISPLAY += libbfd +FEATURE_DISPLAY += libbfd-liberty +FEATURE_DISPLAY += libbfd-liberty-z check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -115,13 +126,6 @@ include $(FEATURES_DUMP) endif endif -ifeq ($(feature-disassembler-four-args), 1) -CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE -endif -ifeq ($(feature-disassembler-init-styled), 1) - CFLAGS += -DDISASM_INIT_STYLED -endif - LIBS = $(LIBBPF) -lelf -lz LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz ifeq ($(feature-libcap), 1) @@ -133,21 +137,41 @@ include $(wildcard $(OUTPUT)*.d) all: $(OUTPUT)bpftool -BFD_SRCS = jit_disasm.c +SRCS := $(wildcard *.c) -SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c)) - -ifeq ($(feature-libbfd),1) - LIBS += -lbfd -ldl -lopcodes -else ifeq ($(feature-libbfd-liberty),1) - LIBS += -lbfd -ldl -lopcodes -liberty -else ifeq ($(feature-libbfd-liberty-z),1) - LIBS += -lbfd -ldl -lopcodes -liberty -lz +ifeq ($(feature-llvm),1) + # If LLVM is available, use it for JIT disassembly + CFLAGS += -DHAVE_LLVM_SUPPORT + LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets + CFLAGS += $(shell $(LLVM_CONFIG) --cflags --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LIBS += $(shell $(LLVM_CONFIG) --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags) +else + # Fall back on libbfd + ifeq ($(feature-libbfd),1) + LIBS += -lbfd -ldl -lopcodes + else ifeq ($(feature-libbfd-liberty),1) + LIBS += -lbfd -ldl -lopcodes -liberty + else ifeq ($(feature-libbfd-liberty-z),1) + LIBS += -lbfd -ldl -lopcodes -liberty -lz + endif + + # If one of the above feature combinations is set, we support libbfd + ifneq ($(filter -lbfd,$(LIBS)),) + CFLAGS += -DHAVE_LIBBFD_SUPPORT + + # Libbfd interface changed over time, figure out what we need + ifeq ($(feature-disassembler-four-args), 1) + CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE + endif + ifeq ($(feature-disassembler-init-styled), 1) + CFLAGS += -DDISASM_INIT_STYLED + endif + endif endif - -ifneq ($(filter -lbfd,$(LIBS)),) -CFLAGS += -DHAVE_LIBBFD_SUPPORT -SRCS += $(BFD_SRCS) +ifeq ($(filter -DHAVE_LLVM_SUPPORT -DHAVE_LIBBFD_SUPPORT,$(CFLAGS)),) + # No support for JIT disassembly + SRCS := $(filter-out jit_disasm.c,$(SRCS)) endif HOST_CFLAGS = $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index dc1641e3670e..2957b42cab67 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -505,6 +505,7 @@ _bpftool() _bpftool_once_attr 'type' _bpftool_once_attr 'dev' _bpftool_once_attr 'pinmaps' + _bpftool_once_attr 'autoattach' return 0 ;; esac diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 8727765add88..e4d33bc8bbbf 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2017-2018 Netronome Systems, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <ctype.h> #include <errno.h> #include <fcntl.h> @@ -625,12 +627,11 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) } const char * -ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, - const char **opt) +ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt) { + __maybe_unused int device_id; char devname[IF_NAMESIZE]; int vendor_id; - int device_id; if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) { p_err("Can't get net device name for ifindex %d: %s", ifindex, @@ -645,6 +646,7 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, } switch (vendor_id) { +#ifdef HAVE_LIBBFD_SUPPORT case 0x19ee: device_id = read_sysfs_netdev_hex_int(devname, "device"); if (device_id != 0x4000 && @@ -653,8 +655,10 @@ ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); *opt = "ctx4"; return "NFP-6xxx"; +#endif /* HAVE_LIBBFD_SUPPORT */ + /* No NFP support in LLVM, we have no valid triple to return. */ default: - p_err("Can't get bfd arch name for device vendor id 0x%04x", + p_err("Can't get arch name for device vendor id 0x%04x", vendor_id); return NULL; } diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index f88fdc820d23..a3e6b167153d 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2020 Facebook +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <unistd.h> #include <linux/err.h> #include <bpf/libbpf.h> diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index aaf99a0168c9..7b8d9ec89ebd 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -11,35 +11,151 @@ * Licensed under the GNU General Public License, version 2.0 (GPLv2) */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdio.h> #include <stdarg.h> #include <stdint.h> #include <stdlib.h> -#include <assert.h> #include <unistd.h> #include <string.h> -#include <bfd.h> -#include <dis-asm.h> #include <sys/stat.h> #include <limits.h> #include <bpf/libbpf.h> + +#ifdef HAVE_LLVM_SUPPORT +#include <llvm-c/Core.h> +#include <llvm-c/Disassembler.h> +#include <llvm-c/Target.h> +#include <llvm-c/TargetMachine.h> +#endif + +#ifdef HAVE_LIBBFD_SUPPORT +#include <bfd.h> +#include <dis-asm.h> #include <tools/dis-asm-compat.h> +#endif #include "json_writer.h" #include "main.h" -static void get_exec_path(char *tpath, size_t size) +static int oper_count; + +#ifdef HAVE_LLVM_SUPPORT +#define DISASM_SPACER + +typedef LLVMDisasmContextRef disasm_ctx_t; + +static int printf_json(char *s) +{ + s = strtok(s, " \t"); + jsonw_string_field(json_wtr, "operation", s); + + jsonw_name(json_wtr, "operands"); + jsonw_start_array(json_wtr); + oper_count = 1; + + while ((s = strtok(NULL, " \t,()")) != 0) { + jsonw_string(json_wtr, s); + oper_count++; + } + return 0; +} + +/* This callback to set the ref_type is necessary to have the LLVM disassembler + * print PC-relative addresses instead of byte offsets for branch instruction + * targets. + */ +static const char * +symbol_lookup_callback(__maybe_unused void *disasm_info, + __maybe_unused uint64_t ref_value, + uint64_t *ref_type, __maybe_unused uint64_t ref_PC, + __maybe_unused const char **ref_name) +{ + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; + return NULL; +} + +static int +init_context(disasm_ctx_t *ctx, const char *arch, + __maybe_unused const char *disassembler_options, + __maybe_unused unsigned char *image, __maybe_unused ssize_t len) +{ + char *triple; + + if (arch) + triple = LLVMNormalizeTargetTriple(arch); + else + triple = LLVMGetDefaultTargetTriple(); + if (!triple) { + p_err("Failed to retrieve triple"); + return -1; + } + *ctx = LLVMCreateDisasm(triple, NULL, 0, NULL, symbol_lookup_callback); + LLVMDisposeMessage(triple); + + if (!*ctx) { + p_err("Failed to create disassembler"); + return -1; + } + + return 0; +} + +static void destroy_context(disasm_ctx_t *ctx) +{ + LLVMDisposeMessage(*ctx); +} + +static int +disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc) +{ + char buf[256]; + int count; + + count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, pc, + buf, sizeof(buf)); + if (json_output) + printf_json(buf); + else + printf("%s", buf); + + return count; +} + +int disasm_init(void) +{ + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllDisassemblers(); + return 0; +} +#endif /* HAVE_LLVM_SUPPORT */ + +#ifdef HAVE_LIBBFD_SUPPORT +#define DISASM_SPACER "\t" + +typedef struct { + struct disassemble_info *info; + disassembler_ftype disassemble; + bfd *bfdf; +} disasm_ctx_t; + +static int get_exec_path(char *tpath, size_t size) { const char *path = "/proc/self/exe"; ssize_t len; len = readlink(path, tpath, size - 1); - assert(len > 0); + if (len <= 0) + return -1; + tpath[len] = 0; + + return 0; } -static int oper_count; static int printf_json(void *out, const char *fmt, va_list ap) { char *s; @@ -97,37 +213,44 @@ static int fprintf_json_styled(void *out, return r; } -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options, - const struct btf *btf, - const struct bpf_prog_linfo *prog_linfo, - __u64 func_ksym, unsigned int func_idx, - bool linum) +static int init_context(disasm_ctx_t *ctx, const char *arch, + const char *disassembler_options, + unsigned char *image, ssize_t len) { - const struct bpf_line_info *linfo = NULL; - disassembler_ftype disassemble; - struct disassemble_info info; - unsigned int nr_skip = 0; - int count, i, pc = 0; + struct disassemble_info *info; char tpath[PATH_MAX]; bfd *bfdf; - if (!len) - return; - memset(tpath, 0, sizeof(tpath)); - get_exec_path(tpath, sizeof(tpath)); + if (get_exec_path(tpath, sizeof(tpath))) { + p_err("failed to create disassembler (get_exec_path)"); + return -1; + } - bfdf = bfd_openr(tpath, NULL); - assert(bfdf); - assert(bfd_check_format(bfdf, bfd_object)); + ctx->bfdf = bfd_openr(tpath, NULL); + if (!ctx->bfdf) { + p_err("failed to create disassembler (bfd_openr)"); + return -1; + } + if (!bfd_check_format(ctx->bfdf, bfd_object)) { + p_err("failed to create disassembler (bfd_check_format)"); + goto err_close; + } + bfdf = ctx->bfdf; + + ctx->info = malloc(sizeof(struct disassemble_info)); + if (!ctx->info) { + p_err("mem alloc failed"); + goto err_close; + } + info = ctx->info; if (json_output) - init_disassemble_info_compat(&info, stdout, + init_disassemble_info_compat(info, stdout, (fprintf_ftype) fprintf_json, fprintf_json_styled); else - init_disassemble_info_compat(&info, stdout, + init_disassemble_info_compat(info, stdout, (fprintf_ftype) fprintf, fprintf_styled); @@ -139,28 +262,77 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, bfdf->arch_info = inf; } else { p_err("No libbfd support for %s", arch); - return; + goto err_free; } } - info.arch = bfd_get_arch(bfdf); - info.mach = bfd_get_mach(bfdf); + info->arch = bfd_get_arch(bfdf); + info->mach = bfd_get_mach(bfdf); if (disassembler_options) - info.disassembler_options = disassembler_options; - info.buffer = image; - info.buffer_length = len; + info->disassembler_options = disassembler_options; + info->buffer = image; + info->buffer_length = len; - disassemble_init_for_target(&info); + disassemble_init_for_target(info); #ifdef DISASM_FOUR_ARGS_SIGNATURE - disassemble = disassembler(info.arch, - bfd_big_endian(bfdf), - info.mach, - bfdf); + ctx->disassemble = disassembler(info->arch, + bfd_big_endian(bfdf), + info->mach, + bfdf); #else - disassemble = disassembler(bfdf); + ctx->disassemble = disassembler(bfdf); #endif - assert(disassemble); + if (!ctx->disassemble) { + p_err("failed to create disassembler"); + goto err_free; + } + return 0; + +err_free: + free(info); +err_close: + bfd_close(ctx->bfdf); + return -1; +} + +static void destroy_context(disasm_ctx_t *ctx) +{ + free(ctx->info); + bfd_close(ctx->bfdf); +} + +static int +disassemble_insn(disasm_ctx_t *ctx, __maybe_unused unsigned char *image, + __maybe_unused ssize_t len, int pc) +{ + return ctx->disassemble(pc, ctx->info); +} + +int disasm_init(void) +{ + bfd_init(); + return 0; +} +#endif /* HAVE_LIBBPFD_SUPPORT */ + +int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) +{ + const struct bpf_line_info *linfo = NULL; + unsigned int nr_skip = 0; + int count, i, pc = 0; + disasm_ctx_t ctx; + + if (!len) + return -1; + + if (init_context(&ctx, arch, disassembler_options, image, len)) + return -1; if (json_output) jsonw_start_array(json_wtr); @@ -185,10 +357,11 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (linfo) btf_dump_linfo_plain(btf, linfo, "; ", linum); - printf("%4x:\t", pc); + printf("%4x:" DISASM_SPACER, pc); } - count = disassemble(pc, &info); + count = disassemble_insn(&ctx, image, len, pc); + if (json_output) { /* Operand array, was started in fprintf_json. Before * that, make sure we have a _null_ value if no operand @@ -224,11 +397,7 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (json_output) jsonw_end_array(json_wtr); - bfd_close(bfdf); -} + destroy_context(&ctx); -int disasm_init(void) -{ - bfd_init(); return 0; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index ccd7457f92bf..741e50ee0b6c 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -71,6 +71,27 @@ static int do_help(int argc, char **argv) return 0; } +static int do_batch(int argc, char **argv); +static int do_version(int argc, char **argv); + +static const struct cmd commands[] = { + { "help", do_help }, + { "batch", do_batch }, + { "prog", do_prog }, + { "map", do_map }, + { "link", do_link }, + { "cgroup", do_cgroup }, + { "perf", do_perf }, + { "net", do_net }, + { "feature", do_feature }, + { "btf", do_btf }, + { "gen", do_gen }, + { "struct_ops", do_struct_ops }, + { "iter", do_iter }, + { "version", do_version }, + { 0 } +}; + #ifndef BPFTOOL_VERSION /* bpftool's major and minor version numbers are aligned on libbpf's. There is * an offset of 6 for the version number, because bpftool's version was higher @@ -82,6 +103,15 @@ static int do_help(int argc, char **argv) #define BPFTOOL_PATCH_VERSION 0 #endif +static void +print_feature(const char *feature, bool state, unsigned int *nb_features) +{ + if (state) { + printf("%s %s", *nb_features ? "," : "", feature); + *nb_features = *nb_features + 1; + } +} + static int do_version(int argc, char **argv) { #ifdef HAVE_LIBBFD_SUPPORT @@ -89,11 +119,28 @@ static int do_version(int argc, char **argv) #else const bool has_libbfd = false; #endif +#ifdef HAVE_LLVM_SUPPORT + const bool has_llvm = true; +#else + const bool has_llvm = false; +#endif #ifdef BPFTOOL_WITHOUT_SKELETONS const bool has_skeletons = false; #else const bool has_skeletons = true; #endif + bool bootstrap = false; + int i; + + for (i = 0; commands[i].cmd; i++) { + if (!strcmp(commands[i].cmd, "prog")) { + /* Assume we run a bootstrap version if "bpftool prog" + * is not available. + */ + bootstrap = !commands[i].func; + break; + } + } if (json_output) { jsonw_start_object(json_wtr); /* root object */ @@ -112,8 +159,10 @@ static int do_version(int argc, char **argv) jsonw_name(json_wtr, "features"); jsonw_start_object(json_wtr); /* features */ jsonw_bool_field(json_wtr, "libbfd", has_libbfd); + jsonw_bool_field(json_wtr, "llvm", has_llvm); jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf); jsonw_bool_field(json_wtr, "skeletons", has_skeletons); + jsonw_bool_field(json_wtr, "bootstrap", bootstrap); jsonw_end_object(json_wtr); /* features */ jsonw_end_object(json_wtr); /* root object */ @@ -128,16 +177,11 @@ static int do_version(int argc, char **argv) #endif printf("using libbpf %s\n", libbpf_version_string()); printf("features:"); - if (has_libbfd) { - printf(" libbfd"); - nb_features++; - } - if (!legacy_libbpf) { - printf("%s libbpf_strict", nb_features++ ? "," : ""); - nb_features++; - } - if (has_skeletons) - printf("%s skeletons", nb_features++ ? "," : ""); + print_feature("libbfd", has_libbfd, &nb_features); + print_feature("llvm", has_llvm, &nb_features); + print_feature("libbpf_strict", !legacy_libbpf, &nb_features); + print_feature("skeletons", has_skeletons, &nb_features); + print_feature("bootstrap", bootstrap, &nb_features); printf("\n"); } return 0; @@ -279,26 +323,6 @@ static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb) return n_argc; } -static int do_batch(int argc, char **argv); - -static const struct cmd cmds[] = { - { "help", do_help }, - { "batch", do_batch }, - { "prog", do_prog }, - { "map", do_map }, - { "link", do_link }, - { "cgroup", do_cgroup }, - { "perf", do_perf }, - { "net", do_net }, - { "feature", do_feature }, - { "btf", do_btf }, - { "gen", do_gen }, - { "struct_ops", do_struct_ops }, - { "iter", do_iter }, - { "version", do_version }, - { 0 } -}; - static int do_batch(int argc, char **argv) { char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX]; @@ -386,7 +410,7 @@ static int do_batch(int argc, char **argv) jsonw_name(json_wtr, "output"); } - err = cmd_select(cmds, n_argc, n_argv, do_help); + err = cmd_select(commands, n_argc, n_argv, do_help); if (json_output) jsonw_end_object(json_wtr); @@ -450,7 +474,7 @@ int main(int argc, char **argv) json_output = false; show_pinned = false; block_mount = false; - bin_name = argv[0]; + bin_name = "bpftool"; opterr = 0; while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l", @@ -528,7 +552,7 @@ int main(int argc, char **argv) if (version_requested) return do_version(argc, argv); - ret = cmd_select(cmds, argc, argv, do_help); + ret = cmd_select(commands, argc, argv, do_help); if (json_output) jsonw_destroy(&json_wtr); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 5e5060c2ac04..467d8472df0c 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -172,27 +172,28 @@ int map_parse_fds(int *argc, char ***argv, int **fds); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); struct bpf_prog_linfo; -#ifdef HAVE_LIBBFD_SUPPORT -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options, - const struct btf *btf, - const struct bpf_prog_linfo *prog_linfo, - __u64 func_ksym, unsigned int func_idx, - bool linum); +#if defined(HAVE_LLVM_SUPPORT) || defined(HAVE_LIBBFD_SUPPORT) +int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum); int disasm_init(void); #else static inline -void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options, - const struct btf *btf, - const struct bpf_prog_linfo *prog_linfo, - __u64 func_ksym, unsigned int func_idx, - bool linum) +int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) { + return 0; } static inline int disasm_init(void) { - p_err("No libbfd support"); + p_err("No JIT disassembly support"); return -1; } #endif @@ -202,8 +203,7 @@ void print_hex_data_json(uint8_t *data, size_t len); unsigned int get_page_size(void); unsigned int get_possible_cpus(void); const char * -ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, - const char **opt); +ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt); struct btf_dumper { const struct btf *btf; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 9a6ca9f31133..f941ac5c7b73 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2017-2018 Netronome Systems, Inc. */ -#include <assert.h> #include <errno.h> #include <fcntl.h> #include <linux/err.h> @@ -1459,7 +1458,7 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage | bloom_filter | user_ringbuf }\n" + " task_storage | bloom_filter | user_ringbuf | cgrp_storage }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 526a332c48e6..c40e44c938ae 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2018 Facebook +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <errno.h> #include <fcntl.h> #include <stdlib.h> diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index 226ec2c39052..91743445e4c7 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -2,7 +2,9 @@ // Copyright (C) 2018 Facebook // Author: Yonghong Song <yhs@fb.com> +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <ctype.h> #include <errno.h> #include <fcntl.h> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index c81362a001ba..a858b907da16 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2017-2018 Netronome Systems, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <errno.h> #include <fcntl.h> #include <signal.h> @@ -762,10 +764,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, const char *name = NULL; if (info->ifindex) { - name = ifindex_to_bfd_params(info->ifindex, - info->netns_dev, - info->netns_ino, - &disasm_opt); + name = ifindex_to_arch(info->ifindex, info->netns_dev, + info->netns_ino, &disasm_opt); if (!name) goto exit_free; } @@ -820,10 +820,11 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, printf("%s:\n", sym_name); } - disasm_print_insn(img, lens[i], opcodes, - name, disasm_opt, btf, - prog_linfo, ksyms[i], i, - linum); + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + prog_linfo, ksyms[i], i, + linum)) + goto exit_free; img += lens[i]; @@ -836,8 +837,10 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (json_output) jsonw_end_array(json_wtr); } else { - disasm_print_insn(buf, member_len, opcodes, name, - disasm_opt, btf, NULL, 0, 0, false); + if (disasm_print_insn(buf, member_len, opcodes, name, + disasm_opt, btf, NULL, 0, 0, + false)) + goto exit_free; } } else if (visual) { if (json_output) @@ -1453,6 +1456,67 @@ get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, return ret; } +static int +auto_attach_program(struct bpf_program *prog, const char *path) +{ + struct bpf_link *link; + int err; + + link = bpf_program__attach(prog); + if (!link) { + p_info("Program %s does not support autoattach, falling back to pinning", + bpf_program__name(prog)); + return bpf_obj_pin(bpf_program__fd(prog), path); + } + + err = bpf_link__pin(link, path); + bpf_link__destroy(link); + return err; +} + +static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) +{ + int len; + + len = snprintf(buf, buf_sz, "%s/%s", path, name); + if (len < 0) + return -EINVAL; + if ((size_t)len >= buf_sz) + return -ENAMETOOLONG; + + return 0; +} + +static int +auto_attach_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + char buf[PATH_MAX]; + int err; + + bpf_object__for_each_program(prog, obj) { + err = pathname_concat(buf, sizeof(buf), path, bpf_program__name(prog)); + if (err) + goto err_unpin_programs; + + err = auto_attach_program(prog, buf); + if (err) + goto err_unpin_programs; + } + + return 0; + +err_unpin_programs: + while ((prog = bpf_object__prev_program(obj, prog))) { + if (pathname_concat(buf, sizeof(buf), path, bpf_program__name(prog))) + continue; + + bpf_program__unpin(prog, buf); + } + + return err; +} + static int load_with_options(int argc, char **argv, bool first_prog_only) { enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC; @@ -1464,6 +1528,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; const char *pinmaps = NULL; + bool auto_attach = false; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; @@ -1583,6 +1648,9 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_free_reuse_maps; pinmaps = GET_ARG(); + } else if (is_prefix(*argv, "autoattach")) { + auto_attach = true; + NEXT_ARG(); } else { p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?", *argv); @@ -1692,14 +1760,20 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - err = bpf_obj_pin(bpf_program__fd(prog), pinfile); + if (auto_attach) + err = auto_attach_program(prog, pinfile); + else + err = bpf_obj_pin(bpf_program__fd(prog), pinfile); if (err) { p_err("failed to pin program %s", bpf_program__section_name(prog)); goto err_close_obj; } } else { - err = bpf_object__pin_programs(obj, pinfile); + if (auto_attach) + err = auto_attach_programs(obj, pinfile); + else + err = bpf_object__pin_programs(obj, pinfile); if (err) { p_err("failed to pin all programs"); goto err_close_obj; @@ -2338,6 +2412,7 @@ static int do_help(int argc, char **argv) " [type TYPE] [dev NAME] \\\n" " [map { idx IDX | name NAME } MAP]\\\n" " [pinmaps MAP_DIR]\n" + " [autoattach]\n" " %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n" " %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n" " %1$s %2$s run PROG \\\n" diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 2d9cd6a7b3c8..6fe3134ae45d 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2018 Netronome Systems, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdarg.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 17f61338f8f8..94659f6b3395 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -922,7 +922,14 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, - BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching + * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to + * both cgroup-attached and other progs and supports all functionality + * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark + * BPF_MAP_TYPE_CGROUP_STORAGE deprecated. + */ + BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, @@ -935,6 +942,7 @@ enum bpf_map_type { BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, + BPF_MAP_TYPE_CGRP_STORAGE, }; /* Note that tracing related programs such as @@ -5435,6 +5443,44 @@ union bpf_attr { * **-E2BIG** if user-space has tried to publish a sample which is * larger than the size of the ring buffer, or which cannot fit * within a struct bpf_dynptr. + * + * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *cgroup*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *cgroup* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this + * helper enforces the key must be a cgroup struct and the map must also + * be a **BPF_MAP_TYPE_CGRP_STORAGE**. + * + * In reality, the local-storage value is embedded directly inside of the + * *cgroup* object itself, rather than being located in the + * **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is + * queried for some *map* on a *cgroup* object, the kernel will perform an + * O(n) iteration over all of the live local-storage values for that + * *cgroup* object until the local-storage value for the *map* is found. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup) + * Description + * Delete a bpf_local_storage from a *cgroup*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5647,6 +5693,8 @@ union bpf_attr { FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \ FN(ktime_get_tai_ns, 208, ##ctx) \ FN(user_ringbuf_drain, 209, ##ctx) \ + FN(cgrp_storage_get, 210, ##ctx) \ + FN(cgrp_storage_delete, 211, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d88647da2c7f..675a0df5c840 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -3887,14 +3887,14 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) } /* Check if given two types are identical ARRAY definitions */ -static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) { struct btf_type *t1, *t2; t1 = btf_type_by_id(d->btf, id1); t2 = btf_type_by_id(d->btf, id2); if (!btf_is_array(t1) || !btf_is_array(t2)) - return 0; + return false; return btf_equal_array(t1, t2); } @@ -3918,7 +3918,9 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id m1 = btf_members(t1); m2 = btf_members(t2); for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { - if (m1->type != m2->type) + if (m1->type != m2->type && + !btf_dedup_identical_arrays(d, m1->type, m2->type) && + !btf_dedup_identical_structs(d, m1->type, m2->type)) return false; } return true; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8c3f236c86e4..5d7819edf074 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -164,6 +164,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", + [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", }; static const char * const prog_type_name[] = { @@ -1461,15 +1462,12 @@ static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 return -ENOENT; } -static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) +static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) { Elf_Data *symbols = obj->efile.symbols; const char *sname; size_t si; - if (!name || !off) - return -EINVAL; - for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { Elf64_Sym *sym = elf_sym_by_idx(obj, si); @@ -1483,15 +1481,13 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ sname = elf_sym_str(obj, sym->st_name); if (!sname) { pr_warn("failed to get sym name string for var %s\n", name); - return -EIO; - } - if (strcmp(name, sname) == 0) { - *off = sym->st_value; - return 0; + return ERR_PTR(-EIO); } + if (strcmp(name, sname) == 0) + return sym; } - return -ENOENT; + return ERR_PTR(-ENOENT); } static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) @@ -1582,7 +1578,38 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) } static int -bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map); +map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); + +/* Internal BPF map is mmap()'able only if at least one of corresponding + * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL + * variable and it's not marked as __hidden (which turns it into, effectively, + * a STATIC variable). + */ +static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) +{ + const struct btf_type *t, *vt; + struct btf_var_secinfo *vsi; + int i, n; + + if (!map->btf_value_type_id) + return false; + + t = btf__type_by_id(obj->btf, map->btf_value_type_id); + if (!btf_is_datasec(t)) + return false; + + vsi = btf_var_secinfos(t); + for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { + vt = btf__type_by_id(obj->btf, vsi->type); + if (!btf_is_var(vt)) + continue; + + if (btf_var(vt)->linkage != BTF_VAR_STATIC) + return true; + } + + return false; +} static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, @@ -1614,7 +1641,12 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def->max_entries = 1; def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG ? BPF_F_RDONLY_PROG : 0; - def->map_flags |= BPF_F_MMAPABLE; + + /* failures are fine because of maps like .rodata.str1.1 */ + (void) map_fill_btf_type_info(obj, map); + + if (map_is_mmapable(obj, map)) + def->map_flags |= BPF_F_MMAPABLE; pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", map->name, map->sec_idx, map->sec_offset, def->map_flags); @@ -1631,9 +1663,6 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, return err; } - /* failures are fine because of maps like .rodata.str1.1 */ - (void) bpf_map_find_btf_info(obj, map); - if (data) memcpy(map->mmaped, data, data_sz); @@ -2545,7 +2574,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, fill_map_from_def(map->inner_map, &inner_def); } - err = bpf_map_find_btf_info(obj, map); + err = map_fill_btf_type_info(obj, map); if (err) return err; @@ -2850,57 +2879,89 @@ static int compare_vsi_off(const void *_a, const void *_b) static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, struct btf_type *t) { - __u32 size = 0, off = 0, i, vars = btf_vlen(t); - const char *name = btf__name_by_offset(btf, t->name_off); - const struct btf_type *t_var; + __u32 size = 0, i, vars = btf_vlen(t); + const char *sec_name = btf__name_by_offset(btf, t->name_off); struct btf_var_secinfo *vsi; - const struct btf_var *var; - int ret; + bool fixup_offsets = false; + int err; - if (!name) { + if (!sec_name) { pr_debug("No name found in string section for DATASEC kind.\n"); return -ENOENT; } - /* .extern datasec size and var offsets were set correctly during - * extern collection step, so just skip straight to sorting variables + /* Extern-backing datasecs (.ksyms, .kconfig) have their size and + * variable offsets set at the previous step. Further, not every + * extern BTF VAR has corresponding ELF symbol preserved, so we skip + * all fixups altogether for such sections and go straight to sorting + * VARs within their DATASEC. */ - if (t->size) + if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) goto sort_vars; - ret = find_elf_sec_sz(obj, name, &size); - if (ret || !size) { - pr_debug("Invalid size for section %s: %u bytes\n", name, size); - return -ENOENT; - } + /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to + * fix this up. But BPF static linker already fixes this up and fills + * all the sizes and offsets during static linking. So this step has + * to be optional. But the STV_HIDDEN handling is non-optional for any + * non-extern DATASEC, so the variable fixup loop below handles both + * functions at the same time, paying the cost of BTF VAR <-> ELF + * symbol matching just once. + */ + if (t->size == 0) { + err = find_elf_sec_sz(obj, sec_name, &size); + if (err || !size) { + pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n", + sec_name, size, err); + return -ENOENT; + } - t->size = size; + t->size = size; + fixup_offsets = true; + } for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + const struct btf_type *t_var; + struct btf_var *var; + const char *var_name; + Elf64_Sym *sym; + t_var = btf__type_by_id(btf, vsi->type); if (!t_var || !btf_is_var(t_var)) { - pr_debug("Non-VAR type seen in section %s\n", name); + pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); return -EINVAL; } var = btf_var(t_var); - if (var->linkage == BTF_VAR_STATIC) + if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) continue; - name = btf__name_by_offset(btf, t_var->name_off); - if (!name) { - pr_debug("No name found in string section for VAR kind\n"); + var_name = btf__name_by_offset(btf, t_var->name_off); + if (!var_name) { + pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", + sec_name, i); return -ENOENT; } - ret = find_elf_var_offset(obj, name, &off); - if (ret) { - pr_debug("No offset found in symbol table for VAR %s\n", - name); + sym = find_elf_var_sym(obj, var_name); + if (IS_ERR(sym)) { + pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", + sec_name, var_name); return -ENOENT; } - vsi->offset = off; + if (fixup_offsets) + vsi->offset = sym->st_value; + + /* if variable is a global/weak symbol, but has restricted + * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR + * as static. This follows similar logic for functions (BPF + * subprogs) and influences libbpf's further decisions about + * whether to make global data BPF array maps as + * BPF_F_MMAPABLE. + */ + if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) + var->linkage = BTF_VAR_STATIC; } sort_vars: @@ -2908,13 +2969,16 @@ sort_vars: return 0; } -static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) +static int bpf_object_fixup_btf(struct bpf_object *obj) { - int err = 0; - __u32 i, n = btf__type_cnt(btf); + int i, n, err = 0; + if (!obj->btf) + return 0; + + n = btf__type_cnt(obj->btf); for (i = 1; i < n; i++) { - struct btf_type *t = btf_type_by_id(btf, i); + struct btf_type *t = btf_type_by_id(obj->btf, i); /* Loader needs to fix up some of the things compiler * couldn't get its hands on while emitting BTF. This @@ -2922,28 +2986,12 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) * the info from the ELF itself for this purpose. */ if (btf_is_datasec(t)) { - err = btf_fixup_datasec(obj, btf, t); + err = btf_fixup_datasec(obj, obj->btf, t); if (err) - break; + return err; } } - return libbpf_err(err); -} - -static int bpf_object__finalize_btf(struct bpf_object *obj) -{ - int err; - - if (!obj->btf) - return 0; - - err = btf_finalize_data(obj, obj->btf); - if (err) { - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); - return err; - } - return 0; } @@ -4235,7 +4283,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat return 0; } -static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) +static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) { int id; @@ -7233,7 +7281,7 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__collect_externs(obj); - err = err ? : bpf_object__finalize_btf(obj); + err = err ? : bpf_object_fixup_btf(obj); err = err ? : bpf_object__init_maps(obj, opts); err = err ? : bpf_object_init_progs(obj, opts); err = err ? : bpf_object__collect_relos(obj); diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index f3a8e8e74eb8..bdb83d467f9a 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -221,6 +221,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_SK_STORAGE: case BPF_MAP_TYPE_INODE_STORAGE: case BPF_MAP_TYPE_TASK_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: btf_key_type_id = 1; btf_value_type_id = 3; value_size = 8; diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 49f3c3b7f609..28fa1b2283de 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -1225,26 +1225,24 @@ static int calc_pt_regs_off(const char *reg_name) static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) { - char *reg_name = NULL; + char reg_name[16]; int arg_sz, len, reg_off; long off; - if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", &arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -4@-20(%rbp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; - } else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, ®_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %%%15s %n", &arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -4@%eax */ arg->arg_type = USDT_ARG_REG; arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; @@ -1456,16 +1454,15 @@ static int calc_pt_regs_off(const char *reg_name) static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) { - char *reg_name = NULL; + char reg_name[16]; int arg_sz, len, reg_off; long off; - if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + if (sscanf(arg_str, " %d @ %ld ( %15[a-z0-9] ) %n", &arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -8@-88(s0) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; @@ -1474,12 +1471,11 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec arg->arg_type = USDT_ARG_CONST; arg->val_off = off; arg->reg_off = 0; - } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + } else if (sscanf(arg_str, " %d @ %15[a-z0-9] %n", &arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -8@a1 */ arg->arg_type = USDT_ARG_REG; arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name); - free(reg_name); if (reg_off < 0) return reg_off; arg->reg_off = reg_off; diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 new file mode 100644 index 000000000000..09416d5d2e33 --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -0,0 +1,81 @@ +bloom_filter_map # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22 +bpf_cookie/lsm +bpf_cookie/multi_kprobe_attach_api +bpf_cookie/multi_kprobe_link_api +bpf_cookie/trampoline +bpf_loop/check_callback_fn_stop # link unexpected error: -524 +bpf_loop/check_invalid_flags +bpf_loop/check_nested_calls +bpf_loop/check_non_constant_callback +bpf_loop/check_nr_loops +bpf_loop/check_null_callback_ctx +bpf_loop/check_stack +bpf_mod_race # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524) +bpf_tcp_ca/dctcp_fallback +btf_dump/btf_dump: var_data # find type id unexpected find type id: actual -2 < expected 0 +cgroup_hierarchical_stats # attach unexpected error: -524 (errno 524) +d_path/basic # setup attach failed: -524 +deny_namespace # attach unexpected error: -524 (errno 524) +fentry_fexit # fentry_attach unexpected error: -1 (errno 524) +fentry_test # fentry_attach unexpected error: -1 (errno 524) +fexit_sleep # fexit_attach fexit attach failed: -1 +fexit_stress # fexit attach unexpected fexit attach: actual -524 < expected 0 +fexit_test # fexit_attach unexpected error: -1 (errno 524) +get_func_args_test # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline) +get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline) +htab_update/reenter_update +kfree_skb # attach fentry unexpected error: -524 (trampoline) +kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF +kfunc_call/subprog_lskel # skel unexpected error: -2 +kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22 +kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 +kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 +kprobe_multi_test/skel_api # kprobe_multi__attach unexpected error: -524 (errno 524) +ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF +ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2 +libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524) +lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524) +lru_bug # lru_bug__attach unexpected error: -524 (errno 524) +modify_return # modify_return__attach failed unexpected error: -524 (errno 524) +module_attach # skel_attach skeleton attach failed: -524 +mptcp/base # run_test mptcp unexpected error: -524 (errno 524) +netcnt # packets unexpected packets: actual 10001 != expected 10000 +recursion # skel_attach unexpected error: -524 (errno 524) +ringbuf # skel_attach skeleton attachment failed: -1 +setget_sockopt # attach_cgroup unexpected error: -524 +sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (errno 524) +skc_to_unix_sock # could not attach BPF object unexpected error: -524 (errno 524) +socket_cookie # prog_attach unexpected error: -524 +stacktrace_build_id # compare_stack_ips stackmap vs. stack_amap err -1 errno 2 +task_local_storage/exit_creds # skel_attach unexpected error: -524 (errno 524) +task_local_storage/recursion # skel_attach unexpected error: -524 (errno 524) +test_bprm_opts # attach attach failed: -524 +test_ima # attach attach failed: -524 +test_local_storage # attach lsm attach failed: -524 +test_lsm # test_lsm_first_attach unexpected error: -524 (errno 524) +test_overhead # attach_fentry unexpected error: -524 +timer # timer unexpected error: -524 (errno 524) +timer_crash # timer_crash__attach unexpected error: -524 (errno 524) +timer_mim # timer_mim unexpected error: -524 (errno 524) +trace_printk # trace_printk__attach unexpected error: -1 (errno 524) +trace_vprintk # trace_vprintk__attach unexpected error: -1 (errno 524) +tracing_struct # tracing_struct__attach unexpected error: -524 (errno 524) +trampoline_count # attach_prog unexpected error: -524 +unpriv_bpf_disabled # skel_attach unexpected error: -524 (errno 524) +user_ringbuf/test_user_ringbuf_post_misaligned # misaligned_skel unexpected error: -524 (errno 524) +user_ringbuf/test_user_ringbuf_post_producer_wrong_offset +user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz +user_ringbuf/test_user_ringbuf_basic # ringbuf_basic_skel unexpected error: -524 (errno 524) +user_ringbuf/test_user_ringbuf_sample_full_ring_buffer +user_ringbuf/test_user_ringbuf_post_alignment_autoadjust +user_ringbuf/test_user_ringbuf_overfill +user_ringbuf/test_user_ringbuf_discards_properly_ignored +user_ringbuf/test_user_ringbuf_loop +user_ringbuf/test_user_ringbuf_msg_protocol +user_ringbuf/test_user_ringbuf_blocking_reserve +verify_pkcs7_sig # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524) +vmlinux # skel_attach skeleton attach failed: -524 diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 520f12229b98..be4e3d47ea3e 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -10,6 +10,7 @@ bpf_nf # JIT does not support calling kernel f bpf_tcp_ca # JIT does not support calling kernel function (kfunc) cb_refs # expected error message unexpected error: -524 (trampoline) cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc) +cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) core_read_macros # unknown func bpf_probe_read#4 (overlapping) d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) deny_namespace # failed to attach: ERROR: strerror_r(-524)=22 (trampoline) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e6cf21fad69f..79edef1dbda4 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -359,9 +359,11 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ test_usdt.skel.h -LSKELS := fentry_test.c fexit_test.c fexit_sleep.c \ - test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ - map_ptr_kern.c core_kern.c core_kern_overflow.c +LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ + trace_printk.c trace_vprintk.c map_ptr_kern.c \ + core_kern.c core_kern_overflow.c test_ringbuf.c \ + test_ringbuf_map_key.c + # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ kfunc_call_test_subprog.c diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 822548d0f2ae..cb9b95702ac6 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -6,13 +6,53 @@ General instructions on running selftests can be found in __ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests +============= +BPF CI System +============= + +BPF employs a continuous integration (CI) system to check patch submission in an +automated fashion. The system runs selftests for each patch in a series. Results +are propagated to patchwork, where failures are highlighted similar to +violations of other checks (such as additional warnings being emitted or a +``scripts/checkpatch.pl`` reported deficiency): + + https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173 + +The CI system executes tests on multiple architectures. It uses a kernel +configuration derived from both the generic and architecture specific config +file fragments below ``tools/testing/selftests/bpf/`` (e.g., ``config`` and +``config.x86_64``). + +Denylisting Tests +================= + +It is possible for some architectures to not have support for all BPF features. +In such a case tests in CI may fail. An example of such a shortcoming is BPF +trampoline support on IBM's s390x architecture. For cases like this, an in-tree +deny list file, located at ``tools/testing/selftests/bpf/DENYLIST.<arch>``, can +be used to prevent the test from running on such an architecture. + +In addition to that, the generic ``tools/testing/selftests/bpf/DENYLIST`` is +honored on every architecture running tests. + +These files are organized in three columns. The first column lists the test in +question. This can be the name of a test suite or of an individual test. The +remaining two columns provide additional meta data that helps identify and +classify the entry: column two is a copy and paste of the error being reported +when running the test in the setting in question. The third column, if +available, summarizes the underlying problem. A value of ``trampoline``, for +example, indicates that lack of trampoline support is causing the test to fail. +This last entry helps identify tests that can be re-enabled once such support is +added. + ========================= Running Selftests in a VM ========================= It's now possible to run the selftests using ``tools/testing/selftests/bpf/vmtest.sh``. The script tries to ensure that the tests are run with the same environment as they -would be run post-submit in the CI used by the Maintainers. +would be run post-submit in the CI used by the Maintainers, with the exception +that deny lists are not automatically honored. This script uses the in-tree kernel configuration and downloads a VM userspace image from the system used by the CI. It builds the kernel (without overwriting diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index a6021d6117b5..5085fea3cac5 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -128,6 +128,23 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg) } } +noinline int bpf_testmod_fentry_test1(int a) +{ + return a + 1; +} + +noinline int bpf_testmod_fentry_test2(int a, u64 b) +{ + return a + b; +} + +noinline int bpf_testmod_fentry_test3(char a, int b, u64 c) +{ + return a + b + c; +} + +int bpf_testmod_fentry_ok; + noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, @@ -167,6 +184,13 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, return snprintf(buf, len, "%d\n", writable.val); } + if (bpf_testmod_fentry_test1(1) != 2 || + bpf_testmod_fentry_test2(2, 3) != 5 || + bpf_testmod_fentry_test3(4, 5, 6) != 15) + goto out; + + bpf_testmod_fentry_ok = 1; +out: return -EIO; /* always fail */ } EXPORT_SYMBOL(bpf_testmod_test_read); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 9213565c0311..7a99a6728169 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -1,4 +1,6 @@ CONFIG_BLK_DEV_LOOP=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BPF=y CONFIG_BPF_EVENTS=y CONFIG_BPF_JIT=y diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 new file mode 100644 index 000000000000..1f0437644186 --- /dev/null +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -0,0 +1,181 @@ +CONFIG_9P_FS=y +CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y +CONFIG_ARM_SMMU_V3=y +CONFIG_ATA=y +CONFIG_AUDIT=y +CONFIG_BINFMT_MISC=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_SD=y +CONFIG_BONDING=y +CONFIG_BPFILTER=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPF_PRELOAD=y +CONFIG_BRIDGE=m +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_CHR_DEV_SG=y +CONFIG_COMPAT=y +CONFIG_CPUSETS=y +CONFIG_CRASH_DUMP=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_SG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEVTMPFS=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_DRM=y +CONFIG_DUMMY=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_FANOTIFY=y +CONFIG_FB=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_FUSE_FS=y +CONFIG_FW_CFG_SYSFS_CMDLINE=y +CONFIG_FW_CFG_SYSFS=y +CONFIG_GDB_SCRIPTS=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_HAVE_KPROBES_ON_FTRACE=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HEADERS_INSTALL=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HW_RANDOM_VIRTIO=y +CONFIG_HW_RANDOM=y +CONFIG_HZ_100=y +CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IKHEADERS=y +CONFIG_INET6_ESP=y +CONFIG_INET_ESP=y +CONFIG_INET=y +CONFIG_INPUT_EVDEV=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPVLAN=y +CONFIG_JUMP_LABEL=y +CONFIG_KERNEL_UNCOMPRESSED=y +CONFIG_KPROBES_ON_FTRACE=y +CONFIG_KPROBES=y +CONFIG_KRETPROBES=y +CONFIG_KSM=y +CONFIG_LATENCYTOP=y +CONFIG_LIVEPATCH=y +CONFIG_LOCK_STAT=y +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MAILBOX=y +CONFIG_MEMCG=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_NAMESPACES=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_9P=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_ACT_GACT=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NET_KEY=y +CONFIG_NET_SCH_FQ=y +CONFIG_NET_VRF=y +CONFIG_NET=y +CONFIG_NF_TABLES=y +CONFIG_NLMON=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_OVERLAY_FS=y +CONFIG_PACKET_DIAG=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI=y +CONFIG_PL320_MBOX=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTDUMP_DEBUGFS=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SAMPLE_SECCOMP=y +CONFIG_SAMPLES=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_SCHED_TRACER=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SCAN_ASYNC=y +CONFIG_SCSI_VIRTIO=y +CONFIG_SCSI=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_STACK_TRACER=y +CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASKSTATS=y +CONFIG_TASK_XACCT=y +CONFIG_TCG_TIS=y +CONFIG_TCG_TPM=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TLS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USELIB=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_FS=y +CONFIG_VIRTIO_INPUT=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index f8a7a258a718..d49f6170e7bd 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -82,9 +82,6 @@ CONFIG_MARCH_Z196_TUNE=y CONFIG_MEMCG=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y -CONFIG_MODULE_SIG=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULES=y CONFIG_NAMESPACES=y CONFIG_NET=y CONFIG_NET_9P=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 21ce5ea4304e..dd97d61d325c 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -18,7 +18,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_THROTTLING=y CONFIG_BONDING=y -CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y CONFIG_BOOTTIME_TRACING=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_KPROBE_OVERRIDE=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index c39d40f4b268..6f8ed61fc4b4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -941,10 +941,10 @@ static void test_bpf_array_map(void) { __u64 val, expected_val = 0, res_first_val, first_val = 0; DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); - __u32 expected_key = 0, res_first_key; + __u32 key, expected_key = 0, res_first_key; + int err, i, map_fd, hash_fd, iter_fd; struct bpf_iter_bpf_array_map *skel; union bpf_iter_link_info linfo; - int err, i, map_fd, iter_fd; struct bpf_link *link; char buf[64] = {}; int len, start; @@ -1001,12 +1001,20 @@ static void test_bpf_array_map(void) if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; + hash_fd = bpf_map__fd(skel->maps.hashmap1); for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { err = bpf_map_lookup_elem(map_fd, &i, &val); - if (!ASSERT_OK(err, "map_lookup")) - goto out; - if (!ASSERT_EQ(i, val, "invalid_val")) - goto out; + if (!ASSERT_OK(err, "map_lookup arraymap1")) + goto close_iter; + if (!ASSERT_EQ(i, val, "invalid_val arraymap1")) + goto close_iter; + + val = i + 4; + err = bpf_map_lookup_elem(hash_fd, &val, &key); + if (!ASSERT_OK(err, "map_lookup hashmap1")) + goto close_iter; + if (!ASSERT_EQ(key, val - 4, "invalid_val hashmap1")) + goto close_iter; } close_iter: diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c new file mode 100644 index 000000000000..1c30412ba132 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/ + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <test_progs.h> +#include "cgrp_ls_tp_btf.skel.h" +#include "cgrp_ls_recursion.skel.h" +#include "cgrp_ls_attach_cgroup.skel.h" +#include "cgrp_ls_negative.skel.h" +#include "network_helpers.h" + +struct socket_cookie { + __u64 cookie_key; + __u32 cookie_value; +}; + +static void test_tp_btf(int cgroup_fd) +{ + struct cgrp_ls_tp_btf *skel; + long val1 = 1, val2 = 0; + int err; + + skel = cgrp_ls_tp_btf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + /* populate a value in map_b */ + err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY); + if (!ASSERT_OK(err, "map_update_elem")) + goto out; + + /* check value */ + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val2); + if (!ASSERT_OK(err, "map_lookup_elem")) + goto out; + if (!ASSERT_EQ(val2, 1, "map_lookup_elem, invalid val")) + goto out; + + /* delete value */ + err = bpf_map_delete_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd); + if (!ASSERT_OK(err, "map_delete_elem")) + goto out; + + skel->bss->target_pid = syscall(SYS_gettid); + + err = cgrp_ls_tp_btf__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + syscall(SYS_gettid); + syscall(SYS_gettid); + + skel->bss->target_pid = 0; + + /* 3x syscalls: 1x attach and 2x gettid */ + ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); + ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt"); + ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt"); +out: + cgrp_ls_tp_btf__destroy(skel); +} + +static void test_attach_cgroup(int cgroup_fd) +{ + int server_fd = 0, client_fd = 0, err = 0; + socklen_t addr_len = sizeof(struct sockaddr_in6); + struct cgrp_ls_attach_cgroup *skel; + __u32 cookie_expected_value; + struct sockaddr_in6 addr; + struct socket_cookie val; + + skel = cgrp_ls_attach_cgroup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->links.set_cookie = bpf_program__attach_cgroup( + skel->progs.set_cookie, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach")) + goto out; + + skel->links.update_cookie_sockops = bpf_program__attach_cgroup( + skel->progs.update_cookie_sockops, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.update_cookie_sockops, "prog_attach")) + goto out; + + skel->links.update_cookie_tracing = bpf_program__attach( + skel->progs.update_cookie_tracing); + if (!ASSERT_OK_PTR(skel->links.update_cookie_tracing, "prog_attach")) + goto out; + + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto out; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto close_server_fd; + + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.socket_cookies), + &cgroup_fd, &val); + if (!ASSERT_OK(err, "map_lookup(socket_cookies)")) + goto close_client_fd; + + err = getsockname(client_fd, (struct sockaddr *)&addr, &addr_len); + if (!ASSERT_OK(err, "getsockname")) + goto close_client_fd; + + cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF; + ASSERT_EQ(val.cookie_value, cookie_expected_value, "cookie_value"); + +close_client_fd: + close(client_fd); +close_server_fd: + close(server_fd); +out: + cgrp_ls_attach_cgroup__destroy(skel); +} + +static void test_recursion(int cgroup_fd) +{ + struct cgrp_ls_recursion *skel; + int err; + + skel = cgrp_ls_recursion__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + err = cgrp_ls_recursion__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + /* trigger sys_enter, make sure it does not cause deadlock */ + syscall(SYS_gettid); + +out: + cgrp_ls_recursion__destroy(skel); +} + +static void test_negative(void) +{ + struct cgrp_ls_negative *skel; + + skel = cgrp_ls_negative__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "skel_open_and_load")) { + cgrp_ls_negative__destroy(skel); + return; + } +} + +void test_cgrp_local_storage(void) +{ + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/cgrp_local_storage"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage")) + return; + + if (test__start_subtest("tp_btf")) + test_tp_btf(cgroup_fd); + if (test__start_subtest("attach_cgroup")) + test_attach_cgroup(cgroup_fd); + if (test__start_subtest("recursion")) + test_recursion(cgroup_fd); + if (test__start_subtest("negative")) + test_negative(); + + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c new file mode 100644 index 000000000000..1fbe7e4ac00a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "kprobe_multi.skel.h" +#include "trace_helpers.h" +#include "bpf/libbpf_internal.h" + +static void kprobe_multi_testmod_check(struct kprobe_multi *skel) +{ + ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result"); + ASSERT_EQ(skel->bss->kprobe_testmod_test2_result, 1, "kprobe_test2_result"); + ASSERT_EQ(skel->bss->kprobe_testmod_test3_result, 1, "kprobe_test3_result"); + + ASSERT_EQ(skel->bss->kretprobe_testmod_test1_result, 1, "kretprobe_test1_result"); + ASSERT_EQ(skel->bss->kretprobe_testmod_test2_result, 1, "kretprobe_test2_result"); + ASSERT_EQ(skel->bss->kretprobe_testmod_test3_result, 1, "kretprobe_test3_result"); +} + +static void test_testmod_attach_api(struct bpf_kprobe_multi_opts *opts) +{ + struct kprobe_multi *skel = NULL; + + skel = kprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load")) + return; + + skel->bss->pid = getpid(); + + skel->links.test_kprobe_testmod = bpf_program__attach_kprobe_multi_opts( + skel->progs.test_kprobe_testmod, + NULL, opts); + if (!skel->links.test_kprobe_testmod) + goto cleanup; + + opts->retprobe = true; + skel->links.test_kretprobe_testmod = bpf_program__attach_kprobe_multi_opts( + skel->progs.test_kretprobe_testmod, + NULL, opts); + if (!skel->links.test_kretprobe_testmod) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); + kprobe_multi_testmod_check(skel); + +cleanup: + kprobe_multi__destroy(skel); +} + +static void test_testmod_attach_api_addrs(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + unsigned long long addrs[3]; + + addrs[0] = ksym_get_addr("bpf_testmod_fentry_test1"); + ASSERT_NEQ(addrs[0], 0, "ksym_get_addr"); + addrs[1] = ksym_get_addr("bpf_testmod_fentry_test2"); + ASSERT_NEQ(addrs[1], 0, "ksym_get_addr"); + addrs[2] = ksym_get_addr("bpf_testmod_fentry_test3"); + ASSERT_NEQ(addrs[2], 0, "ksym_get_addr"); + + opts.addrs = (const unsigned long *) addrs; + opts.cnt = ARRAY_SIZE(addrs); + + test_testmod_attach_api(&opts); +} + +static void test_testmod_attach_api_syms(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + const char *syms[3] = { + "bpf_testmod_fentry_test1", + "bpf_testmod_fentry_test2", + "bpf_testmod_fentry_test3", + }; + + opts.syms = syms; + opts.cnt = ARRAY_SIZE(syms); + test_testmod_attach_api(&opts); +} + +void serial_test_kprobe_multi_testmod_test(void) +{ + if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh")) + return; + + if (test__start_subtest("testmod_attach_api_syms")) + test_testmod_attach_api_syms(); + if (test__start_subtest("testmod_attach_api_addrs")) + test_testmod_attach_api_addrs(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index 93e9cddaadcf..efb8bd43653c 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -139,6 +139,14 @@ static void test_libbpf_bpf_map_type_str(void) snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str); uppercase(buf); + /* Special case for map_type_name BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED + * where it and BPF_MAP_TYPE_CGROUP_STORAGE have the same enum value + * (map_type). For this enum value, libbpf_bpf_map_type_str() picks + * BPF_MAP_TYPE_CGROUP_STORAGE. + */ + if (strcmp(map_type_name, "BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED") == 0) + continue; + ASSERT_STREQ(buf, map_type_name, "exp_str_value"); } diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 6d0e50dcf47c..7fc01ff490db 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -103,6 +103,13 @@ void test_module_attach(void) ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); bpf_link__destroy(link); + link = bpf_program__attach(skel->progs.kprobe_multi); + if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) + goto cleanup; + + ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + bpf_link__destroy(link); + cleanup: test_module_attach__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 9a80fe8a6427..ac104dc652e3 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -13,6 +13,7 @@ #include <linux/perf_event.h> #include <linux/ring_buffer.h> #include "test_ringbuf.lskel.h" +#include "test_ringbuf_map_key.lskel.h" #define EDONE 7777 @@ -58,6 +59,7 @@ static int process_sample(void *ctx, void *data, size_t len) } } +static struct test_ringbuf_map_key_lskel *skel_map_key; static struct test_ringbuf_lskel *skel; static struct ring_buffer *ringbuf; @@ -81,7 +83,7 @@ static void *poll_thread(void *input) return (void *)(long)ring_buffer__poll(ringbuf, timeout); } -void test_ringbuf(void) +static void ringbuf_subtest(void) { const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); pthread_t thread; @@ -297,3 +299,65 @@ cleanup: ring_buffer__free(ringbuf); test_ringbuf_lskel__destroy(skel); } + +static int process_map_key_sample(void *ctx, void *data, size_t len) +{ + struct sample *s; + int err, val; + + s = data; + switch (s->seq) { + case 1: + ASSERT_EQ(s->value, 42, "sample_value"); + err = bpf_map_lookup_elem(skel_map_key->maps.hash_map.map_fd, + s, &val); + ASSERT_OK(err, "hash_map bpf_map_lookup_elem"); + ASSERT_EQ(val, 1, "hash_map val"); + return -EDONE; + default: + return 0; + } +} + +static void ringbuf_map_key_subtest(void) +{ + int err; + + skel_map_key = test_ringbuf_map_key_lskel__open(); + if (!ASSERT_OK_PTR(skel_map_key, "test_ringbuf_map_key_lskel__open")) + return; + + skel_map_key->maps.ringbuf.max_entries = getpagesize(); + skel_map_key->bss->pid = getpid(); + + err = test_ringbuf_map_key_lskel__load(skel_map_key); + if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__load")) + goto cleanup; + + ringbuf = ring_buffer__new(skel_map_key->maps.ringbuf.map_fd, + process_map_key_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new")) + goto cleanup; + + err = test_ringbuf_map_key_lskel__attach(skel_map_key); + if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__attach")) + goto cleanup_ringbuf; + + syscall(__NR_getpgid); + ASSERT_EQ(skel_map_key->bss->seq, 1, "skel_map_key->bss->seq"); + err = ring_buffer__poll(ringbuf, -1); + ASSERT_EQ(err, -EDONE, "ring_buffer__poll"); + +cleanup_ringbuf: + ring_buffer__free(ringbuf); +cleanup: + test_ringbuf_map_key_lskel__destroy(skel_map_key); +} + +void test_ringbuf(void) +{ + if (test__start_subtest("ringbuf")) + ringbuf_subtest(); + if (test__start_subtest("ringbuf_map_key")) + ringbuf_map_key_subtest(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 99dac5292b41..bc6817aee9aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Facebook */ #include <test_progs.h> +#include <sys/mman.h> struct s { int a; @@ -22,7 +23,8 @@ void test_skeleton(void) struct test_skeleton__kconfig *kcfg; const void *elf_bytes; size_t elf_bytes_sz = 0; - int i; + void *m; + int i, fd; skel = test_skeleton__open(); if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) @@ -124,6 +126,13 @@ void test_skeleton(void) ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); + fd = bpf_map__fd(skel->maps.data_non_mmapable); + m = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd, 0); + if (!ASSERT_EQ(m, MAP_FAILED, "unexpected_mmap_success")) + munmap(m, getpagesize()); + + ASSERT_EQ(bpf_map__map_flags(skel->maps.data_non_mmapable), 0, "non_mmap_flags"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 035c263aab1b..a176bd75a748 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -3,12 +3,16 @@ #define _GNU_SOURCE /* See feature_test_macros(7) */ #include <unistd.h> +#include <sched.h> +#include <pthread.h> #include <sys/syscall.h> /* For SYS_xxx definitions */ #include <sys/types.h> #include <test_progs.h> +#include "task_local_storage_helpers.h" #include "task_local_storage.skel.h" #include "task_local_storage_exit_creds.skel.h" #include "task_ls_recursion.skel.h" +#include "task_storage_nodeadlock.skel.h" static void test_sys_enter_exit(void) { @@ -39,7 +43,8 @@ out: static void test_exit_creds(void) { struct task_local_storage_exit_creds *skel; - int err; + int err, run_count, sync_rcu_calls = 0; + const int MAX_SYNC_RCU_CALLS = 1000; skel = task_local_storage_exit_creds__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) @@ -53,8 +58,19 @@ static void test_exit_creds(void) if (CHECK_FAIL(system("ls > /dev/null"))) goto out; - /* sync rcu to make sure exit_creds() is called for "ls" */ - kern_sync_rcu(); + /* kern_sync_rcu is not enough on its own as the read section we want + * to wait for may start after we enter synchronize_rcu, so our call + * won't wait for the section to finish. Loop on the run counter + * as well to ensure the program has run. + */ + do { + kern_sync_rcu(); + run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST); + } while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS); + + ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS, + "sync_rcu count too high"); + ASSERT_NEQ(run_count, 0, "run_count"); ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count"); ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count"); out: @@ -63,24 +79,160 @@ out: static void test_recursion(void) { + int err, map_fd, prog_fd, task_fd; struct task_ls_recursion *skel; - int err; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); + long value; + + task_fd = sys_pidfd_open(getpid(), 0); + if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open")) + return; skel = task_ls_recursion__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) - return; + goto out; err = task_ls_recursion__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; /* trigger sys_enter, make sure it does not cause deadlock */ + skel->bss->test_pid = getpid(); syscall(SYS_gettid); + skel->bss->test_pid = 0; + task_ls_recursion__detach(skel); + + /* Refer to the comment in BPF_PROG(on_update) for + * the explanation on the value 201 and 100. + */ + map_fd = bpf_map__fd(skel->maps.map_a); + err = bpf_map_lookup_elem(map_fd, &task_fd, &value); + ASSERT_OK(err, "lookup map_a"); + ASSERT_EQ(value, 201, "map_a value"); + ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy"); + + map_fd = bpf_map__fd(skel->maps.map_b); + err = bpf_map_lookup_elem(map_fd, &task_fd, &value); + ASSERT_OK(err, "lookup map_b"); + ASSERT_EQ(value, 100, "map_b value"); + + prog_fd = bpf_program__fd(skel->progs.on_lookup); + memset(&info, 0, sizeof(info)); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_GT(info.recursion_misses, 0, "on_lookup prog recursion"); + + prog_fd = bpf_program__fd(skel->progs.on_update); + memset(&info, 0, sizeof(info)); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion"); + + prog_fd = bpf_program__fd(skel->progs.on_enter); + memset(&info, 0, sizeof(info)); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion"); out: + close(task_fd); task_ls_recursion__destroy(skel); } +static bool stop; + +static void waitall(const pthread_t *tids, int nr) +{ + int i; + + stop = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); +} + +static void *sock_create_loop(void *arg) +{ + struct task_storage_nodeadlock *skel = arg; + int fd; + + while (!stop) { + fd = socket(AF_INET, SOCK_STREAM, 0); + close(fd); + if (skel->bss->nr_get_errs || skel->bss->nr_del_errs) + stop = true; + } + + return NULL; +} + +static void test_nodeadlock(void) +{ + struct task_storage_nodeadlock *skel; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + const int nr_threads = 32; + pthread_t tids[nr_threads]; + int i, prog_fd, err; + cpu_set_t old, new; + + /* Pin all threads to one cpu to increase the chance of preemption + * in a sleepable bpf prog. + */ + CPU_ZERO(&new); + CPU_SET(0, &new); + err = sched_getaffinity(getpid(), sizeof(old), &old); + if (!ASSERT_OK(err, "getaffinity")) + return; + err = sched_setaffinity(getpid(), sizeof(new), &new); + if (!ASSERT_OK(err, "setaffinity")) + return; + + skel = task_storage_nodeadlock__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto done; + + /* Unnecessary recursion and deadlock detection are reproducible + * in the preemptible kernel. + */ + if (!skel->kconfig->CONFIG_PREEMPT) { + test__skip(); + goto done; + } + + err = task_storage_nodeadlock__attach(skel); + ASSERT_OK(err, "attach prog"); + + for (i = 0; i < nr_threads; i++) { + err = pthread_create(&tids[i], NULL, sock_create_loop, skel); + if (err) { + /* Only assert once here to avoid excessive + * PASS printing during test failure. + */ + ASSERT_OK(err, "pthread_create"); + waitall(tids, i); + goto done; + } + } + + /* With 32 threads, 1s is enough to reproduce the issue */ + sleep(1); + waitall(tids, nr_threads); + + info_len = sizeof(info); + prog_fd = bpf_program__fd(skel->progs.socket_post_create); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + ASSERT_OK(err, "get prog info"); + ASSERT_EQ(info.recursion_misses, 0, "prog recursion"); + + ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy"); + ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy"); + +done: + task_storage_nodeadlock__destroy(skel); + sched_setaffinity(getpid(), sizeof(old), &old); +} + void test_task_local_storage(void) { if (test__start_subtest("sys_enter_exit")) @@ -89,4 +241,6 @@ void test_task_local_storage(void) test_exit_creds(); if (test__start_subtest("recursion")) test_recursion(); + if (test__start_subtest("nodeadlock")) + test_nodeadlock(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c index 6286023fd62b..c5969ca6f26b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -19,13 +19,20 @@ struct { __type(value, __u64); } arraymap1 SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10); + __type(key, __u64); + __type(value, __u32); +} hashmap1 SEC(".maps"); + __u32 key_sum = 0; __u64 val_sum = 0; SEC("iter/bpf_map_elem") int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx) { - __u32 *key = ctx->key; + __u32 *hmap_val, *key = ctx->key; __u64 *val = ctx->value; if (key == (void *)0 || val == (void *)0) @@ -35,6 +42,18 @@ int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx) bpf_seq_write(ctx->meta->seq, val, sizeof(__u64)); key_sum += *key; val_sum += *val; + + /* workaround - It's necessary to do this convoluted (val, key) + * write into hashmap1, instead of simply doing + * bpf_map_update_elem(&hashmap1, val, key, BPF_ANY); + * because key has MEM_RDONLY flag and bpf_map_update elem expects + * types without this flag + */ + bpf_map_update_elem(&hashmap1, val, val, BPF_ANY); + hmap_val = bpf_map_lookup_elem(&hashmap1, val); + if (hmap_val) + *hmap_val = *key; + *val = *key; return 0; } diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c new file mode 100644 index 000000000000..6652d18465b2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +struct socket_cookie { + __u64 cookie_key; + __u64 cookie_value; +}; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct socket_cookie); +} socket_cookies SEC(".maps"); + +SEC("cgroup/connect6") +int set_cookie(struct bpf_sock_addr *ctx) +{ + struct socket_cookie *p; + struct tcp_sock *tcp_sk; + struct bpf_sock *sk; + + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) + return 1; + + sk = ctx->sk; + if (!sk) + return 1; + + tcp_sk = bpf_skc_to_tcp_sock(sk); + if (!tcp_sk) + return 1; + + p = bpf_cgrp_storage_get(&socket_cookies, + tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!p) + return 1; + + p->cookie_value = 0xF; + p->cookie_key = bpf_get_socket_cookie(ctx); + return 1; +} + +SEC("sockops") +int update_cookie_sockops(struct bpf_sock_ops *ctx) +{ + struct socket_cookie *p; + struct tcp_sock *tcp_sk; + struct bpf_sock *sk; + + if (ctx->family != AF_INET6 || ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + sk = ctx->sk; + if (!sk) + return 1; + + tcp_sk = bpf_skc_to_tcp_sock(sk); + if (!tcp_sk) + return 1; + + p = bpf_cgrp_storage_get(&socket_cookies, + tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0, 0); + if (!p) + return 1; + + if (p->cookie_key != bpf_get_socket_cookie(ctx)) + return 1; + + p->cookie_value |= (ctx->local_port << 8); + return 1; +} + +SEC("fexit/inet_stream_connect") +int BPF_PROG(update_cookie_tracing, struct socket *sock, + struct sockaddr *uaddr, int addr_len, int flags) +{ + struct socket_cookie *p; + struct tcp_sock *tcp_sk; + + if (uaddr->sa_family != AF_INET6) + return 0; + + p = bpf_cgrp_storage_get(&socket_cookies, sock->sk->sk_cgrp_data.cgroup, 0, 0); + if (!p) + return 0; + + if (p->cookie_key != bpf_get_socket_cookie(sock->sk)) + return 0; + + p->cookie_value |= 0xF0; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c new file mode 100644 index 000000000000..d41f90e2ab64 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + + task = bpf_get_current_task_btf(); + (void)bpf_cgrp_storage_get(&map_a, (struct cgroup *)task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c new file mode 100644 index 000000000000..a043d8fefdac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_b SEC(".maps"); + +SEC("fentry/bpf_local_storage_lookup") +int BPF_PROG(on_lookup) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); + bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp); + return 0; +} + +SEC("fentry/bpf_local_storage_update") +int BPF_PROG(on_update) +{ + struct task_struct *task = bpf_get_current_task_btf(); + long *ptr; + + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr += 1; + + ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr += 1; + + return 0; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + + task = bpf_get_current_task_btf(); + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr = 200; + + ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + *ptr = 100; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c new file mode 100644 index 000000000000..9ebb8e2fe541 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_b SEC(".maps"); + +#define MAGIC_VALUE 0xabcd1234 + +pid_t target_pid = 0; +int mismatch_cnt = 0; +int enter_cnt = 0; +int exit_cnt = 0; + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + int err; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* populate value 0 */ + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + + /* delete value 0 */ + err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); + if (err) + return 0; + + /* value is not available */ + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0); + if (ptr) + return 0; + + /* re-populate the value */ + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + __sync_fetch_and_add(&enter_cnt, 1); + *ptr = MAGIC_VALUE + enter_cnt; + + return 0; +} + +SEC("tp_btf/sys_exit") +int BPF_PROG(on_exit, struct pt_regs *regs, long id) +{ + struct task_struct *task; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + + __sync_fetch_and_add(&exit_cnt, 1); + if (*ptr != MAGIC_VALUE + exit_cnt) + __sync_fetch_and_add(&mismatch_cnt, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c index 98c3399e15c0..9e1ca8e34913 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c @@ -110,3 +110,53 @@ int test_kretprobe_manual(struct pt_regs *ctx) kprobe_multi_check(ctx, true); return 0; } + +extern const void bpf_testmod_fentry_test1 __ksym; +extern const void bpf_testmod_fentry_test2 __ksym; +extern const void bpf_testmod_fentry_test3 __ksym; + +__u64 kprobe_testmod_test1_result = 0; +__u64 kprobe_testmod_test2_result = 0; +__u64 kprobe_testmod_test3_result = 0; + +__u64 kretprobe_testmod_test1_result = 0; +__u64 kretprobe_testmod_test2_result = 0; +__u64 kretprobe_testmod_test3_result = 0; + +static void kprobe_multi_testmod_check(void *ctx, bool is_return) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + __u64 addr = bpf_get_func_ip(ctx); + + if (is_return) { + if ((const void *) addr == &bpf_testmod_fentry_test1) + kretprobe_testmod_test1_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test2) + kretprobe_testmod_test2_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test3) + kretprobe_testmod_test3_result = 1; + } else { + if ((const void *) addr == &bpf_testmod_fentry_test1) + kprobe_testmod_test1_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test2) + kprobe_testmod_test2_result = 1; + if ((const void *) addr == &bpf_testmod_fentry_test3) + kprobe_testmod_test3_result = 1; + } +} + +SEC("kprobe.multi") +int test_kprobe_testmod(struct pt_regs *ctx) +{ + kprobe_multi_testmod_check(ctx, false); + return 0; +} + +SEC("kretprobe.multi") +int test_kretprobe_testmod(struct pt_regs *ctx) +{ + kprobe_multi_testmod_check(ctx, true); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c index 81758c0aef99..41d88ed222ff 100644 --- a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c +++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c @@ -14,6 +14,7 @@ struct { __type(value, __u64); } task_storage SEC(".maps"); +int run_count = 0; int valid_ptr_count = 0; int null_ptr_count = 0; @@ -28,5 +29,7 @@ int BPF_PROG(trace_exit_creds, struct task_struct *task) __sync_fetch_and_add(&valid_ptr_count, 1); else __sync_fetch_and_add(&null_ptr_count, 1); + + __sync_fetch_and_add(&run_count, 1); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c index 564583dca7c8..4542dc683b44 100644 --- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c @@ -5,7 +5,13 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#ifndef EBUSY +#define EBUSY 16 +#endif + char _license[] SEC("license") = "GPL"; +int nr_del_errs = 0; +int test_pid = 0; struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); @@ -26,6 +32,13 @@ int BPF_PROG(on_lookup) { struct task_struct *task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + + /* The bpf_task_storage_delete will call + * bpf_local_storage_lookup. The prog->active will + * stop the recursion. + */ bpf_task_storage_delete(&map_a, task); bpf_task_storage_delete(&map_b, task); return 0; @@ -37,11 +50,32 @@ int BPF_PROG(on_update) struct task_struct *task = bpf_get_current_task_btf(); long *ptr; + if (!test_pid || task->pid != test_pid) + return 0; + ptr = bpf_task_storage_get(&map_a, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) + /* ptr will not be NULL when it is called from + * the bpf_task_storage_get(&map_b,...F_CREATE) in + * the BPF_PROG(on_enter) below. It is because + * the value can be found in map_a and the kernel + * does not need to acquire any spin_lock. + */ + if (ptr) { + int err; + *ptr += 1; + err = bpf_task_storage_delete(&map_a, task); + if (err == -EBUSY) + nr_del_errs++; + } + /* This will still fail because map_b is empty and + * this BPF_PROG(on_update) has failed to acquire + * the percpu busy lock => meaning potential + * deadlock is detected and it will fail to create + * new storage. + */ ptr = bpf_task_storage_get(&map_b, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) @@ -57,14 +91,17 @@ int BPF_PROG(on_enter, struct pt_regs *regs, long id) long *ptr; task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + ptr = bpf_task_storage_get(&map_a, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) + if (ptr && !*ptr) *ptr = 200; ptr = bpf_task_storage_get(&map_b, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) + if (ptr && !*ptr) *ptr = 100; return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c new file mode 100644 index 000000000000..ea2dbb80f7b3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#ifndef EBUSY +#define EBUSY 16 +#endif + +extern bool CONFIG_PREEMPT __kconfig __weak; +int nr_get_errs = 0; +int nr_del_errs = 0; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_storage SEC(".maps"); + +SEC("lsm.s/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +{ + struct task_struct *task; + int ret, zero = 0; + int *value; + + if (!CONFIG_PREEMPT) + return 0; + + task = bpf_get_current_task_btf(); + value = bpf_task_storage_get(&task_storage, task, &zero, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!value) + __sync_fetch_and_add(&nr_get_errs, 1); + + ret = bpf_task_storage_delete(&task_storage, + bpf_get_current_task_btf()); + if (ret == -EBUSY) + __sync_fetch_and_add(&nr_del_errs, 1); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index 08628afedb77..8a1b50f3a002 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -110,4 +110,10 @@ int BPF_PROG(handle_fmod_ret, return 0; /* don't override the exit code */ } +SEC("kprobe.multi/bpf_testmod_test_read") +int BPF_PROG(kprobe_multi) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c new file mode 100644 index 000000000000..2760bf60d05a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1000); + __type(key, struct sample); + __type(value, int); +} hash_map SEC(".maps"); + +/* inputs */ +int pid = 0; + +/* inner state */ +long seq = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_ringbuf_mem_map_key(void *ctx) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + struct sample *sample, sample_copy; + int *lookup_val; + + if (cur_pid != pid) + return 0; + + sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0); + if (!sample) + return 0; + + sample->pid = pid; + bpf_get_current_comm(sample->comm, sizeof(sample->comm)); + sample->seq = ++seq; + sample->value = 42; + + /* test using 'sample' (PTR_TO_MEM | MEM_ALLOC) as map key arg + */ + lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample); + + /* workaround - memcpy is necessary so that verifier doesn't + * complain with: + * verifier internal error: more than one arg with ref_obj_id R3 + * when trying to do bpf_map_update_elem(&hash_map, sample, &sample->seq, BPF_ANY); + * + * Since bpf_map_lookup_elem above uses 'sample' as key, test using + * sample field as value below + */ + __builtin_memcpy(&sample_copy, sample, sizeof(struct sample)); + bpf_map_update_elem(&hash_map, &sample_copy, &sample->seq, BPF_ANY); + + bpf_ringbuf_submit(sample, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 1a4e93f6d9df..adece9f91f58 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -53,6 +53,20 @@ int out_mostly_var; char huge_arr[16 * 1024 * 1024]; +/* non-mmapable custom .data section */ + +struct my_value { int x, y, z; }; + +__hidden int zero_key SEC(".data.non_mmapable"); +static struct my_value zero_value SEC(".data.non_mmapable"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct my_value); + __uint(max_entries, 1); +} my_map SEC(".maps"); + SEC("raw_tp/sys_enter") int handler(const void *ctx) { @@ -75,6 +89,9 @@ int handler(const void *ctx) huge_arr[sizeof(huge_arr) - 1] = 123; + /* make sure zero_key and zero_value are not optimized out */ + bpf_map_update_elem(&my_map, &zero_key, &zero_value, BPF_ANY); + return 0; } diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh index 1bf81b49457a..b5520692f41b 100755 --- a/tools/testing/selftests/bpf/test_bpftool_metadata.sh +++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh @@ -4,6 +4,9 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +BPF_FILE_USED="metadata_used.bpf.o" +BPF_FILE_UNUSED="metadata_unused.bpf.o" + TESTNAME=bpftool_metadata BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) BPF_DIR=$BPF_FS/test_$TESTNAME @@ -55,7 +58,7 @@ mkdir $BPF_DIR trap cleanup EXIT -bpftool prog load metadata_unused.o $BPF_DIR/unused +bpftool prog load $BPF_FILE_UNUSED $BPF_DIR/unused METADATA_PLAIN="$(bpftool prog)" echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null @@ -67,7 +70,7 @@ bpftool map | grep 'metadata.rodata' > /dev/null rm $BPF_DIR/unused -bpftool prog load metadata_used.o $BPF_DIR/used +bpftool prog load $BPF_FILE_USED $BPF_DIR/used METADATA_PLAIN="$(bpftool prog)" echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index a6410bebe603..9fe4c9336c6f 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -501,6 +501,14 @@ def main(): source_map_types = set(bpf_info.get_map_type_map().values()) source_map_types.discard('unspec') + # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED and BPF_MAP_TYPE_CGROUP_STORAGE + # share the same enum value and source_map_types picks + # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED/cgroup_storage_deprecated. + # Replace 'cgroup_storage_deprecated' with 'cgroup_storage' + # so it aligns with what `bpftool map help` shows. + source_map_types.remove('cgroup_storage_deprecated') + source_map_types.add('cgroup_storage') + help_map_types = map_info.get_map_help() help_map_options = map_info.get_options() map_info.close() diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index 5303ce0c977b..4b298863797a 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # # Load BPF flow dissector and verify it correctly dissects traffic + +BPF_FILE="bpf_flow.bpf.o" export TESTNAME=test_flow_dissector unmount=0 @@ -22,7 +24,7 @@ if [[ -z $(ip netns identify $$) ]]; then if bpftool="$(which bpftool)"; then echo "Testing global flow dissector..." - $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \ + $bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \ type flow_dissector if ! unshare --net $bpftool prog attach pinned \ @@ -95,7 +97,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s _dissect +./flow_dissector_load -p $BPF_FILE -s _dissect # Setup tc qdisc add dev lo ingress diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index 6c69c42b1d60..1e565f47aca9 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -38,6 +38,7 @@ # ping: SRC->[encap at veth2:ingress]->GRE:decap->DST # ping replies go DST->SRC directly +BPF_FILE="test_lwt_ip_encap.bpf.o" if [[ $EUID -ne 0 ]]; then echo "This script must be run as root" echo "FAIL" @@ -373,14 +374,14 @@ test_egress() # install replacement routes (LWT/eBPF), pings succeed if [ "${ENCAP}" == "IPv4" ] ; then ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre dev veth1 ${VRF} ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre dev veth1 ${VRF} elif [ "${ENCAP}" == "IPv6" ] ; then ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF} else echo " unknown encap ${ENCAP}" TEST_STATUS=1 @@ -431,14 +432,14 @@ test_ingress() # install replacement routes (LWT/eBPF), pings succeed if [ "${ENCAP}" == "IPv4" ] ; then ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre dev veth2 ${VRF} ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre dev veth2 ${VRF} elif [ "${ENCAP}" == "IPv6" ] ; then ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \ - test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF} + ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF} else echo "FAIL: unknown encap ${ENCAP}" TEST_STATUS=1 diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh index 826f4423ce02..0efea2292d6a 100755 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -23,6 +23,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +BPF_FILE="test_lwt_seg6local.bpf.o" readonly NS1="ns1-$(mktemp -u XXXXXX)" readonly NS2="ns2-$(mktemp -u XXXXXX)" readonly NS3="ns3-$(mktemp -u XXXXXX)" @@ -117,18 +118,18 @@ ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21 -ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2 +ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2 ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65 -ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4 +ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4 -ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6 +ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6 ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87 ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 -ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8 +ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8 ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh index daa7d1b8d309..76f0bd17061f 100755 --- a/tools/testing/selftests/bpf/test_tc_edt.sh +++ b/tools/testing/selftests/bpf/test_tc_edt.sh @@ -5,6 +5,7 @@ # with dst port = 9000 down to 5MBps. Then it measures actual # throughput of the flow. +BPF_FILE="test_tc_edt.bpf.o" if [[ $EUID -ne 0 ]]; then echo "This script must be run as root" echo "FAIL" @@ -54,7 +55,7 @@ ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ - bpf da obj test_tc_edt.o sec cls_test + bpf da obj ${BPF_FILE} sec cls_test # start the listener diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index 088fcad138c9..334bdfeab940 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -3,6 +3,7 @@ # # In-place tunneling +BPF_FILE="test_tc_tunnel.bpf.o" # must match the port that the bpf program filters on readonly port=8000 @@ -196,7 +197,7 @@ verify_data # client can no longer connect ip netns exec "${ns1}" tc qdisc add dev veth1 clsact ip netns exec "${ns1}" tc filter add dev veth1 egress \ - bpf direct-action object-file ./test_tc_tunnel.o \ + bpf direct-action object-file ${BPF_FILE} \ section "encap_${tuntype}_${mac}" echo "test bpf encap without decap (expect failure)" server_listen @@ -296,7 +297,7 @@ fi ip netns exec "${ns2}" ip link del dev testtun0 ip netns exec "${ns2}" tc qdisc add dev veth2 clsact ip netns exec "${ns2}" tc filter add dev veth2 ingress \ - bpf direct-action object-file ./test_tc_tunnel.o section decap + bpf direct-action object-file ${BPF_FILE} section decap echo "test bpf encap with bpf decap" client_connect verify_data diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index e9ebc67d73f7..2eaedc1d9ed3 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -45,6 +45,7 @@ # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet # 6) Forward the packet to the overlay tnl dev +BPF_FILE="test_tunnel_kern.bpf.o" BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel" PING_ARG="-c 3 -w 10 -q" ret=0 @@ -545,7 +546,7 @@ test_xfrm_tunnel() > /sys/kernel/debug/tracing/trace setup_xfrm_tunnel mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR} + bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} tc qdisc add dev veth1 clsact tc filter add dev veth1 proto ip ingress bpf da object-pinned \ ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state @@ -572,7 +573,7 @@ attach_bpf() SET=$2 GET=$3 mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}/ + bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/ tc qdisc add dev $DEV clsact tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index ea69370caae3..2740322c1878 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -1,5 +1,6 @@ #!/bin/sh +BPF_FILE="test_xdp_meta.bpf.o" # Kselftest framework requirement - SKIP code is 4. readonly KSFT_SKIP=4 readonly NS1="ns1-$(mktemp -u XXXXXX)" @@ -42,11 +43,11 @@ ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 ip netns exec ${NS1} tc qdisc add dev veth1 clsact ip netns exec ${NS2} tc qdisc add dev veth2 clsact -ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t -ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t +ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t +ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t -ip netns exec ${NS1} ip link set dev veth1 xdp obj test_xdp_meta.o sec x -ip netns exec ${NS2} ip link set dev veth2 xdp obj test_xdp_meta.o sec x +ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x +ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x ip netns exec ${NS1} ip link set dev veth1 up ip netns exec ${NS2} ip link set dev veth2 up diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh index 810c407e0286..fbcaa9f0120b 100755 --- a/tools/testing/selftests/bpf/test_xdp_vlan.sh +++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh @@ -200,11 +200,11 @@ ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First p # ---------------------------------------------------------------------- # In ns1: ingress use XDP to remove VLAN tags export DEVNS1=veth1 -export FILE=test_xdp_vlan.o +export BPF_FILE=test_xdp_vlan.bpf.o # First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" export XDP_PROG=xdp_vlan_change -ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG +ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG # In ns1: egress use TC to add back VLAN tag 4011 # (del cmd) @@ -212,7 +212,7 @@ ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PRO # ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \ - prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push + prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push # Now the namespaces can reach each-other, test with ping: ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 @@ -226,7 +226,7 @@ ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2 # export XDP_PROG=xdp_vlan_remove_outer2 ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off -ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG +ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG # Now the namespaces should still be able reach each-other, test with ping: ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1 diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 9c4be2cdb21a..09a16a77bae4 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -23,7 +23,7 @@ static int ksym_cmp(const void *p1, const void *p2) return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; } -int load_kallsyms(void) +int load_kallsyms_refresh(void) { FILE *f; char func[256], buf[256]; @@ -31,12 +31,7 @@ int load_kallsyms(void) void *addr; int i = 0; - /* - * This is called/used from multiplace places, - * load symbols just once. - */ - if (sym_cnt) - return 0; + sym_cnt = 0; f = fopen("/proc/kallsyms", "r"); if (!f) @@ -57,6 +52,17 @@ int load_kallsyms(void) return 0; } +int load_kallsyms(void) +{ + /* + * This is called/used from multiplace places, + * load symbols just once. + */ + if (sym_cnt) + return 0; + return load_kallsyms_refresh(); +} + struct ksym *ksym_search(long key) { int start = 0, end = sym_cnt; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 238a9c98cde2..53efde0e2998 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -10,6 +10,8 @@ struct ksym { }; int load_kallsyms(void); +int load_kallsyms_refresh(void); + struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c index 79021c30e51e..8bf37e5207f1 100644 --- a/tools/testing/selftests/bpf/verifier/jit.c +++ b/tools/testing/selftests/bpf/verifier/jit.c @@ -21,6 +21,30 @@ .retval = 2, }, { + "jit: lsh, rsh, arsh by reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_MOV64_IMM(BPF_REG_1, 0xff), + BPF_ALU64_REG(BPF_LSH, BPF_REG_1, BPF_REG_0), + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_RSH, BPF_REG_1, BPF_REG_4), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_1), + BPF_ALU32_REG(BPF_RSH, BPF_REG_4, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0xff, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ARSH, BPF_REG_4, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, +{ "jit: mov32 for ldimm64, 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 2), diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index a29aa05ebb3e..316a56d680f2 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -21,6 +21,12 @@ x86_64) QEMU_FLAGS=(-cpu host -smp 8) BZIMAGE="arch/x86/boot/bzImage" ;; +aarch64) + QEMU_BINARY=qemu-system-aarch64 + QEMU_CONSOLE="ttyAMA0,115200" + QEMU_FLAGS=(-M virt,gic-version=3 -cpu host -smp 8) + BZIMAGE="arch/arm64/boot/Image" + ;; *) echo "Unsupported architecture" exit 1 |