diff options
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 147 | ||||
-rw-r--r-- | include/linux/bpf.h | 4 | ||||
-rw-r--r-- | include/linux/bpf_verifier.h | 8 | ||||
-rw-r--r-- | include/linux/filter.h | 1 | ||||
-rw-r--r-- | kernel/bpf/core.c | 5 | ||||
-rw-r--r-- | kernel/bpf/trampoline.c | 4 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 112 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 104 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h | 5 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c | 106 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/verifier.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/struct_ops_private_stack.c | 62 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c | 62 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c | 50 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/verifier_private_stack.c | 272 |
15 files changed, 930 insertions, 14 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 06b080b61aa5..8f896c32172c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -325,6 +325,22 @@ struct jit_context { /* Number of bytes that will be skipped on tailcall */ #define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE) +static void push_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x51); /* push r9 */ + *pprog = prog; +} + +static void pop_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x59); /* pop r9 */ + *pprog = prog; +} + static void push_r12(u8 **pprog) { u8 *prog = *pprog; @@ -1404,6 +1420,24 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) *pprog = prog; } +static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr) +{ + u8 *prog = *pprog; + + /* movabs r9, priv_frame_ptr */ + emit_mov_imm64(&prog, X86_REG_R9, (__force long) priv_frame_ptr >> 32, + (u32) (__force long) priv_frame_ptr); + +#ifdef CONFIG_SMP + /* add <r9>, gs:[<off>] */ + EMIT2(0x65, 0x4c); + EMIT3(0x03, 0x0c, 0x25); + EMIT((u32)(unsigned long)&this_cpu_off, 4); +#endif + + *pprog = prog; +} + #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) #define __LOAD_TCC_PTR(off) \ @@ -1412,6 +1446,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) #define LOAD_TAIL_CALL_CNT_PTR(stack) \ __LOAD_TCC_PTR(BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack)) +/* Memory size/value to protect private stack overflow/underflow */ +#define PRIV_STACK_GUARD_SZ 8 +#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { @@ -1421,18 +1459,28 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image int insn_cnt = bpf_prog->len; bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; + void __percpu *priv_frame_ptr = NULL; u64 arena_vm_start, user_vm_start; + void __percpu *priv_stack_ptr; int i, excnt = 0; int ilen, proglen = 0; u8 *prog = temp; + u32 stack_depth; int err; + stack_depth = bpf_prog->aux->stack_depth; + priv_stack_ptr = bpf_prog->aux->priv_stack_ptr; + if (priv_stack_ptr) { + priv_frame_ptr = priv_stack_ptr + PRIV_STACK_GUARD_SZ + round_up(stack_depth, 8); + stack_depth = 0; + } + arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena); user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena); detect_reg_usage(insn, insn_cnt, callee_regs_used); - emit_prologue(&prog, bpf_prog->aux->stack_depth, + emit_prologue(&prog, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); /* Exception callback will clobber callee regs for its own use, and @@ -1454,6 +1502,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_mov_imm64(&prog, X86_REG_R12, arena_vm_start >> 32, (u32) arena_vm_start); + if (priv_frame_ptr) + emit_priv_frame_ptr(&prog, priv_frame_ptr); + ilen = prog - temp; if (rw_image) memcpy(rw_image + proglen, temp, ilen); @@ -1473,6 +1524,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image u8 *func; int nops; + if (priv_frame_ptr) { + if (src_reg == BPF_REG_FP) + src_reg = X86_REG_R9; + + if (dst_reg == BPF_REG_FP) + dst_reg = X86_REG_R9; + } + switch (insn->code) { /* ALU */ case BPF_ALU | BPF_ADD | BPF_X: @@ -2128,14 +2187,20 @@ populate_extable: func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { - LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); + LOAD_TAIL_CALL_CNT_PTR(stack_depth); ip += 7; } if (!imm32) return -EINVAL; + if (priv_frame_ptr) { + push_r9(&prog); + ip += 2; + } ip += x86_call_depth_emit_accounting(&prog, func, ip); if (emit_call(&prog, func, ip)) return -EINVAL; + if (priv_frame_ptr) + pop_r9(&prog); break; } @@ -2145,13 +2210,13 @@ populate_extable: &bpf_prog->aux->poke_tab[imm32 - 1], &prog, image + addrs[i - 1], callee_regs_used, - bpf_prog->aux->stack_depth, + stack_depth, ctx); else emit_bpf_tail_call_indirect(bpf_prog, &prog, callee_regs_used, - bpf_prog->aux->stack_depth, + stack_depth, image + addrs[i - 1], ctx); break; @@ -3303,6 +3368,42 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf); } +static const char *bpf_get_prog_name(struct bpf_prog *prog) +{ + if (prog->aux->ksym.prog) + return prog->aux->ksym.name; + return prog->aux->name; +} + +static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size) +{ + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; + u64 *stack_ptr; + + for_each_possible_cpu(cpu) { + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); + stack_ptr[0] = PRIV_STACK_GUARD_VAL; + stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL; + } +} + +static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size, + struct bpf_prog *prog) +{ + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; + u64 *stack_ptr; + + for_each_possible_cpu(cpu) { + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); + if (stack_ptr[0] != PRIV_STACK_GUARD_VAL || + stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL) { + pr_err("BPF private stack overflow/underflow detected for prog %sx\n", + bpf_get_prog_name(prog)); + break; + } + } +} + struct x64_jit_data { struct bpf_binary_header *rw_header; struct bpf_binary_header *header; @@ -3320,7 +3421,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) struct bpf_binary_header *rw_header = NULL; struct bpf_binary_header *header = NULL; struct bpf_prog *tmp, *orig_prog = prog; + void __percpu *priv_stack_ptr = NULL; struct x64_jit_data *jit_data; + int priv_stack_alloc_sz; int proglen, oldproglen = 0; struct jit_context ctx = {}; bool tmp_blinded = false; @@ -3356,6 +3459,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } prog->aux->jit_data = jit_data; } + priv_stack_ptr = prog->aux->priv_stack_ptr; + if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) { + /* Allocate actual private stack size with verifier-calculated + * stack size plus two memory guards to protect overflow and + * underflow. + */ + priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) + + 2 * PRIV_STACK_GUARD_SZ; + priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 8, GFP_KERNEL); + if (!priv_stack_ptr) { + prog = orig_prog; + goto out_priv_stack; + } + + priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz); + prog->aux->priv_stack_ptr = priv_stack_ptr; + } addrs = jit_data->addrs; if (addrs) { ctx = jit_data->ctx; @@ -3491,6 +3611,11 @@ out_image: bpf_prog_fill_jited_linfo(prog, addrs + 1); out_addrs: kvfree(addrs); + if (!image && priv_stack_ptr) { + free_percpu(priv_stack_ptr); + prog->aux->priv_stack_ptr = NULL; + } +out_priv_stack: kfree(jit_data); prog->aux->jit_data = NULL; } @@ -3529,6 +3654,8 @@ void bpf_jit_free(struct bpf_prog *prog) if (prog->jited) { struct x64_jit_data *jit_data = prog->aux->jit_data; struct bpf_binary_header *hdr; + void __percpu *priv_stack_ptr; + int priv_stack_alloc_sz; /* * If we fail the final pass of JIT (from jit_subprogs), @@ -3544,6 +3671,13 @@ void bpf_jit_free(struct bpf_prog *prog) prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset(); hdr = bpf_jit_binary_pack_hdr(prog); bpf_jit_binary_pack_free(hdr, NULL); + priv_stack_ptr = prog->aux->priv_stack_ptr; + if (priv_stack_ptr) { + priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) + + 2 * PRIV_STACK_GUARD_SZ; + priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog); + free_percpu(prog->aux->priv_stack_ptr); + } WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); } @@ -3559,6 +3693,11 @@ bool bpf_jit_supports_exceptions(void) return IS_ENABLED(CONFIG_UNWINDER_ORC); } +bool bpf_jit_supports_private_stack(void) +{ + return true; +} + void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { #if defined(CONFIG_UNWINDER_ORC) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7da41ae2eac8..10945c8858ce 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1507,6 +1507,7 @@ struct bpf_prog_aux { u32 max_rdwr_access; struct btf *attach_btf; const struct bpf_ctx_arg_aux *ctx_arg_info; + void __percpu *priv_stack_ptr; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; struct bpf_trampoline *dst_trampoline; @@ -1523,9 +1524,12 @@ struct bpf_prog_aux { bool exception_cb; bool exception_boundary; bool is_extended; /* true if extended by freplace program */ + bool jits_use_priv_stack; + bool priv_stack_requested; u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; + void (*recursion_detected)(struct bpf_prog *prog); /* callback if recursion is detected */ /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3a74033d49c4..6b7c91629176 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -633,6 +633,12 @@ struct bpf_subprog_arg_info { }; }; +enum priv_stack_mode { + PRIV_STACK_UNKNOWN, + NO_PRIV_STACK, + PRIV_STACK_ADAPTIVE, +}; + struct bpf_subprog_info { /* 'start' has to be the first field otherwise find_subprog() won't work */ u32 start; /* insn idx of function entry point */ @@ -653,6 +659,7 @@ struct bpf_subprog_info { /* true if bpf_fastcall stack region is used by functions that can't be inlined */ bool keep_fastcall_stack: 1; + enum priv_stack_mode priv_stack_mode; u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; }; @@ -872,6 +879,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) case BPF_PROG_TYPE_TRACING: return prog->expected_attach_type != BPF_TRACE_ITER; case BPF_PROG_TYPE_STRUCT_OPS: + return prog->aux->jits_use_priv_stack; case BPF_PROG_TYPE_LSM: return false; default: diff --git a/include/linux/filter.h b/include/linux/filter.h index 7d7578a8eac1..3a21947f2fd4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1119,6 +1119,7 @@ bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); +bool bpf_jit_supports_private_stack(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 233ea78f8f1b..14d9288441f2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -3045,6 +3045,11 @@ bool __weak bpf_jit_supports_exceptions(void) return false; } +bool __weak bpf_jit_supports_private_stack(void) +{ + return false; +} + void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 9f36c049f4c2..a8d188b31da5 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -899,6 +899,8 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); + if (prog->aux->recursion_detected) + prog->aux->recursion_detected(prog); return 0; } return bpf_prog_start_time(); @@ -975,6 +977,8 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); + if (prog->aux->recursion_detected) + prog->aux->recursion_detected(prog); return 0; } return bpf_prog_start_time(); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9f5de8d4fbd0..f4c39bb50511 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -194,6 +194,8 @@ struct bpf_verifier_stack_elem { #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512 +#define BPF_PRIV_STACK_MIN_SIZE 64 + static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); static void invalidate_non_owning_refs(struct bpf_verifier_env *env); @@ -6090,6 +6092,34 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, strict); } +static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog) +{ + if (!bpf_jit_supports_private_stack()) + return NO_PRIV_STACK; + + /* bpf_prog_check_recur() checks all prog types that use bpf trampoline + * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked + * explicitly. + */ + switch (prog->type) { + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + return PRIV_STACK_ADAPTIVE; + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_STRUCT_OPS: + if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog)) + return PRIV_STACK_ADAPTIVE; + fallthrough; + default: + break; + } + + return NO_PRIV_STACK; +} + static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) { if (env->prog->jit_requested) @@ -6107,17 +6137,20 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) * Since recursion is prevented by check_cfg() this algorithm * only needs a local stack of MAX_CALL_FRAMES to remember callsites */ -static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) +static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, + bool priv_stack_supported) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; - int depth = 0, frame = 0, i, subprog_end; + int depth = 0, frame = 0, i, subprog_end, subprog_depth; bool tail_call_reachable = false; int ret_insn[MAX_CALL_FRAMES]; int ret_prog[MAX_CALL_FRAMES]; int j; i = subprog[idx].start; + if (!priv_stack_supported) + subprog[idx].priv_stack_mode = NO_PRIV_STACK; process_func: /* protect against potential stack overflow that might happen when * bpf2bpf calls get combined with tailcalls. Limit the caller's stack @@ -6144,11 +6177,31 @@ process_func: depth); return -EACCES; } - depth += round_up_stack_depth(env, subprog[idx].stack_depth); - if (depth > MAX_BPF_STACK) { - verbose(env, "combined stack size of %d calls is %d. Too large\n", - frame + 1, depth); - return -EACCES; + + subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth); + if (priv_stack_supported) { + /* Request private stack support only if the subprog stack + * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to + * avoid jit penalty if the stack usage is small. + */ + if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN && + subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) + subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE; + } + + if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) { + if (subprog_depth > MAX_BPF_STACK) { + verbose(env, "stack size of subprog %d is %d. Too large\n", + idx, subprog_depth); + return -EACCES; + } + } else { + depth += subprog_depth; + if (depth > MAX_BPF_STACK) { + verbose(env, "combined stack size of %d calls is %d. Too large\n", + frame + 1, depth); + return -EACCES; + } } continue_func: subprog_end = subprog[idx + 1].start; @@ -6205,6 +6258,8 @@ continue_func: } i = next_insn; idx = sidx; + if (!priv_stack_supported) + subprog[idx].priv_stack_mode = NO_PRIV_STACK; if (subprog[idx].has_tail_call) tail_call_reachable = true; @@ -6238,7 +6293,8 @@ continue_func: */ if (frame == 0) return 0; - depth -= round_up_stack_depth(env, subprog[idx].stack_depth); + if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE) + depth -= round_up_stack_depth(env, subprog[idx].stack_depth); frame--; i = ret_insn[frame]; idx = ret_prog[frame]; @@ -6247,17 +6303,45 @@ continue_func: static int check_max_stack_depth(struct bpf_verifier_env *env) { + enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN; struct bpf_subprog_info *si = env->subprog_info; + bool priv_stack_supported; int ret; for (int i = 0; i < env->subprog_cnt; i++) { + if (si[i].has_tail_call) { + priv_stack_mode = NO_PRIV_STACK; + break; + } + } + + if (priv_stack_mode == PRIV_STACK_UNKNOWN) + priv_stack_mode = bpf_enable_priv_stack(env->prog); + + /* All async_cb subprogs use normal kernel stack. If a particular + * subprog appears in both main prog and async_cb subtree, that + * subprog will use normal kernel stack to avoid potential nesting. + * The reverse subprog traversal ensures when main prog subtree is + * checked, the subprogs appearing in async_cb subtrees are already + * marked as using normal kernel stack, so stack size checking can + * be done properly. + */ + for (int i = env->subprog_cnt - 1; i >= 0; i--) { if (!i || si[i].is_async_cb) { - ret = check_max_stack_depth_subprog(env, i); + priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE; + ret = check_max_stack_depth_subprog(env, i, priv_stack_supported); if (ret < 0) return ret; } - continue; } + + for (int i = 0; i < env->subprog_cnt; i++) { + if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) { + env->prog->aux->jits_use_priv_stack = true; + break; + } + } + return 0; } @@ -20198,6 +20282,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; + if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) + func[i]->aux->jits_use_priv_stack = true; + func[i]->jit_requested = 1; func[i]->blinding_requested = prog->blinding_requested; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; @@ -21966,6 +22053,11 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } } + if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) { + verbose(env, "Private stack not supported by jit\n"); + return -EACCES; + } + /* btf_ctx_access() used this to provide argument type info */ prog->aux->ctx_arg_info = st_ops_desc->arg_info[member_idx].info; diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 987d41af71d2..cc9dde507aba 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -245,6 +245,39 @@ __bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) call_rcu(&ctx->rcu, testmod_free_cb); } +static struct bpf_testmod_ops3 *st_ops3; + +static int bpf_testmod_test_3(void) +{ + return 0; +} + +static int bpf_testmod_test_4(void) +{ + return 0; +} + +static struct bpf_testmod_ops3 __bpf_testmod_ops3 = { + .test_1 = bpf_testmod_test_3, + .test_2 = bpf_testmod_test_4, +}; + +static void bpf_testmod_test_struct_ops3(void) +{ + if (st_ops3) + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_1(void) +{ + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_2(void) +{ + st_ops3->test_2(); +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -382,6 +415,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)trace_bpf_testmod_test_raw_tp_null(NULL); + bpf_testmod_test_struct_ops3(); + struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) + sizeof(int)), GFP_KERNEL); if (struct_arg3 != NULL) { @@ -586,6 +621,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU) BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_2) BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) BTF_ID_LIST(bpf_testmod_dtor_ids) @@ -1096,6 +1133,10 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; +static const struct bpf_verifier_ops bpf_testmod_verifier_ops3 = { + .is_valid_access = bpf_testmod_ops_is_valid_access, +}; + static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -1175,6 +1216,68 @@ struct bpf_struct_ops bpf_testmod_ops2 = { .owner = THIS_MODULE, }; +static int st_ops3_reg(void *kdata, struct bpf_link *link) +{ + int err = 0; + + mutex_lock(&st_ops_mutex); + if (st_ops3) { + pr_err("st_ops has already been registered\n"); + err = -EEXIST; + goto unlock; + } + st_ops3 = kdata; + +unlock: + mutex_unlock(&st_ops_mutex); + return err; +} + +static void st_ops3_unreg(void *kdata, struct bpf_link *link) +{ + mutex_lock(&st_ops_mutex); + st_ops3 = NULL; + mutex_unlock(&st_ops_mutex); +} + +static void test_1_recursion_detected(struct bpf_prog *prog) +{ + struct bpf_prog_stats *stats; + + stats = this_cpu_ptr(prog->stats); + printk("bpf_testmod: oh no, recursing into test_1, recursion_misses %llu", + u64_stats_read(&stats->misses)); +} + +static int st_ops3_check_member(const struct btf_type *t, + const struct btf_member *member, + const struct bpf_prog *prog) +{ + u32 moff = __btf_member_bit_offset(t, member) / 8; + + switch (moff) { + case offsetof(struct bpf_testmod_ops3, test_1): + prog->aux->priv_stack_requested = true; + prog->aux->recursion_detected = test_1_recursion_detected; + fallthrough; + default: + break; + } + return 0; +} + +struct bpf_struct_ops bpf_testmod_ops3 = { + .verifier_ops = &bpf_testmod_verifier_ops3, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = st_ops3_reg, + .unreg = st_ops3_unreg, + .check_member = st_ops3_check_member, + .cfi_stubs = &__bpf_testmod_ops3, + .name = "bpf_testmod_ops3", + .owner = THIS_MODULE, +}; + static int bpf_test_mod_st_ops__test_prologue(struct st_ops_args *args) { return 0; @@ -1333,6 +1436,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_testmod_kfunc_set); ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); + ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3); ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops); ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, ARRAY_SIZE(bpf_testmod_dtors), diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index fb7dff47597a..356803d1c10e 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -94,6 +94,11 @@ struct bpf_testmod_ops2 { int (*test_1)(void); }; +struct bpf_testmod_ops3 { + int (*test_1)(void); + int (*test_2)(void); +}; + struct st_ops_args { u64 a; }; diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c new file mode 100644 index 000000000000..4006879ca3fe --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "struct_ops_private_stack.skel.h" +#include "struct_ops_private_stack_fail.skel.h" +#include "struct_ops_private_stack_recur.skel.h" + +static void test_private_stack(void) +{ + struct struct_ops_private_stack *skel; + struct bpf_link *link; + int err; + + skel = struct_ops_private_stack__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack__load(skel); + if (!ASSERT_OK(err, "struct_ops_private_stack__load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->val_i, 3, "val_i"); + ASSERT_EQ(skel->bss->val_j, 8, "val_j"); + + bpf_link__destroy(link); + +cleanup: + struct_ops_private_stack__destroy(skel); +} + +static void test_private_stack_fail(void) +{ + struct struct_ops_private_stack_fail *skel; + int err; + + skel = struct_ops_private_stack_fail__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_fail__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack_fail__load(skel); + if (!ASSERT_ERR(err, "struct_ops_private_stack_fail__load")) + goto cleanup; + return; + +cleanup: + struct_ops_private_stack_fail__destroy(skel); +} + +static void test_private_stack_recur(void) +{ + struct struct_ops_private_stack_recur *skel; + struct bpf_link *link; + int err; + + skel = struct_ops_private_stack_recur__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_recur__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack_recur__load(skel); + if (!ASSERT_OK(err, "struct_ops_private_stack_recur__load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->val_j, 3, "val_j"); + + bpf_link__destroy(link); + +cleanup: + struct_ops_private_stack_recur__destroy(skel); +} + +void test_struct_ops_private_stack(void) +{ + if (test__start_subtest("private_stack")) + test_private_stack(); + if (test__start_subtest("private_stack_fail")) + test_private_stack_fail(); + if (test__start_subtest("private_stack_recur")) + test_private_stack_recur(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 75f7a2ce334b..d9f65adb456b 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -61,6 +61,7 @@ #include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" +#include "verifier_private_stack.skel.h" #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" #include "verifier_reg_equal.skel.h" @@ -188,6 +189,7 @@ void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); } void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } +void test_verifier_private_stack(void) { RUN(verifier_private_stack); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } void test_verifier_reg_equal(void) { RUN(verifier_reg_equal); } diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c new file mode 100644 index 000000000000..8ea57e5348ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_2(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[10] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 200 bytes */ + int b[50] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 400 bytes */ + int a[100] = {}; + + a[10] = 1; + val_i = subprog1(a); + bpf_testmod_ops3_call_test_2(); + return 0; +} + +SEC("struct_ops") +int BPF_PROG(test_2) +{ + /* stack size 200 bytes */ + int a[50] = {}; + + a[10] = 3; + val_j = subprog1(a); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c new file mode 100644 index 000000000000..1f55ec4cee37 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_2(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[10] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 200 bytes */ + int b[50] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 100 bytes */ + int a[25] = {}; + + a[10] = 1; + val_i = subprog1(a); + bpf_testmod_ops3_call_test_2(); + return 0; +} + +SEC("struct_ops") +int BPF_PROG(test_2) +{ + /* stack size 400 bytes */ + int a[100] = {}; + + a[10] = 3; + val_j = subprog1(a); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c new file mode 100644 index 000000000000..f2f300d50988 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_1(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[1] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 400 bytes */ + int b[100] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 20 bytes */ + int a[5] = {}; + + a[1] = 1; + val_j += subprog1(a); + bpf_testmod_ops3_call_test_1(); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, +}; diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c new file mode 100644 index 000000000000..b1fbdf119553 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +/* From include/linux/filter.h */ +#define MAX_BPF_STACK 512 + +#if defined(__TARGET_ARCH_x86) + +struct elem { + struct bpf_timer t; + char pad[256]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +SEC("kprobe") +__description("Private stack, single prog") +__success +__arch_x86_64 +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x100(%r9)") +__naked void private_stack_single_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 256) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("raw_tp") +__description("No private stack") +__success +__arch_x86_64 +__jited(" subq $0x8, %rsp") +__naked void no_private_stack_nested(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 8) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +__used +__naked static void cumulative_stack_depth_subprog(void) +{ + asm volatile (" \ + r1 = 41; \ + *(u64 *)(r10 - 32) = r1; \ + call %[bpf_get_smp_processor_id]; \ + exit; \ +" : + : __imm(bpf_get_smp_processor_id) + : __clobber_all); +} + +SEC("kprobe") +__description("Private stack, subtree > MAX_BPF_STACK") +__success +__arch_x86_64 +/* private stack fp for the main prog */ +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq 0x{{.*}}") +__jited(" popq %r9") +__jited(" xorl %eax, %eax") +__naked void private_stack_nested_1(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - %[max_bpf_stack]) = r1; \ + call cumulative_stack_depth_subprog; \ + r0 = 0; \ + exit; \ +" : + : __imm_const(max_bpf_stack, MAX_BPF_STACK) + : __clobber_all); +} + +__naked __noinline __used +static unsigned long loop_callback(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ + call cumulative_stack_depth_subprog; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_common); +} + +SEC("raw_tp") +__description("Private stack, callback") +__success +__arch_x86_64 +/* for func loop_callback */ +__jited("func #1") +__jited(" endbr64") +__jited(" nopl (%rax,%rax)") +__jited(" nopl (%rax)") +__jited(" pushq %rbp") +__jited(" movq %rsp, %rbp") +__jited(" endbr64") +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__naked void private_stack_callback(void) +{ + asm volatile (" \ + r1 = 1; \ + r2 = %[loop_callback]; \ + r3 = 0; \ + r4 = 0; \ + call %[bpf_loop]; \ + r0 = 0; \ + exit; \ +" : + : __imm_ptr(loop_callback), + __imm(bpf_loop) + : __clobber_common); +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in main prog") +__success __retval(0) +__arch_x86_64 +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_main_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ +" ::: __clobber_common); + + bpf_throw(0); + return 0; +} + +__used static int subprog_exception(void) +{ + bpf_throw(0); + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in subprog") +__success __retval(0) +__arch_x86_64 +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_sub_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ + call subprog_exception; \ +" ::: __clobber_common); + + return 0; +} + +int glob; +__noinline static void subprog2(int *val) +{ + glob += val[0] * 2; +} + +__noinline static void subprog1(int *val) +{ + int tmp[64] = {}; + + tmp[0] = *val; + subprog2(tmp); +} + +__noinline static int timer_cb1(void *map, int *key, struct bpf_timer *timer) +{ + subprog1(key); + return 0; +} + +__noinline static int timer_cb2(void *map, int *key, struct bpf_timer *timer) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, async callback, not nested") +__success __retval(0) +__arch_x86_64 +__jited(" movabsq $0x{{.*}}, %r9") +int private_stack_async_callback_1(void) +{ + struct bpf_timer *arr_timer; + int array_key = 0; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb2); + bpf_timer_start(arr_timer, 0, 0); + subprog1(&array_key); + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, async callback, potential nesting") +__success __retval(0) +__arch_x86_64 +__jited(" subq $0x100, %rsp") +int private_stack_async_callback_2(void) +{ + struct bpf_timer *arr_timer; + int array_key = 0; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb1); + bpf_timer_start(arr_timer, 0, 0); + subprog1(&array_key); + return 0; +} + +#else + +SEC("kprobe") +__description("private stack is not supported, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; |