diff options
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/jit.c | 381 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/main.h | 52 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/offload.c | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 141 | ||||
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/nfp_asm.h | 9 | ||||
-rw-r--r-- | drivers/net/netdevsim/bpf.c | 8 | ||||
-rw-r--r-- | include/linux/bpf.h | 1 | ||||
-rw-r--r-- | include/linux/bpf_verifier.h | 1 | ||||
-rw-r--r-- | kernel/bpf/offload.c | 18 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 13 |
10 files changed, 589 insertions, 46 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index eff57f7d056a..6ed1b5207ecd 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -267,6 +267,38 @@ emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer) } static void +__emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + u8 defer, bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_BR_ALU_BASE | + FIELD_PREP(OP_BR_ALU_A_SRC, areg) | + FIELD_PREP(OP_BR_ALU_B_SRC, breg) | + FIELD_PREP(OP_BR_ALU_DEFBR, defer) | + FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) | + FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn, + reg.src_lmextn); +} + +static void __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, enum immed_width width, bool invert, enum immed_shift shift, bool wr_both, @@ -1137,7 +1169,7 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, bool clr_gpr, lmem_step step) { - s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; + s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off; bool first = true, last; bool needs_inc = false; swreg stack_off_reg; @@ -1146,7 +1178,8 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool lm3 = true; int ret; - if (meta->ptr_not_const) { + if (meta->ptr_not_const || + meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) { /* Use of the last encountered ptr_off is OK, they all have * the same alignment. Depend on low bits of value being * discarded when written to LMaddr register. @@ -1695,7 +1728,7 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) s64 lm_off; /* We only have to reload LM0 if the key is not at start of stack */ - lm_off = nfp_prog->stack_depth; + lm_off = nfp_prog->stack_frame_depth; lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off; load_lm_ptr = meta->arg2.var_off || lm_off; @@ -1808,10 +1841,10 @@ static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) swreg stack_depth_reg; stack_depth_reg = ur_load_imm_any(nfp_prog, - nfp_prog->stack_depth, + nfp_prog->stack_frame_depth, stack_imm(nfp_prog)); - emit_alu(nfp_prog, reg_both(dst), - stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg); + emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog), + ALU_OP_ADD, stack_depth_reg); wrp_immed(nfp_prog, reg_both(dst + 1), 0); } else { wrp_reg_mov(nfp_prog, dst, src); @@ -3081,7 +3114,93 @@ static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); } -static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int +bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u32 ret_tgt, stack_depth, offset_br; + swreg tmp_reg; + + stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN); + /* Space for saving the return address is accounted for by the callee, + * so stack_depth can be zero for the main function. + */ + if (stack_depth) { + tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, stack_reg(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg); + emit_csr_wr(nfp_prog, stack_reg(nfp_prog), + NFP_CSR_ACT_LM_ADDR0); + } + + /* Two cases for jumping to the callee: + * + * - If callee uses and needs to save R6~R9 then: + * 1. Put the start offset of the callee into imm_b(). This will + * require a fixup step, as we do not necessarily know this + * address yet. + * 2. Put the return address from the callee to the caller into + * register ret_reg(). + * 3. (After defer slots are consumed) Jump to the subroutine that + * pushes the registers to the stack. + * The subroutine acts as a trampoline, and returns to the address in + * imm_b(), i.e. jumps to the callee. + * + * - If callee does not need to save R6~R9 then just load return + * address to the caller in ret_reg(), and jump to the callee + * directly. + * + * Using ret_reg() to pass the return address to the callee is set here + * as a convention. The callee can then push this address onto its + * stack frame in its prologue. The advantages of passing the return + * address through ret_reg(), instead of pushing it to the stack right + * here, are the following: + * - It looks cleaner. + * - If the called function is called multiple time, we get a lower + * program size. + * - We save two no-op instructions that should be added just before + * the emit_br() when stack depth is not null otherwise. + * - If we ever find a register to hold the return address during whole + * execution of the callee, we will not have to push the return + * address to the stack for leaf functions. + */ + if (!meta->jmp_dst) { + pr_err("BUG: BPF-to-BPF call has no destination recorded\n"); + return -ELOOP; + } + if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) { + ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, + RELO_BR_GO_CALL_PUSH_REGS); + offset_br = nfp_prog_current_offset(nfp_prog); + wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); + } else { + ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; + emit_br(nfp_prog, BR_UNC, meta->n + 1 + meta->insn.imm, 1); + offset_br = nfp_prog_current_offset(nfp_prog); + } + wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); + + if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) + return -EINVAL; + + if (stack_depth) { + tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, stack_reg(nfp_prog), + stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg); + emit_csr_wr(nfp_prog, stack_reg(nfp_prog), + NFP_CSR_ACT_LM_ADDR0); + wrp_nops(nfp_prog, 3); + } + + meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog); + meta->num_insns_after_br -= offset_br; + + return 0; +} + +static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { switch (meta->insn.imm) { case BPF_FUNC_xdp_adjust_head: @@ -3102,6 +3221,19 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) } } +static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (is_mbpf_pseudo_call(meta)) + return bpf_to_bpf_call(nfp_prog, meta); + else + return helper_call(nfp_prog, meta); +} + +static bool nfp_is_main_function(struct nfp_insn_meta *meta) +{ + return meta->subprog_idx == 0; +} + static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); @@ -3109,6 +3241,39 @@ static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int +nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) { + /* Pop R6~R9 to the stack via related subroutine. + * We loaded the return address to the caller into ret_reg(). + * This means that the subroutine does not come back here, we + * make it jump back to the subprogram caller directly! + */ + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1, + RELO_BR_GO_CALL_POP_REGS); + /* Pop return address from the stack. */ + wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); + } else { + /* Pop return address from the stack. */ + wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); + /* Jump back to caller if no callee-saved registers were used + * by the subprogram. + */ + emit_rtn(nfp_prog, ret_reg(nfp_prog), 0); + } + + return 0; +} + +static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (nfp_is_main_function(meta)) + return goto_out(nfp_prog, meta); + else + return nfp_subprog_epilogue(nfp_prog, meta); +} + static const instr_cb_t instr_cb[256] = { [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, @@ -3197,36 +3362,66 @@ static const instr_cb_t instr_cb[256] = { [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, [BPF_JMP | BPF_CALL] = call, - [BPF_JMP | BPF_EXIT] = goto_out, + [BPF_JMP | BPF_EXIT] = jmp_exit, }; /* --- Assembler logic --- */ +static int +nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct nfp_insn_meta *jmp_dst, u32 br_idx) +{ + if (immed_get_value(nfp_prog->prog[br_idx + 1])) { + pr_err("BUG: failed to fix up callee register saving\n"); + return -EINVAL; + } + + immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off); + + return 0; +} + static int nfp_fixup_branches(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta, *jmp_dst; u32 idx, br_idx; + int err; list_for_each_entry(meta, &nfp_prog->insns, l) { if (meta->skip) continue; - if (meta->insn.code == (BPF_JMP | BPF_CALL)) - continue; if (BPF_CLASS(meta->insn.code) != BPF_JMP) continue; + if (meta->insn.code == (BPF_JMP | BPF_EXIT) && + !nfp_is_main_function(meta)) + continue; + if (is_mbpf_helper_call(meta)) + continue; if (list_is_last(&meta->l, &nfp_prog->insns)) br_idx = nfp_prog->last_bpf_off; else br_idx = list_next_entry(meta, l)->off - 1; + /* For BPF-to-BPF function call, a stack adjustment sequence is + * generated after the return instruction. Therefore, we must + * withdraw the length of this sequence to have br_idx pointing + * to where the "branch" NFP instruction is expected to be. + */ + if (is_mbpf_pseudo_call(meta)) + br_idx -= meta->num_insns_after_br; + if (!nfp_is_br(nfp_prog->prog[br_idx])) { pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", br_idx, meta->insn.code, nfp_prog->prog[br_idx]); return -ELOOP; } + + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + continue; + /* Leave special branches for later */ if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != - RELO_BR_REL) + RELO_BR_REL && !is_mbpf_pseudo_call(meta)) continue; if (!meta->jmp_dst) { @@ -3241,6 +3436,18 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) return -ELOOP; } + if (is_mbpf_pseudo_call(meta) && + nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) { + err = nfp_fixup_immed_relo(nfp_prog, meta, + jmp_dst, br_idx); + if (err) + return err; + } + + if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != + RELO_BR_REL) + continue; + for (idx = meta->off; idx <= br_idx; idx++) { if (!nfp_is_br(nfp_prog->prog[idx])) continue; @@ -3258,6 +3465,27 @@ static void nfp_intro(struct nfp_prog *nfp_prog) plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); } +static void +nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + /* Save return address into the stack. */ + wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog)); +} + +static void +nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth; + + nfp_prog->stack_frame_depth = round_up(depth, 4); + nfp_subprog_prologue(nfp_prog, meta); +} + +bool nfp_is_subprog_start(struct nfp_insn_meta *meta) +{ + return meta->flags & FLAG_INSN_IS_SUBPROG_START; +} + static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) { /* TC direct-action mode: @@ -3348,6 +3576,67 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); } +static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog) +{ + unsigned int idx; + + for (idx = 1; idx < nfp_prog->subprog_cnt; idx++) + if (nfp_prog->subprog[idx].needs_reg_push) + return true; + + return false; +} + +static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) +{ + u8 reg; + + /* Subroutine: Save all callee saved registers (R6 ~ R9). + * imm_b() holds the return address. + */ + nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog); + for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { + u8 adj = (reg - BPF_REG_0) * 2; + u8 idx = (reg - BPF_REG_6) * 2; + + /* The first slot in the stack frame is used to push the return + * address in bpf_to_bpf_call(), start just after. + */ + wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj)); + + if (reg == BPF_REG_8) + /* Prepare to jump back, last 3 insns use defer slots */ + emit_rtn(nfp_prog, imm_b(nfp_prog), 3); + + wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1)); + } +} + +static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog) +{ + u8 reg; + + /* Subroutine: Restore all callee saved registers (R6 ~ R9). + * ret_reg() holds the return address. + */ + nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog); + for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { + u8 adj = (reg - BPF_REG_0) * 2; + u8 idx = (reg - BPF_REG_6) * 2; + + /* The first slot in the stack frame holds the return address, + * start popping just after that. + */ + wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx)); + + if (reg == BPF_REG_8) + /* Prepare to jump back, last 3 insns use defer slots */ + emit_rtn(nfp_prog, ret_reg(nfp_prog), 3); + + wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1)); + } +} + static void nfp_outro(struct nfp_prog *nfp_prog) { switch (nfp_prog->type) { @@ -3360,13 +3649,23 @@ static void nfp_outro(struct nfp_prog *nfp_prog) default: WARN_ON(1); } + + if (!nfp_prog_needs_callee_reg_save(nfp_prog)) + return; + + nfp_push_callee_registers(nfp_prog); + nfp_pop_callee_registers(nfp_prog); } static int nfp_translate(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta; + unsigned int depth; int err; + depth = nfp_prog->subprog[0].stack_depth; + nfp_prog->stack_frame_depth = round_up(depth, 4); + nfp_intro(nfp_prog); if (nfp_prog->error) return nfp_prog->error; @@ -3376,6 +3675,12 @@ static int nfp_translate(struct nfp_prog *nfp_prog) meta->off = nfp_prog_current_offset(nfp_prog); + if (nfp_is_subprog_start(meta)) { + nfp_start_subprog(nfp_prog, meta); + if (nfp_prog->error) + return nfp_prog->error; + } + if (meta->skip) { nfp_prog->n_translated++; continue; @@ -4018,20 +4323,35 @@ void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) /* Another pass to record jump information. */ list_for_each_entry(meta, &nfp_prog->insns, l) { + struct nfp_insn_meta *dst_meta; u64 code = meta->insn.code; + unsigned int dst_idx; + bool pseudo_call; + + if (BPF_CLASS(code) != BPF_JMP) + continue; + if (BPF_OP(code) == BPF_EXIT) + continue; + if (is_mbpf_helper_call(meta)) + continue; - if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && - BPF_OP(code) != BPF_CALL) { - struct nfp_insn_meta *dst_meta; - unsigned short dst_indx; + /* If opcode is BPF_CALL at this point, this can only be a + * BPF-to-BPF call (a.k.a pseudo call). + */ + pseudo_call = BPF_OP(code) == BPF_CALL; - dst_indx = meta->n + 1 + meta->insn.off; - dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, - cnt); + if (pseudo_call) + dst_idx = meta->n + 1 + meta->insn.imm; + else + dst_idx = meta->n + 1 + meta->insn.off; - meta->jmp_dst = dst_meta; - dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; - } + dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx, cnt); + + if (pseudo_call) + dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START; + + dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; + meta->jmp_dst = dst_meta; } } @@ -4054,6 +4374,7 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) for (i = 0; i < nfp_prog->prog_len; i++) { enum nfp_relo_type special; u32 val; + u16 off; special = FIELD_GET(OP_RELO_TYPE, prog[i]); switch (special) { @@ -4070,6 +4391,24 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) br_set_offset(&prog[i], nfp_prog->tgt_abort + bv->start_off); break; + case RELO_BR_GO_CALL_PUSH_REGS: + if (!nfp_prog->tgt_call_push_regs) { + pr_err("BUG: failed to detect subprogram registers needs\n"); + err = -EINVAL; + goto err_free_prog; + } + off = nfp_prog->tgt_call_push_regs + bv->start_off; + br_set_offset(&prog[i], off); + break; + case RELO_BR_GO_CALL_POP_REGS: + if (!nfp_prog->tgt_call_pop_regs) { + pr_err("BUG: failed to detect subprogram registers needs\n"); + err = -EINVAL; + goto err_free_prog; + } + off = nfp_prog->tgt_call_pop_regs + bv->start_off; + br_set_offset(&prog[i], off); + break; case RELO_BR_NEXT_PKT: br_set_offset(&prog[i], bv->tgt_done); break; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 792ebc4081a3..25e10cfa2678 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -61,6 +61,8 @@ enum nfp_relo_type { /* internal jumps to parts of the outro */ RELO_BR_GO_OUT, RELO_BR_GO_ABORT, + RELO_BR_GO_CALL_PUSH_REGS, + RELO_BR_GO_CALL_POP_REGS, /* external jumps to fixed addresses */ RELO_BR_NEXT_PKT, RELO_BR_HELPER, @@ -104,6 +106,7 @@ enum pkt_vec { #define imma_a(np) reg_a(STATIC_REG_IMMA) #define imma_b(np) reg_b(STATIC_REG_IMMA) #define imm_both(np) reg_both(STATIC_REG_IMM) +#define ret_reg(np) imm_a(np) #define NFP_BPF_ABI_FLAGS reg_imm(0) #define NFP_BPF_ABI_FLAG_MARK 1 @@ -262,7 +265,9 @@ struct nfp_bpf_reg_state { bool var_off; }; -#define FLAG_INSN_IS_JUMP_DST BIT(0) +#define FLAG_INSN_IS_JUMP_DST BIT(0) +#define FLAG_INSN_IS_SUBPROG_START BIT(1) +#define FLAG_INSN_PTR_CALLER_STACK_FRAME BIT(2) /** * struct nfp_insn_meta - BPF instruction wrapper @@ -279,6 +284,7 @@ struct nfp_bpf_reg_state { * @xadd_maybe_16bit: 16bit immediate is possible * @jmp_dst: destination info for jump instructions * @jump_neg_op: jump instruction has inverted immediate, use ADD instead of SUB + * @num_insns_after_br: number of insns following a branch jump, used for fixup * @func_id: function id for call instructions * @arg1: arg1 for call instructions * @arg2: arg2 for call instructions @@ -289,6 +295,7 @@ struct nfp_bpf_reg_state { * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @flags: eBPF instruction extra optimization flags + * @subprog_idx: index of subprogram to which the instruction belongs * @skip: skip this instruction (optimized out) * @double_cb: callback for second part of the instruction * @l: link on nfp_prog->insns list @@ -314,6 +321,7 @@ struct nfp_insn_meta { struct { struct nfp_insn_meta *jmp_dst; bool jump_neg_op; + u32 num_insns_after_br; /* only for BPF-to-BPF calls */ }; /* function calls */ struct { @@ -335,6 +343,7 @@ struct nfp_insn_meta { unsigned int off; unsigned short n; unsigned short flags; + unsigned short subprog_idx; bool skip; instr_cb_t double_cb; @@ -423,6 +432,34 @@ static inline bool is_mbpf_div(const struct nfp_insn_meta *meta) return is_mbpf_alu(meta) && mbpf_op(meta) == BPF_DIV; } +static inline bool is_mbpf_helper_call(const struct nfp_insn_meta *meta) +{ + struct bpf_insn insn = meta->insn; + + return insn.code == (BPF_JMP | BPF_CALL) && + insn.src_reg != BPF_PSEUDO_CALL; +} + +static inline bool is_mbpf_pseudo_call(const struct nfp_insn_meta *meta) +{ + struct bpf_insn insn = meta->insn; + + return insn.code == (BPF_JMP | BPF_CALL) && + insn.src_reg == BPF_PSEUDO_CALL; +} + +#define STACK_FRAME_ALIGN 64 + +/** + * struct nfp_bpf_subprog_info - nfp BPF sub-program (a.k.a. function) info + * @stack_depth: maximum stack depth used by this sub-program + * @needs_reg_push: whether sub-program uses callee-saved registers + */ +struct nfp_bpf_subprog_info { + u16 stack_depth; + u8 needs_reg_push : 1; +}; + /** * struct nfp_prog - nfp BPF program * @bpf: backpointer to the bpf app priv structure @@ -434,12 +471,16 @@ static inline bool is_mbpf_div(const struct nfp_insn_meta *meta) * @last_bpf_off: address of the last instruction translated from BPF * @tgt_out: jump target for normal exit * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) + * @tgt_call_push_regs: jump target for subroutine for saving R6~R9 to stack + * @tgt_call_pop_regs: jump target for subroutine used for restoring R6~R9 * @n_translated: number of successfully translated instructions (for errors) * @error: error code if something went wrong - * @stack_depth: max stack depth from the verifier + * @stack_frame_depth: max stack depth for current frame * @adjust_head_location: if program has single adjust head call - the insn no. * @map_records_cnt: the number of map pointers recorded for this prog + * @subprog_cnt: number of sub-programs, including main function * @map_records: the map record pointers from bpf->maps_neutral + * @subprog: pointer to an array of objects holding info about sub-programs * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) */ struct nfp_prog { @@ -456,15 +497,19 @@ struct nfp_prog { unsigned int last_bpf_off; unsigned int tgt_out; unsigned int tgt_abort; + unsigned int tgt_call_push_regs; + unsigned int tgt_call_pop_regs; unsigned int n_translated; int error; - unsigned int stack_depth; + unsigned int stack_frame_depth; unsigned int adjust_head_location; unsigned int map_records_cnt; + unsigned int subprog_cnt; struct nfp_bpf_neutral_map **map_records; + struct nfp_bpf_subprog_info *subprog; struct list_head insns; }; @@ -481,6 +526,7 @@ struct nfp_bpf_vnic { unsigned int tgt_done; }; +bool nfp_is_subprog_start(struct nfp_insn_meta *meta); void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt); int nfp_bpf_jit(struct nfp_prog *prog); bool nfp_bpf_supported_opcode(u8 code); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 1ccd6371a15b..49c7bead8113 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -208,6 +208,8 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta, *tmp; + kfree(nfp_prog->subprog); + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { list_del(&meta->l); kfree(meta); @@ -250,18 +252,9 @@ err_free: static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - unsigned int stack_size; unsigned int max_instr; int err; - stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; - if (prog->aux->stack_depth > stack_size) { - nn_info(nn, "stack too large: program %dB > FW stack %dB\n", - prog->aux->stack_depth, stack_size); - return -EOPNOTSUPP; - } - nfp_prog->stack_depth = round_up(prog->aux->stack_depth, 4); - max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index a6e9248669e1..cddb70786a58 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -34,10 +34,12 @@ #include <linux/bpf.h> #include <linux/bpf_verifier.h> #include <linux/kernel.h> +#include <linux/netdevice.h> #include <linux/pkt_cls.h> #include "../nfp_app.h" #include "../nfp_main.h" +#include "../nfp_net.h" #include "fw.h" #include "main.h" @@ -155,8 +157,9 @@ nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env, } static int -nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, - struct nfp_insn_meta *meta) +nfp_bpf_check_helper_call(struct nfp_prog *nfp_prog, + struct bpf_verifier_env *env, + struct nfp_insn_meta *meta) { const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1; const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2; @@ -333,6 +336,9 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, { s32 old_off, new_off; + if (reg->frameno != env->cur_state->curframe) + meta->flags |= FLAG_INSN_PTR_CALLER_STACK_FRAME; + if (!tnum_is_const(reg->var_off)) { pr_vlog(env, "variable ptr stack access\n"); return -EINVAL; @@ -620,8 +626,8 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) return -EINVAL; } - if (meta->insn.code == (BPF_JMP | BPF_CALL)) - return nfp_bpf_check_call(nfp_prog, env, meta); + if (is_mbpf_helper_call(meta)) + return nfp_bpf_check_helper_call(nfp_prog, env, meta); if (meta->insn.code == (BPF_JMP | BPF_EXIT)) return nfp_bpf_check_exit(nfp_prog, env); @@ -640,6 +646,131 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) return 0; } +static int +nfp_assign_subprog_idx_and_regs(struct bpf_verifier_env *env, + struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + int index = 0; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (nfp_is_subprog_start(meta)) + index++; + meta->subprog_idx = index; + + if (meta->insn.dst_reg >= BPF_REG_6 && + meta->insn.dst_reg <= BPF_REG_9) + nfp_prog->subprog[index].needs_reg_push = 1; + } + + if (index + 1 != nfp_prog->subprog_cnt) { + pr_vlog(env, "BUG: number of processed BPF functions is not consistent (processed %d, expected %d)\n", + index + 1, nfp_prog->subprog_cnt); + return -EFAULT; + } + + return 0; +} + +static unsigned int +nfp_bpf_get_stack_usage(struct nfp_prog *nfp_prog, unsigned int cnt) +{ + struct nfp_insn_meta *meta = nfp_prog_first_meta(nfp_prog); + unsigned int max_depth = 0, depth = 0, frame = 0; + struct nfp_insn_meta *ret_insn[MAX_CALL_FRAMES]; + unsigned short frame_depths[MAX_CALL_FRAMES]; + unsigned short ret_prog[MAX_CALL_FRAMES]; + unsigned short idx = meta->subprog_idx; + + /* Inspired from check_max_stack_depth() from kernel verifier. + * Starting from main subprogram, walk all instructions and recursively + * walk all callees that given subprogram can call. Since recursion is + * prevented by the kernel verifier, this algorithm only needs a local + * stack of MAX_CALL_FRAMES to remember callsites. + */ +process_subprog: + frame_depths[frame] = nfp_prog->subprog[idx].stack_depth; + frame_depths[frame] = round_up(frame_depths[frame], STACK_FRAME_ALIGN); + depth += frame_depths[frame]; + max_depth = max(max_depth, depth); + +continue_subprog: + for (; meta != nfp_prog_last_meta(nfp_prog) && meta->subprog_idx == idx; + meta = nfp_meta_next(meta)) { + if (!is_mbpf_pseudo_call(meta)) + continue; + + /* We found a call to a subprogram. Remember instruction to + * return to and subprog id. + */ + ret_insn[frame] = nfp_meta_next(meta); + ret_prog[frame] = idx; + + /* Find the callee and start processing it. */ + meta = nfp_bpf_goto_meta(nfp_prog, meta, + meta->n + 1 + meta->insn.imm, cnt); + idx = meta->subprog_idx; + frame++; + goto process_subprog; + } + /* End of for() loop means the last instruction of the subprog was + * reached. If we popped all stack frames, return; otherwise, go on + * processing remaining instructions from the caller. + */ + if (frame == 0) + return max_depth; + + depth -= frame_depths[frame]; + frame--; + meta = ret_insn[frame]; + idx = ret_prog[frame]; + goto continue_subprog; +} + +static int nfp_bpf_finalize(struct bpf_verifier_env *env) +{ + unsigned int stack_size, stack_needed; + struct bpf_subprog_info *info; + struct nfp_prog *nfp_prog; + struct nfp_net *nn; + int i; + + nfp_prog = env->prog->aux->offload->dev_priv; + nfp_prog->subprog_cnt = env->subprog_cnt; + nfp_prog->subprog = kcalloc(nfp_prog->subprog_cnt, + sizeof(nfp_prog->subprog[0]), GFP_KERNEL); + if (!nfp_prog->subprog) + return -ENOMEM; + + nfp_assign_subprog_idx_and_regs(env, nfp_prog); + + info = env->subprog_info; + for (i = 0; i < nfp_prog->subprog_cnt; i++) { + nfp_prog->subprog[i].stack_depth = info[i].stack_depth; + + if (i == 0) + continue; + + /* Account for size of return address. */ + nfp_prog->subprog[i].stack_depth += REG_WIDTH; + /* Account for size of saved registers, if necessary. */ + if (nfp_prog->subprog[i].needs_reg_push) + nfp_prog->subprog[i].stack_depth += BPF_REG_SIZE * 4; + } + + nn = netdev_priv(env->prog->aux->offload->netdev); + stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + stack_needed = nfp_bpf_get_stack_usage(nfp_prog, env->prog->len); + if (stack_needed > stack_size) { + pr_vlog(env, "stack too large: program %dB > FW stack %dB\n", + stack_needed, stack_size); + return -EOPNOTSUPP; + } + + return 0; +} + const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = { - .insn_hook = nfp_verify_insn, + .insn_hook = nfp_verify_insn, + .finalize = nfp_bpf_finalize, }; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index fad0e62a910c..5b257c603e91 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -82,6 +82,15 @@ #define OP_BR_BIT_ADDR_LO OP_BR_ADDR_LO #define OP_BR_BIT_ADDR_HI OP_BR_ADDR_HI +#define OP_BR_ALU_BASE 0x0e800000000ULL +#define OP_BR_ALU_BASE_MASK 0x0ff80000000ULL +#define OP_BR_ALU_A_SRC 0x000000003ffULL +#define OP_BR_ALU_B_SRC 0x000000ffc00ULL +#define OP_BR_ALU_DEFBR 0x00000300000ULL +#define OP_BR_ALU_IMM_HI 0x0007fc00000ULL +#define OP_BR_ALU_SRC_LMEXTN 0x40000000000ULL +#define OP_BR_ALU_DST_LMEXTN 0x80000000000ULL + static inline bool nfp_is_br(u64 insn) { return (insn & OP_BR_BASE_MASK) == OP_BR_BASE || diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 81444208b216..cb3518474f0e 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -86,8 +86,14 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn) return 0; } +static int nsim_bpf_finalize(struct bpf_verifier_env *env) +{ + return 0; +} + static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = { - .insn_hook = nsim_bpf_verify_insn, + .insn_hook = nsim_bpf_verify_insn, + .finalize = nsim_bpf_finalize, }; static bool nsim_xdp_offload_active(struct netdevsim *ns) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 027697b6a22f..9b558713447f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -263,6 +263,7 @@ struct bpf_verifier_ops { struct bpf_prog_offload_ops { int (*insn_hook)(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); + int (*finalize)(struct bpf_verifier_env *env); }; struct bpf_prog_offload { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7b6fd2ab3263..9e8056ec20fa 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -245,5 +245,6 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); +int bpf_prog_offload_finalize(struct bpf_verifier_env *env); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 177a52436394..8e93c47f0779 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -172,6 +172,24 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env, return ret; } +int bpf_prog_offload_finalize(struct bpf_verifier_env *env) +{ + struct bpf_prog_offload *offload; + int ret = -ENODEV; + + down_read(&bpf_devs_lock); + offload = env->prog->aux->offload; + if (offload) { + if (offload->dev_ops->finalize) + ret = offload->dev_ops->finalize(env); + else + ret = 0; + } + up_read(&bpf_devs_lock); + + return ret; +} + static void __bpf_prog_offload_destroy(struct bpf_prog *prog) { struct bpf_prog_offload *offload = prog->aux->offload; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 73c81bef6ae8..73cc136915fe 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1009,10 +1009,6 @@ static int check_subprogs(struct bpf_verifier_env *env) verbose(env, "function calls to other bpf functions are allowed for root only\n"); return -EPERM; } - if (bpf_prog_is_dev_bound(env->prog->aux)) { - verbose(env, "function calls in offloaded programs are not supported yet\n"); - return -EINVAL; - } ret = add_subprog(env, i + insn[i].imm + 1); if (ret < 0) return ret; @@ -5968,10 +5964,10 @@ static int fixup_call_args(struct bpf_verifier_env *env) struct bpf_insn *insn = prog->insnsi; int i, depth; #endif - int err; + int err = 0; - err = 0; - if (env->prog->jit_requested) { + if (env->prog->jit_requested && + !bpf_prog_is_dev_bound(env->prog->aux)) { err = jit_subprogs(env); if (err == 0) return 0; @@ -6309,6 +6305,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) env->cur_state = NULL; } + if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) + ret = bpf_prog_offload_finalize(env); + skip_full_check: while (!pop_stack(env, NULL, NULL)); free_states(env); |