summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/fault-injection/fault-injection.txt68
-rw-r--r--arch/Kconfig2
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/error-injection.h13
-rw-r--r--arch/x86/include/asm/kprobes.h4
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c14
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/error-inject.c19
-rw-r--r--drivers/net/ethernet/netronome/nfp/Makefile1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c446
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/fw.h103
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c168
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c60
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h96
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c106
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c55
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.h9
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.c58
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h12
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c7
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/free-space-cache.c4
-rw-r--r--include/asm-generic/error-injection.h35
-rw-r--r--include/asm-generic/vmlinux.lds.h14
-rw-r--r--include/linux/bpf.h76
-rw-r--r--include/linux/error-injection.h27
-rw-r--r--include/linux/kprobes.h1
-rw-r--r--include/linux/module.h7
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/uapi/linux/bpf.h3
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bpf/cpumap.c31
-rw-r--r--kernel/bpf/devmap.c8
-rw-r--r--kernel/bpf/disasm.h4
-rw-r--r--kernel/bpf/hashtab.c103
-rw-r--r--kernel/bpf/lpm_trie.c7
-rw-r--r--kernel/bpf/offload.c222
-rw-r--r--kernel/bpf/sockmap.c8
-rw-r--r--kernel/bpf/stackmap.c6
-rw-r--r--kernel/bpf/syscall.c71
-rw-r--r--kernel/bpf/verifier.c7
-rw-r--r--kernel/fail_function.c349
-rw-r--r--kernel/kprobes.c163
-rw-r--r--kernel/module.c8
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/bpf_trace.c11
-rw-r--r--kernel/trace/trace_kprobe.c33
-rw-r--r--kernel/trace/trace_probe.h12
-rw-r--r--lib/Kconfig.debug14
-rw-r--r--lib/Makefile1
-rw-r--r--lib/error-inject.c242
-rw-r--r--net/core/filter.c7
-rw-r--r--samples/bpf/xdp_monitor_kern.c2
-rw-r--r--tools/bpf/bpftool/Makefile2
-rw-r--r--tools/bpf/bpftool/prog.c1
-rw-r--r--tools/build/feature/Makefile2
-rw-r--r--tools/include/uapi/linux/bpf.h1
-rw-r--r--tools/lib/bpf/Makefile20
-rw-r--r--tools/lib/bpf/libbpf.c2
60 files changed, 2365 insertions, 402 deletions
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 918972babcd8..f4a32463ca48 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -30,6 +30,12 @@ o fail_mmc_request
injects MMC data errors on devices permitted by setting
debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
+o fail_function
+
+ injects error return on specific functions, which are marked by
+ ALLOW_ERROR_INJECTION() macro, by setting debugfs entries
+ under /sys/kernel/debug/fail_function. No boot option supported.
+
Configure fault-injection capabilities behavior
-----------------------------------------------
@@ -123,6 +129,29 @@ configuration of fault-injection capabilities.
default is 'N', setting it to 'Y' will disable failure injections
when dealing with private (address space) futexes.
+- /sys/kernel/debug/fail_function/inject:
+
+ Format: { 'function-name' | '!function-name' | '' }
+ specifies the target function of error injection by name.
+ If the function name leads '!' prefix, given function is
+ removed from injection list. If nothing specified ('')
+ injection list is cleared.
+
+- /sys/kernel/debug/fail_function/injectable:
+
+ (read only) shows error injectable functions and what type of
+ error values can be specified. The error type will be one of
+ below;
+ - NULL: retval must be 0.
+ - ERRNO: retval must be -1 to -MAX_ERRNO (-4096).
+ - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096).
+
+- /sys/kernel/debug/fail_function/<functiuon-name>/retval:
+
+ specifies the "error" return value to inject to the given
+ function for given function. This will be created when
+ user specifies new injection entry.
+
o Boot option
In order to inject faults while debugfs is not available (early boot time),
@@ -268,6 +297,45 @@ trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
echo "Injecting errors into the module $module... (interrupt to stop)"
sleep 1000000
+------------------------------------------------------------------------------
+
+o Inject open_ctree error while btrfs mount
+
+#!/bin/bash
+
+rm -f testfile.img
+dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
+DEVICE=$(losetup --show -f testfile.img)
+mkfs.btrfs -f $DEVICE
+mkdir -p tmpmnt
+
+FAILTYPE=fail_function
+FAILFUNC=open_ctree
+echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject
+echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
+echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+echo 100 > /sys/kernel/debug/$FAILTYPE/probability
+echo 0 > /sys/kernel/debug/$FAILTYPE/interval
+echo -1 > /sys/kernel/debug/$FAILTYPE/times
+echo 0 > /sys/kernel/debug/$FAILTYPE/space
+echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
+
+mount -t btrfs $DEVICE tmpmnt
+if [ $? -ne 0 ]
+then
+ echo "SUCCESS!"
+else
+ echo "FAILED!"
+ umount tmpmnt
+fi
+
+echo > /sys/kernel/debug/$FAILTYPE/inject
+
+rmdir tmpmnt
+losetup -d $DEVICE
+rm testfile.img
+
+
Tool to run command with failslab or fail_page_alloc
----------------------------------------------------
In order to make it easier to accomplish the tasks mentioned above, we can use
diff --git a/arch/Kconfig b/arch/Kconfig
index d3f4aaf9cb7a..97376accfb14 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -196,7 +196,7 @@ config HAVE_OPTPROBES
config HAVE_KPROBES_ON_FTRACE
bool
-config HAVE_KPROBE_OVERRIDE
+config HAVE_FUNCTION_ERROR_INJECTION
bool
config HAVE_NMI
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 45dc6233f2b9..366b19cb79b7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -154,7 +154,7 @@ config X86
select HAVE_KERNEL_XZ
select HAVE_KPROBES
select HAVE_KPROBES_ON_FTRACE
- select HAVE_KPROBE_OVERRIDE
+ select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_KRETPROBES
select HAVE_KVM
select HAVE_LIVEPATCH if X86_64
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
new file mode 100644
index 000000000000..47b7a1296245
--- /dev/null
+++ b/arch/x86/include/asm/error-injection.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ERROR_INJECTION_H
+#define _ASM_ERROR_INJECTION_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <asm/ptrace.h>
+#include <asm-generic/error-injection.h>
+
+asmlinkage void just_return_func(void);
+void override_function_with_return(struct pt_regs *regs);
+
+#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 36abb23a7a35..367d99cff426 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -67,9 +67,7 @@ extern const int kretprobe_blacklist_size;
void arch_remove_kprobe(struct kprobe *p);
asmlinkage void kretprobe_trampoline(void);
-#ifdef CONFIG_KPROBES_ON_FTRACE
-extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs);
-#endif
+extern void arch_kprobe_override_function(struct pt_regs *regs);
/* Architecture specific copy of original instruction*/
struct arch_specific_insn {
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 1ea748d682fd..8dc0161cec8f 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -97,17 +97,3 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
p->ainsn.boostable = false;
return 0;
}
-
-asmlinkage void override_func(void);
-asm(
- ".type override_func, @function\n"
- "override_func:\n"
- " ret\n"
- ".size override_func, .-override_func\n"
-);
-
-void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
-{
- regs->ip = (unsigned long)&override_func;
-}
-NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 7b181b61170e..171377b83be1 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
new file mode 100644
index 000000000000..7b881d03d0dd
--- /dev/null
+++ b/arch/x86/lib/error-inject.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+asmlinkage void just_return_func(void);
+
+asm(
+ ".type just_return_func, @function\n"
+ "just_return_func:\n"
+ " ret\n"
+ ".size just_return_func, .-just_return_func\n"
+);
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ regs->ip = (unsigned long)&just_return_func;
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 6e5ef984398b..064f00e23a19 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -44,6 +44,7 @@ endif
ifeq ($(CONFIG_BPF_SYSCALL),y)
nfp-objs += \
+ bpf/cmsg.o \
bpf/main.o \
bpf/offload.o \
bpf/verifier.o \
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
new file mode 100644
index 000000000000..71e6586acc36
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bpf.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/jiffies.h>
+#include <linux/skbuff.h>
+#include <linux/wait.h>
+
+#include "../nfp_app.h"
+#include "../nfp_net.h"
+#include "fw.h"
+#include "main.h"
+
+#define cmsg_warn(bpf, msg...) nn_dp_warn(&(bpf)->app->ctrl->dp, msg)
+
+#define NFP_BPF_TAG_ALLOC_SPAN (U16_MAX / 4)
+
+static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf)
+{
+ u16 used_tags;
+
+ used_tags = bpf->tag_alloc_next - bpf->tag_alloc_last;
+
+ return used_tags > NFP_BPF_TAG_ALLOC_SPAN;
+}
+
+static int nfp_bpf_alloc_tag(struct nfp_app_bpf *bpf)
+{
+ /* All FW communication for BPF is request-reply. To make sure we
+ * don't reuse the message ID too early after timeout - limit the
+ * number of requests in flight.
+ */
+ if (nfp_bpf_all_tags_busy(bpf)) {
+ cmsg_warn(bpf, "all FW request contexts busy!\n");
+ return -EAGAIN;
+ }
+
+ WARN_ON(__test_and_set_bit(bpf->tag_alloc_next, bpf->tag_allocator));
+ return bpf->tag_alloc_next++;
+}
+
+static void nfp_bpf_free_tag(struct nfp_app_bpf *bpf, u16 tag)
+{
+ WARN_ON(!__test_and_clear_bit(tag, bpf->tag_allocator));
+
+ while (!test_bit(bpf->tag_alloc_last, bpf->tag_allocator) &&
+ bpf->tag_alloc_last != bpf->tag_alloc_next)
+ bpf->tag_alloc_last++;
+}
+
+static struct sk_buff *
+nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size)
+{
+ struct sk_buff *skb;
+
+ skb = nfp_app_ctrl_msg_alloc(bpf->app, size, GFP_KERNEL);
+ skb_put(skb, size);
+
+ return skb;
+}
+
+static struct sk_buff *
+nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
+{
+ unsigned int size;
+
+ size = sizeof(struct cmsg_req_map_op);
+ size += sizeof(struct cmsg_key_value_pair) * n;
+
+ return nfp_bpf_cmsg_alloc(bpf, size);
+}
+
+static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
+{
+ struct cmsg_hdr *hdr;
+
+ hdr = (struct cmsg_hdr *)skb->data;
+
+ return be16_to_cpu(hdr->tag);
+}
+
+static struct sk_buff *__nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
+{
+ unsigned int msg_tag;
+ struct sk_buff *skb;
+
+ skb_queue_walk(&bpf->cmsg_replies, skb) {
+ msg_tag = nfp_bpf_cmsg_get_tag(skb);
+ if (msg_tag == tag) {
+ nfp_bpf_free_tag(bpf, tag);
+ __skb_unlink(skb, &bpf->cmsg_replies);
+ return skb;
+ }
+ }
+
+ return NULL;
+}
+
+static struct sk_buff *nfp_bpf_reply(struct nfp_app_bpf *bpf, u16 tag)
+{
+ struct sk_buff *skb;
+
+ nfp_ctrl_lock(bpf->app->ctrl);
+ skb = __nfp_bpf_reply(bpf, tag);
+ nfp_ctrl_unlock(bpf->app->ctrl);
+
+ return skb;
+}
+
+static struct sk_buff *nfp_bpf_reply_drop_tag(struct nfp_app_bpf *bpf, u16 tag)
+{
+ struct sk_buff *skb;
+
+ nfp_ctrl_lock(bpf->app->ctrl);
+ skb = __nfp_bpf_reply(bpf, tag);
+ if (!skb)
+ nfp_bpf_free_tag(bpf, tag);
+ nfp_ctrl_unlock(bpf->app->ctrl);
+
+ return skb;
+}
+
+static struct sk_buff *
+nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type,
+ int tag)
+{
+ struct sk_buff *skb;
+ int err;
+
+ err = wait_event_interruptible_timeout(bpf->cmsg_wq,
+ skb = nfp_bpf_reply(bpf, tag),
+ msecs_to_jiffies(5000));
+ /* We didn't get a response - try last time and atomically drop
+ * the tag even if no response is matched.
+ */
+ if (!skb)
+ skb = nfp_bpf_reply_drop_tag(bpf, tag);
+ if (err < 0) {
+ cmsg_warn(bpf, "%s waiting for response to 0x%02x: %d\n",
+ err == ERESTARTSYS ? "interrupted" : "error",
+ type, err);
+ return ERR_PTR(err);
+ }
+ if (!skb) {
+ cmsg_warn(bpf, "timeout waiting for response to 0x%02x\n",
+ type);
+ return ERR_PTR(-ETIMEDOUT);
+ }
+
+ return skb;
+}
+
+static struct sk_buff *
+nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb,
+ enum nfp_bpf_cmsg_type type, unsigned int reply_size)
+{
+ struct cmsg_hdr *hdr;
+ int tag;
+
+ nfp_ctrl_lock(bpf->app->ctrl);
+ tag = nfp_bpf_alloc_tag(bpf);
+ if (tag < 0) {
+ nfp_ctrl_unlock(bpf->app->ctrl);
+ dev_kfree_skb_any(skb);
+ return ERR_PTR(tag);
+ }
+
+ hdr = (void *)skb->data;
+ hdr->ver = CMSG_MAP_ABI_VERSION;
+ hdr->type = type;
+ hdr->tag = cpu_to_be16(tag);
+
+ __nfp_app_ctrl_tx(bpf->app, skb);
+
+ nfp_ctrl_unlock(bpf->app->ctrl);
+
+ skb = nfp_bpf_cmsg_wait_reply(bpf, type, tag);
+ if (IS_ERR(skb))
+ return skb;
+
+ hdr = (struct cmsg_hdr *)skb->data;
+ /* 0 reply_size means caller will do the validation */
+ if (reply_size && skb->len != reply_size) {
+ cmsg_warn(bpf, "cmsg drop - wrong size %d != %d!\n",
+ skb->len, reply_size);
+ goto err_free;
+ }
+ if (hdr->type != __CMSG_REPLY(type)) {
+ cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
+ hdr->type, __CMSG_REPLY(type));
+ goto err_free;
+ }
+
+ return skb;
+err_free:
+ dev_kfree_skb_any(skb);
+ return ERR_PTR(-EIO);
+}
+
+static int
+nfp_bpf_ctrl_rc_to_errno(struct nfp_app_bpf *bpf,
+ struct cmsg_reply_map_simple *reply)
+{
+ static const int res_table[] = {
+ [CMSG_RC_SUCCESS] = 0,
+ [CMSG_RC_ERR_MAP_FD] = -EBADFD,
+ [CMSG_RC_ERR_MAP_NOENT] = -ENOENT,
+ [CMSG_RC_ERR_MAP_ERR] = -EINVAL,
+ [CMSG_RC_ERR_MAP_PARSE] = -EIO,
+ [CMSG_RC_ERR_MAP_EXIST] = -EEXIST,
+ [CMSG_RC_ERR_MAP_NOMEM] = -ENOMEM,
+ [CMSG_RC_ERR_MAP_E2BIG] = -E2BIG,
+ };
+ u32 rc;
+
+ rc = be32_to_cpu(reply->rc);
+ if (rc >= ARRAY_SIZE(res_table)) {
+ cmsg_warn(bpf, "FW responded with invalid status: %u\n", rc);
+ return -EIO;
+ }
+
+ return res_table[rc];
+}
+
+long long int
+nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map)
+{
+ struct cmsg_reply_map_alloc_tbl *reply;
+ struct cmsg_req_map_alloc_tbl *req;
+ struct sk_buff *skb;
+ u32 tid;
+ int err;
+
+ skb = nfp_bpf_cmsg_alloc(bpf, sizeof(*req));
+ if (!skb)
+ return -ENOMEM;
+
+ req = (void *)skb->data;
+ req->key_size = cpu_to_be32(map->key_size);
+ req->value_size = cpu_to_be32(map->value_size);
+ req->max_entries = cpu_to_be32(map->max_entries);
+ req->map_type = cpu_to_be32(map->map_type);
+ req->map_flags = 0;
+
+ skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_ALLOC,
+ sizeof(*reply));
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ reply = (void *)skb->data;
+ err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
+ if (err)
+ goto err_free;
+
+ tid = be32_to_cpu(reply->tid);
+ dev_consume_skb_any(skb);
+
+ return tid;
+err_free:
+ dev_kfree_skb_any(skb);
+ return err;
+}
+
+void nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map)
+{
+ struct cmsg_reply_map_free_tbl *reply;
+ struct cmsg_req_map_free_tbl *req;
+ struct sk_buff *skb;
+ int err;
+
+ skb = nfp_bpf_cmsg_alloc(bpf, sizeof(*req));
+ if (!skb) {
+ cmsg_warn(bpf, "leaking map - failed to allocate msg\n");
+ return;
+ }
+
+ req = (void *)skb->data;
+ req->tid = cpu_to_be32(nfp_map->tid);
+
+ skb = nfp_bpf_cmsg_communicate(bpf, skb, CMSG_TYPE_MAP_FREE,
+ sizeof(*reply));
+ if (IS_ERR(skb)) {
+ cmsg_warn(bpf, "leaking map - I/O error\n");
+ return;
+ }
+
+ reply = (void *)skb->data;
+ err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
+ if (err)
+ cmsg_warn(bpf, "leaking map - FW responded with: %d\n", err);
+
+ dev_consume_skb_any(skb);
+}
+
+static int
+nfp_bpf_ctrl_entry_op(struct bpf_offloaded_map *offmap,
+ enum nfp_bpf_cmsg_type op,
+ u8 *key, u8 *value, u64 flags, u8 *out_key, u8 *out_value)
+{
+ struct nfp_bpf_map *nfp_map = offmap->dev_priv;
+ struct nfp_app_bpf *bpf = nfp_map->bpf;
+ struct bpf_map *map = &offmap->map;
+ struct cmsg_reply_map_op *reply;
+ struct cmsg_req_map_op *req;
+ struct sk_buff *skb;
+ int err;
+
+ /* FW messages have no space for more than 32 bits of flags */
+ if (flags >> 32)
+ return -EOPNOTSUPP;
+
+ skb = nfp_bpf_cmsg_map_req_alloc(bpf, 1);
+ if (!skb)
+ return -ENOMEM;
+
+ req = (void *)skb->data;
+ req->tid = cpu_to_be32(nfp_map->tid);
+ req->count = cpu_to_be32(1);
+ req->flags = cpu_to_be32(flags);
+
+ /* Copy inputs */
+ if (key)
+ memcpy(&req->elem[0].key, key, map->key_size);
+ if (value)
+ memcpy(&req->elem[0].value, value, map->value_size);
+
+ skb = nfp_bpf_cmsg_communicate(bpf, skb, op,
+ sizeof(*reply) + sizeof(*reply->elem));
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ reply = (void *)skb->data;
+ err = nfp_bpf_ctrl_rc_to_errno(bpf, &reply->reply_hdr);
+ if (err)
+ goto err_free;
+
+ /* Copy outputs */
+ if (out_key)
+ memcpy(out_key, &reply->elem[0].key, map->key_size);
+ if (out_value)
+ memcpy(out_value, &reply->elem[0].value, map->value_size);
+
+ dev_consume_skb_any(skb);
+
+ return 0;
+err_free:
+ dev_kfree_skb_any(skb);
+ return err;
+}
+
+int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap,
+ void *key, void *value, u64 flags)
+{
+ return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_UPDATE,
+ key, value, flags, NULL, NULL);
+}
+
+int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key)
+{
+ return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_DELETE,
+ key, NULL, 0, NULL, NULL);
+}
+
+int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
+ void *key, void *value)
+{
+ return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_LOOKUP,
+ key, NULL, 0, NULL, value);
+}
+
+int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap,
+ void *next_key)
+{
+ return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETFIRST,
+ NULL, NULL, 0, next_key, NULL);
+}
+
+int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
+ void *key, void *next_key)
+{
+ return nfp_bpf_ctrl_entry_op(offmap, CMSG_TYPE_MAP_GETNEXT,
+ key, NULL, 0, next_key, NULL);
+}
+
+void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
+{
+ struct nfp_app_bpf *bpf = app->priv;
+ unsigned int tag;
+
+ if (unlikely(skb->len < sizeof(struct cmsg_reply_map_simple))) {
+ cmsg_warn(bpf, "cmsg drop - too short %d!\n", skb->len);
+ goto err_free;
+ }
+
+ nfp_ctrl_lock(bpf->app->ctrl);
+
+ tag = nfp_bpf_cmsg_get_tag(skb);
+ if (unlikely(!test_bit(tag, bpf->tag_allocator))) {
+ cmsg_warn(bpf, "cmsg drop - no one is waiting for tag %u!\n",
+ tag);
+ goto err_unlock;
+ }
+
+ __skb_queue_tail(&bpf->cmsg_replies, skb);
+ wake_up_interruptible_all(&bpf->cmsg_wq);
+
+ nfp_ctrl_unlock(bpf->app->ctrl);
+
+ return;
+err_unlock:
+ nfp_ctrl_unlock(bpf->app->ctrl);
+err_free:
+ dev_kfree_skb_any(skb);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index 7206aa1522db..cfcc7bcb2c67 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -38,7 +38,14 @@
#include <linux/types.h>
enum bpf_cap_tlv_type {
+ NFP_BPF_CAP_TYPE_FUNC = 1,
NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2,
+ NFP_BPF_CAP_TYPE_MAPS = 3,
+};
+
+struct nfp_bpf_cap_tlv_func {
+ __le32 func_id;
+ __le32 func_addr;
};
struct nfp_bpf_cap_tlv_adjust_head {
@@ -51,4 +58,100 @@ struct nfp_bpf_cap_tlv_adjust_head {
#define NFP_BPF_ADJUST_HEAD_NO_META BIT(0)
+struct nfp_bpf_cap_tlv_maps {
+ __le32 types;
+ __le32 max_maps;
+ __le32 max_elems;
+ __le32 max_key_sz;
+ __le32 max_val_sz;
+ __le32 max_elem_sz;
+};
+
+/*
+ * Types defined for map related control messages
+ */
+#define CMSG_MAP_ABI_VERSION 1
+
+enum nfp_bpf_cmsg_type {
+ CMSG_TYPE_MAP_ALLOC = 1,
+ CMSG_TYPE_MAP_FREE = 2,
+ CMSG_TYPE_MAP_LOOKUP = 3,
+ CMSG_TYPE_MAP_UPDATE = 4,
+ CMSG_TYPE_MAP_DELETE = 5,
+ CMSG_TYPE_MAP_GETNEXT = 6,
+ CMSG_TYPE_MAP_GETFIRST = 7,
+ __CMSG_TYPE_MAP_MAX,
+};
+
+#define CMSG_TYPE_MAP_REPLY_BIT 7
+#define __CMSG_REPLY(req) (BIT(CMSG_TYPE_MAP_REPLY_BIT) | (req))
+
+#define CMSG_MAP_KEY_LW 16
+#define CMSG_MAP_VALUE_LW 16
+
+enum nfp_bpf_cmsg_status {
+ CMSG_RC_SUCCESS = 0,
+ CMSG_RC_ERR_MAP_FD = 1,
+ CMSG_RC_ERR_MAP_NOENT = 2,
+ CMSG_RC_ERR_MAP_ERR = 3,
+ CMSG_RC_ERR_MAP_PARSE = 4,
+ CMSG_RC_ERR_MAP_EXIST = 5,
+ CMSG_RC_ERR_MAP_NOMEM = 6,
+ CMSG_RC_ERR_MAP_E2BIG = 7,
+};
+
+struct cmsg_hdr {
+ u8 type;
+ u8 ver;
+ __be16 tag;
+};
+
+struct cmsg_reply_map_simple {
+ struct cmsg_hdr hdr;
+ __be32 rc;
+};
+
+struct cmsg_req_map_alloc_tbl {
+ struct cmsg_hdr hdr;
+ __be32 key_size; /* in bytes */
+ __be32 value_size; /* in bytes */
+ __be32 max_entries;
+ __be32 map_type;
+ __be32 map_flags; /* reserved */
+};
+
+struct cmsg_reply_map_alloc_tbl {
+ struct cmsg_reply_map_simple reply_hdr;
+ __be32 tid;
+};
+
+struct cmsg_req_map_free_tbl {
+ struct cmsg_hdr hdr;
+ __be32 tid;
+};
+
+struct cmsg_reply_map_free_tbl {
+ struct cmsg_reply_map_simple reply_hdr;
+ __be32 count;
+};
+
+struct cmsg_key_value_pair {
+ __be32 key[CMSG_MAP_KEY_LW];
+ __be32 value[CMSG_MAP_VALUE_LW];
+};
+
+struct cmsg_req_map_op {
+ struct cmsg_hdr hdr;
+ __be32 tid;
+ __be32 count;
+ __be32 flags;
+ struct cmsg_key_value_pair elem[0];
+};
+
+struct cmsg_reply_map_op {
+ struct cmsg_reply_map_simple reply_hdr;
+ __be32 count;
+ __be32 resv;
+ struct cmsg_key_value_pair elem[0];
+};
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 47c5224f8d6f..56451edf01c2 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -483,6 +483,21 @@ static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
}
}
+static void
+wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
+ enum nfp_relo_type relo)
+{
+ if (imm > 0xffff) {
+ pr_err("relocation of a large immediate!\n");
+ nfp_prog->error = -EFAULT;
+ return;
+ }
+ emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
+
+ nfp_prog->prog[nfp_prog->prog_len - 1] |=
+ FIELD_PREP(OP_RELO_TYPE, relo);
+}
+
/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
* If the @imm is small enough encode it directly in operand and return
* otherwise load @imm to a spare register and return its encoding.
@@ -538,27 +553,51 @@ wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
}
+static void
+addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
+ swreg *rega, swreg *regb)
+{
+ if (offset == reg_imm(0)) {
+ *rega = reg_a(src_gpr);
+ *regb = reg_b(src_gpr + 1);
+ return;
+ }
+
+ emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
+ emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
+ reg_imm(0));
+ *rega = imm_a(nfp_prog);
+ *regb = imm_b(nfp_prog);
+}
+
/* NFP has Command Push Pull bus which supports bluk memory operations. */
static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
bool descending_seq = meta->ldst_gather_len < 0;
s16 len = abs(meta->ldst_gather_len);
swreg src_base, off;
+ bool src_40bit_addr;
unsigned int i;
u8 xfer_num;
off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+ src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
src_base = reg_a(meta->insn.src_reg * 2);
xfer_num = round_up(len, 4) / 4;
+ if (src_40bit_addr)
+ addr40_offset(nfp_prog, meta->insn.src_reg, off, &src_base,
+ &off);
+
/* Setup PREV_ALU fields to override memory read length. */
if (len > 32)
wrp_immed(nfp_prog, reg_none(),
CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
/* Memory read from source addr into transfer-in registers. */
- emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
- off, xfer_num - 1, true, len > 32);
+ emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
+ src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
+ src_base, off, xfer_num - 1, true, len > 32);
/* Move from transfer-in to transfer-out. */
for (i = 0; i < xfer_num; i++)
@@ -696,20 +735,20 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
}
static int
-data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
- u8 dst_gpr, int size)
+data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
+ swreg lreg, swreg rreg, int size, enum cmd_mode mode)
{
unsigned int i;
u8 mask, sz;
- /* We load the value from the address indicated in @offset and then
+ /* We load the value from the address indicated in rreg + lreg and then
* mask out the data we don't need. Note: this is little endian!
*/
sz = max(size, 4);
mask = size < 4 ? GENMASK(size - 1, 0) : 0;
- emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0,
- reg_a(src_gpr), offset, sz / 4 - 1, true);
+ emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
+ lreg, rreg, sz / 4 - 1, true);
i = 0;
if (mask)
@@ -726,6 +765,26 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
}
static int
+data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
+ u8 dst_gpr, u8 size)
+{
+ return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
+ size, CMD_MODE_32b);
+}
+
+static int
+data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
+ u8 dst_gpr, u8 size)
+{
+ swreg rega, regb;
+
+ addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
+
+ return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
+ size, CMD_MODE_40b_BA);
+}
+
+static int
construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
{
swreg tmp_reg;
@@ -1279,6 +1338,56 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
+static int
+map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ struct bpf_offloaded_map *offmap;
+ struct nfp_bpf_map *nfp_map;
+ bool load_lm_ptr;
+ u32 ret_tgt;
+ s64 lm_off;
+ swreg tid;
+
+ offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
+ nfp_map = offmap->dev_priv;
+
+ /* We only have to reload LM0 if the key is not at start of stack */
+ lm_off = nfp_prog->stack_depth;
+ lm_off += meta->arg2.var_off.value + meta->arg2.off;
+ load_lm_ptr = meta->arg2_var_off || lm_off;
+
+ /* Set LM0 to start of key */
+ if (load_lm_ptr)
+ emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
+
+ /* Load map ID into a register, it should actually fit as an immediate
+ * but in case it doesn't deal with it here, not in the delay slots.
+ */
+ tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
+
+ emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + BPF_FUNC_map_lookup_elem,
+ 2, RELO_BR_HELPER);
+ ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
+
+ /* Load map ID into A0 */
+ wrp_mov(nfp_prog, reg_a(0), tid);
+
+ /* Load the return address into B0 */
+ wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
+
+ if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
+ return -EINVAL;
+
+ /* Reset the LM0 pointer */
+ if (!load_lm_ptr)
+ return 0;
+
+ emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
+ wrp_nops(nfp_prog, 3);
+
+ return 0;
+}
+
/* --- Callbacks --- */
static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
@@ -1713,8 +1822,20 @@ mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
- return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg,
- meta->insn.dst_reg * 2, size);
+ return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
+ tmp_reg, meta->insn.dst_reg * 2, size);
+}
+
+static int
+mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int size)
+{
+ swreg tmp_reg;
+
+ tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+
+ return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
+ tmp_reg, meta->insn.dst_reg * 2, size);
}
static int
@@ -1738,6 +1859,9 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return mem_ldx_stack(nfp_prog, meta, size,
meta->ptr.off + meta->ptr.var_off.value);
+ if (meta->ptr.type == PTR_TO_MAP_VALUE)
+ return mem_ldx_emem(nfp_prog, meta, size);
+
return -EOPNOTSUPP;
}
@@ -2058,6 +2182,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
switch (meta->insn.imm) {
case BPF_FUNC_xdp_adjust_head:
return adjust_head(nfp_prog, meta);
+ case BPF_FUNC_map_lookup_elem:
+ return map_lookup_stack(nfp_prog, meta);
default:
WARN_ONCE(1, "verifier allowed unsupported function\n");
return -EOPNOTSUPP;
@@ -2781,6 +2907,11 @@ void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
}
}
+bool nfp_bpf_supported_opcode(u8 code)
+{
+ return !!instr_cb[code];
+}
+
void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
{
unsigned int i;
@@ -2794,6 +2925,7 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
for (i = 0; i < nfp_prog->prog_len; i++) {
enum nfp_relo_type special;
+ u32 val;
special = FIELD_GET(OP_RELO_TYPE, prog[i]);
switch (special) {
@@ -2813,6 +2945,24 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
case RELO_BR_NEXT_PKT:
br_set_offset(&prog[i], bv->tgt_done);
break;
+ case RELO_BR_HELPER:
+ val = br_get_offset(prog[i]);
+ val -= BR_OFF_RELO;
+ switch (val) {
+ case BPF_FUNC_map_lookup_elem:
+ val = nfp_prog->bpf->helpers.map_lookup;
+ break;
+ default:
+ pr_err("relocation of unknown helper %d\n",
+ val);
+ err = -EINVAL;
+ goto err_free_prog;
+ }
+ br_set_offset(&prog[i], val);
+ break;
+ case RELO_IMMED_REL:
+ immed_add_value(&prog[i], bv->start_off);
+ break;
}
prog[i] &= ~OP_RELO_TYPE;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index e8cfe300c8c4..8823c8360047 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -251,6 +251,45 @@ nfp_bpf_parse_cap_adjust_head(struct nfp_app_bpf *bpf, void __iomem *value,
return 0;
}
+static int
+nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
+{
+ struct nfp_bpf_cap_tlv_func __iomem *cap = value;
+
+ if (length < sizeof(*cap)) {
+ nfp_err(bpf->app->cpp, "truncated function TLV: %d\n", length);
+ return -EINVAL;
+ }
+
+ switch (readl(&cap->func_id)) {
+ case BPF_FUNC_map_lookup_elem:
+ bpf->helpers.map_lookup = readl(&cap->func_addr);
+ break;
+ }
+
+ return 0;
+}
+
+static int
+nfp_bpf_parse_cap_maps(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
+{
+ struct nfp_bpf_cap_tlv_maps __iomem *cap = value;
+
+ if (length < sizeof(*cap)) {
+ nfp_err(bpf->app->cpp, "truncated maps TLV: %d\n", length);
+ return -EINVAL;
+ }
+
+ bpf->maps.types = readl(&cap->types);
+ bpf->maps.max_maps = readl(&cap->max_maps);
+ bpf->maps.max_elems = readl(&cap->max_elems);
+ bpf->maps.max_key_sz = readl(&cap->max_key_sz);
+ bpf->maps.max_val_sz = readl(&cap->max_val_sz);
+ bpf->maps.max_elem_sz = readl(&cap->max_elem_sz);
+
+ return 0;
+}
+
static int nfp_bpf_parse_capabilities(struct nfp_app *app)
{
struct nfp_cpp *cpp = app->pf->cpp;
@@ -276,11 +315,19 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
goto err_release_free;
switch (type) {
+ case NFP_BPF_CAP_TYPE_FUNC:
+ if (nfp_bpf_parse_cap_func(app->priv, value, length))
+ goto err_release_free;
+ break;
case NFP_BPF_CAP_TYPE_ADJUST_HEAD:
if (nfp_bpf_parse_cap_adjust_head(app->priv, value,
length))
goto err_release_free;
break;
+ case NFP_BPF_CAP_TYPE_MAPS:
+ if (nfp_bpf_parse_cap_maps(app->priv, value, length))
+ goto err_release_free;
+ break;
default:
nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
break;
@@ -313,6 +360,10 @@ static int nfp_bpf_init(struct nfp_app *app)
bpf->app = app;
app->priv = bpf;
+ skb_queue_head_init(&bpf->cmsg_replies);
+ init_waitqueue_head(&bpf->cmsg_wq);
+ INIT_LIST_HEAD(&bpf->map_list);
+
err = nfp_bpf_parse_capabilities(app);
if (err)
goto err_free_bpf;
@@ -326,7 +377,12 @@ err_free_bpf:
static void nfp_bpf_clean(struct nfp_app *app)
{
- kfree(app->priv);
+ struct nfp_app_bpf *bpf = app->priv;
+
+ WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
+ WARN_ON(!list_empty(&bpf->map_list));
+ WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
+ kfree(bpf);
}
const struct nfp_app_type app_bpf = {
@@ -343,6 +399,8 @@ const struct nfp_app_type app_bpf = {
.vnic_alloc = nfp_bpf_vnic_alloc,
.vnic_free = nfp_bpf_vnic_free,
+ .ctrl_msg_rx = nfp_bpf_ctrl_msg_rx,
+
.setup_tc = nfp_bpf_setup_tc,
.tc_busy = nfp_bpf_tc_busy,
.bpf = nfp_ndo_bpf,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 66381afee2a9..c476bca15ba4 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -37,10 +37,14 @@
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
+#include <linux/kernel.h>
#include <linux/list.h>
+#include <linux/skbuff.h>
#include <linux/types.h>
+#include <linux/wait.h>
#include "../nfp_asm.h"
+#include "fw.h"
/* For relocation logic use up-most byte of branch instruction as scratch
* area. Remember to clear this before sending instructions to HW!
@@ -56,6 +60,9 @@ enum nfp_relo_type {
RELO_BR_GO_ABORT,
/* external jumps to fixed addresses */
RELO_BR_NEXT_PKT,
+ RELO_BR_HELPER,
+ /* immediate relocation against load address */
+ RELO_IMMED_REL,
};
/* To make absolute relocated branches (branches other than RELO_BR_REL)
@@ -93,16 +100,49 @@ enum pkt_vec {
* struct nfp_app_bpf - bpf app priv structure
* @app: backpointer to the app
*
+ * @tag_allocator: bitmap of control message tags in use
+ * @tag_alloc_next: next tag bit to allocate
+ * @tag_alloc_last: next tag bit to be freed
+ *
+ * @cmsg_replies: received cmsg replies waiting to be consumed
+ * @cmsg_wq: work queue for waiting for cmsg replies
+ *
+ * @map_list: list of offloaded maps
+ * @maps_in_use: number of currently offloaded maps
+ * @map_elems_in_use: number of elements allocated to offloaded maps
+ *
* @adjust_head: adjust head capability
* @flags: extra flags for adjust head
* @off_min: minimal packet offset within buffer required
* @off_max: maximum packet offset within buffer required
* @guaranteed_sub: amount of negative adjustment guaranteed possible
* @guaranteed_add: amount of positive adjustment guaranteed possible
+ *
+ * @maps: map capability
+ * @types: supported map types
+ * @max_maps: max number of maps supported
+ * @max_elems: max number of entries in each map
+ * @max_key_sz: max size of map key
+ * @max_val_sz: max size of map value
+ * @max_elem_sz: max size of map entry (key + value)
+ *
+ * @helpers: helper addressess for various calls
+ * @map_lookup: map lookup helper address
*/
struct nfp_app_bpf {
struct nfp_app *app;
+ DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
+ u16 tag_alloc_next;
+ u16 tag_alloc_last;
+
+ struct sk_buff_head cmsg_replies;
+ struct wait_queue_head cmsg_wq;
+
+ struct list_head map_list;
+ unsigned int maps_in_use;
+ unsigned int map_elems_in_use;
+
struct nfp_bpf_cap_adjust_head {
u32 flags;
int off_min;
@@ -110,6 +150,33 @@ struct nfp_app_bpf {
int guaranteed_sub;
int guaranteed_add;
} adjust_head;
+
+ struct {
+ u32 types;
+ u32 max_maps;
+ u32 max_elems;
+ u32 max_key_sz;
+ u32 max_val_sz;
+ u32 max_elem_sz;
+ } maps;
+
+ struct {
+ u32 map_lookup;
+ } helpers;
+};
+
+/**
+ * struct nfp_bpf_map - private per-map data attached to BPF maps for offload
+ * @offmap: pointer to the offloaded BPF map
+ * @bpf: back pointer to bpf app private structure
+ * @tid: table id identifying map on datapath
+ * @l: link on the nfp_app_bpf->map_list list
+ */
+struct nfp_bpf_map {
+ struct bpf_offloaded_map *offmap;
+ struct nfp_app_bpf *bpf;
+ u32 tid;
+ struct list_head l;
};
struct nfp_prog;
@@ -131,9 +198,12 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
* @ptr: pointer type for memory operations
* @ldst_gather_len: memcpy length gathered from load/store sequence
* @paired_st: the paired store insn at the head of the sequence
- * @arg2: arg2 for call instructions
* @ptr_not_const: pointer is not always constant
* @jmp_dst: destination info for jump instructions
+ * @func_id: function id for call instructions
+ * @arg1: arg1 for call instructions
+ * @arg2: arg2 for call instructions
+ * @arg2_var_off: arg2 changes stack offset on different paths
* @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number
* @flags: eBPF instruction extra optimization flags
@@ -151,7 +221,12 @@ struct nfp_insn_meta {
bool ptr_not_const;
};
struct nfp_insn_meta *jmp_dst;
- struct bpf_reg_state arg2;
+ struct {
+ u32 func_id;
+ struct bpf_reg_state arg1;
+ struct bpf_reg_state arg2;
+ bool arg2_var_off;
+ };
};
unsigned int off;
unsigned short n;
@@ -249,6 +324,7 @@ struct nfp_bpf_vnic {
void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt);
int nfp_bpf_jit(struct nfp_prog *prog);
+bool nfp_bpf_supported_opcode(u8 code);
extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
@@ -266,4 +342,20 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int insn_idx, unsigned int n_insns);
void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv);
+
+long long int
+nfp_bpf_ctrl_alloc_map(struct nfp_app_bpf *bpf, struct bpf_map *map);
+void
+nfp_bpf_ctrl_free_map(struct nfp_app_bpf *bpf, struct nfp_bpf_map *nfp_map);
+int nfp_bpf_ctrl_getfirst_entry(struct bpf_offloaded_map *offmap,
+ void *next_key);
+int nfp_bpf_ctrl_update_entry(struct bpf_offloaded_map *offmap,
+ void *key, void *value, u64 flags);
+int nfp_bpf_ctrl_del_entry(struct bpf_offloaded_map *offmap, void *key);
+int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
+ void *key, void *value);
+int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
+ void *key, void *next_key);
+
+void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 320b2250d29a..e2859b2e9c6a 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -36,6 +36,9 @@
* Netronome network device driver: TC offload functions for PF and VF
*/
+#define pr_fmt(fmt) "NFP net bpf: " fmt
+
+#include <linux/bpf.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/pci.h>
@@ -153,6 +156,103 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
return 0;
}
+static int
+nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap,
+ void *key, void *next_key)
+{
+ if (!key)
+ return nfp_bpf_ctrl_getfirst_entry(offmap, next_key);
+ return nfp_bpf_ctrl_getnext_entry(offmap, key, next_key);
+}
+
+static int
+nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key)
+{
+ return nfp_bpf_ctrl_del_entry(offmap, key);
+}
+
+static const struct bpf_map_dev_ops nfp_bpf_map_ops = {
+ .map_get_next_key = nfp_bpf_map_get_next_key,
+ .map_lookup_elem = nfp_bpf_ctrl_lookup_entry,
+ .map_update_elem = nfp_bpf_ctrl_update_entry,
+ .map_delete_elem = nfp_bpf_map_delete_elem,
+};
+
+static int
+nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
+{
+ struct nfp_bpf_map *nfp_map;
+ long long int res;
+
+ if (!bpf->maps.types)
+ return -EOPNOTSUPP;
+
+ if (offmap->map.map_flags ||
+ offmap->map.numa_node != NUMA_NO_NODE) {
+ pr_info("map flags are not supported\n");
+ return -EINVAL;
+ }
+
+ if (!(bpf->maps.types & 1 << offmap->map.map_type)) {
+ pr_info("map type not supported\n");
+ return -EOPNOTSUPP;
+ }
+ if (bpf->maps.max_maps == bpf->maps_in_use) {
+ pr_info("too many maps for a device\n");
+ return -ENOMEM;
+ }
+ if (bpf->maps.max_elems - bpf->map_elems_in_use <
+ offmap->map.max_entries) {
+ pr_info("map with too many elements: %u, left: %u\n",
+ offmap->map.max_entries,
+ bpf->maps.max_elems - bpf->map_elems_in_use);
+ return -ENOMEM;
+ }
+ if (offmap->map.key_size > bpf->maps.max_key_sz ||
+ offmap->map.value_size > bpf->maps.max_val_sz ||
+ round_up(offmap->map.key_size, 8) +
+ round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) {
+ pr_info("elements don't fit in device constraints\n");
+ return -ENOMEM;
+ }
+
+ nfp_map = kzalloc(sizeof(*nfp_map), GFP_USER);
+ if (!nfp_map)
+ return -ENOMEM;
+
+ offmap->dev_priv = nfp_map;
+ nfp_map->offmap = offmap;
+ nfp_map->bpf = bpf;
+
+ res = nfp_bpf_ctrl_alloc_map(bpf, &offmap->map);
+ if (res < 0) {
+ kfree(nfp_map);
+ return res;
+ }
+
+ nfp_map->tid = res;
+ offmap->dev_ops = &nfp_bpf_map_ops;
+ bpf->maps_in_use++;
+ bpf->map_elems_in_use += offmap->map.max_entries;
+ list_add_tail(&nfp_map->l, &bpf->map_list);
+
+ return 0;
+}
+
+static int
+nfp_bpf_map_free(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
+{
+ struct nfp_bpf_map *nfp_map = offmap->dev_priv;
+
+ nfp_bpf_ctrl_free_map(bpf, nfp_map);
+ list_del_init(&nfp_map->l);
+ bpf->map_elems_in_use -= offmap->map.max_entries;
+ bpf->maps_in_use--;
+ kfree(nfp_map);
+
+ return 0;
+}
+
int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
{
switch (bpf->command) {
@@ -162,6 +262,10 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
return nfp_bpf_translate(nn, bpf->offload.prog);
case BPF_OFFLOAD_DESTROY:
return nfp_bpf_destroy(nn, bpf->offload.prog);
+ case BPF_OFFLOAD_MAP_ALLOC:
+ return nfp_bpf_map_alloc(app->priv, bpf->offmap);
+ case BPF_OFFLOAD_MAP_FREE:
+ return nfp_bpf_map_free(app->priv, bpf->offmap);
default:
return -EINVAL;
}
@@ -237,7 +341,7 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
int err;
if (prog) {
- struct bpf_dev_offload *offload = prog->aux->offload;
+ struct bpf_prog_offload *offload = prog->aux->offload;
if (!offload)
return -EINVAL;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 7890d95d4018..479f602887e9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -110,9 +110,11 @@ static int
nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
struct nfp_insn_meta *meta)
{
+ const struct bpf_reg_state *reg1 = cur_regs(env) + BPF_REG_1;
const struct bpf_reg_state *reg2 = cur_regs(env) + BPF_REG_2;
struct nfp_app_bpf *bpf = nfp_prog->bpf;
u32 func_id = meta->insn.imm;
+ s64 off, old_off;
switch (func_id) {
case BPF_FUNC_xdp_adjust_head:
@@ -127,11 +129,50 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
nfp_record_adjust_head(bpf, nfp_prog, meta, reg2);
break;
+
+ case BPF_FUNC_map_lookup_elem:
+ if (!bpf->helpers.map_lookup) {
+ pr_vlog(env, "map_lookup: not supported by FW\n");
+ return -EOPNOTSUPP;
+ }
+ if (reg2->type != PTR_TO_STACK) {
+ pr_vlog(env,
+ "map_lookup: unsupported key ptr type %d\n",
+ reg2->type);
+ return -EOPNOTSUPP;
+ }
+ if (!tnum_is_const(reg2->var_off)) {
+ pr_vlog(env, "map_lookup: variable key pointer\n");
+ return -EOPNOTSUPP;
+ }
+
+ off = reg2->var_off.value + reg2->off;
+ if (-off % 4) {
+ pr_vlog(env,
+ "map_lookup: unaligned stack pointer %lld\n",
+ -off);
+ return -EOPNOTSUPP;
+ }
+
+ /* Rest of the checks is only if we re-parse the same insn */
+ if (!meta->func_id)
+ break;
+
+ old_off = meta->arg2.var_off.value + meta->arg2.off;
+ meta->arg2_var_off |= off != old_off;
+
+ if (meta->arg1.map_ptr != reg1->map_ptr) {
+ pr_vlog(env, "map_lookup: called for different map\n");
+ return -EOPNOTSUPP;
+ }
+ break;
default:
pr_vlog(env, "unsupported function id: %d\n", func_id);
return -EOPNOTSUPP;
}
+ meta->func_id = func_id;
+ meta->arg1 = *reg1;
meta->arg2 = *reg2;
return 0;
@@ -210,6 +251,7 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
if (reg->type != PTR_TO_CTX &&
reg->type != PTR_TO_STACK &&
+ reg->type != PTR_TO_MAP_VALUE &&
reg->type != PTR_TO_PACKET) {
pr_vlog(env, "unsupported ptr type: %d\n", reg->type);
return -EINVAL;
@@ -221,6 +263,13 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return err;
}
+ if (reg->type == PTR_TO_MAP_VALUE) {
+ if (is_mbpf_store(meta)) {
+ pr_vlog(env, "map writes not supported\n");
+ return -EOPNOTSUPP;
+ }
+ }
+
if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) {
pr_vlog(env, "ptr type changed for instruction %d -> %d\n",
meta->ptr.type, reg->type);
@@ -241,6 +290,12 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len);
nfp_prog->verifier_meta = meta;
+ if (!nfp_bpf_supported_opcode(meta->insn.code)) {
+ pr_vlog(env, "instruction %#02x not supported\n",
+ meta->insn.code);
+ return -EINVAL;
+ }
+
if (meta->insn.src_reg >= MAX_BPF_REG ||
meta->insn.dst_reg >= MAX_BPF_REG) {
pr_vlog(env, "program uses extended registers - jit hardening?\n");
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 32ff46a00f70..6a6eb02b516e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -165,6 +165,7 @@ struct nfp_app {
void *priv;
};
+bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb);
static inline int nfp_app_init(struct nfp_app *app)
@@ -326,6 +327,14 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
return app->type->xdp_offload(app, nn, prog);
}
+static inline bool __nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb)
+{
+ trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0,
+ skb->data, skb->len);
+
+ return __nfp_ctrl_tx(app->ctrl, skb);
+}
+
static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb)
{
trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index 9ee3a3f60cc7..3f6952b66a49 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -50,6 +50,11 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
[CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 },
};
+static bool unreg_is_imm(u16 reg)
+{
+ return (reg & UR_REG_IMM) == UR_REG_IMM;
+}
+
u16 br_get_offset(u64 instr)
{
u16 addr_lo, addr_hi;
@@ -80,6 +85,59 @@ void br_add_offset(u64 *instr, u16 offset)
br_set_offset(instr, addr + offset);
}
+static bool immed_can_modify(u64 instr)
+{
+ if (FIELD_GET(OP_IMMED_INV, instr) ||
+ FIELD_GET(OP_IMMED_SHIFT, instr) ||
+ FIELD_GET(OP_IMMED_WIDTH, instr) != IMMED_WIDTH_ALL) {
+ pr_err("Can't decode/encode immed!\n");
+ return false;
+ }
+ return true;
+}
+
+u16 immed_get_value(u64 instr)
+{
+ u16 reg;
+
+ if (!immed_can_modify(instr))
+ return 0;
+
+ reg = FIELD_GET(OP_IMMED_A_SRC, instr);
+ if (!unreg_is_imm(reg))
+ reg = FIELD_GET(OP_IMMED_B_SRC, instr);
+
+ return (reg & 0xff) | FIELD_GET(OP_IMMED_IMM, instr);
+}
+
+void immed_set_value(u64 *instr, u16 immed)
+{
+ if (!immed_can_modify(*instr))
+ return;
+
+ if (unreg_is_imm(FIELD_GET(OP_IMMED_A_SRC, *instr))) {
+ *instr &= ~FIELD_PREP(OP_IMMED_A_SRC, 0xff);
+ *instr |= FIELD_PREP(OP_IMMED_A_SRC, immed & 0xff);
+ } else {
+ *instr &= ~FIELD_PREP(OP_IMMED_B_SRC, 0xff);
+ *instr |= FIELD_PREP(OP_IMMED_B_SRC, immed & 0xff);
+ }
+
+ *instr &= ~OP_IMMED_IMM;
+ *instr |= FIELD_PREP(OP_IMMED_IMM, immed >> 8);
+}
+
+void immed_add_value(u64 *instr, u16 offset)
+{
+ u16 val;
+
+ if (!immed_can_modify(*instr))
+ return;
+
+ val = immed_get_value(*instr);
+ immed_set_value(instr, val + offset);
+}
+
static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst)
{
bool lm_id, lm_dec = false;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 20e51cb60e69..5f9291db98e0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -138,6 +138,10 @@ enum immed_shift {
IMMED_SHIFT_2B = 2,
};
+u16 immed_get_value(u64 instr);
+void immed_set_value(u64 *instr, u16 immed);
+void immed_add_value(u64 *instr, u16 offset);
+
#define OP_SHF_BASE 0x08000000000ULL
#define OP_SHF_A_SRC 0x000000000ffULL
#define OP_SHF_SC 0x00000000300ULL
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 0e564cfabe7e..6f6e3d6fd935 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -839,6 +839,18 @@ static inline const char *nfp_net_name(struct nfp_net *nn)
return nn->dp.netdev ? nn->dp.netdev->name : "ctrl";
}
+static inline void nfp_ctrl_lock(struct nfp_net *nn)
+ __acquires(&nn->r_vecs[0].lock)
+{
+ spin_lock_bh(&nn->r_vecs[0].lock);
+}
+
+static inline void nfp_ctrl_unlock(struct nfp_net *nn)
+ __releases(&nn->r_vecs[0].lock)
+{
+ spin_unlock_bh(&nn->r_vecs[0].lock);
+}
+
/* Globals */
extern const char nfp_driver_version[];
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 07e0587dc14e..2b5cad3069a7 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1920,6 +1920,13 @@ err_free:
return false;
}
+bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
+{
+ struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
+
+ return nfp_ctrl_tx_one(nn, r_vec, skb, false);
+}
+
bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb)
{
struct nfp_net_r_vector *r_vec = &nn->r_vecs[0];
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5da18ebc9222..83e2349e1362 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -30,7 +30,7 @@
#include <linux/ratelimit.h>
#include <linux/uuid.h>
#include <linux/semaphore.h>
-#include <linux/bpf.h>
+#include <linux/error-injection.h>
#include <asm/unaligned.h>
#include "ctree.h"
#include "disk-io.h"
@@ -3124,7 +3124,7 @@ recovery_tree_root:
goto fail_block_groups;
goto retry_root_backup;
}
-BPF_ALLOW_ERROR_INJECTION(open_ctree);
+ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index fb1382893bfc..586bb06472bb 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -22,7 +22,7 @@
#include <linux/slab.h>
#include <linux/math64.h>
#include <linux/ratelimit.h>
-#include <linux/bpf.h>
+#include <linux/error-injection.h>
#include "ctree.h"
#include "free-space-cache.h"
#include "transaction.h"
@@ -333,7 +333,7 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
return 0;
}
-BPF_ALLOW_ERROR_INJECTION(io_ctl_init);
+ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
{
diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h
new file mode 100644
index 000000000000..296c65442f00
--- /dev/null
+++ b/include/asm-generic/error-injection.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_ERROR_INJECTION_H
+#define _ASM_GENERIC_ERROR_INJECTION_H
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+enum {
+ EI_ETYPE_NONE, /* Dummy value for undefined case */
+ EI_ETYPE_NULL, /* Return NULL if failure */
+ EI_ETYPE_ERRNO, /* Return -ERRNO if failure */
+ EI_ETYPE_ERRNO_NULL, /* Return -ERRNO or NULL if failure */
+};
+
+struct error_injection_entry {
+ unsigned long addr;
+ int etype;
+};
+
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+/*
+ * Whitelist ganerating macro. Specify functions which can be
+ * error-injectable using this macro.
+ */
+#define ALLOW_ERROR_INJECTION(fname, _etype) \
+static struct error_injection_entry __used \
+ __attribute__((__section__("_error_injection_whitelist"))) \
+ _eil_addr_##fname = { \
+ .addr = (unsigned long)fname, \
+ .etype = EI_ETYPE_##_etype, \
+ };
+#else
+#define ALLOW_ERROR_INJECTION(fname, _etype)
+#endif
+#endif
+
+#endif /* _ASM_GENERIC_ERROR_INJECTION_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index a2e8582d094a..ebe544e048cd 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -136,13 +136,13 @@
#define KPROBE_BLACKLIST()
#endif
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
-#define ERROR_INJECT_LIST() . = ALIGN(8); \
- VMLINUX_SYMBOL(__start_kprobe_error_inject_list) = .; \
- KEEP(*(_kprobe_error_inject_list)) \
- VMLINUX_SYMBOL(__stop_kprobe_error_inject_list) = .;
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+#define ERROR_INJECT_WHITELIST() STRUCT_ALIGN(); \
+ VMLINUX_SYMBOL(__start_error_injection_whitelist) = .;\
+ KEEP(*(_error_injection_whitelist)) \
+ VMLINUX_SYMBOL(__stop_error_injection_whitelist) = .;
#else
-#define ERROR_INJECT_LIST()
+#define ERROR_INJECT_WHITELIST()
#endif
#ifdef CONFIG_EVENT_TRACING
@@ -573,7 +573,7 @@
FTRACE_EVENTS() \
TRACE_SYSCALLS() \
KPROBE_BLACKLIST() \
- ERROR_INJECT_LIST() \
+ ERROR_INJECT_WHITELIST() \
MEM_DISCARD(init.rodata) \
CLK_OF_TABLES() \
RESERVEDMEM_OF_TABLES() \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 44f26f6df8fc..5c2c104dc2c5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -25,6 +25,7 @@ struct bpf_map;
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
+ int (*map_alloc_check)(union bpf_attr *attr);
struct bpf_map *(*map_alloc)(union bpf_attr *attr);
void (*map_release)(struct bpf_map *map, struct file *map_file);
void (*map_free)(struct bpf_map *map);
@@ -73,6 +74,33 @@ struct bpf_map {
char name[BPF_OBJ_NAME_LEN];
};
+struct bpf_offloaded_map;
+
+struct bpf_map_dev_ops {
+ int (*map_get_next_key)(struct bpf_offloaded_map *map,
+ void *key, void *next_key);
+ int (*map_lookup_elem)(struct bpf_offloaded_map *map,
+ void *key, void *value);
+ int (*map_update_elem)(struct bpf_offloaded_map *map,
+ void *key, void *value, u64 flags);
+ int (*map_delete_elem)(struct bpf_offloaded_map *map, void *key);
+};
+
+struct bpf_offloaded_map {
+ struct bpf_map map;
+ struct net_device *netdev;
+ const struct bpf_map_dev_ops *dev_ops;
+ void *dev_priv;
+ struct list_head offloads;
+};
+
+static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
+{
+ return container_of(map, struct bpf_offloaded_map, map);
+}
+
+extern const struct bpf_map_ops bpf_map_offload_ops;
+
/* function argument constraints */
enum bpf_arg_type {
ARG_DONTCARE = 0, /* unused argument in helper function */
@@ -199,7 +227,7 @@ struct bpf_prog_offload_ops {
int insn_idx, int prev_insn_idx);
};
-struct bpf_dev_offload {
+struct bpf_prog_offload {
struct bpf_prog *prog;
struct net_device *netdev;
void *dev_priv;
@@ -229,7 +257,7 @@ struct bpf_prog_aux {
#ifdef CONFIG_SECURITY
void *security;
#endif
- struct bpf_dev_offload *offload;
+ struct bpf_prog_offload *offload;
union {
struct work_struct work;
struct rcu_head rcu;
@@ -368,6 +396,7 @@ int __bpf_prog_charge(struct user_struct *user, u32 pages);
void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
+void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
@@ -377,6 +406,7 @@ void bpf_map_put(struct bpf_map *map);
int bpf_map_precharge_memlock(u32 pages);
void *bpf_map_area_alloc(size_t size, int numa_node);
void bpf_map_area_free(void *base);
+void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
extern int sysctl_unprivileged_bpf_disabled;
@@ -554,6 +584,15 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog);
int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
struct bpf_prog *prog);
+int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value);
+int bpf_map_offload_update_elem(struct bpf_map *map,
+ void *key, void *value, u64 flags);
+int bpf_map_offload_delete_elem(struct bpf_map *map, void *key);
+int bpf_map_offload_get_next_key(struct bpf_map *map,
+ void *key, void *next_key);
+
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
+
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
@@ -561,6 +600,14 @@ static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
{
return aux->offload_requested;
}
+
+static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
+{
+ return unlikely(map->ops == &bpf_map_offload_ops);
+}
+
+struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
+void bpf_map_offload_map_free(struct bpf_map *map);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
union bpf_attr *attr)
@@ -572,6 +619,20 @@ static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
{
return false;
}
+
+static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
+{
+ return false;
+}
+
+static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void bpf_map_offload_map_free(struct bpf_map *map)
+{
+}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
@@ -613,15 +674,4 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
-#define BPF_ALLOW_ERROR_INJECTION(fname) \
-static unsigned long __used \
- __attribute__((__section__("_kprobe_error_inject_list"))) \
- _eil_addr_##fname = (unsigned long)fname;
-#else
-#define BPF_ALLOW_ERROR_INJECTION(fname)
-#endif
-#endif
-
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h
new file mode 100644
index 000000000000..280c61ecbf20
--- /dev/null
+++ b/include/linux/error-injection.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ERROR_INJECTION_H
+#define _LINUX_ERROR_INJECTION_H
+
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+
+#include <asm/error-injection.h>
+
+extern bool within_error_injection_list(unsigned long addr);
+extern int get_injectable_error_type(unsigned long addr);
+
+#else /* !CONFIG_FUNCTION_ERROR_INJECTION */
+
+#include <asm-generic/error-injection.h>
+static inline bool within_error_injection_list(unsigned long addr)
+{
+ return false;
+}
+
+static inline int get_injectable_error_type(unsigned long addr)
+{
+ return EI_ETYPE_NONE;
+}
+
+#endif
+
+#endif /* _LINUX_ERROR_INJECTION_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 963fd364f3d6..9440a2fc8893 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -271,7 +271,6 @@ extern bool arch_kprobe_on_func_entry(unsigned long offset);
extern bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset);
extern bool within_kprobe_blacklist(unsigned long addr);
-extern bool within_kprobe_error_injection_list(unsigned long addr);
struct kprobe_insn_cache {
struct mutex mutex;
diff --git a/include/linux/module.h b/include/linux/module.h
index 548fa09fa806..9642d3116718 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -19,6 +19,7 @@
#include <linux/jump_label.h>
#include <linux/export.h>
#include <linux/rbtree_latch.h>
+#include <linux/error-injection.h>
#include <linux/percpu.h>
#include <asm/module.h>
@@ -476,9 +477,9 @@ struct module {
unsigned int num_ctors;
#endif
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
- unsigned int num_kprobe_ei_funcs;
- unsigned long *kprobe_ei_funcs;
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+ struct error_injection_entry *ei_funcs;
+ unsigned int num_ei_funcs;
#endif
} ____cacheline_aligned __randomize_layout;
#ifndef MODULE_ARCH_INIT
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6d95477b962c..ed0799a12bf2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -805,6 +805,8 @@ enum bpf_netdev_command {
BPF_OFFLOAD_VERIFIER_PREP,
BPF_OFFLOAD_TRANSLATE,
BPF_OFFLOAD_DESTROY,
+ BPF_OFFLOAD_MAP_ALLOC,
+ BPF_OFFLOAD_MAP_FREE,
};
struct bpf_prog_offload_ops;
@@ -835,6 +837,10 @@ struct netdev_bpf {
struct {
struct bpf_prog *prog;
} offload;
+ /* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
+ struct {
+ struct bpf_offloaded_map *offmap;
+ };
};
};
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 405317f9c064..7c2259e8bc54 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -245,6 +245,7 @@ union bpf_attr {
* BPF_F_NUMA_NODE is set).
*/
char map_name[BPF_OBJ_NAME_LEN];
+ __u32 map_ifindex; /* ifindex of netdev to create on */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -899,7 +900,7 @@ struct xdp_md {
__u32 data;
__u32 data_end;
__u32 data_meta;
- /* Below access go though struct xdp_rxq_info */
+ /* Below access go through struct xdp_rxq_info */
__u32 ingress_ifindex; /* rxq->dev->ifindex */
__u32 rx_queue_index; /* rxq->queue_index */
};
diff --git a/kernel/Makefile b/kernel/Makefile
index 172d151d429c..f85ae5dfa474 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_GCOV_KERNEL) += gcov/
obj-$(CONFIG_KCOV) += kcov.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index ce5b669003b2..fbfdada6caee 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -94,13 +94,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
if (!cmap)
return ERR_PTR(-ENOMEM);
- /* mandatory map attributes */
- cmap->map.map_type = attr->map_type;
- cmap->map.key_size = attr->key_size;
- cmap->map.value_size = attr->value_size;
- cmap->map.max_entries = attr->max_entries;
- cmap->map.map_flags = attr->map_flags;
- cmap->map.numa_node = bpf_map_attr_numa_node(attr);
+ bpf_map_init_from_attr(&cmap->map, attr);
/* Pre-limit array size based on NR_CPUS, not final CPU check */
if (cmap->map.max_entries > NR_CPUS) {
@@ -143,7 +137,7 @@ free_cmap:
return ERR_PTR(err);
}
-void __cpu_map_queue_destructor(void *ptr)
+static void __cpu_map_queue_destructor(void *ptr)
{
/* The tear-down procedure should have made sure that queue is
* empty. See __cpu_map_entry_replace() and work-queue
@@ -222,8 +216,8 @@ static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
return xdp_pkt;
}
-struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
- struct xdp_pkt *xdp_pkt)
+static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
+ struct xdp_pkt *xdp_pkt)
{
unsigned int frame_size;
void *pkt_data_start;
@@ -337,7 +331,8 @@ static int cpu_map_kthread_run(void *data)
return 0;
}
-struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id)
+static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu,
+ int map_id)
{
gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
struct bpf_cpu_map_entry *rcpu;
@@ -395,7 +390,7 @@ free_rcu:
return NULL;
}
-void __cpu_map_entry_free(struct rcu_head *rcu)
+static void __cpu_map_entry_free(struct rcu_head *rcu)
{
struct bpf_cpu_map_entry *rcpu;
int cpu;
@@ -438,8 +433,8 @@ void __cpu_map_entry_free(struct rcu_head *rcu)
* cpu_map_kthread_stop, which waits for an RCU graze period before
* stopping kthread, emptying the queue.
*/
-void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
- u32 key_cpu, struct bpf_cpu_map_entry *rcpu)
+static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
+ u32 key_cpu, struct bpf_cpu_map_entry *rcpu)
{
struct bpf_cpu_map_entry *old_rcpu;
@@ -451,7 +446,7 @@ void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
}
}
-int cpu_map_delete_elem(struct bpf_map *map, void *key)
+static int cpu_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
u32 key_cpu = *(u32 *)key;
@@ -464,8 +459,8 @@ int cpu_map_delete_elem(struct bpf_map *map, void *key)
return 0;
}
-int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
- u64 map_flags)
+static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
struct bpf_cpu_map_entry *rcpu;
@@ -502,7 +497,7 @@ int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
return 0;
}
-void cpu_map_free(struct bpf_map *map)
+static void cpu_map_free(struct bpf_map *map)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
int cpu;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index ebdef54bf7df..565f9ece9115 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -93,13 +93,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (!dtab)
return ERR_PTR(-ENOMEM);
- /* mandatory map attributes */
- dtab->map.map_type = attr->map_type;
- dtab->map.key_size = attr->key_size;
- dtab->map.value_size = attr->value_size;
- dtab->map.max_entries = attr->max_entries;
- dtab->map.map_flags = attr->map_flags;
- dtab->map.numa_node = bpf_map_attr_numa_node(attr);
+ bpf_map_init_from_attr(&dtab->map, attr);
/* make sure page count doesn't overflow */
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h
index e0857d016f89..266fe8ee542b 100644
--- a/kernel/bpf/disasm.h
+++ b/kernel/bpf/disasm.h
@@ -29,8 +29,8 @@ extern const char *const bpf_class_string[8];
const char *func_id_name(int id);
-typedef void (*bpf_insn_print_t)(struct bpf_verifier_env *env,
- const char *, ...);
+typedef __printf(2, 3) void (*bpf_insn_print_t)(struct bpf_verifier_env *env,
+ const char *, ...);
typedef const char *(*bpf_insn_revmap_call_t)(void *private_data,
const struct bpf_insn *insn);
typedef const char *(*bpf_insn_print_imm_t)(void *private_data,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3905d4bc5b80..b76828f23b49 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -227,7 +227,7 @@ static int alloc_extra_elems(struct bpf_htab *htab)
}
/* Called from syscall */
-static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
+static int htab_map_alloc_check(union bpf_attr *attr)
{
bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
@@ -241,9 +241,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
int numa_node = bpf_map_attr_numa_node(attr);
- struct bpf_htab *htab;
- int err, i;
- u64 cost;
BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
offsetof(struct htab_elem, hash_node.pprev));
@@ -254,40 +251,68 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
/* LRU implementation is much complicated than other
* maps. Hence, limit to CAP_SYS_ADMIN for now.
*/
- return ERR_PTR(-EPERM);
+ return -EPERM;
if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
/* reserved bits should not be used */
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (!lru && percpu_lru)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (lru && !prealloc)
- return ERR_PTR(-ENOTSUPP);
+ return -ENOTSUPP;
if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
+
+ /* check sanity of attributes.
+ * value_size == 0 may be allowed in the future to use map as a set
+ */
+ if (attr->max_entries == 0 || attr->key_size == 0 ||
+ attr->value_size == 0)
+ return -EINVAL;
+
+ if (attr->key_size > MAX_BPF_STACK)
+ /* eBPF programs initialize keys on stack, so they cannot be
+ * larger than max stack size
+ */
+ return -E2BIG;
+
+ if (attr->value_size >= KMALLOC_MAX_SIZE -
+ MAX_BPF_STACK - sizeof(struct htab_elem))
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements via bpf syscall. This check also makes
+ * sure that the elem_size doesn't overflow and it's
+ * kmalloc-able later in htab_map_update_elem()
+ */
+ return -E2BIG;
+
+ return 0;
+}
+
+static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
+{
+ bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
+ attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ /* percpu_lru means each cpu has its own LRU list.
+ * it is different from BPF_MAP_TYPE_PERCPU_HASH where
+ * the map's value itself is percpu. percpu_lru has
+ * nothing to do with the map's value.
+ */
+ bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
+ bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
+ struct bpf_htab *htab;
+ int err, i;
+ u64 cost;
htab = kzalloc(sizeof(*htab), GFP_USER);
if (!htab)
return ERR_PTR(-ENOMEM);
- /* mandatory map attributes */
- htab->map.map_type = attr->map_type;
- htab->map.key_size = attr->key_size;
- htab->map.value_size = attr->value_size;
- htab->map.max_entries = attr->max_entries;
- htab->map.map_flags = attr->map_flags;
- htab->map.numa_node = numa_node;
-
- /* check sanity of attributes.
- * value_size == 0 may be allowed in the future to use map as a set
- */
- err = -EINVAL;
- if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
- htab->map.value_size == 0)
- goto free_htab;
+ bpf_map_init_from_attr(&htab->map, attr);
if (percpu_lru) {
/* ensure each CPU's lru list has >=1 elements.
@@ -304,22 +329,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
/* hash table size must be power of 2 */
htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
- err = -E2BIG;
- if (htab->map.key_size > MAX_BPF_STACK)
- /* eBPF programs initialize keys on stack, so they cannot be
- * larger than max stack size
- */
- goto free_htab;
-
- if (htab->map.value_size >= KMALLOC_MAX_SIZE -
- MAX_BPF_STACK - sizeof(struct htab_elem))
- /* if value_size is bigger, the user space won't be able to
- * access the elements via bpf syscall. This check also makes
- * sure that the elem_size doesn't overflow and it's
- * kmalloc-able later in htab_map_update_elem()
- */
- goto free_htab;
-
htab->elem_size = sizeof(struct htab_elem) +
round_up(htab->map.key_size, 8);
if (percpu)
@@ -327,6 +336,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
else
htab->elem_size += round_up(htab->map.value_size, 8);
+ err = -E2BIG;
/* prevent zero size kmalloc and check for u32 overflow */
if (htab->n_buckets == 0 ||
htab->n_buckets > U32_MAX / sizeof(struct bucket))
@@ -1143,6 +1153,7 @@ static void htab_map_free(struct bpf_map *map)
}
const struct bpf_map_ops htab_map_ops = {
+ .map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
@@ -1153,6 +1164,7 @@ const struct bpf_map_ops htab_map_ops = {
};
const struct bpf_map_ops htab_lru_map_ops = {
+ .map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
@@ -1236,6 +1248,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
}
const struct bpf_map_ops htab_percpu_map_ops = {
+ .map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
@@ -1245,6 +1258,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
};
const struct bpf_map_ops htab_lru_percpu_map_ops = {
+ .map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
@@ -1253,11 +1267,11 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_delete_elem = htab_lru_map_delete_elem,
};
-static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
+static int fd_htab_map_alloc_check(union bpf_attr *attr)
{
if (attr->value_size != sizeof(u32))
- return ERR_PTR(-EINVAL);
- return htab_map_alloc(attr);
+ return -EINVAL;
+ return htab_map_alloc_check(attr);
}
static void fd_htab_map_free(struct bpf_map *map)
@@ -1328,7 +1342,7 @@ static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
if (IS_ERR(inner_map_meta))
return inner_map_meta;
- map = fd_htab_map_alloc(attr);
+ map = htab_map_alloc(attr);
if (IS_ERR(map)) {
bpf_map_meta_free(inner_map_meta);
return map;
@@ -1372,6 +1386,7 @@ static void htab_of_map_free(struct bpf_map *map)
}
const struct bpf_map_ops htab_of_maps_map_ops = {
+ .map_alloc_check = fd_htab_map_alloc_check,
.map_alloc = htab_of_map_alloc,
.map_free = htab_of_map_free,
.map_get_next_key = htab_map_get_next_key,
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 885e45479680..584e02227671 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -522,12 +522,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
/* copy mandatory map attributes */
- trie->map.map_type = attr->map_type;
- trie->map.key_size = attr->key_size;
- trie->map.value_size = attr->value_size;
- trie->map.max_entries = attr->max_entries;
- trie->map.map_flags = attr->map_flags;
- trie->map.numa_node = bpf_map_attr_numa_node(attr);
+ bpf_map_init_from_attr(&trie->map, attr);
trie->data_size = attr->key_size -
offsetof(struct bpf_lpm_trie_key, data);
trie->max_prefixlen = trie->data_size * 8;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 040d4e0edf3f..a88cebf368bf 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -24,15 +24,27 @@
#include <linux/rtnetlink.h>
#include <linux/rwsem.h>
-/* Protects bpf_prog_offload_devs and offload members of all progs.
+/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
+ * of all progs.
* RTNL lock cannot be taken when holding this lock.
*/
static DECLARE_RWSEM(bpf_devs_lock);
static LIST_HEAD(bpf_prog_offload_devs);
+static LIST_HEAD(bpf_map_offload_devs);
+
+static int bpf_dev_offload_check(struct net_device *netdev)
+{
+ if (!netdev)
+ return -EINVAL;
+ if (!netdev->netdev_ops->ndo_bpf)
+ return -EOPNOTSUPP;
+ return 0;
+}
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
{
- struct bpf_dev_offload *offload;
+ struct bpf_prog_offload *offload;
+ int err;
if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
attr->prog_type != BPF_PROG_TYPE_XDP)
@@ -49,12 +61,15 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
offload->netdev = dev_get_by_index(current->nsproxy->net_ns,
attr->prog_ifindex);
- if (!offload->netdev)
- goto err_free;
+ err = bpf_dev_offload_check(offload->netdev);
+ if (err)
+ goto err_maybe_put;
down_write(&bpf_devs_lock);
- if (offload->netdev->reg_state != NETREG_REGISTERED)
+ if (offload->netdev->reg_state != NETREG_REGISTERED) {
+ err = -EINVAL;
goto err_unlock;
+ }
prog->aux->offload = offload;
list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
dev_put(offload->netdev);
@@ -63,16 +78,17 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
return 0;
err_unlock:
up_write(&bpf_devs_lock);
- dev_put(offload->netdev);
-err_free:
+err_maybe_put:
+ if (offload->netdev)
+ dev_put(offload->netdev);
kfree(offload);
- return -EINVAL;
+ return err;
}
static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
struct netdev_bpf *data)
{
- struct bpf_dev_offload *offload = prog->aux->offload;
+ struct bpf_prog_offload *offload = prog->aux->offload;
struct net_device *netdev;
ASSERT_RTNL();
@@ -80,8 +96,6 @@ static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
if (!offload)
return -ENODEV;
netdev = offload->netdev;
- if (!netdev->netdev_ops->ndo_bpf)
- return -EOPNOTSUPP;
data->command = cmd;
@@ -110,7 +124,7 @@ exit_unlock:
int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx)
{
- struct bpf_dev_offload *offload;
+ struct bpf_prog_offload *offload;
int ret = -ENODEV;
down_read(&bpf_devs_lock);
@@ -124,7 +138,7 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
{
- struct bpf_dev_offload *offload = prog->aux->offload;
+ struct bpf_prog_offload *offload = prog->aux->offload;
struct netdev_bpf data = {};
data.offload.prog = prog;
@@ -238,11 +252,184 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
const struct bpf_prog_ops bpf_offload_prog_ops = {
};
+static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
+ enum bpf_netdev_command cmd)
+{
+ struct netdev_bpf data = {};
+ struct net_device *netdev;
+
+ ASSERT_RTNL();
+
+ data.command = cmd;
+ data.offmap = offmap;
+ /* Caller must make sure netdev is valid */
+ netdev = offmap->netdev;
+
+ return netdev->netdev_ops->ndo_bpf(netdev, &data);
+}
+
+struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_offloaded_map *offmap;
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+ if (attr->map_type != BPF_MAP_TYPE_HASH)
+ return ERR_PTR(-EINVAL);
+
+ offmap = kzalloc(sizeof(*offmap), GFP_USER);
+ if (!offmap)
+ return ERR_PTR(-ENOMEM);
+
+ bpf_map_init_from_attr(&offmap->map, attr);
+
+ rtnl_lock();
+ down_write(&bpf_devs_lock);
+ offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
+ err = bpf_dev_offload_check(offmap->netdev);
+ if (err)
+ goto err_unlock;
+
+ err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
+ if (err)
+ goto err_unlock;
+
+ list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
+ up_write(&bpf_devs_lock);
+ rtnl_unlock();
+
+ return &offmap->map;
+
+err_unlock:
+ up_write(&bpf_devs_lock);
+ rtnl_unlock();
+ kfree(offmap);
+ return ERR_PTR(err);
+}
+
+static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
+{
+ WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
+ /* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
+ bpf_map_free_id(&offmap->map, true);
+ list_del_init(&offmap->offloads);
+ offmap->netdev = NULL;
+}
+
+void bpf_map_offload_map_free(struct bpf_map *map)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+
+ rtnl_lock();
+ down_write(&bpf_devs_lock);
+ if (offmap->netdev)
+ __bpf_map_offload_destroy(offmap);
+ up_write(&bpf_devs_lock);
+ rtnl_unlock();
+
+ kfree(offmap);
+}
+
+int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+ int ret = -ENODEV;
+
+ down_read(&bpf_devs_lock);
+ if (offmap->netdev)
+ ret = offmap->dev_ops->map_lookup_elem(offmap, key, value);
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+int bpf_map_offload_update_elem(struct bpf_map *map,
+ void *key, void *value, u64 flags)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+ int ret = -ENODEV;
+
+ if (unlikely(flags > BPF_EXIST))
+ return -EINVAL;
+
+ down_read(&bpf_devs_lock);
+ if (offmap->netdev)
+ ret = offmap->dev_ops->map_update_elem(offmap, key, value,
+ flags);
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+int bpf_map_offload_delete_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+ int ret = -ENODEV;
+
+ down_read(&bpf_devs_lock);
+ if (offmap->netdev)
+ ret = offmap->dev_ops->map_delete_elem(offmap, key);
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ struct bpf_offloaded_map *offmap = map_to_offmap(map);
+ int ret = -ENODEV;
+
+ down_read(&bpf_devs_lock);
+ if (offmap->netdev)
+ ret = offmap->dev_ops->map_get_next_key(offmap, key, next_key);
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
+{
+ struct bpf_offloaded_map *offmap;
+ struct bpf_prog_offload *offload;
+ bool ret;
+
+ if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map))
+ return false;
+
+ down_read(&bpf_devs_lock);
+ offload = prog->aux->offload;
+ offmap = map_to_offmap(map);
+
+ ret = offload && offload->netdev == offmap->netdev;
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
+static void bpf_offload_orphan_all_progs(struct net_device *netdev)
+{
+ struct bpf_prog_offload *offload, *tmp;
+
+ list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
+ if (offload->netdev == netdev)
+ __bpf_prog_offload_destroy(offload->prog);
+}
+
+static void bpf_offload_orphan_all_maps(struct net_device *netdev)
+{
+ struct bpf_offloaded_map *offmap, *tmp;
+
+ list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
+ if (offmap->netdev == netdev)
+ __bpf_map_offload_destroy(offmap);
+}
+
static int bpf_offload_notification(struct notifier_block *notifier,
ulong event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
- struct bpf_dev_offload *offload, *tmp;
ASSERT_RTNL();
@@ -253,11 +440,8 @@ static int bpf_offload_notification(struct notifier_block *notifier,
break;
down_write(&bpf_devs_lock);
- list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
- offloads) {
- if (offload->netdev == netdev)
- __bpf_prog_offload_destroy(offload->prog);
- }
+ bpf_offload_orphan_all_progs(netdev);
+ bpf_offload_orphan_all_maps(netdev);
up_write(&bpf_devs_lock);
break;
default:
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 079968680bc3..0314d1783d77 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -513,13 +513,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
if (!stab)
return ERR_PTR(-ENOMEM);
- /* mandatory map attributes */
- stab->map.map_type = attr->map_type;
- stab->map.key_size = attr->key_size;
- stab->map.value_size = attr->value_size;
- stab->map.max_entries = attr->max_entries;
- stab->map.map_flags = attr->map_flags;
- stab->map.numa_node = bpf_map_attr_numa_node(attr);
+ bpf_map_init_from_attr(&stab->map, attr);
/* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 6c63c2222ea8..b0ecf43f5894 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -88,14 +88,10 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
if (cost >= U32_MAX - PAGE_SIZE)
goto free_smap;
- smap->map.map_type = attr->map_type;
- smap->map.key_size = attr->key_size;
+ bpf_map_init_from_attr(&smap->map, attr);
smap->map.value_size = value_size;
- smap->map.max_entries = attr->max_entries;
- smap->map.map_flags = attr->map_flags;
smap->n_buckets = n_buckets;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
- smap->map.numa_node = bpf_map_attr_numa_node(attr);
err = bpf_map_precharge_memlock(smap->map.pages);
if (err)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2bac0dc8baba..c691b9e972e3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -94,18 +94,34 @@ static int check_uarg_tail_zero(void __user *uaddr,
return 0;
}
+const struct bpf_map_ops bpf_map_offload_ops = {
+ .map_alloc = bpf_map_offload_map_alloc,
+ .map_free = bpf_map_offload_map_free,
+};
+
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
{
+ const struct bpf_map_ops *ops;
struct bpf_map *map;
+ int err;
- if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
- !bpf_map_types[attr->map_type])
+ if (attr->map_type >= ARRAY_SIZE(bpf_map_types))
+ return ERR_PTR(-EINVAL);
+ ops = bpf_map_types[attr->map_type];
+ if (!ops)
return ERR_PTR(-EINVAL);
- map = bpf_map_types[attr->map_type]->map_alloc(attr);
+ if (ops->map_alloc_check) {
+ err = ops->map_alloc_check(attr);
+ if (err)
+ return ERR_PTR(err);
+ }
+ if (attr->map_ifindex)
+ ops = &bpf_map_offload_ops;
+ map = ops->map_alloc(attr);
if (IS_ERR(map))
return map;
- map->ops = bpf_map_types[attr->map_type];
+ map->ops = ops;
map->map_type = attr->map_type;
return map;
}
@@ -134,6 +150,16 @@ void bpf_map_area_free(void *area)
kvfree(area);
}
+void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
+{
+ map->map_type = attr->map_type;
+ map->key_size = attr->key_size;
+ map->value_size = attr->value_size;
+ map->max_entries = attr->max_entries;
+ map->map_flags = attr->map_flags;
+ map->numa_node = bpf_map_attr_numa_node(attr);
+}
+
int bpf_map_precharge_memlock(u32 pages)
{
struct user_struct *user = get_current_user();
@@ -189,16 +215,25 @@ static int bpf_map_alloc_id(struct bpf_map *map)
return id > 0 ? 0 : id;
}
-static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
+void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
{
unsigned long flags;
+ /* Offloaded maps are removed from the IDR store when their device
+ * disappears - even if someone holds an fd to them they are unusable,
+ * the memory is gone, all ops will fail; they are simply waiting for
+ * refcnt to drop to be freed.
+ */
+ if (!map->id)
+ return;
+
if (do_idr_lock)
spin_lock_irqsave(&map_idr_lock, flags);
else
__acquire(&map_idr_lock);
idr_remove(&map_idr, map->id);
+ map->id = 0;
if (do_idr_lock)
spin_unlock_irqrestore(&map_idr_lock, flags);
@@ -378,7 +413,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0;
}
-#define BPF_MAP_CREATE_LAST_FIELD map_name
+#define BPF_MAP_CREATE_LAST_FIELD map_ifindex
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -566,8 +601,10 @@ static int map_lookup_elem(union bpf_attr *attr)
if (!value)
goto free_key;
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_lookup_elem(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_copy(map, key, value);
@@ -654,7 +691,10 @@ static int map_update_elem(union bpf_attr *attr)
goto free_value;
/* Need to create a kthread, thus must support schedule */
- if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_update_elem(map, key, value, attr->flags);
+ goto out;
+ } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
err = map->ops->map_update_elem(map, key, value, attr->flags);
goto out;
}
@@ -731,6 +771,11 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_delete_elem(map, key);
+ goto out;
+ }
+
preempt_disable();
__this_cpu_inc(bpf_prog_active);
rcu_read_lock();
@@ -738,7 +783,7 @@ static int map_delete_elem(union bpf_attr *attr)
rcu_read_unlock();
__this_cpu_dec(bpf_prog_active);
preempt_enable();
-
+out:
if (!err)
trace_bpf_map_delete_elem(map, ufd, key);
kfree(key);
@@ -788,9 +833,15 @@ static int map_get_next_key(union bpf_attr *attr)
if (!next_key)
goto free_key;
+ if (bpf_map_is_dev_bound(map)) {
+ err = bpf_map_offload_get_next_key(map, key, next_key);
+ goto out;
+ }
+
rcu_read_lock();
err = map->ops->map_get_next_key(map, key, next_key);
rcu_read_unlock();
+out:
if (err)
goto free_next_key;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 48b61caa94cb..ceabb394d2dc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4816,6 +4816,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
return -EINVAL;
}
}
+
+ if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
+ !bpf_offload_dev_match(prog, map)) {
+ verbose(env, "offload device mismatch between prog and map\n");
+ return -EINVAL;
+ }
+
return 0;
}
diff --git a/kernel/fail_function.c b/kernel/fail_function.c
new file mode 100644
index 000000000000..21b0122cb39c
--- /dev/null
+++ b/kernel/fail_function.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fail_function.c: Function-based error injection
+ */
+#include <linux/error-injection.h>
+#include <linux/debugfs.h>
+#include <linux/fault-inject.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs);
+
+struct fei_attr {
+ struct list_head list;
+ struct kprobe kp;
+ unsigned long retval;
+};
+static DEFINE_MUTEX(fei_lock);
+static LIST_HEAD(fei_attr_list);
+static DECLARE_FAULT_ATTR(fei_fault_attr);
+static struct dentry *fei_debugfs_dir;
+
+static unsigned long adjust_error_retval(unsigned long addr, unsigned long retv)
+{
+ switch (get_injectable_error_type(addr)) {
+ case EI_ETYPE_NULL:
+ if (retv != 0)
+ return 0;
+ break;
+ case EI_ETYPE_ERRNO:
+ if (retv < (unsigned long)-MAX_ERRNO)
+ return (unsigned long)-EINVAL;
+ break;
+ case EI_ETYPE_ERRNO_NULL:
+ if (retv != 0 && retv < (unsigned long)-MAX_ERRNO)
+ return (unsigned long)-EINVAL;
+ break;
+ }
+
+ return retv;
+}
+
+static struct fei_attr *fei_attr_new(const char *sym, unsigned long addr)
+{
+ struct fei_attr *attr;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (attr) {
+ attr->kp.symbol_name = kstrdup(sym, GFP_KERNEL);
+ if (!attr->kp.symbol_name) {
+ kfree(attr);
+ return NULL;
+ }
+ attr->kp.pre_handler = fei_kprobe_handler;
+ attr->retval = adjust_error_retval(addr, 0);
+ INIT_LIST_HEAD(&attr->list);
+ }
+ return attr;
+}
+
+static void fei_attr_free(struct fei_attr *attr)
+{
+ if (attr) {
+ kfree(attr->kp.symbol_name);
+ kfree(attr);
+ }
+}
+
+static struct fei_attr *fei_attr_lookup(const char *sym)
+{
+ struct fei_attr *attr;
+
+ list_for_each_entry(attr, &fei_attr_list, list) {
+ if (!strcmp(attr->kp.symbol_name, sym))
+ return attr;
+ }
+
+ return NULL;
+}
+
+static bool fei_attr_is_valid(struct fei_attr *_attr)
+{
+ struct fei_attr *attr;
+
+ list_for_each_entry(attr, &fei_attr_list, list) {
+ if (attr == _attr)
+ return true;
+ }
+
+ return false;
+}
+
+static int fei_retval_set(void *data, u64 val)
+{
+ struct fei_attr *attr = data;
+ unsigned long retv = (unsigned long)val;
+ int err = 0;
+
+ mutex_lock(&fei_lock);
+ /*
+ * Since this operation can be done after retval file is removed,
+ * It is safer to check the attr is still valid before accessing
+ * its member.
+ */
+ if (!fei_attr_is_valid(attr)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (attr->kp.addr) {
+ if (adjust_error_retval((unsigned long)attr->kp.addr,
+ val) != retv)
+ err = -EINVAL;
+ }
+ if (!err)
+ attr->retval = val;
+out:
+ mutex_unlock(&fei_lock);
+
+ return err;
+}
+
+static int fei_retval_get(void *data, u64 *val)
+{
+ struct fei_attr *attr = data;
+ int err = 0;
+
+ mutex_lock(&fei_lock);
+ /* Here we also validate @attr to ensure it still exists. */
+ if (!fei_attr_is_valid(attr))
+ err = -ENOENT;
+ else
+ *val = attr->retval;
+ mutex_unlock(&fei_lock);
+
+ return err;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fei_retval_ops, fei_retval_get, fei_retval_set,
+ "%llx\n");
+
+static int fei_debugfs_add_attr(struct fei_attr *attr)
+{
+ struct dentry *dir;
+
+ dir = debugfs_create_dir(attr->kp.symbol_name, fei_debugfs_dir);
+ if (!dir)
+ return -ENOMEM;
+
+ if (!debugfs_create_file("retval", 0600, dir, attr, &fei_retval_ops)) {
+ debugfs_remove_recursive(dir);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void fei_debugfs_remove_attr(struct fei_attr *attr)
+{
+ struct dentry *dir;
+
+ dir = debugfs_lookup(attr->kp.symbol_name, fei_debugfs_dir);
+ if (dir)
+ debugfs_remove_recursive(dir);
+}
+
+static int fei_kprobe_handler(struct kprobe *kp, struct pt_regs *regs)
+{
+ struct fei_attr *attr = container_of(kp, struct fei_attr, kp);
+
+ if (should_fail(&fei_fault_attr, 1)) {
+ regs_set_return_value(regs, attr->retval);
+ override_function_with_return(regs);
+ /* Kprobe specific fixup */
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+
+ return 0;
+}
+NOKPROBE_SYMBOL(fei_kprobe_handler)
+
+static void *fei_seq_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&fei_lock);
+ return seq_list_start(&fei_attr_list, *pos);
+}
+
+static void fei_seq_stop(struct seq_file *m, void *v)
+{
+ mutex_unlock(&fei_lock);
+}
+
+static void *fei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return seq_list_next(v, &fei_attr_list, pos);
+}
+
+static int fei_seq_show(struct seq_file *m, void *v)
+{
+ struct fei_attr *attr = list_entry(v, struct fei_attr, list);
+
+ seq_printf(m, "%pf\n", attr->kp.addr);
+ return 0;
+}
+
+static const struct seq_operations fei_seq_ops = {
+ .start = fei_seq_start,
+ .next = fei_seq_next,
+ .stop = fei_seq_stop,
+ .show = fei_seq_show,
+};
+
+static int fei_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &fei_seq_ops);
+}
+
+static void fei_attr_remove(struct fei_attr *attr)
+{
+ fei_debugfs_remove_attr(attr);
+ unregister_kprobe(&attr->kp);
+ list_del(&attr->list);
+ fei_attr_free(attr);
+}
+
+static void fei_attr_remove_all(void)
+{
+ struct fei_attr *attr, *n;
+
+ list_for_each_entry_safe(attr, n, &fei_attr_list, list) {
+ fei_attr_remove(attr);
+ }
+}
+
+static ssize_t fei_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct fei_attr *attr;
+ unsigned long addr;
+ char *buf, *sym;
+ int ret;
+
+ /* cut off if it is too long */
+ if (count > KSYM_NAME_LEN)
+ count = KSYM_NAME_LEN;
+ buf = kmalloc(sizeof(char) * (count + 1), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, buffer, count)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ buf[count] = '\0';
+ sym = strstrip(buf);
+
+ mutex_lock(&fei_lock);
+
+ /* Writing just spaces will remove all injection points */
+ if (sym[0] == '\0') {
+ fei_attr_remove_all();
+ ret = count;
+ goto out;
+ }
+ /* Writing !function will remove one injection point */
+ if (sym[0] == '!') {
+ attr = fei_attr_lookup(sym + 1);
+ if (!attr) {
+ ret = -ENOENT;
+ goto out;
+ }
+ fei_attr_remove(attr);
+ ret = count;
+ goto out;
+ }
+
+ addr = kallsyms_lookup_name(sym);
+ if (!addr) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!within_error_injection_list(addr)) {
+ ret = -ERANGE;
+ goto out;
+ }
+ if (fei_attr_lookup(sym)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ attr = fei_attr_new(sym, addr);
+ if (!attr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = register_kprobe(&attr->kp);
+ if (!ret)
+ ret = fei_debugfs_add_attr(attr);
+ if (ret < 0)
+ fei_attr_remove(attr);
+ else {
+ list_add_tail(&attr->list, &fei_attr_list);
+ ret = count;
+ }
+out:
+ kfree(buf);
+ mutex_unlock(&fei_lock);
+ return ret;
+}
+
+static const struct file_operations fei_ops = {
+ .open = fei_open,
+ .read = seq_read,
+ .write = fei_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init fei_debugfs_init(void)
+{
+ struct dentry *dir;
+
+ dir = fault_create_debugfs_attr("fail_function", NULL,
+ &fei_fault_attr);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ /* injectable attribute is just a symlink of error_inject/list */
+ if (!debugfs_create_symlink("injectable", dir,
+ "../error_injection/list"))
+ goto error;
+
+ if (!debugfs_create_file("inject", 0600, dir, NULL, &fei_ops))
+ goto error;
+
+ fei_debugfs_dir = dir;
+
+ return 0;
+error:
+ debugfs_remove_recursive(dir);
+ return -ENOMEM;
+}
+
+late_initcall(fei_debugfs_init);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b4aab48ad258..da2ccf142358 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -83,16 +83,6 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
return &(kretprobe_table_locks[hash].lock);
}
-/* List of symbols that can be overriden for error injection. */
-static LIST_HEAD(kprobe_error_injection_list);
-static DEFINE_MUTEX(kprobe_ei_mutex);
-struct kprobe_ei_entry {
- struct list_head list;
- unsigned long start_addr;
- unsigned long end_addr;
- void *priv;
-};
-
/* Blacklist -- list of struct kprobe_blacklist_entry */
static LIST_HEAD(kprobe_blacklist);
@@ -1404,17 +1394,6 @@ bool within_kprobe_blacklist(unsigned long addr)
return false;
}
-bool within_kprobe_error_injection_list(unsigned long addr)
-{
- struct kprobe_ei_entry *ent;
-
- list_for_each_entry(ent, &kprobe_error_injection_list, list) {
- if (addr >= ent->start_addr && addr < ent->end_addr)
- return true;
- }
- return false;
-}
-
/*
* If we have a symbol_name argument, look it up and add the offset field
* to it. This way, we can specify a relative address to a symbol.
@@ -2189,86 +2168,6 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
return 0;
}
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
-/* Markers of the _kprobe_error_inject_list section */
-extern unsigned long __start_kprobe_error_inject_list[];
-extern unsigned long __stop_kprobe_error_inject_list[];
-
-/*
- * Lookup and populate the kprobe_error_injection_list.
- *
- * For safety reasons we only allow certain functions to be overriden with
- * bpf_error_injection, so we need to populate the list of the symbols that have
- * been marked as safe for overriding.
- */
-static void populate_kprobe_error_injection_list(unsigned long *start,
- unsigned long *end,
- void *priv)
-{
- unsigned long *iter;
- struct kprobe_ei_entry *ent;
- unsigned long entry, offset = 0, size = 0;
-
- mutex_lock(&kprobe_ei_mutex);
- for (iter = start; iter < end; iter++) {
- entry = arch_deref_entry_point((void *)*iter);
-
- if (!kernel_text_address(entry) ||
- !kallsyms_lookup_size_offset(entry, &size, &offset)) {
- pr_err("Failed to find error inject entry at %p\n",
- (void *)entry);
- continue;
- }
-
- ent = kmalloc(sizeof(*ent), GFP_KERNEL);
- if (!ent)
- break;
- ent->start_addr = entry;
- ent->end_addr = entry + size;
- ent->priv = priv;
- INIT_LIST_HEAD(&ent->list);
- list_add_tail(&ent->list, &kprobe_error_injection_list);
- }
- mutex_unlock(&kprobe_ei_mutex);
-}
-
-static void __init populate_kernel_kprobe_ei_list(void)
-{
- populate_kprobe_error_injection_list(__start_kprobe_error_inject_list,
- __stop_kprobe_error_inject_list,
- NULL);
-}
-
-static void module_load_kprobe_ei_list(struct module *mod)
-{
- if (!mod->num_kprobe_ei_funcs)
- return;
- populate_kprobe_error_injection_list(mod->kprobe_ei_funcs,
- mod->kprobe_ei_funcs +
- mod->num_kprobe_ei_funcs, mod);
-}
-
-static void module_unload_kprobe_ei_list(struct module *mod)
-{
- struct kprobe_ei_entry *ent, *n;
- if (!mod->num_kprobe_ei_funcs)
- return;
-
- mutex_lock(&kprobe_ei_mutex);
- list_for_each_entry_safe(ent, n, &kprobe_error_injection_list, list) {
- if (ent->priv == mod) {
- list_del_init(&ent->list);
- kfree(ent);
- }
- }
- mutex_unlock(&kprobe_ei_mutex);
-}
-#else
-static inline void __init populate_kernel_kprobe_ei_list(void) {}
-static inline void module_load_kprobe_ei_list(struct module *m) {}
-static inline void module_unload_kprobe_ei_list(struct module *m) {}
-#endif
-
/* Module notifier call back, checking kprobes on the module */
static int kprobes_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
@@ -2279,11 +2178,6 @@ static int kprobes_module_callback(struct notifier_block *nb,
unsigned int i;
int checkcore = (val == MODULE_STATE_GOING);
- if (val == MODULE_STATE_COMING)
- module_load_kprobe_ei_list(mod);
- else if (val == MODULE_STATE_GOING)
- module_unload_kprobe_ei_list(mod);
-
if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
return NOTIFY_DONE;
@@ -2346,8 +2240,6 @@ static int __init init_kprobes(void)
pr_err("Please take care of using kprobes.\n");
}
- populate_kernel_kprobe_ei_list();
-
if (kretprobe_blacklist_size) {
/* lookup the function address from its name */
for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
@@ -2515,56 +2407,6 @@ static const struct file_operations debugfs_kprobe_blacklist_ops = {
.release = seq_release,
};
-/*
- * kprobes/error_injection_list -- shows which functions can be overriden for
- * error injection.
- * */
-static void *kprobe_ei_seq_start(struct seq_file *m, loff_t *pos)
-{
- mutex_lock(&kprobe_ei_mutex);
- return seq_list_start(&kprobe_error_injection_list, *pos);
-}
-
-static void kprobe_ei_seq_stop(struct seq_file *m, void *v)
-{
- mutex_unlock(&kprobe_ei_mutex);
-}
-
-static void *kprobe_ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
- return seq_list_next(v, &kprobe_error_injection_list, pos);
-}
-
-static int kprobe_ei_seq_show(struct seq_file *m, void *v)
-{
- char buffer[KSYM_SYMBOL_LEN];
- struct kprobe_ei_entry *ent =
- list_entry(v, struct kprobe_ei_entry, list);
-
- sprint_symbol(buffer, ent->start_addr);
- seq_printf(m, "%s\n", buffer);
- return 0;
-}
-
-static const struct seq_operations kprobe_ei_seq_ops = {
- .start = kprobe_ei_seq_start,
- .next = kprobe_ei_seq_next,
- .stop = kprobe_ei_seq_stop,
- .show = kprobe_ei_seq_show,
-};
-
-static int kprobe_ei_open(struct inode *inode, struct file *filp)
-{
- return seq_open(filp, &kprobe_ei_seq_ops);
-}
-
-static const struct file_operations debugfs_kprobe_ei_ops = {
- .open = kprobe_ei_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static void arm_all_kprobes(void)
{
struct hlist_head *head;
@@ -2706,11 +2548,6 @@ static int __init debugfs_kprobe_init(void)
if (!file)
goto error;
- file = debugfs_create_file("error_injection_list", 0444, dir, NULL,
- &debugfs_kprobe_ei_ops);
- if (!file)
- goto error;
-
return 0;
error:
diff --git a/kernel/module.c b/kernel/module.c
index bd695bfdc5c4..601494d4b7ea 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3118,10 +3118,10 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->ftrace_callsites),
&mod->num_ftrace_callsites);
#endif
-#ifdef CONFIG_BPF_KPROBE_OVERRIDE
- mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
- sizeof(*mod->kprobe_ei_funcs),
- &mod->num_kprobe_ei_funcs);
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+ mod->ei_funcs = section_objs(info, "_error_injection_whitelist",
+ sizeof(*mod->ei_funcs),
+ &mod->num_ei_funcs);
#endif
mod->extable = section_objs(info, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index ae3a2d519e50..7114c885a78a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -533,9 +533,7 @@ config FUNCTION_PROFILER
config BPF_KPROBE_OVERRIDE
bool "Enable BPF programs to override a kprobed function"
depends on BPF_EVENTS
- depends on KPROBES_ON_FTRACE
- depends on HAVE_KPROBE_OVERRIDE
- depends on DYNAMIC_FTRACE_WITH_REGS
+ depends on FUNCTION_ERROR_INJECTION
default n
help
Allows BPF to override the execution of a probed function and
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f6d2327ecb59..f274468cbc45 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,7 +14,7 @@
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include <linux/kprobes.h>
-#include <asm/kprobes.h>
+#include <linux/error-injection.h>
#include "trace_probe.h"
#include "trace.h"
@@ -83,9 +83,8 @@ EXPORT_SYMBOL_GPL(trace_call_bpf);
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
{
- __this_cpu_write(bpf_kprobe_override, 1);
regs_set_return_value(regs, rc);
- arch_ftrace_kprobe_override_function(regs);
+ override_function_with_return(regs);
return 0;
}
@@ -800,11 +799,11 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
int ret = -EEXIST;
/*
- * Kprobe override only works for ftrace based kprobes, and only if they
- * are on the opt-in list.
+ * Kprobe override only works if they are on the function entry,
+ * and only if they are on the opt-in list.
*/
if (prog->kprobe_override &&
- (!trace_kprobe_ftrace(event->tp_event) ||
+ (!trace_kprobe_on_func_entry(event->tp_event) ||
!trace_kprobe_error_injectable(event->tp_event)))
return -EINVAL;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 91f4b57dab82..1fad24acd444 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/rculist.h>
+#include <linux/error-injection.h>
#include "trace_probe.h"
@@ -42,8 +43,6 @@ struct trace_kprobe {
(offsetof(struct trace_kprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
-DEFINE_PER_CPU(int, bpf_kprobe_override);
-
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
{
return tk->rp.handler != NULL;
@@ -88,13 +87,16 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
return nhit;
}
-int trace_kprobe_ftrace(struct trace_event_call *call)
+bool trace_kprobe_on_func_entry(struct trace_event_call *call)
{
struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
- return kprobe_ftrace(&tk->rp.kp);
+
+ return kprobe_on_func_entry(tk->rp.kp.addr,
+ tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
+ tk->rp.kp.addr ? 0 : tk->rp.kp.offset);
}
-int trace_kprobe_error_injectable(struct trace_event_call *call)
+bool trace_kprobe_error_injectable(struct trace_event_call *call)
{
struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
unsigned long addr;
@@ -106,7 +108,7 @@ int trace_kprobe_error_injectable(struct trace_event_call *call)
} else {
addr = (unsigned long)tk->rp.kp.addr;
}
- return within_kprobe_error_injection_list(addr);
+ return within_error_injection_list(addr);
}
static int register_kprobe_event(struct trace_kprobe *tk);
@@ -1202,6 +1204,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
int rctx;
if (bpf_prog_array_valid(call)) {
+ unsigned long orig_ip = instruction_pointer(regs);
int ret;
ret = trace_call_bpf(call, regs);
@@ -1209,12 +1212,13 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
/*
* We need to check and see if we modified the pc of the
* pt_regs, and if so clear the kprobe and return 1 so that we
- * don't do the instruction skipping. Also reset our state so
- * we are clean the next pass through.
+ * don't do the single stepping.
+ * The ftrace kprobe handler leaves it up to us to re-enable
+ * preemption here before returning if we've modified the ip.
*/
- if (__this_cpu_read(bpf_kprobe_override)) {
- __this_cpu_write(bpf_kprobe_override, 0);
+ if (orig_ip != instruction_pointer(regs)) {
reset_current_kprobe();
+ preempt_enable_no_resched();
return 1;
}
if (!ret)
@@ -1322,15 +1326,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
if (tk->tp.flags & TP_FLAG_TRACE)
kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS
- if (tk->tp.flags & TP_FLAG_PROFILE) {
+ if (tk->tp.flags & TP_FLAG_PROFILE)
ret = kprobe_perf_func(tk, regs);
- /*
- * The ftrace kprobe handler leaves it up to us to re-enable
- * preemption here before returning if we've modified the ip.
- */
- if (ret)
- preempt_enable_no_resched();
- }
#endif
return ret;
}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5e54d748c84c..e101c5bb9eda 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -252,8 +252,8 @@ struct symbol_cache;
unsigned long update_symbol_cache(struct symbol_cache *sc);
void free_symbol_cache(struct symbol_cache *sc);
struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
-int trace_kprobe_ftrace(struct trace_event_call *call);
-int trace_kprobe_error_injectable(struct trace_event_call *call);
+bool trace_kprobe_on_func_entry(struct trace_event_call *call);
+bool trace_kprobe_error_injectable(struct trace_event_call *call);
#else
/* uprobes do not support symbol fetch methods */
#define fetch_symbol_u8 NULL
@@ -280,14 +280,14 @@ alloc_symbol_cache(const char *sym, long offset)
return NULL;
}
-static inline int trace_kprobe_ftrace(struct trace_event_call *call)
+static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call)
{
- return 0;
+ return false;
}
-static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
+static inline bool trace_kprobe_error_injectable(struct trace_event_call *call)
{
- return 0;
+ return false;
}
#endif /* CONFIG_KPROBE_EVENTS */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9d5b78aad4c5..890d4766cef3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1500,6 +1500,10 @@ config FAULT_INJECTION
Provide fault-injection framework.
For more details, see Documentation/fault-injection/.
+config FUNCTION_ERROR_INJECTION
+ def_bool y
+ depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
+
config FAILSLAB
bool "Fault-injection capability for kmalloc"
depends on FAULT_INJECTION
@@ -1547,6 +1551,16 @@ config FAIL_FUTEX
help
Provide fault-injection capability for futexes.
+config FAIL_FUNCTION
+ bool "Fault-injection capability for functions"
+ depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
+ help
+ Provide function-based fault-injection capability.
+ This will allow you to override a specific function with a return
+ with given return value. As a result, function caller will see
+ an error value and have to handle it. This is useful to test the
+ error handling in various subsystems.
+
config FAULT_INJECTION_DEBUG_FS
bool "Debugfs entries for fault-injection capabilities"
depends on FAULT_INJECTION && SYSFS && DEBUG_FS
diff --git a/lib/Makefile b/lib/Makefile
index a6c8529dd9b2..75ec13778cd8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -149,6 +149,7 @@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
of-reconfig-notifier-error-inject.o
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_GENERIC_BUG) += bug.o
diff --git a/lib/error-inject.c b/lib/error-inject.c
new file mode 100644
index 000000000000..c0d4600f4896
--- /dev/null
+++ b/lib/error-inject.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+// error-inject.c: Function-level error injection table
+#include <linux/error-injection.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+/* Whitelist of symbols that can be overridden for error injection. */
+static LIST_HEAD(error_injection_list);
+static DEFINE_MUTEX(ei_mutex);
+struct ei_entry {
+ struct list_head list;
+ unsigned long start_addr;
+ unsigned long end_addr;
+ int etype;
+ void *priv;
+};
+
+bool within_error_injection_list(unsigned long addr)
+{
+ struct ei_entry *ent;
+ bool ret = false;
+
+ mutex_lock(&ei_mutex);
+ list_for_each_entry(ent, &error_injection_list, list) {
+ if (addr >= ent->start_addr && addr < ent->end_addr) {
+ ret = true;
+ break;
+ }
+ }
+ mutex_unlock(&ei_mutex);
+ return ret;
+}
+
+int get_injectable_error_type(unsigned long addr)
+{
+ struct ei_entry *ent;
+
+ list_for_each_entry(ent, &error_injection_list, list) {
+ if (addr >= ent->start_addr && addr < ent->end_addr)
+ return ent->etype;
+ }
+ return EI_ETYPE_NONE;
+}
+
+/*
+ * Lookup and populate the error_injection_list.
+ *
+ * For safety reasons we only allow certain functions to be overridden with
+ * bpf_error_injection, so we need to populate the list of the symbols that have
+ * been marked as safe for overriding.
+ */
+static void populate_error_injection_list(struct error_injection_entry *start,
+ struct error_injection_entry *end,
+ void *priv)
+{
+ struct error_injection_entry *iter;
+ struct ei_entry *ent;
+ unsigned long entry, offset = 0, size = 0;
+
+ mutex_lock(&ei_mutex);
+ for (iter = start; iter < end; iter++) {
+ entry = arch_deref_entry_point((void *)iter->addr);
+
+ if (!kernel_text_address(entry) ||
+ !kallsyms_lookup_size_offset(entry, &size, &offset)) {
+ pr_err("Failed to find error inject entry at %p\n",
+ (void *)entry);
+ continue;
+ }
+
+ ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ break;
+ ent->start_addr = entry;
+ ent->end_addr = entry + size;
+ ent->etype = iter->etype;
+ ent->priv = priv;
+ INIT_LIST_HEAD(&ent->list);
+ list_add_tail(&ent->list, &error_injection_list);
+ }
+ mutex_unlock(&ei_mutex);
+}
+
+/* Markers of the _error_inject_whitelist section */
+extern struct error_injection_entry __start_error_injection_whitelist[];
+extern struct error_injection_entry __stop_error_injection_whitelist[];
+
+static void __init populate_kernel_ei_list(void)
+{
+ populate_error_injection_list(__start_error_injection_whitelist,
+ __stop_error_injection_whitelist,
+ NULL);
+}
+
+#ifdef CONFIG_MODULES
+static void module_load_ei_list(struct module *mod)
+{
+ if (!mod->num_ei_funcs)
+ return;
+
+ populate_error_injection_list(mod->ei_funcs,
+ mod->ei_funcs + mod->num_ei_funcs, mod);
+}
+
+static void module_unload_ei_list(struct module *mod)
+{
+ struct ei_entry *ent, *n;
+
+ if (!mod->num_ei_funcs)
+ return;
+
+ mutex_lock(&ei_mutex);
+ list_for_each_entry_safe(ent, n, &error_injection_list, list) {
+ if (ent->priv == mod) {
+ list_del_init(&ent->list);
+ kfree(ent);
+ }
+ }
+ mutex_unlock(&ei_mutex);
+}
+
+/* Module notifier call back, checking error injection table on the module */
+static int ei_module_callback(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+
+ if (val == MODULE_STATE_COMING)
+ module_load_ei_list(mod);
+ else if (val == MODULE_STATE_GOING)
+ module_unload_ei_list(mod);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ei_module_nb = {
+ .notifier_call = ei_module_callback,
+ .priority = 0
+};
+
+static __init int module_ei_init(void)
+{
+ return register_module_notifier(&ei_module_nb);
+}
+#else /* !CONFIG_MODULES */
+#define module_ei_init() (0)
+#endif
+
+/*
+ * error_injection/whitelist -- shows which functions can be overridden for
+ * error injection.
+ */
+static void *ei_seq_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&ei_mutex);
+ return seq_list_start(&error_injection_list, *pos);
+}
+
+static void ei_seq_stop(struct seq_file *m, void *v)
+{
+ mutex_unlock(&ei_mutex);
+}
+
+static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return seq_list_next(v, &error_injection_list, pos);
+}
+
+static const char *error_type_string(int etype)
+{
+ switch (etype) {
+ case EI_ETYPE_NULL:
+ return "NULL";
+ case EI_ETYPE_ERRNO:
+ return "ERRNO";
+ case EI_ETYPE_ERRNO_NULL:
+ return "ERRNO_NULL";
+ default:
+ return "(unknown)";
+ }
+}
+
+static int ei_seq_show(struct seq_file *m, void *v)
+{
+ struct ei_entry *ent = list_entry(v, struct ei_entry, list);
+
+ seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr,
+ error_type_string(ent->etype));
+ return 0;
+}
+
+static const struct seq_operations ei_seq_ops = {
+ .start = ei_seq_start,
+ .next = ei_seq_next,
+ .stop = ei_seq_stop,
+ .show = ei_seq_show,
+};
+
+static int ei_open(struct inode *inode, struct file *filp)
+{
+ return seq_open(filp, &ei_seq_ops);
+}
+
+static const struct file_operations debugfs_ei_ops = {
+ .open = ei_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init ei_debugfs_init(void)
+{
+ struct dentry *dir, *file;
+
+ dir = debugfs_create_dir("error_injection", NULL);
+ if (!dir)
+ return -ENOMEM;
+
+ file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops);
+ if (!file) {
+ debugfs_remove(dir);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int __init init_error_injection(void)
+{
+ populate_kernel_ei_list();
+
+ if (!module_ei_init())
+ ei_debugfs_init();
+
+ return 0;
+}
+late_initcall(init_error_injection);
diff --git a/net/core/filter.c b/net/core/filter.c
index d4b190e63b79..db2ee8c7e1bd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4310,16 +4310,15 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->dst_reg,
offsetof(struct xdp_rxq_info, dev));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
- bpf_target_off(struct net_device,
- ifindex, 4, target_size));
+ offsetof(struct net_device, ifindex));
break;
case offsetof(struct xdp_md, rx_queue_index):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, rxq));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
- bpf_target_off(struct xdp_rxq_info,
- queue_index, 4, target_size));
+ offsetof(struct xdp_rxq_info,
+ queue_index));
break;
}
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 2fe2f761a0d0..c969141bfa8b 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -104,7 +104,7 @@ struct xdp_exception_ctx {
SEC("tracepoint/xdp/xdp_exception")
int trace_xdp_exception(struct xdp_exception_ctx *ctx)
{
- u64 *cnt;;
+ u64 *cnt;
u32 key;
key = ctx->act;
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 2237bc43f71c..26901ec87361 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -39,7 +39,7 @@ CC = gcc
CFLAGS += -O2
CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
-CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
+CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index c6a28be4665c..099e21cf1b5c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -66,6 +66,7 @@ static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
[BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
[BPF_PROG_TYPE_SK_SKB] = "sk_skb",
+ [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
};
static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 17f2c73fff8b..bc715f6ac320 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -190,7 +190,7 @@ $(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
$(OUTPUT)test-disassembler-four-args.bin:
- $(BUILD) -lbfd -lopcodes
+ $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
$(OUTPUT)test-liberty.bin:
$(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4e8c60acfa32..69f96af4a569 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -245,6 +245,7 @@ union bpf_attr {
* BPF_F_NUMA_NODE is set).
*/
char map_name[BPF_OBJ_NAME_LEN];
+ __u32 map_ifindex; /* ifindex of netdev to create on */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 8ed43ae9db9b..83714ca1f22b 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -93,7 +93,6 @@ export prefix libdir src obj
# Shell quotes
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
LIB_FILE = libbpf.a libbpf.so
@@ -150,7 +149,7 @@ CMD_TARGETS = $(LIB_FILE)
TARGETS = $(CMD_TARGETS)
-all: fixdep $(VERSION_FILES) all_cmd
+all: fixdep all_cmd
all_cmd: $(CMD_TARGETS)
@@ -169,21 +168,11 @@ $(OUTPUT)libbpf.so: $(BPF_IN)
$(OUTPUT)libbpf.a: $(BPF_IN)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
-define update_dir
- (echo $1 > $@.tmp; \
- if [ -r $@ ] && cmp -s $@ $@.tmp; then \
- rm -f $@.tmp; \
- else \
- echo ' UPDATE $@'; \
- mv -f $@.tmp $@; \
- fi);
-endef
-
define do_install
if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
fi; \
- $(INSTALL) $1 '$(DESTDIR_SQ)$2'
+ $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
endef
install_lib: all_cmd
@@ -192,7 +181,8 @@ install_lib: all_cmd
install_headers:
$(call QUIET_INSTALL, headers) \
- $(call do_install,bpf.h,$(prefix)/include/bpf,644)
+ $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
+ $(call do_install,libbpf.h,$(prefix)/include/bpf,644);
install: install_lib
@@ -203,7 +193,7 @@ config-clean:
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
clean:
- $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \
+ $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \
$(RM) LIBBPF-CFLAGS
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e9c4b7cabcf2..30c776375118 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1803,7 +1803,7 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
-#define BPF_PROG_SEC(string, type) { string, sizeof(string), type }
+#define BPF_PROG_SEC(string, type) { string, sizeof(string) - 1, type }
static const struct {
const char *sec;
size_t len;