From 5d902372ba5f416261c79123f02e49c664c7118f Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Tue, 12 Jun 2018 12:02:56 +0200
Subject: xsk: re-add queue id check for XDP_SKB path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 173d3adb6f43 ("xsk: add zero-copy support for Rx") introduced a
regression on the XDP_SKB receive path, when the queue id checks were
removed. Now, they are back again.

Fixes: 173d3adb6f43 ("xsk: add zero-copy support for Rx")
Reported-by: Qi Zhang <qi.z.zhang@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xsk.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 36919a254ba3..3b3410ada097 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -118,6 +118,9 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 	u64 addr;
 	int err;
 
+	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
+		return -EINVAL;
+
 	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
 	    len > xs->umem->chunk_size_nohr) {
 		xs->rx_dropped++;
-- 
cgit v1.2.3-70-g09d2


From 73df93c57c0b18195a2fe5429747e00018b3e863 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 11 Jun 2018 22:35:48 -0700
Subject: tools/bpftool: fix a bug in bpftool perf

Commit b04df400c302 ("tools/bpftool: add perf subcommand")
introduced bpftool subcommand perf to query bpf program
kuprobe and tracepoint attachments.

The perf subcommand will first test whether bpf subcommand
BPF_TASK_FD_QUERY is supported in kernel or not. It does it
by opening a file with argv[0] and feeds the file descriptor
and current task pid to the kernel for querying.

Such an approach won't work if the argv[0] cannot be opened
successfully in the current directory. This is especially
true when bpftool is accessible through PATH env variable.
The error below reflects the open failure for file argv[0]
at home directory.

  [yhs@localhost ~]$ which bpftool
  /usr/local/sbin/bpftool
  [yhs@localhost ~]$ bpftool perf
  Error: perf_query_support: No such file or directory

To fix the issue, let us open root directory ("/")
which exists in every linux system. With the fix, the
error message will correctly reflect the permission issue.

  [yhs@localhost ~]$ which bpftool
  /usr/local/sbin/bpftool
  [yhs@localhost ~]$ bpftool perf
  Error: perf_query_support: Operation not permitted
  HINT: non root or kernel doesn't support TASK_FD_QUERY

Fixes: b04df400c302 ("tools/bpftool: add perf subcommand")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/perf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index ac6b1a12c9b7..b76b77dcfd1f 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -29,9 +29,10 @@ static bool has_perf_query_support(void)
 	if (perf_query_supported)
 		goto out;
 
-	fd = open(bin_name, O_RDONLY);
+	fd = open("/", O_RDONLY);
 	if (fd < 0) {
-		p_err("perf_query_support: %s", strerror(errno));
+		p_err("perf_query_support: cannot open directory \"/\" (%s)",
+		      strerror(errno));
 		goto out;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 3bce593ac06b4f18710274cfb084369b3d7909eb Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Tue, 12 Jun 2018 13:05:10 +0200
Subject: selftests: bpf: config: add config fragments

Tests test_tunnel.sh fails due to config fragments ins't enabled.

Fixes: 933a741e3b82 ("selftests/bpf: bpf tunnel test.")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/config | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 1eefe211a4a8..7eb613ffef55 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -7,3 +7,13 @@ CONFIG_CGROUP_BPF=y
 CONFIG_NETDEVSIM=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_IPIP=y
+CONFIG_IPV6=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPGRE=y
+CONFIG_IPV6_GRE=y
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_HMAC=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_VXLAN=y
+CONFIG_GENEVE=y
-- 
cgit v1.2.3-70-g09d2


From 07480cbc05ef1ff7301cb11afb7d894ad3d0916a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 14 Jun 2018 11:06:55 -0700
Subject: tools: bpftool: improve accuracy of load time

BPF program load time is reported from the kernel relative to boot time.
If conversion to wall clock does not take nanosecond parts into account,
the load time reported by bpftool may differ by one second from run to
run.  This means JSON object reported by bpftool for a program will
randomly change.

Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/prog.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index a4f435203fef..05f42a46d6ed 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -90,7 +90,9 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 	}
 
 	wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) +
-		nsecs / 1000000000;
+		(real_time_ts.tv_nsec - boot_time_ts.tv_nsec + nsecs) /
+		1000000000;
+
 
 	if (!localtime_r(&wallclock_secs, &load_tm)) {
 		snprintf(buf, size, "%llu", nsecs / 1000000000);
-- 
cgit v1.2.3-70-g09d2


From 47cf52a246e526e2092d60ac01c54af9bd45dcc9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 14 Jun 2018 11:06:56 -0700
Subject: selftests/bpf: test offloads even with BPF programs present

Modern distroes increasingly make use of BPF programs.  Default
Ubuntu 18.04 installation boots with a number of cgroup_skb
programs loaded.

test_offloads.py tries to check if programs and maps are not
leaked on error paths by confirming the list of programs on the
system is empty between tests.

Since we can no longer expect the system to have no BPF objects
at boot try to remember the programs and maps present at the start,
and skip those when scanning the system.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_offload.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index e78aad0a68bb..be800d0e7a84 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -163,6 +163,10 @@ def bpftool(args, JSON=True, ns="", fail=True):
 
 def bpftool_prog_list(expected=None, ns=""):
     _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
+    # Remove the base progs
+    for p in base_progs:
+        if p in progs:
+            progs.remove(p)
     if expected is not None:
         if len(progs) != expected:
             fail(True, "%d BPF programs loaded, expected %d" %
@@ -171,6 +175,10 @@ def bpftool_prog_list(expected=None, ns=""):
 
 def bpftool_map_list(expected=None, ns=""):
     _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+    # Remove the base maps
+    for m in base_maps:
+        if m in maps:
+            maps.remove(m)
     if expected is not None:
         if len(maps) != expected:
             fail(True, "%d BPF maps loaded, expected %d" %
@@ -585,8 +593,8 @@ skip(os.getuid() != 0, "test must be run as root")
 # Check tools
 ret, progs = bpftool("prog", fail=False)
 skip(ret != 0, "bpftool not installed")
-# Check no BPF programs are loaded
-skip(len(progs) != 0, "BPF programs already loaded on the system")
+base_progs = progs
+_, base_maps = bpftool("map")
 
 # Check netdevsim
 ret, out = cmd("modprobe netdevsim", fail=False)
-- 
cgit v1.2.3-70-g09d2


From 36ffdbc0a2d9f7280e49dbe5ea53c289ad112a8c Mon Sep 17 00:00:00 2001
From: Jian Wang <jianjian.wang1@gmail.com>
Date: Fri, 15 Jun 2018 03:22:17 +0200
Subject: bpf, selftest: check tunnel type more accurately

Grep tunnel type directly to make sure 'ip' command supports it.

Signed-off-by: Jian Wang <jianjian.wang1@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_tunnel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index aeb2901f21f4..c4b5fbbaa760 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -668,7 +668,7 @@ cleanup_exit()
 
 check()
 {
-	ip link help $1 2>&1 | grep -q "^Usage:"
+	ip link help 2>&1 | grep -q "\s$1\s"
 	if [ $? -ne 0 ];then
 		echo "SKIP $1: iproute2 not support"
 	cleanup
-- 
cgit v1.2.3-70-g09d2


From 26bf8a89d887c0686acef0f44eaadd49abfcab03 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Thu, 14 Jun 2018 05:01:06 -0700
Subject: bpf, selftests: delete xfrm tunnel when test exits.

Make the printting of bpf xfrm tunnel better and
cleanup xfrm state and policy when xfrm test finishes.

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_tunnel.sh | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index c4b5fbbaa760..546aee3e9fb4 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -608,28 +608,26 @@ setup_xfrm_tunnel()
 test_xfrm_tunnel()
 {
 	config_device
-        #tcpdump -nei veth1 ip &
-	output=$(mktemp)
-	cat /sys/kernel/debug/tracing/trace_pipe | tee $output &
-        setup_xfrm_tunnel
+	> /sys/kernel/debug/tracing/trace
+	setup_xfrm_tunnel
 	tc qdisc add dev veth1 clsact
 	tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
 		sec xfrm_get_state
 	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
 	sleep 1
-	grep "reqid 1" $output
+	grep "reqid 1" /sys/kernel/debug/tracing/trace
 	check_err $?
-	grep "spi 0x1" $output
+	grep "spi 0x1" /sys/kernel/debug/tracing/trace
 	check_err $?
-	grep "remote ip 0xac100164" $output
+	grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
 	check_err $?
 	cleanup
 
 	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: xfrm tunnel"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+		echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+		return 1
+	fi
+	echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
 }
 
 attach_bpf()
@@ -657,6 +655,10 @@ cleanup()
 	ip link del ip6geneve11 2> /dev/null
 	ip link del erspan11 2> /dev/null
 	ip link del ip6erspan11 2> /dev/null
+	ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
+	ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
+	ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
+	ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
 }
 
 cleanup_exit()
-- 
cgit v1.2.3-70-g09d2


From 7d1982b4e335c1b184406b7566f6041bfe313c35 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 15 Jun 2018 02:30:47 +0200
Subject: bpf: fix panic in prog load calls cleanup

While testing I found that when hitting error path in bpf_prog_load()
where we jump to free_used_maps and prog contained BPF to BPF calls
that were JITed earlier, then we never clean up the bpf_prog_kallsyms_add()
done under jit_subprogs(). Add proper API to make BPF kallsyms deletion
more clear and fix that.

Fixes: 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h |  3 +++
 kernel/bpf/core.c      | 14 ++++++++++++++
 kernel/bpf/syscall.c   |  8 ++------
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 45fc0f5000d8..297c56fa9cee 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -961,6 +961,9 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 }
 #endif /* CONFIG_BPF_JIT */
 
+void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
+void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
+
 #define BPF_ANC		BIT(15)
 
 static inline bool bpf_needs_clear_a(const struct sock_filter *first)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9f1493705f40..1061968adcc1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -350,6 +350,20 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 	return prog_adj;
 }
 
+void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
+{
+	int i;
+
+	for (i = 0; i < fp->aux->func_cnt; i++)
+		bpf_prog_kallsyms_del(fp->aux->func[i]);
+}
+
+void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
+{
+	bpf_prog_kallsyms_del_subprogs(fp);
+	bpf_prog_kallsyms_del(fp);
+}
+
 #ifdef CONFIG_BPF_JIT
 /* All BPF JIT sysctl knobs here. */
 int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0fa20624707f..0f62692fe635 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1034,14 +1034,9 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 {
 	if (atomic_dec_and_test(&prog->aux->refcnt)) {
-		int i;
-
 		/* bpf_prog_free_id() must be called first */
 		bpf_prog_free_id(prog, do_idr_lock);
-
-		for (i = 0; i < prog->aux->func_cnt; i++)
-			bpf_prog_kallsyms_del(prog->aux->func[i]);
-		bpf_prog_kallsyms_del(prog);
+		bpf_prog_kallsyms_del_all(prog);
 
 		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 	}
@@ -1384,6 +1379,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 	return err;
 
 free_used_maps:
+	bpf_prog_kallsyms_del_subprogs(prog);
 	free_used_maps(prog->aux);
 free_prog:
 	bpf_prog_uncharge_memlock(prog);
-- 
cgit v1.2.3-70-g09d2


From 9facc336876f7ecf9edba4c67b90426fde4ec898 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 15 Jun 2018 02:30:48 +0200
Subject: bpf: reject any prog that failed read-only lock

We currently lock any JITed image as read-only via bpf_jit_binary_lock_ro()
as well as the BPF image as read-only through bpf_prog_lock_ro(). In
the case any of these would fail we throw a WARN_ON_ONCE() in order to
yell loudly to the log. Perhaps, to some extend, this may be comparable
to an allocation where __GFP_NOWARN is explicitly not set.

Added via 65869a47f348 ("bpf: improve read-only handling"), this behavior
is slightly different compared to any of the other in-kernel set_memory_ro()
users who do not check the return code of set_memory_ro() and friends /at
all/ (e.g. in the case of module_enable_ro() / module_disable_ro()). Given
in BPF this is mandatory hardening step, we want to know whether there
are any issues that would leave both BPF data writable. So it happens
that syzkaller enabled fault injection and it triggered memory allocation
failure deep inside x86's change_page_attr_set_clr() which was triggered
from set_memory_ro().

Now, there are two options: i) leaving everything as is, and ii) reworking
the image locking code in order to have a final checkpoint out of the
central bpf_prog_select_runtime() which probes whether any of the calls
during prog setup weren't successful, and then bailing out with an error.
Option ii) is a better approach since this additional paranoia avoids
altogether leaving any potential W+X pages from BPF side in the system.
Therefore, lets be strict about it, and reject programs in such unlikely
occasion. While testing I noticed also that one bpf_prog_lock_ro()
call was missing on the outer dummy prog in case of calls, e.g. in the
destructor we call bpf_prog_free_deferred() on the main prog where we
try to bpf_prog_unlock_free() the program, and since we go via
bpf_prog_select_runtime() do that as well.

Reported-by: syzbot+3b889862e65a98317058@syzkaller.appspotmail.com
Reported-by: syzbot+9e762b52dd17e616a7a5@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 60 ++++++++++++++++++++++++++++++++------------------
 kernel/bpf/core.c      | 55 +++++++++++++++++++++++++++++++++++++++------
 kernel/bpf/syscall.c   |  4 +---
 3 files changed, 87 insertions(+), 32 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 297c56fa9cee..108f9812e196 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -469,7 +469,8 @@ struct sock_fprog_kern {
 };
 
 struct bpf_binary_header {
-	unsigned int pages;
+	u16 pages;
+	u16 locked:1;
 	u8 image[];
 };
 
@@ -671,15 +672,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 	fp->locked = 1;
-	WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
+	if (set_memory_ro((unsigned long)fp, fp->pages))
+		fp->locked = 0;
+#endif
 }
 
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 	if (fp->locked) {
 		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
 		/* In case set_memory_rw() fails, we want to be the first
@@ -687,34 +691,30 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 		 */
 		fp->locked = 0;
 	}
+#endif
 }
 
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-	WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
-}
-
-static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
-{
-	WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
-}
-#else
-static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
-{
-}
-
-static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
-{
-}
-
-static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
-{
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+	hdr->locked = 1;
+	if (set_memory_ro((unsigned long)hdr, hdr->pages))
+		hdr->locked = 0;
+#endif
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+	if (hdr->locked) {
+		WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
+		/* In case set_memory_rw() fails, we want to be the first
+		 * to crash here instead of some random place later on.
+		 */
+		hdr->locked = 0;
+	}
+#endif
 }
-#endif /* CONFIG_ARCH_HAS_SET_MEMORY */
 
 static inline struct bpf_binary_header *
 bpf_jit_binary_hdr(const struct bpf_prog *fp)
@@ -725,6 +725,22 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
 	return (void *)addr;
 }
 
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
+{
+	if (!fp->locked)
+		return -ENOLCK;
+	if (fp->jited) {
+		const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
+
+		if (!hdr->locked)
+			return -ENOLCK;
+	}
+
+	return 0;
+}
+#endif
+
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1061968adcc1..a9e6c04d0f4a 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -598,6 +598,8 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 	bpf_fill_ill_insns(hdr, size);
 
 	hdr->pages = size / PAGE_SIZE;
+	hdr->locked = 0;
+
 	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
 		     PAGE_SIZE - sizeof(*hdr));
 	start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1448,6 +1450,33 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
 	return 0;
 }
 
+static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
+{
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+	int i, err;
+
+	for (i = 0; i < fp->aux->func_cnt; i++) {
+		err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
+		if (err)
+			return err;
+	}
+
+	return bpf_prog_check_pages_ro_single(fp);
+#endif
+	return 0;
+}
+
+static void bpf_prog_select_func(struct bpf_prog *fp)
+{
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+
+	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+#else
+	fp->bpf_func = __bpf_prog_ret0_warn;
+#endif
+}
+
 /**
  *	bpf_prog_select_runtime - select exec runtime for BPF program
  *	@fp: bpf_prog populated with internal BPF program
@@ -1458,13 +1487,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
  */
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+	/* In case of BPF to BPF calls, verifier did all the prep
+	 * work with regards to JITing, etc.
+	 */
+	if (fp->bpf_func)
+		goto finalize;
 
-	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
-#else
-	fp->bpf_func = __bpf_prog_ret0_warn;
-#endif
+	bpf_prog_select_func(fp);
 
 	/* eBPF JITs can rewrite the program in case constant
 	 * blinding is active. However, in case of error during
@@ -1485,6 +1514,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 		if (*err)
 			return fp;
 	}
+
+finalize:
 	bpf_prog_lock_ro(fp);
 
 	/* The tail call compatibility check can only be done at
@@ -1493,7 +1524,17 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 	 * all eBPF JITs might immediately support all features.
 	 */
 	*err = bpf_check_tail_call(fp);
-
+	if (*err)
+		return fp;
+
+	/* Checkpoint: at this point onwards any cBPF -> eBPF or
+	 * native eBPF program is read-only. If we failed to change
+	 * the page attributes (e.g. allocation failure from
+	 * splitting large pages), then reject the whole program
+	 * in order to guarantee not ending up with any W+X pages
+	 * from BPF side in kernel.
+	 */
+	*err = bpf_prog_check_pages_ro_locked(fp);
 	return fp;
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0f62692fe635..35dc466641f2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1353,9 +1353,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (err < 0)
 		goto free_used_maps;
 
-	/* eBPF program is ready to be JITed */
-	if (!prog->bpf_func)
-		prog = bpf_prog_select_runtime(prog, &err);
+	prog = bpf_prog_select_runtime(prog, &err);
 	if (err < 0)
 		goto free_used_maps;
 
-- 
cgit v1.2.3-70-g09d2


From 6d5fc1957989266006db6ef3dfb9159b42cf0189 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Thu, 14 Jun 2018 11:07:42 +0900
Subject: xdp: Fix handling of devmap in generic XDP

Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed
the return value type of __devmap_lookup_elem() from struct net_device *
to struct bpf_dtab_netdev * but forgot to modify generic XDP code
accordingly.

Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct
net_device is expected, then skb->dev was set to invalid value.

v2:
- Fix compiler warning without CONFIG_BPF_SYSCALL.

Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h    | 12 ++++++++++++
 include/linux/filter.h | 16 ++++++++++++++++
 kernel/bpf/devmap.c    | 14 ++++++++++++++
 net/core/filter.c      | 21 ++++-----------------
 4 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 995c3b1e59bf..7df32a3200f7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -488,12 +488,15 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
 
 /* Map specifics */
 struct xdp_buff;
+struct sk_buff;
 
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
 void __dev_map_flush(struct bpf_map *map);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
+int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
+			     struct bpf_prog *xdp_prog);
 
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
@@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 	return 0;
 }
 
+struct sk_buff;
+
+static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
+					   struct sk_buff *skb,
+					   struct bpf_prog *xdp_prog)
+{
+	return 0;
+}
+
 static inline
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 108f9812e196..b615df57b7d5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -19,6 +19,7 @@
 #include <linux/cryptohash.h>
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
+#include <linux/if_vlan.h>
 
 #include <net/sch_generic.h>
 
@@ -802,6 +803,21 @@ static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 
+static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
+					   struct net_device *fwd)
+{
+	unsigned int len;
+
+	if (unlikely(!(fwd->flags & IFF_UP)))
+		return -ENETDOWN;
+
+	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+	if (skb->len > len)
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
  * same cpu context. Further for best results no more than a single map
  * for the do_redirect/do_flush pair should be used. This limitation is
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a7cc7b3494a9..642c97f6d1b8 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -345,6 +345,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 	return bq_enqueue(dst, xdpf, dev_rx);
 }
 
+int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
+			     struct bpf_prog *xdp_prog)
+{
+	int err;
+
+	err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
+	if (unlikely(err))
+		return err;
+	skb->dev = dst->dev;
+	generic_xdp_tx(skb, xdp_prog);
+
+	return 0;
+}
+
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
diff --git a/net/core/filter.c b/net/core/filter.c
index 3d9ba7e5965a..e7f12e9f598c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3214,20 +3214,6 @@ err:
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
 
-static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
-{
-	unsigned int len;
-
-	if (unlikely(!(fwd->flags & IFF_UP)))
-		return -ENETDOWN;
-
-	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
-	if (skb->len > len)
-		return -EMSGSIZE;
-
-	return 0;
-}
-
 static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct xdp_buff *xdp,
@@ -3256,10 +3242,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 	}
 
 	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
-		if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+		struct bpf_dtab_netdev *dst = fwd;
+
+		err = dev_map_generic_redirect(dst, skb, xdp_prog);
+		if (unlikely(err))
 			goto err;
-		skb->dev = fwd;
-		generic_xdp_tx(skb, xdp_prog);
 	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
 		struct xdp_sock *xs = fwd;
 
-- 
cgit v1.2.3-70-g09d2