From 8f1634b82189e715b0f82f16ce54fab43cfedd8a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 14 May 2021 10:05:28 -0700 Subject: selftests/bpf: Convert static to global in tc_redirect progs Both IFINDEX_SRC and IFINDEX_DST are set from the userspace and it won't work once bpf merges with bpf-next. Fixes: 096eccdef0b3 ("selftests/bpf: Rewrite test_tc_redirect.sh as prog_tests/tc_redirect.c") Signed-off-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210514170528.3750250-1-sdf@google.com --- tools/testing/selftests/bpf/progs/test_tc_neigh.c | 4 ++-- tools/testing/selftests/bpf/progs/test_tc_peer.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index 90f64a85998f..0c93d326a663 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -33,8 +33,8 @@ a.s6_addr32[3] == b.s6_addr32[3]) #endif -static volatile const __u32 IFINDEX_SRC; -static volatile const __u32 IFINDEX_DST; +volatile const __u32 IFINDEX_SRC; +volatile const __u32 IFINDEX_DST; static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, __be32 addr) diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index 72c72950c3bb..ef264bced0e6 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -8,8 +8,8 @@ #include -static volatile const __u32 IFINDEX_SRC; -static volatile const __u32 IFINDEX_DST; +volatile const __u32 IFINDEX_SRC; +volatile const __u32 IFINDEX_DST; SEC("classifier/chk_egress") int tc_chk(struct __sk_buff *skb) -- cgit v1.2.3-70-g09d2 From 704e2beba23c45eaa056b1c03b5e1fb221e03f80 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 14 May 2021 11:07:26 -0700 Subject: selftests/bpf: Test ringbuf mmap read-only and read-write restrictions Extend ringbuf selftest to validate read/write and read-only restrictions on memory mapping consumer/producer/data pages. Ensure no "escalations" from PROT_READ to PROT_WRITE/PROT_EXEC is allowed. And test that mremap() fails to expand mmap()'ed area. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210514180726.843157-1-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/ringbuf.c | 49 +++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index de78617f6550..f9a8ae331963 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -86,8 +86,9 @@ void test_ringbuf(void) const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); pthread_t thread; long bg_ret = -1; - int err, cnt; + int err, cnt, rb_fd; int page_size = getpagesize(); + void *mmap_ptr, *tmp_ptr; skel = test_ringbuf__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) @@ -101,6 +102,52 @@ void test_ringbuf(void) if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) goto cleanup; + rb_fd = bpf_map__fd(skel->maps.ringbuf); + /* good read/write cons_pos */ + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); + ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"); + tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE); + if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend")) + goto cleanup; + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); + + /* bad writeable prod_pos */ + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size); + err = -errno; + ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos"); + ASSERT_EQ(err, -EPERM, "wr_prod_pos_err"); + + /* bad writeable data pages */ + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); + err = -errno; + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one"); + ASSERT_EQ(err, -EPERM, "wr_data_page_one_err"); + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size); + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two"); + mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all"); + + /* good read-only pages */ + mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0); + if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos")) + goto cleanup; + + ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect"); + ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect"); + ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap"); + ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro"); + + /* good read-only pages with initial offset */ + mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size); + if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos")) + goto cleanup; + + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect"); + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect"); + ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro"); + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); -- cgit v1.2.3-70-g09d2 From a8deba8547e39f26440101164a3bbc2899c5b305 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 25 May 2021 09:41:39 +0800 Subject: bpftool: Add sock_release help info for cgroup attach/prog load command The help information was not added at the time when the function got added. Fix this and add the missing information to its cli, documentation and bash completion. Fixes: db94cc0b4805 ("bpftool: Add support for BPF_CGROUP_INET_SOCK_RELEASE") Signed-off-by: Liu Jian Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20210525014139.323859-1-liujian56@huawei.com --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 4 +++- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 +- tools/bpf/bpftool/bash-completion/bpftool | 6 +++--- tools/bpf/bpftool/cgroup.c | 3 ++- tools/bpf/bpftool/prog.c | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 790944c35602..baee8591ac76 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -30,7 +30,8 @@ CGROUP COMMANDS | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | | **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** } +| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | +| **sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -106,6 +107,7 @@ DESCRIPTION **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8); **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8); **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8). + **sock_release** closing an userspace inet socket (since 5.9). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* Detach *PROG* from the cgroup *CGROUP* and attach type diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 358c7309d419..fe1b38e7e887 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -44,7 +44,7 @@ PROG COMMANDS | **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** | | **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | | **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | -| **cgroup/getsockopt** | **cgroup/setsockopt** | +| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } | *ATTACH_TYPE* := { diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index d67518bcbd44..cc33c5824a2f 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -478,7 +478,7 @@ _bpftool() cgroup/recvmsg4 cgroup/recvmsg6 \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ - cgroup/setsockopt struct_ops \ + cgroup/setsockopt cgroup/sock_release struct_ops \ fentry fexit freplace sk_lookup" -- \ "$cur" ) ) return 0 @@ -1021,7 +1021,7 @@ _bpftool() device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ getpeername4 getpeername6 getsockname4 getsockname6 \ sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ - setsockopt' + setsockopt sock_release' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' case $prev in @@ -1032,7 +1032,7 @@ _bpftool() ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ post_bind4|post_bind6|connect4|connect6|getpeername4|\ getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\ - recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt) + recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release) COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index d901cc1b904a..6e53b1d393f4 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -28,7 +28,8 @@ " connect6 | getpeername4 | getpeername6 |\n" \ " getsockname4 | getsockname6 | sendmsg4 |\n" \ " sendmsg6 | recvmsg4 | recvmsg6 |\n" \ - " sysctl | getsockopt | setsockopt }" + " sysctl | getsockopt | setsockopt |\n" \ + " sock_release }" static unsigned int query_flags; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 3f067d2d7584..da4846c9856a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2138,7 +2138,7 @@ static int do_help(int argc, char **argv) " cgroup/getpeername4 | cgroup/getpeername6 |\n" " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" - " cgroup/getsockopt | cgroup/setsockopt |\n" + " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" -- cgit v1.2.3-70-g09d2 From 6fd5fb63820a9a1146aba0bba2fdbc1db4b903e7 Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Tue, 25 May 2021 10:29:55 +0000 Subject: selftests/bpf: Add test for l3 use of bpf_redirect_peer Add a test case for using bpf_skb_change_head() in combination with bpf_redirect_peer() to redirect a packet from a L3 device to veth and back. The test uses a BPF program that adds L2 headers to the packet coming from a L3 device and then calls bpf_redirect_peer() to redirect the packet to a veth device. The test fails as skb->mac_len is not set properly and thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify(), causing tcp_v4_rcv() to point the TCP header into middle of the IP header. Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210525102955.2811090-1-joamaki@gmail.com --- .../testing/selftests/bpf/prog_tests/tc_redirect.c | 552 ++++++++++++++------- tools/testing/selftests/bpf/progs/test_tc_peer.c | 31 ++ 2 files changed, 405 insertions(+), 178 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 95ef9fcd31d8..5703c918812b 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -11,14 +11,17 @@ */ #define _GNU_SOURCE -#include + +#include #include #include +#include +#include #include #include #include #include -#include +#include #include "test_progs.h" #include "network_helpers.h" @@ -32,18 +35,25 @@ #define IP4_SRC "172.16.1.100" #define IP4_DST "172.16.2.100" +#define IP4_TUN_SRC "172.17.1.100" +#define IP4_TUN_FWD "172.17.1.200" #define IP4_PORT 9004 -#define IP6_SRC "::1:dead:beef:cafe" -#define IP6_DST "::2:dead:beef:cafe" +#define IP6_SRC "0::1:dead:beef:cafe" +#define IP6_DST "0::2:dead:beef:cafe" +#define IP6_TUN_SRC "1::1:dead:beef:cafe" +#define IP6_TUN_FWD "1::2:dead:beef:cafe" #define IP6_PORT 9006 #define IP4_SLL "169.254.0.1" #define IP4_DLL "169.254.0.2" #define IP4_NET "169.254.0.0" +#define MAC_DST_FWD "00:11:22:33:44:55" +#define MAC_DST "00:22:33:44:55:66" + #define IFADDR_STR_LEN 18 -#define PING_ARGS "-c 3 -w 10 -q" +#define PING_ARGS "-i 0.2 -c 3 -w 10 -q" #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src" #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst" @@ -51,120 +61,104 @@ #define TIMEOUT_MILLIS 10000 -#define MAX_PROC_MODS 128 -#define MAX_PROC_VALUE_LEN 16 - #define log_err(MSG, ...) \ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) -struct proc_mod { - char path[PATH_MAX]; - char oldval[MAX_PROC_VALUE_LEN]; - int oldlen; -}; - static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL}; -static int root_netns_fd = -1; -static int num_proc_mods; -static struct proc_mod proc_mods[MAX_PROC_MODS]; -/** - * modify_proc() - Modify entry in /proc - * - * Modifies an entry in /proc and saves the original value for later - * restoration with restore_proc(). - */ -static int modify_proc(const char *path, const char *newval) +static int write_file(const char *path, const char *newval) { - struct proc_mod *mod; FILE *f; - if (num_proc_mods + 1 > MAX_PROC_MODS) - return -1; - f = fopen(path, "r+"); if (!f) return -1; - - mod = &proc_mods[num_proc_mods]; - num_proc_mods++; - - strncpy(mod->path, path, PATH_MAX); - - if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) { - log_err("reading from %s failed", path); - goto fail; - } - rewind(f); if (fwrite(newval, strlen(newval), 1, f) != 1) { log_err("writing to %s failed", path); - goto fail; + fclose(f); + return -1; } - fclose(f); return 0; - -fail: - fclose(f); - num_proc_mods--; - return -1; } -/** - * restore_proc() - Restore all /proc modifications - */ -static void restore_proc(void) +struct nstoken { + int orig_netns_fd; +}; + +static int setns_by_fd(int nsfd) { - int i; + int err; - for (i = 0; i < num_proc_mods; i++) { - struct proc_mod *mod = &proc_mods[i]; - FILE *f; + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); - f = fopen(mod->path, "w"); - if (!f) { - log_err("fopen of %s failed", mod->path); - continue; - } + if (!ASSERT_OK(err, "setns")) + return err; - if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1) - log_err("fwrite to %s failed", mod->path); + /* Switch /sys to the new namespace so that e.g. /sys/class/net + * reflects the devices in the new namespace. + */ + err = unshare(CLONE_NEWNS); + if (!ASSERT_OK(err, "unshare")) + return err; - fclose(f); - } - num_proc_mods = 0; + err = umount2("/sys", MNT_DETACH); + if (!ASSERT_OK(err, "umount2 /sys")) + return err; + + err = mount("sysfs", "/sys", "sysfs", 0, NULL); + if (!ASSERT_OK(err, "mount /sys")) + return err; + + err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL); + if (!ASSERT_OK(err, "mount /sys/fs/bpf")) + return err; + + return 0; } /** - * setns_by_name() - Set networks namespace by name + * open_netns() - Switch to specified network namespace by name. + * + * Returns token with which to restore the original namespace + * using close_netns(). */ -static int setns_by_name(const char *name) +static struct nstoken *open_netns(const char *name) { int nsfd; char nspath[PATH_MAX]; int err; + struct nstoken *token; + + token = malloc(sizeof(struct nstoken)); + if (!ASSERT_OK_PTR(token, "malloc token")) + return NULL; + + token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net")) + goto fail; snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); nsfd = open(nspath, O_RDONLY | O_CLOEXEC); - if (nsfd < 0) - return nsfd; + if (!ASSERT_GE(nsfd, 0, "open netns fd")) + goto fail; - err = setns(nsfd, CLONE_NEWNET); - close(nsfd); + err = setns_by_fd(nsfd); + if (!ASSERT_OK(err, "setns_by_fd")) + goto fail; - return err; + return token; +fail: + free(token); + return NULL; } -/** - * setns_root() - Set network namespace to original (root) namespace - * - * Not expected to ever fail, so error not returned, but failure logged - * and test marked as failed. - */ -static void setns_root(void) +static void close_netns(struct nstoken *token) { - ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root"); + ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd"); + free(token); } static int netns_setup_namespaces(const char *verb) @@ -237,15 +231,17 @@ static int get_ifindex(const char *name) static int netns_setup_links_and_routes(struct netns_setup_result *result) { + struct nstoken *nstoken = NULL; char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; - char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {}; SYS("ip link add veth_src type veth peer name veth_src_fwd"); SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); + + SYS("ip link set veth_dst_fwd address " MAC_DST_FWD); + SYS("ip link set veth_dst address " MAC_DST); + if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) goto fail; - if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr)) - goto fail; result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd"); if (result->ifindex_veth_src_fwd < 0) @@ -260,7 +256,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS("ip link set veth_dst netns " NS_DST); /** setup in 'src' namespace */ - if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src")) + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) goto fail; SYS("ip addr add " IP4_SRC "/32 dev veth_src"); @@ -276,8 +273,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", veth_src_fwd_addr); + close_netns(nstoken); + /** setup in 'fwd' namespace */ - if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) goto fail; /* The fwd netns automatically gets a v6 LL address / routes, but also @@ -294,8 +294,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + close_netns(nstoken); + /** setup in 'dst' namespace */ - if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst")) + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns dst")) goto fail; SYS("ip addr add " IP4_DST "/32 dev veth_dst"); @@ -306,23 +309,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); - SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s", - veth_dst_fwd_addr); - SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s", - veth_dst_fwd_addr); + SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + + close_netns(nstoken); - setns_root(); return 0; fail: - setns_root(); + if (nstoken) + close_netns(nstoken); return -1; } static int netns_load_bpf(void) { - if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) - return -1; - SYS("tc qdisc add dev veth_src_fwd clsact"); SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned " SRC_PROG_PIN_FILE); @@ -335,42 +335,29 @@ static int netns_load_bpf(void) SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " CHK_PROG_PIN_FILE); - setns_root(); - return -1; -fail: - setns_root(); - return -1; -} - -static int netns_unload_bpf(void) -{ - if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) - goto fail; - SYS("tc qdisc delete dev veth_src_fwd clsact"); - SYS("tc qdisc delete dev veth_dst_fwd clsact"); - - setns_root(); return 0; fail: - setns_root(); return -1; } - static void test_tcp(int family, const char *addr, __u16 port) { int listen_fd = -1, accept_fd = -1, client_fd = -1; char buf[] = "testing testing"; int n; + struct nstoken *nstoken; - if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst")) + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns dst")) return; listen_fd = start_server(family, SOCK_STREAM, addr, port, 0); if (!ASSERT_GE(listen_fd, 0, "listen")) goto done; - if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src")) + close_netns(nstoken); + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); @@ -392,7 +379,8 @@ static void test_tcp(int family, const char *addr, __u16 port) ASSERT_EQ(n, sizeof(buf), "recv from server"); done: - setns_root(); + if (nstoken) + close_netns(nstoken); if (listen_fd >= 0) close(listen_fd); if (accept_fd >= 0) @@ -405,7 +393,7 @@ static int test_ping(int family, const char *addr) { const char *ping = family == AF_INET6 ? "ping6" : "ping"; - SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr); + SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr); return 0; fail: return -1; @@ -419,19 +407,37 @@ static void test_connectivity(void) test_ping(AF_INET6, IP6_DST); } +static int set_forwarding(bool enable) +{ + int err; + + err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0"); + if (!ASSERT_OK(err, "set ipv4.ip_forward=0")) + return err; + + err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0"); + if (!ASSERT_OK(err, "set ipv6.forwarding=0")) + return err; + + return 0; +} + static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) { - struct test_tc_neigh_fib *skel; + struct nstoken *nstoken = NULL; + struct test_tc_neigh_fib *skel = NULL; int err; + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + skel = test_tc_neigh_fib__open(); if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open")) - return; + goto done; - if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) { - test_tc_neigh_fib__destroy(skel); - return; - } + if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) + goto done; err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) @@ -449,46 +455,37 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) goto done; /* bpf_fib_lookup() checks if forwarding is enabled */ - if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + if (!ASSERT_OK(set_forwarding(true), "enable forwarding")) goto done; - err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1"); - if (!ASSERT_OK(err, "set ipv4.ip_forward")) - goto done; - - err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1"); - if (!ASSERT_OK(err, "set ipv6.forwarding")) - goto done; - setns_root(); - test_connectivity(); + done: - bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - test_tc_neigh_fib__destroy(skel); - netns_unload_bpf(); - setns_root(); - restore_proc(); + if (skel) + test_tc_neigh_fib__destroy(skel); + close_netns(nstoken); } static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) { - struct test_tc_neigh *skel; + struct nstoken *nstoken = NULL; + struct test_tc_neigh *skel = NULL; int err; + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + skel = test_tc_neigh__open(); if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) - return; + goto done; skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; err = test_tc_neigh__load(skel); - if (!ASSERT_OK(err, "test_tc_neigh__load")) { - test_tc_neigh__destroy(skel); - return; - } + if (!ASSERT_OK(err, "test_tc_neigh__load")) + goto done; err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) @@ -505,34 +502,37 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) if (netns_load_bpf()) goto done; + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto done; + test_connectivity(); done: - bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - test_tc_neigh__destroy(skel); - netns_unload_bpf(); - setns_root(); + if (skel) + test_tc_neigh__destroy(skel); + close_netns(nstoken); } static void test_tc_redirect_peer(struct netns_setup_result *setup_result) { + struct nstoken *nstoken; struct test_tc_peer *skel; int err; + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + skel = test_tc_peer__open(); if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) - return; + goto done; skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; err = test_tc_peer__load(skel); - if (!ASSERT_OK(err, "test_tc_peer__load")) { - test_tc_peer__destroy(skel); - return; - } + if (!ASSERT_OK(err, "test_tc_peer__load")) + goto done; err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) @@ -549,41 +549,237 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) if (netns_load_bpf()) goto done; + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto done; + test_connectivity(); done: - bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); - bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); - test_tc_peer__destroy(skel); - netns_unload_bpf(); - setns_root(); + if (skel) + test_tc_peer__destroy(skel); + close_netns(nstoken); } -void test_tc_redirect(void) +static int tun_open(char *name) +{ + struct ifreq ifr; + int fd, err; + + fd = open("/dev/net/tun", O_RDWR); + if (!ASSERT_GE(fd, 0, "open /dev/net/tun")) + return -1; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TUN | IFF_NO_PI; + if (*name) + strncpy(ifr.ifr_name, name, IFNAMSIZ); + + err = ioctl(fd, TUNSETIFF, &ifr); + if (!ASSERT_OK(err, "ioctl TUNSETIFF")) + goto fail; + + SYS("ip link set dev %s up", name); + + return fd; +fail: + close(fd); + return -1; +} + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +enum { + SRC_TO_TARGET = 0, + TARGET_TO_SRC = 1, +}; + +static int tun_relay_loop(int src_fd, int target_fd) { - struct netns_setup_result setup_result; + fd_set rfds, wfds; - root_netns_fd = open("/proc/self/ns/net", O_RDONLY); - if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net")) + FD_ZERO(&rfds); + FD_ZERO(&wfds); + + for (;;) { + char buf[1500]; + int direction, nread, nwrite; + + FD_SET(src_fd, &rfds); + FD_SET(target_fd, &rfds); + + if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) { + log_err("select failed"); + return 1; + } + + direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC; + + nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf)); + if (nread < 0) { + log_err("read failed"); + return 1; + } + + nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread); + if (nwrite != nread) { + log_err("write failed"); + return 1; + } + } +} + +static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) +{ + struct test_tc_peer *skel = NULL; + struct nstoken *nstoken = NULL; + int err; + int tunnel_pid = -1; + int src_fd, target_fd; + int ifindex; + + /* Start a L3 TUN/TAP tunnel between the src and dst namespaces. + * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those + * expose the L2 headers encapsulating the IP packet to BPF and hence + * don't have skb in suitable state for this test. Alternative to TUN/TAP + * would be e.g. Wireguard which would appear as a pure L3 device to BPF, + * but that requires much more complicated setup. + */ + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) return; - if (netns_setup_namespaces("add")) - goto done; + src_fd = tun_open("tun_src"); + if (!ASSERT_GE(src_fd, 0, "tun_open tun_src")) + goto fail; - if (netns_setup_links_and_routes(&setup_result)) - goto done; + close_netns(nstoken); + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) + goto fail; - if (test__start_subtest("tc_redirect_peer")) - test_tc_redirect_peer(&setup_result); + target_fd = tun_open("tun_fwd"); + if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd")) + goto fail; - if (test__start_subtest("tc_redirect_neigh")) - test_tc_redirect_neigh(&setup_result); + tunnel_pid = fork(); + if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop")) + goto fail; - if (test__start_subtest("tc_redirect_neigh_fib")) - test_tc_redirect_neigh_fib(&setup_result); + if (tunnel_pid == 0) + exit(tun_relay_loop(src_fd, target_fd)); -done: - close(root_netns_fd); - netns_setup_namespaces("delete"); + skel = test_tc_peer__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) + goto fail; + + ifindex = get_ifindex("tun_fwd"); + if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd")) + goto fail; + + skel->rodata->IFINDEX_SRC = ifindex; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_peer__load(skel); + if (!ASSERT_OK(err, "test_tc_peer__load")) + goto fail; + + err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto fail; + + err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto fail; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto fail; + + /* Load "tc_src_l3" to the tun_fwd interface to redirect packets + * towards dst, and "tc_dst" to redirect packets + * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. + */ + SYS("tc qdisc add dev tun_fwd clsact"); + SYS("tc filter add dev tun_fwd ingress bpf da object-pinned " + SRC_PROG_PIN_FILE); + + SYS("tc qdisc add dev veth_dst_fwd clsact"); + SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " + DST_PROG_PIN_FILE); + SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + /* Setup route and neigh tables */ + SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); + SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); + + SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); + SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); + + SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD + " dev tun_src scope global"); + SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); + SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD + " dev tun_src scope global"); + SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + + SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto fail; + + test_connectivity(); + +fail: + if (tunnel_pid > 0) { + kill(tunnel_pid, SIGTERM); + waitpid(tunnel_pid, NULL, 0); + } + if (src_fd >= 0) + close(src_fd); + if (target_fd >= 0) + close(target_fd); + if (skel) + test_tc_peer__destroy(skel); + if (nstoken) + close_netns(nstoken); +} + +#define RUN_TEST(name) \ + ({ \ + struct netns_setup_result setup_result; \ + if (test__start_subtest(#name)) \ + if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ + if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ + "setup links and routes")) \ + test_ ## name(&setup_result); \ + netns_setup_namespaces("delete"); \ + } \ + }) + +static void *test_tc_redirect_run_tests(void *arg) +{ + RUN_TEST(tc_redirect_peer); + RUN_TEST(tc_redirect_peer_l3); + RUN_TEST(tc_redirect_neigh); + RUN_TEST(tc_redirect_neigh_fib); + return NULL; +} + +void test_tc_redirect(void) +{ + pthread_t test_thread; + int err; + + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index ef264bced0e6..fe818cd5f010 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -5,12 +5,17 @@ #include #include #include +#include +#include #include volatile const __u32 IFINDEX_SRC; volatile const __u32 IFINDEX_DST; +static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; +static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}; + SEC("classifier/chk_egress") int tc_chk(struct __sk_buff *skb) { @@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb) return bpf_redirect_peer(IFINDEX_DST, 0); } +SEC("classifier/dst_ingress_l3") +int tc_dst_l3(struct __sk_buff *skb) +{ + return bpf_redirect(IFINDEX_SRC, 0); +} + +SEC("classifier/src_ingress_l3") +int tc_src_l3(struct __sk_buff *skb) +{ + __u16 proto = skb->protocol; + + if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0) + return TC_ACT_SHOT; + + return bpf_redirect_peer(IFINDEX_DST, 0); +} + char __license[] SEC("license") = "GPL"; -- cgit v1.2.3-70-g09d2 From 1bad6fd52be4ce12d207e2820ceb0f29ab31fc53 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 4 May 2021 08:58:25 +0000 Subject: bpf, selftests: Adjust few selftest result_unpriv outcomes Given we don't need to simulate the speculative domain for registers with immediates anymore since the verifier uses direct imm-based rewrites instead of having to mask, we can also lift a few cases that were previously rejected. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/stack_ptr.c | 2 -- tools/testing/selftests/bpf/verifier/value_ptr_arith.c | 8 -------- 2 files changed, 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c index 07eaa04412ae..8ab94d65f3d5 100644 --- a/tools/testing/selftests/bpf/verifier/stack_ptr.c +++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c @@ -295,8 +295,6 @@ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), BPF_EXIT_INSN(), }, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid write to stack R1 off=0 size=1", .result = ACCEPT, .retval = 42, }, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index e5913fd3b903..7ae2859d495c 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -300,8 +300,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -371,8 +369,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -472,8 +468,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -766,8 +760,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { -- cgit v1.2.3-70-g09d2