From fac85c291e141a67fce46bdce01f9ee33aafabfe Mon Sep 17 00:00:00 2001
From: Yuran Pereira <yuran.pereira@hotmail.com>
Date: Sat, 28 Oct 2023 10:54:13 +0530
Subject: selftests/bpf: Convert CHECK macros to ASSERT_* macros in bpf_iter

As it was pointed out by Yonghong Song [1], in the bpf selftests the use
of the ASSERT_* series of macros is preferred over the CHECK macro.
This patch replaces all CHECK calls in bpf_iter with the appropriate
ASSERT_* macros.

[1] https://lore.kernel.org/lkml/0a142924-633c-44e6-9a92-2dc019656bf2@linux.dev

Suggested-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Yuran Pereira <yuran.pereira@hotmail.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Kui-Feng Lee <thinker.li@gmail.com>
Link: https://lore.kernel.org/r/DB3PR10MB6835E9C8DFCA226DD6FEF914E8A3A@DB3PR10MB6835.EURPRD10.PROD.OUTLOOK.COM
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 79 ++++++++++-------------
 1 file changed, 35 insertions(+), 44 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index e3498f607b49..5e334d3d7ac2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -34,8 +34,6 @@
 #include "bpf_iter_ksym.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
-static int duration;
-
 static void test_btf_id_or_null(void)
 {
 	struct bpf_iter_test_kern3 *skel;
@@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_
 	/* not check contents, but ensure read() ends without error */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+	ASSERT_GE(len, 0, "read");
 
 	close(iter_fd);
 
@@ -413,7 +411,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
 		goto free_link;
 	}
 
-	if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(err, 0, "read"))
 		goto free_link;
 
 	ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)",
@@ -526,11 +524,11 @@ static int do_read_with_fd(int iter_fd, const char *expected,
 	start = 0;
 	while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
 		start += len;
-		if (CHECK(start >= 16, "read", "read len %d\n", len))
+		if (!ASSERT_LT(start, 16, "read"))
 			return -1;
 		read_buf_len = read_one_char ? 1 : 16 - start;
 	}
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		return -1;
 
 	if (!ASSERT_STREQ(buf, expected, "read"))
@@ -571,8 +569,7 @@ static int do_read(const char *path, const char *expected)
 	int err, iter_fd;
 
 	iter_fd = open(path, O_RDONLY);
-	if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
-		  path, strerror(errno)))
+	if (!ASSERT_GE(iter_fd, 0, "open"))
 		return -1;
 
 	err = do_read_with_fd(iter_fd, expected, false);
@@ -600,7 +597,7 @@ static void test_file_iter(void)
 	unlink(path);
 
 	err = bpf_link__pin(link, path);
-	if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+	if (!ASSERT_OK(err, "pin_iter"))
 		goto free_link;
 
 	err = do_read(path, "abcd");
@@ -651,12 +648,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	 * overflow and needs restart.
 	 */
 	map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
-	if (CHECK(map1_fd < 0, "bpf_map_create",
-		  "map_creation failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(map1_fd, 0, "bpf_map_create"))
 		goto out;
 	map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
-	if (CHECK(map2_fd < 0, "bpf_map_create",
-		  "map_creation failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(map2_fd, 0, "bpf_map_create"))
 		goto free_map1;
 
 	/* bpf_seq_printf kernel buffer is 8 pages, so one map
@@ -685,14 +680,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	/* setup filtering map_id in bpf program */
 	map_info_len = sizeof(map_info);
 	err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len);
-	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
-		  strerror(errno)))
+	if (!ASSERT_OK(err, "get_map_info"))
 		goto free_map2;
 	skel->bss->map1_id = map_info.id;
 
 	err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len);
-	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
-		  strerror(errno)))
+	if (!ASSERT_OK(err, "get_map_info"))
 		goto free_map2;
 	skel->bss->map2_id = map_info.id;
 
@@ -714,16 +707,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
 			total_read_len += len;
 
-		CHECK(len != -1 || errno != E2BIG, "read",
-		      "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
-			  len, strerror(errno));
+		ASSERT_EQ(len, -1, "read");
+		ASSERT_EQ(errno, E2BIG, "read");
 		goto free_buf;
 	} else if (!ret1) {
 		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
 			total_read_len += len;
 
-		if (CHECK(len < 0, "read", "read failed: %s\n",
-			  strerror(errno)))
+		if (!ASSERT_GE(len, 0, "read"))
 			goto free_buf;
 	} else {
 		do {
@@ -732,8 +723,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 				total_read_len += len;
 		} while (len > 0 || len == -EAGAIN);
 
-		if (CHECK(len < 0, "read", "read failed: %s\n",
-			  strerror(errno)))
+		if (!ASSERT_GE(len, 0, "read"))
 			goto free_buf;
 	}
 
@@ -836,7 +826,7 @@ static void test_bpf_hash_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
@@ -917,7 +907,7 @@ static void test_bpf_percpu_hash_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
@@ -983,17 +973,14 @@ static void test_bpf_array_map(void)
 	start = 0;
 	while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
 		start += len;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
 	res_first_key = *(__u32 *)buf;
 	res_first_val = *(__u64 *)(buf + sizeof(__u32));
-	if (CHECK(res_first_key != 0 || res_first_val != first_val,
-		  "bpf_seq_write",
-		  "seq_write failure: first key %u vs expected 0, "
-		  " first value %llu vs expected %llu\n",
-		  res_first_key, res_first_val, first_val))
+	if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") ||
+			!ASSERT_EQ(res_first_val, first_val, "bpf_seq_write"))
 		goto close_iter;
 
 	if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
@@ -1092,7 +1079,7 @@ static void test_bpf_percpu_array_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
@@ -1131,6 +1118,7 @@ static void test_bpf_sk_storage_delete(void)
 	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
 	if (!ASSERT_GE(sock_fd, 0, "socket"))
 		goto out;
+
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
 	if (!ASSERT_OK(err, "map_update"))
 		goto out;
@@ -1151,14 +1139,19 @@ static void test_bpf_sk_storage_delete(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
-		  "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
-		goto close_iter;
+
+	 /* Note: The following assertions serve to ensure
+	  * the value was deleted. It does so by asserting
+	  * that bpf_map_lookup_elem has failed. This might
+	  * seem counterintuitive at first.
+	  */
+	ASSERT_ERR(err, "bpf_map_lookup_elem");
+	ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem");
 
 close_iter:
 	close(iter_fd);
@@ -1203,17 +1196,15 @@ static void test_bpf_sk_storage_get(void)
 	do_dummy_read(skel->progs.fill_socket_owner);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
-	    "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
-	    getpid(), val, err))
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem") ||
+			!ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem"))
 		goto close_socket;
 
 	do_dummy_read(skel->progs.negate_socket_local_storage);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
-	      "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
-	      -getpid(), val, err);
+	ASSERT_OK(err, "bpf_map_lookup_elem");
+	ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem");
 
 close_socket:
 	close(sock_fd);
@@ -1290,7 +1281,7 @@ static void test_bpf_sk_storage_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
-- 
cgit v1.2.3-70-g09d2


From bf4a64b9323f181df8aba32d66cb37b9fa5df959 Mon Sep 17 00:00:00 2001
From: Yuran Pereira <yuran.pereira@hotmail.com>
Date: Sat, 28 Oct 2023 10:54:14 +0530
Subject: selftests/bpf: Add malloc failure checks in bpf_iter

Since some malloc calls in bpf_iter may at times fail,
this patch adds the appropriate fail checks, and ensures that
any previously allocated resource is appropriately destroyed
before returning the function.

Signed-off-by: Yuran Pereira <yuran.pereira@hotmail.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Kui-Feng Lee <thinker.li@gmail.com>
Link: https://lore.kernel.org/r/DB3PR10MB6835F0ECA792265FA41FC39BE8A3A@DB3PR10MB6835.EURPRD10.PROD.OUTLOOK.COM
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 5e334d3d7ac2..4e02093c2cbe 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -698,7 +698,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 		goto free_link;
 
 	buf = malloc(expected_read_len);
-	if (!buf)
+	if (!ASSERT_OK_PTR(buf, "malloc"))
 		goto close_iter;
 
 	/* do read */
@@ -868,6 +868,8 @@ static void test_bpf_percpu_hash_map(void)
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
+	if (!ASSERT_OK_PTR(val, "malloc"))
+		goto out;
 
 	err = bpf_iter_bpf_percpu_hash_map__load(skel);
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load"))
@@ -1044,6 +1046,8 @@ static void test_bpf_percpu_array_map(void)
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
+	if (!ASSERT_OK_PTR(val, "malloc"))
+		goto out;
 
 	err = bpf_iter_bpf_percpu_array_map__load(skel);
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load"))
-- 
cgit v1.2.3-70-g09d2


From 2b62aa59d02ed281fa4fc218df3ca91b773e1e62 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 1 Nov 2023 20:37:43 -0700
Subject: selftests/bpf: fix RELEASE=1 build for tc_opts

Compiler complains about malloc(). We also don't need to dynamically
allocate anything, so make the life easier by using statically sized
buffer.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231102033759.2541186-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/tc_opts.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index 51883ccb8020..196abf223465 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -2387,12 +2387,9 @@ static int generate_dummy_prog(void)
 	const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
 	LIBBPF_OPTS(bpf_prog_load_opts, opts);
 	const size_t log_buf_sz = 256;
-	char *log_buf;
+	char log_buf[log_buf_sz];
 	int fd = -1;
 
-	log_buf = malloc(log_buf_sz);
-	if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
-		return fd;
 	opts.log_buf = log_buf;
 	opts.log_size = log_buf_sz;
 
@@ -2402,7 +2399,6 @@ static int generate_dummy_prog(void)
 			   prog_insns, prog_insn_cnt, &opts);
 	ASSERT_STREQ(log_buf, "", "log_0");
 	ASSERT_GE(fd, 0, "prog_fd");
-	free(log_buf);
 	return fd;
 }
 
-- 
cgit v1.2.3-70-g09d2


From f4c7e887324f5776eef6e6e47a90e0ac8058a7a8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 1 Nov 2023 20:37:44 -0700
Subject: selftests/bpf: satisfy compiler by having explicit return in btf test

Some compilers complain about get_pprint_mapv_size() not returning value
in some code paths. Fix with explicit return.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231102033759.2541186-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 92d51f377fe5..8fb4a04fbbc0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -5265,6 +5265,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind)
 #endif
 
 	assert(0);
+	return 0;
 }
 
 static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind,
-- 
cgit v1.2.3-70-g09d2


From d79924ca579c647d5dc55f605899c98f7ea04d0f Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 1 Nov 2023 11:24:53 +0800
Subject: selftests/bpf: Use value with enough-size when updating per-cpu map

When updating per-cpu map in map_percpu_stats test, patch_map_thread()
only passes 4-bytes-sized value to bpf_map_update_elem(). The expected
size of the value is 8 * num_possible_cpus(), so fix it by passing a
value with enough-size for per-cpu map update.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231101032455.3808547-2-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/map_tests/map_percpu_stats.c      | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
index 8bf497a9843e..a98d6b94dd02 100644
--- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -131,6 +131,12 @@ static bool is_lru(__u32 map_type)
 	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
 }
 
+static bool is_percpu(__u32 map_type)
+{
+	return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
 struct upsert_opts {
 	__u32 map_type;
 	int map_fd;
@@ -150,17 +156,26 @@ static int create_small_hash(void)
 
 static void *patch_map_thread(void *arg)
 {
+	/* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
+	static __u8 blob[8 << 10];
 	struct upsert_opts *opts = arg;
+	void *val_ptr;
 	int val;
 	int ret;
 	int i;
 
 	for (i = 0; i < opts->n; i++) {
-		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 			val = create_small_hash();
-		else
+			val_ptr = &val;
+		} else if (is_percpu(opts->map_type)) {
+			val_ptr = blob;
+		} else {
 			val = rand();
-		ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0);
+			val_ptr = &val;
+		}
+
+		ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
 		CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
 
 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
-- 
cgit v1.2.3-70-g09d2


From b9b79553163788d3fc42e25c2662c0a46dc9a3c5 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 1 Nov 2023 11:24:54 +0800
Subject: selftests/bpf: Export map_update_retriable()

Export map_update_retriable() to make it usable for other map_test
cases. These cases may only need retry for specific errno, so add
a new callback parameter to let map_update_retriable() decide whether or
not the errno is retriable.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231101032455.3808547-3-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_maps.c | 17 ++++++++++++-----
 tools/testing/selftests/bpf/test_maps.h |  5 +++++
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 7fc00e423e4d..767e0693df10 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1396,13 +1396,18 @@ static void test_map_stress(void)
 #define MAX_DELAY_US 50000
 #define MIN_DELAY_RANGE_US 5000
 
-static int map_update_retriable(int map_fd, const void *key, const void *value,
-				int flags, int attempts)
+static bool retry_for_again_or_busy(int err)
+{
+	return (err == EAGAIN || err == EBUSY);
+}
+
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+			 retry_for_error_fn need_retry)
 {
 	int delay = rand() % MIN_DELAY_RANGE_US;
 
 	while (bpf_map_update_elem(map_fd, key, value, flags)) {
-		if (!attempts || (errno != EAGAIN && errno != EBUSY))
+		if (!attempts || !need_retry(errno))
 			return -errno;
 
 		if (delay <= MAX_DELAY_US / 2)
@@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data)
 		key = value = i;
 
 		if (do_update) {
-			err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
+			err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
+						   retry_for_again_or_busy);
 			if (err)
 				printf("error %d %d\n", err, errno);
 			assert(err == 0);
-			err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
+			err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
+						   retry_for_again_or_busy);
 			if (err)
 				printf("error %d %d\n", err, errno);
 			assert(err == 0);
diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h
index f6fbca761732..e4ac704a536c 100644
--- a/tools/testing/selftests/bpf/test_maps.h
+++ b/tools/testing/selftests/bpf/test_maps.h
@@ -4,6 +4,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdbool.h>
 
 #define CHECK(condition, tag, format...) ({				\
 	int __ret = !!(condition);					\
@@ -16,4 +17,8 @@
 
 extern int skips;
 
+typedef bool (*retry_for_error_fn)(int err);
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+			 retry_for_error_fn need_retry);
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 2f553b032cad4993969cab356b3b0e306fcd1cd1 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 1 Nov 2023 11:24:55 +0800
Subject: selftsets/bpf: Retry map update for non-preallocated per-cpu map

BPF CI failed due to map_percpu_stats_percpu_hash from time to time [1].
It seems that the failure reason is per-cpu bpf memory allocator may not
be able to allocate per-cpu pointer successfully and it can not refill
free llist timely, and bpf_map_update_elem() will return -ENOMEM.

So mitigate the problem by retrying the update operation for
non-preallocated per-cpu map.

[1]: https://github.com/kernel-patches/bpf/actions/runs/6713177520/job/18244865326?pr=5909

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231101032455.3808547-4-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/map_tests/map_percpu_stats.c       | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
index a98d6b94dd02..2ea36408816b 100644
--- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -141,6 +141,7 @@ struct upsert_opts {
 	__u32 map_type;
 	int map_fd;
 	__u32 n;
+	bool retry_for_nomem;
 };
 
 static int create_small_hash(void)
@@ -154,6 +155,11 @@ static int create_small_hash(void)
 	return map_fd;
 }
 
+static bool retry_for_nomem_fn(int err)
+{
+	return err == ENOMEM;
+}
+
 static void *patch_map_thread(void *arg)
 {
 	/* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
@@ -175,7 +181,12 @@ static void *patch_map_thread(void *arg)
 			val_ptr = &val;
 		}
 
-		ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
+		/* 2 seconds may be enough ? */
+		if (opts->retry_for_nomem)
+			ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
+						   40, retry_for_nomem_fn);
+		else
+			ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
 		CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
 
 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
@@ -296,6 +307,13 @@ static void __test(int map_fd)
 	else
 		opts.n /= 2;
 
+	/* per-cpu bpf memory allocator may not be able to allocate per-cpu
+	 * pointer successfully and it can not refill free llist timely, and
+	 * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
+	 * the problem temporarily.
+	 */
+	opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
+
 	/*
 	 * Upsert keys [0, n) under some competition: with random values from
 	 * N_THREADS threads. Check values, then delete all elements and check
-- 
cgit v1.2.3-70-g09d2


From b0cf0dcde8cae24571b1f382e81328229e475604 Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantr4@gmail.com>
Date: Tue, 31 Oct 2023 14:27:17 -0700
Subject: selftests/bpf: Consolidate VIRTIO/9P configs in config.vm file

Those configs are needed to be able to run VM somewhat consistently.
For instance, ATM, s390x is missing the `CONFIG_VIRTIO_CONSOLE` which
prevents s390x kernels built in CI to leverage qemu-guest-agent.

By moving them to `config,vm`, we should have selftest kernels which are
equal in term of VM functionalities when they include this file.

The set of config unabled were picked using

    grep -h -E '(_9P|_VIRTIO)' config.x86_64 config | sort | uniq

added to `config.vm` and then
    grep -vE '(_9P|_VIRTIO)' config.{x86_64,aarch64,s390x}

as a side-effect, some config may have disappeared to the aarch64 and
s390x kernels, but they should not be needed. CI will tell.

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231031212717.4037892-1-chantr4@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config.aarch64 | 16 ----------------
 tools/testing/selftests/bpf/config.s390x   |  9 ---------
 tools/testing/selftests/bpf/config.vm      | 12 ++++++++++++
 tools/testing/selftests/bpf/config.x86_64  | 12 ------------
 tools/testing/selftests/bpf/vmtest.sh      |  4 +++-
 5 files changed, 15 insertions(+), 38 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/config.vm

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 253821494884..fa8ecf626c73 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
 CONFIG_ARCH_VEXPRESS=y
 CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
 CONFIG_ARM_SMMU_V3=y
@@ -46,7 +45,6 @@ CONFIG_DEBUG_SG=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_DEVTMPFS=y
-CONFIG_DRM_VIRTIO_GPU=y
 CONFIG_DRM=y
 CONFIG_DUMMY=y
 CONFIG_EXPERT=y
@@ -67,7 +65,6 @@ CONFIG_HAVE_KRETPROBES=y
 CONFIG_HEADERS_INSTALL=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HUGETLBFS=y
-CONFIG_HW_RANDOM_VIRTIO=y
 CONFIG_HW_RANDOM=y
 CONFIG_HZ_100=y
 CONFIG_IDLE_PAGE_TRACKING=y
@@ -99,8 +96,6 @@ CONFIG_MEMCG=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_NAMESPACES=y
-CONFIG_NET_9P_VIRTIO=y
-CONFIG_NET_9P=y
 CONFIG_NET_ACT_BPF=y
 CONFIG_NET_ACT_GACT=y
 CONFIG_NETDEVICES=y
@@ -140,7 +135,6 @@ CONFIG_SCHED_TRACER=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SCAN_ASYNC=y
-CONFIG_SCSI_VIRTIO=y
 CONFIG_SCSI=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
@@ -167,16 +161,6 @@ CONFIG_UPROBES=y
 CONFIG_USELIB=y
 CONFIG_USER_NS=y
 CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_FS=y
-CONFIG_VIRTIO_INPUT=y
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VIRTIO_MMIO=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
 CONFIG_VLAN_8021Q=y
 CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 2ba92167be35..e93330382849 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
 CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
 CONFIG_AUDIT=y
 CONFIG_BLK_CGROUP=y
@@ -84,8 +83,6 @@ CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_NAMESPACES=y
 CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
 CONFIG_NET_ACT_BPF=y
 CONFIG_NET_ACT_GACT=y
 CONFIG_NET_KEY=y
@@ -114,7 +111,6 @@ CONFIG_SAMPLE_SECCOMP=y
 CONFIG_SAMPLES=y
 CONFIG_SCHED_TRACER=y
 CONFIG_SCSI=y
-CONFIG_SCSI_VIRTIO=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_STACK_TRACER=y
 CONFIG_STATIC_KEYS_SELFTEST=y
@@ -136,11 +132,6 @@ CONFIG_UPROBES=y
 CONFIG_USELIB=y
 CONFIG_USER_NS=y
 CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
 CONFIG_VLAN_8021Q=y
 CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm
new file mode 100644
index 000000000000..a9746ca78777
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.vm
@@ -0,0 +1,12 @@
+CONFIG_9P_FS=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 2e70a6048278..f7bfb2b09c82 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -1,6 +1,3 @@
-CONFIG_9P_FS=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
 CONFIG_AGP=y
 CONFIG_AGP_AMD64=y
 CONFIG_AGP_INTEL=y
@@ -45,7 +42,6 @@ CONFIG_CPU_IDLE_GOV_LADDER=y
 CONFIG_CPUSETS=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_BLAKE2B=y
-CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_CRYPTO_SEQIV=y
 CONFIG_CRYPTO_XXHASH=y
 CONFIG_DCB=y
@@ -145,8 +141,6 @@ CONFIG_MEMORY_FAILURE=y
 CONFIG_MINIX_SUBPARTITION=y
 CONFIG_NAMESPACES=y
 CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
 CONFIG_NET_ACT_BPF=y
 CONFIG_NET_CLS_CGROUP=y
 CONFIG_NET_EMATCH=y
@@ -228,12 +222,6 @@ CONFIG_USER_NS=y
 CONFIG_VALIDATE_FS_PARSER=y
 CONFIG_VETH=y
 CONFIG_VIRT_DRIVERS=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
 CONFIG_VLAN_8021Q=y
 CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 685034528018..65d14f3bbe30 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -36,7 +36,9 @@ DEFAULT_COMMAND="./test_progs"
 MOUNT_DIR="mnt"
 ROOTFS_IMAGE="root.img"
 OUTPUT_DIR="$HOME/.bpf_selftests"
-KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}")
+KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config"
+	"tools/testing/selftests/bpf/config.vm"
+	"tools/testing/selftests/bpf/config.${ARCH}")
 INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX"
 NUM_COMPILE_JOBS="$(nproc)"
 LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
-- 
cgit v1.2.3-70-g09d2


From a46afaa03f6db8c65492302ffdafcb2e769e5667 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Fri, 3 Nov 2023 09:11:26 +0100
Subject: bpftool: Fix prog object type in manpage

bpftool's man page lists "program" as one of possible values for OBJECT,
while in fact bpftool accepts "prog" instead.

Reported-by: Jerry Snitselaar <jsnitsel@redhat.com>
Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20231103081126.170034-1-asavkov@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Documentation/bpftool.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 6965c94dfdaf..09e4f2ff5658 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -20,7 +20,7 @@ SYNOPSIS
 
 	**bpftool** **version**
 
-	*OBJECT* := { **map** | **program** | **link** | **cgroup** | **perf** | **net** | **feature** |
+	*OBJECT* := { **map** | **prog** | **link** | **cgroup** | **perf** | **net** | **feature** |
 	**btf** | **gen** | **struct_ops** | **iter** }
 
 	*OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| }
-- 
cgit v1.2.3-70-g09d2


From f2d2c7e1b7c9e8847478769d6e1f8a76b5e91952 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Fri, 3 Nov 2023 23:09:12 +0100
Subject: selftests/bpf: Disable CONFIG_DEBUG_INFO_REDUCED in config.aarch64

Building an arm64 kernel and seftests/bpf with defconfig +
selftests/bpf/config and selftests/bpf/config.aarch64 the fragment
CONFIG_DEBUG_INFO_REDUCED is enabled in arm64's defconfig, it should be
disabled in file sefltests/bpf/config.aarch64 since if its not disabled
CONFIG_DEBUG_INFO_BTF wont be enabled.

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231103220912.333930-1-anders.roxell@linaro.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config.aarch64 | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index fa8ecf626c73..29c8635c5722 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -36,6 +36,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=y
 CONFIG_DEBUG_ATOMIC_SLEEP=y
 CONFIG_DEBUG_INFO_BTF=y
 CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_INFO_REDUCED=n
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_LOCKDEP=y
 CONFIG_DEBUG_NOTIFIERS=y
-- 
cgit v1.2.3-70-g09d2


From 7f7c43693c1b46652cfafb7af67ba31726d6ec4e Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Tue, 7 Nov 2023 12:15:11 -0800
Subject: libbpf: Fix potential uninitialized tail padding with
 LIBBPF_OPTS_RESET

Martin reported that there is a libbpf complaining of non-zero-value tail
padding with LIBBPF_OPTS_RESET macro if struct bpf_netkit_opts is modified
to have a 4-byte tail padding. This only happens to clang compiler.
The commend line is: ./test_progs -t tc_netkit_multi_links
Martin and I did some investigation and found this indeed the case and
the following are the investigation details.

Clang:
  clang version 18.0.0
  <I tried clang15/16/17 and they all have similar results>

tools/lib/bpf/libbpf_common.h:
  #define LIBBPF_OPTS_RESET(NAME, ...)                                      \
        do {                                                                \
                memset(&NAME, 0, sizeof(NAME));                             \
                NAME = (typeof(NAME)) {                                     \
                        .sz = sizeof(NAME),                                 \
                        __VA_ARGS__                                         \
                };                                                          \
        } while (0)

  #endif

tools/lib/bpf/libbpf.h:
  struct bpf_netkit_opts {
        /* size of this struct, for forward/backward compatibility */
        size_t sz;
        __u32 flags;
        __u32 relative_fd;
        __u32 relative_id;
        __u64 expected_revision;
        size_t :0;
  };
  #define bpf_netkit_opts__last_field expected_revision
In the above struct bpf_netkit_opts, there is no tail padding.

prog_tests/tc_netkit.c:
  static void serial_test_tc_netkit_multi_links_target(int mode, int target)
  {
        ...
        LIBBPF_OPTS(bpf_netkit_opts, optl);
        ...
        LIBBPF_OPTS_RESET(optl,
                .flags = BPF_F_BEFORE,
                .relative_fd = bpf_program__fd(skel->progs.tc1),
        );
        ...
  }

Let us make the following source change, note that we have a 4-byte
tailing padding now.
  diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
  index 6cd9c501624f..0dd83910ae9a 100644
  --- a/tools/lib/bpf/libbpf.h
  +++ b/tools/lib/bpf/libbpf.h
  @@ -803,13 +803,13 @@ bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
   struct bpf_netkit_opts {
        /* size of this struct, for forward/backward compatibility */
        size_t sz;
  -       __u32 flags;
        __u32 relative_fd;
        __u32 relative_id;
        __u64 expected_revision;
  +       __u32 flags;
        size_t :0;
   };
  -#define bpf_netkit_opts__last_field expected_revision
  +#define bpf_netkit_opts__last_field flags

The clang 18 generated asm code looks like below:
    ;       LIBBPF_OPTS_RESET(optl,
    55e3: 48 8d 7d 98                   leaq    -0x68(%rbp), %rdi
    55e7: 31 f6                         xorl    %esi, %esi
    55e9: ba 20 00 00 00                movl    $0x20, %edx
    55ee: e8 00 00 00 00                callq   0x55f3 <serial_test_tc_netkit_multi_links_target+0x18d3>
    55f3: 48 c7 85 10 fd ff ff 20 00 00 00      movq    $0x20, -0x2f0(%rbp)
    55fe: 48 8b 85 68 ff ff ff          movq    -0x98(%rbp), %rax
    5605: 48 8b 78 18                   movq    0x18(%rax), %rdi
    5609: e8 00 00 00 00                callq   0x560e <serial_test_tc_netkit_multi_links_target+0x18ee>
    560e: 89 85 18 fd ff ff             movl    %eax, -0x2e8(%rbp)
    5614: c7 85 1c fd ff ff 00 00 00 00 movl    $0x0, -0x2e4(%rbp)
    561e: 48 c7 85 20 fd ff ff 00 00 00 00      movq    $0x0, -0x2e0(%rbp)
    5629: c7 85 28 fd ff ff 08 00 00 00 movl    $0x8, -0x2d8(%rbp)
    5633: 48 8b 85 10 fd ff ff          movq    -0x2f0(%rbp), %rax
    563a: 48 89 45 98                   movq    %rax, -0x68(%rbp)
    563e: 48 8b 85 18 fd ff ff          movq    -0x2e8(%rbp), %rax
    5645: 48 89 45 a0                   movq    %rax, -0x60(%rbp)
    5649: 48 8b 85 20 fd ff ff          movq    -0x2e0(%rbp), %rax
    5650: 48 89 45 a8                   movq    %rax, -0x58(%rbp)
    5654: 48 8b 85 28 fd ff ff          movq    -0x2d8(%rbp), %rax
    565b: 48 89 45 b0                   movq    %rax, -0x50(%rbp)
    ;       link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);

At -O0 level, the clang compiler creates an intermediate copy.
We have below to store 'flags' with 4-byte store and leave another 4 byte
in the same 8-byte-aligned storage undefined,
    5629: c7 85 28 fd ff ff 08 00 00 00 movl    $0x8, -0x2d8(%rbp)
and later we store 8-byte to the original zero'ed buffer
    5654: 48 8b 85 28 fd ff ff          movq    -0x2d8(%rbp), %rax
    565b: 48 89 45 b0                   movq    %rax, -0x50(%rbp)

This caused a problem as the 4-byte value at [%rbp-0x2dc, %rbp-0x2e0)
may be garbage.

gcc (gcc 11.4) does not have this issue as it does zeroing struct first before
doing assignments:
  ;       LIBBPF_OPTS_RESET(optl,
    50fd: 48 8d 85 40 fc ff ff          leaq    -0x3c0(%rbp), %rax
    5104: ba 20 00 00 00                movl    $0x20, %edx
    5109: be 00 00 00 00                movl    $0x0, %esi
    510e: 48 89 c7                      movq    %rax, %rdi
    5111: e8 00 00 00 00                callq   0x5116 <serial_test_tc_netkit_multi_links_target+0x1522>
    5116: 48 8b 45 f0                   movq    -0x10(%rbp), %rax
    511a: 48 8b 40 18                   movq    0x18(%rax), %rax
    511e: 48 89 c7                      movq    %rax, %rdi
    5121: e8 00 00 00 00                callq   0x5126 <serial_test_tc_netkit_multi_links_target+0x1532>
    5126: 48 c7 85 40 fc ff ff 00 00 00 00      movq    $0x0, -0x3c0(%rbp)
    5131: 48 c7 85 48 fc ff ff 00 00 00 00      movq    $0x0, -0x3b8(%rbp)
    513c: 48 c7 85 50 fc ff ff 00 00 00 00      movq    $0x0, -0x3b0(%rbp)
    5147: 48 c7 85 58 fc ff ff 00 00 00 00      movq    $0x0, -0x3a8(%rbp)
    5152: 48 c7 85 40 fc ff ff 20 00 00 00      movq    $0x20, -0x3c0(%rbp)
    515d: 89 85 48 fc ff ff             movl    %eax, -0x3b8(%rbp)
    5163: c7 85 58 fc ff ff 08 00 00 00 movl    $0x8, -0x3a8(%rbp)
  ;       link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);

It is not clear how to resolve the compiler code generation as the compiler
generates correct code w.r.t. how to handle unnamed padding in C standard.
So this patch changed LIBBPF_OPTS_RESET macro to avoid uninitialized tail
padding. We already knows LIBBPF_OPTS macro works on both gcc and clang,
even with tail padding. So LIBBPF_OPTS_RESET is changed to be a
LIBBPF_OPTS followed by a memcpy(), thus avoiding uninitialized tail padding.

The below is asm code generated with this patch and with clang compiler:
    ;       LIBBPF_OPTS_RESET(optl,
    55e3: 48 8d bd 10 fd ff ff          leaq    -0x2f0(%rbp), %rdi
    55ea: 31 f6                         xorl    %esi, %esi
    55ec: ba 20 00 00 00                movl    $0x20, %edx
    55f1: e8 00 00 00 00                callq   0x55f6 <serial_test_tc_netkit_multi_links_target+0x18d6>
    55f6: 48 c7 85 10 fd ff ff 20 00 00 00      movq    $0x20, -0x2f0(%rbp)
    5601: 48 8b 85 68 ff ff ff          movq    -0x98(%rbp), %rax
    5608: 48 8b 78 18                   movq    0x18(%rax), %rdi
    560c: e8 00 00 00 00                callq   0x5611 <serial_test_tc_netkit_multi_links_target+0x18f1>
    5611: 89 85 18 fd ff ff             movl    %eax, -0x2e8(%rbp)
    5617: c7 85 1c fd ff ff 00 00 00 00 movl    $0x0, -0x2e4(%rbp)
    5621: 48 c7 85 20 fd ff ff 00 00 00 00      movq    $0x0, -0x2e0(%rbp)
    562c: c7 85 28 fd ff ff 08 00 00 00 movl    $0x8, -0x2d8(%rbp)
    5636: 48 8b 85 10 fd ff ff          movq    -0x2f0(%rbp), %rax
    563d: 48 89 45 98                   movq    %rax, -0x68(%rbp)
    5641: 48 8b 85 18 fd ff ff          movq    -0x2e8(%rbp), %rax
    5648: 48 89 45 a0                   movq    %rax, -0x60(%rbp)
    564c: 48 8b 85 20 fd ff ff          movq    -0x2e0(%rbp), %rax
    5653: 48 89 45 a8                   movq    %rax, -0x58(%rbp)
    5657: 48 8b 85 28 fd ff ff          movq    -0x2d8(%rbp), %rax
    565e: 48 89 45 b0                   movq    %rax, -0x50(%rbp)
    ;       link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);

In the above code, a temporary buffer is zeroed and then has proper value assigned.
Finally, values in temporary buffer are copied to the original variable buffer,
hence tail padding is guaranteed to be 0.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20231107201511.2548645-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf_common.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index b7060f254486..8fe248e14eb6 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -79,11 +79,14 @@
  */
 #define LIBBPF_OPTS_RESET(NAME, ...)					    \
 	do {								    \
-		memset(&NAME, 0, sizeof(NAME));				    \
-		NAME = (typeof(NAME)) {					    \
-			.sz = sizeof(NAME),				    \
-			__VA_ARGS__					    \
-		};							    \
+		typeof(NAME) ___##NAME = ({ 				    \
+			memset(&___##NAME, 0, sizeof(NAME));		    \
+			(typeof(NAME)) {				    \
+				.sz = sizeof(NAME),			    \
+				__VA_ARGS__				    \
+			};						    \
+		});							    \
+		memcpy(&NAME, &___##NAME, sizeof(NAME));		    \
 	} while (0)
 
 #endif /* __LIBBPF_LIBBPF_COMMON_H */
-- 
cgit v1.2.3-70-g09d2


From 5d4a7aaca1ebcc7c864caec13203662a061c4f4f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 7 Nov 2023 21:14:29 -0800
Subject: veristat: add ability to sort by stat's absolute value

Add ability to sort results by absolute values of specified stats. This
is especially useful to find biggest deviations in comparison mode. When
comparing verifier change effect against a large base of BPF object
files, it's necessary to see big changes both in positive and negative
directions, as both might be a signal for regressions or bugs.

The syntax is natural, e.g., adding `-s '|insns_diff|'^` will instruct
veristat to sort by absolute value of instructions difference in
ascending order.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231108051430.1830950-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 68 ++++++++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 655095810d4a..102914f70573 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -18,6 +18,7 @@
 #include <libelf.h>
 #include <gelf.h>
 #include <float.h>
+#include <math.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -99,6 +100,7 @@ struct stat_specs {
 	enum stat_id ids[ALL_STATS_CNT];
 	enum stat_variant variants[ALL_STATS_CNT];
 	bool asc[ALL_STATS_CNT];
+	bool abs[ALL_STATS_CNT];
 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
 };
 
@@ -133,6 +135,7 @@ struct filter {
 	int stat_id;
 	enum stat_variant stat_var;
 	long value;
+	bool abs;
 };
 
 static struct env {
@@ -455,7 +458,8 @@ static struct {
 	{ OP_EQ, "=" },
 };
 
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+			      enum stat_variant *var, bool *is_abs);
 
 static int append_filter(struct filter **filters, int *cnt, const char *str)
 {
@@ -488,13 +492,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 		long val;
 		const char *end = str;
 		const char *op_str;
+		bool is_abs;
 
 		op_str = operators[i].op_str;
 		p = strstr(str, op_str);
 		if (!p)
 			continue;
 
-		if (!parse_stat_id_var(str, p - str, &id, &var)) {
+		if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
 			return -EINVAL;
 		}
@@ -533,6 +538,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 		f->stat_id = id;
 		f->stat_var = var;
 		f->op = operators[i].op_kind;
+		f->abs = true;
 		f->value = val;
 
 		*cnt += 1;
@@ -657,7 +663,8 @@ static struct stat_def {
 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
 };
 
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+			      enum stat_variant *var, bool *is_abs)
 {
 	static const char *var_sfxs[] = {
 		[VARIANT_A] = "_a",
@@ -667,6 +674,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v
 	};
 	int i, j, k;
 
+	/* |<stat>| means we take absolute value of given stat */
+	*is_abs = false;
+	if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
+		*is_abs = true;
+		name += 1;
+		len -= 2;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
 		struct stat_def *def = &stat_defs[i];
 		size_t alias_len, sfx_len;
@@ -722,7 +737,7 @@ static bool is_desc_sym(char c)
 static int parse_stat(const char *stat_name, struct stat_specs *specs)
 {
 	int id;
-	bool has_order = false, is_asc = false;
+	bool has_order = false, is_asc = false, is_abs = false;
 	size_t len = strlen(stat_name);
 	enum stat_variant var;
 
@@ -737,7 +752,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
 		len -= 1;
 	}
 
-	if (!parse_stat_id_var(stat_name, len, &id, &var)) {
+	if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
 		return -ESRCH;
 	}
@@ -745,6 +760,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
 	specs->ids[specs->spec_cnt] = id;
 	specs->variants[specs->spec_cnt] = var;
 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
+	specs->abs[specs->spec_cnt] = is_abs;
 	specs->spec_cnt++;
 
 	return 0;
@@ -1103,7 +1119,7 @@ cleanup:
 }
 
 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
-		    enum stat_id id, bool asc)
+		    enum stat_id id, bool asc, bool abs)
 {
 	int cmp = 0;
 
@@ -1124,6 +1140,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
 		long v1 = s1->stats[id];
 		long v2 = s2->stats[id];
 
+		if (abs) {
+			v1 = v1 < 0 ? -v1 : v1;
+			v2 = v2 < 0 ? -v2 : v2;
+		}
+
 		if (v1 != v2)
 			cmp = v1 < v2 ? -1 : 1;
 		break;
@@ -1142,7 +1163,8 @@ static int cmp_prog_stats(const void *v1, const void *v2)
 	int i, cmp;
 
 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
-		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
+		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
+			       env.sort_spec.asc[i], env.sort_spec.abs[i]);
 		if (cmp != 0)
 			return cmp;
 	}
@@ -1211,7 +1233,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s,
 
 static int cmp_join_stat(const struct verif_stats_join *s1,
 			 const struct verif_stats_join *s2,
-			 enum stat_id id, enum stat_variant var, bool asc)
+			 enum stat_id id, enum stat_variant var,
+			 bool asc, bool abs)
 {
 	const char *str1 = NULL, *str2 = NULL;
 	double v1, v2;
@@ -1220,6 +1243,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1,
 	fetch_join_stat_value(s1, id, var, &str1, &v1);
 	fetch_join_stat_value(s2, id, var, &str2, &v2);
 
+	if (abs) {
+		v1 = fabs(v1);
+		v2 = fabs(v2);
+	}
+
 	if (str1)
 		cmp = strcmp(str1, str2);
 	else if (v1 != v2)
@@ -1237,7 +1265,8 @@ static int cmp_join_stats(const void *v1, const void *v2)
 		cmp = cmp_join_stat(s1, s2,
 				    env.sort_spec.ids[i],
 				    env.sort_spec.variants[i],
-				    env.sort_spec.asc[i]);
+				    env.sort_spec.asc[i],
+				    env.sort_spec.abs[i]);
 		if (cmp != 0)
 			return cmp;
 	}
@@ -1720,6 +1749,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta
 
 	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
 
+	if (f->abs)
+		value = fabs(value);
+
 	switch (f->op) {
 	case OP_EQ: return value > f->value - eps && value < f->value + eps;
 	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
@@ -1766,7 +1798,7 @@ static int handle_comparison_mode(void)
 	struct stat_specs base_specs = {}, comp_specs = {};
 	struct stat_specs tmp_sort_spec;
 	enum resfmt cur_fmt;
-	int err, i, j, last_idx;
+	int err, i, j, last_idx, cnt;
 
 	if (env.filename_cnt != 2) {
 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
@@ -1879,7 +1911,7 @@ static int handle_comparison_mode(void)
 		env.join_stat_cnt += 1;
 	}
 
-	/* now sort joined results accorsing to sort spec */
+	/* now sort joined results according to sort spec */
 	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
 
 	/* for human-readable table output we need to do extra pass to
@@ -1896,16 +1928,22 @@ one_more_time:
 	output_comp_headers(cur_fmt);
 
 	last_idx = -1;
+	cnt = 0;
 	for (i = 0; i < env.join_stat_cnt; i++) {
 		const struct verif_stats_join *join = &env.join_stats[i];
 
 		if (!should_output_join_stats(join))
 			continue;
 
+		if (env.top_n && cnt >= env.top_n)
+			break;
+
 		if (cur_fmt == RESFMT_TABLE_CALCLEN)
 			last_idx = i;
 
 		output_comp_stats(join, cur_fmt, i == last_idx);
+
+		cnt++;
 	}
 
 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
@@ -1920,6 +1958,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s
 {
 	long value = stats->stats[f->stat_id];
 
+	if (f->abs)
+		value = value < 0 ? -value : value;
+
 	switch (f->op) {
 	case OP_EQ: return value == f->value;
 	case OP_NEQ: return value != f->value;
@@ -1964,7 +2005,7 @@ static bool should_output_stats(const struct verif_stats *stats)
 static void output_prog_stats(void)
 {
 	const struct verif_stats *stats;
-	int i, last_stat_idx = 0;
+	int i, last_stat_idx = 0, cnt = 0;
 
 	if (env.out_fmt == RESFMT_TABLE) {
 		/* calculate column widths */
@@ -1984,7 +2025,10 @@ static void output_prog_stats(void)
 		stats = &env.prog_stats[i];
 		if (!should_output_stats(stats))
 			continue;
+		if (env.top_n && cnt >= env.top_n)
+			break;
 		output_stats(stats, env.out_fmt, i == last_stat_idx);
+		cnt++;
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 27007fae704eb12547b9b5c7b1005e11640d4f19 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 7 Nov 2023 21:14:30 -0800
Subject: veristat: add ability to filter top N results

Add ability to filter top B results, both in replay/verifier mode and
comparison mode. Just adding `-n10` will emit only first 10 rows, or
less, if there is not enough rows.

This is not just a shortcut instead of passing veristat output through
`head`, though. Filtering out all the other rows influences final table
formatting, as table column widths are calculated based on actual
emitted test.

To demonstrate the difference, compare two "equivalent" forms below, one
using head and another using -n argument.

TOP N FEATURE
=============
[vmuser@archvm bpf]$ sudo ./veristat -C ~/baseline-results-selftests.csv ~/sanity2-results-selftests.csv -e file,prog,insns,states -s '|insns_diff|' -n10
File                                      Program                Insns (A)  Insns (B)  Insns (DIFF)  States (A)  States (B)  States (DIFF)
----------------------------------------  ---------------------  ---------  ---------  ------------  ----------  ----------  -------------
test_seg6_loop.bpf.linked3.o              __add_egr_x                12440      12360  -80 (-0.64%)         364         357    -7 (-1.92%)
async_stack_depth.bpf.linked3.o           async_call_root_check        145        145   +0 (+0.00%)           3           3    +0 (+0.00%)
async_stack_depth.bpf.linked3.o           pseudo_call_check            139        139   +0 (+0.00%)           3           3    +0 (+0.00%)
atomic_bounds.bpf.linked3.o               sub                            7          7   +0 (+0.00%)           0           0    +0 (+0.00%)
bench_local_storage_create.bpf.linked3.o  kmalloc                        5          5   +0 (+0.00%)           0           0    +0 (+0.00%)
bench_local_storage_create.bpf.linked3.o  sched_process_fork            22         22   +0 (+0.00%)           2           2    +0 (+0.00%)
bench_local_storage_create.bpf.linked3.o  socket_post_create            23         23   +0 (+0.00%)           2           2    +0 (+0.00%)
bind4_prog.bpf.linked3.o                  bind_v4_prog                 358        358   +0 (+0.00%)          33          33    +0 (+0.00%)
bind6_prog.bpf.linked3.o                  bind_v6_prog                 429        429   +0 (+0.00%)          37          37    +0 (+0.00%)
bind_perm.bpf.linked3.o                   bind_v4_prog                  15         15   +0 (+0.00%)           1           1    +0 (+0.00%)

PIPING TO HEAD
==============
[vmuser@archvm bpf]$ sudo ./veristat -C ~/baseline-results-selftests.csv ~/sanity2-results-selftests.csv -e file,prog,insns,states -s '|insns_diff|' | head -n12
File                                                   Program                                               Insns (A)  Insns (B)  Insns (DIFF)  States (A)  States (B)  States (DIFF)
-----------------------------------------------------  ----------------------------------------------------  ---------  ---------  ------------  ----------  ----------  -------------
test_seg6_loop.bpf.linked3.o                           __add_egr_x                                               12440      12360  -80 (-0.64%)         364         357    -7 (-1.92%)
async_stack_depth.bpf.linked3.o                        async_call_root_check                                       145        145   +0 (+0.00%)           3           3    +0 (+0.00%)
async_stack_depth.bpf.linked3.o                        pseudo_call_check                                           139        139   +0 (+0.00%)           3           3    +0 (+0.00%)
atomic_bounds.bpf.linked3.o                            sub                                                           7          7   +0 (+0.00%)           0           0    +0 (+0.00%)
bench_local_storage_create.bpf.linked3.o               kmalloc                                                       5          5   +0 (+0.00%)           0           0    +0 (+0.00%)
bench_local_storage_create.bpf.linked3.o               sched_process_fork                                           22         22   +0 (+0.00%)           2           2    +0 (+0.00%)
bench_local_storage_create.bpf.linked3.o               socket_post_create                                           23         23   +0 (+0.00%)           2           2    +0 (+0.00%)
bind4_prog.bpf.linked3.o                               bind_v4_prog                                                358        358   +0 (+0.00%)          33          33    +0 (+0.00%)
bind6_prog.bpf.linked3.o                               bind_v6_prog                                                429        429   +0 (+0.00%)          37          37    +0 (+0.00%)
bind_perm.bpf.linked3.o                                bind_v4_prog                                                 15         15   +0 (+0.00%)           1           1    +0 (+0.00%)

Note all the wasted whitespace in the "PIPING TO HEAD" variant.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231108051430.1830950-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 102914f70573..443a29fc6a62 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -149,6 +149,7 @@ static struct env {
 	bool show_version;
 	bool comparison_mode;
 	bool replay_mode;
+	int top_n;
 
 	int log_level;
 	int log_size;
@@ -215,6 +216,7 @@ static const struct argp_option opts[] = {
 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
 	{ "test-states", 't', NULL, 0,
 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
@@ -293,6 +295,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 't':
 		env.force_checkpoints = true;
 		break;
+	case 'n':
+		errno = 0;
+		env.top_n = strtol(arg, NULL, 10);
+		if (errno) {
+			fprintf(stderr, "invalid top N specifier: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
 	case 'C':
 		env.comparison_mode = true;
 		break;
-- 
cgit v1.2.3-70-g09d2


From f460e7bdb027d1da93f0c5090b239889cd46a33d Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Tue, 7 Nov 2023 00:56:35 -0800
Subject: selftests/bpf: Add test passing MAYBE_NULL reg to
 bpf_refcount_acquire

The test added in this patch exercises the logic fixed in the previous
patch in this series. Before the previous patch's changes,
bpf_refcount_acquire accepts MAYBE_NULL local kptrs; after the change
the verifier correctly rejects the such a call.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20231107085639.3016113-3-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/refcounted_kptr_fail.c        | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 1ef07f6ee580..1553b9c16aa7 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -53,6 +53,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx)
 	return 0;
 }
 
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+long refcount_acquire_maybe_null(void *ctx)
+{
+	struct node_acquire *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	/* Intentionally not testing !n
+	 * it's MAYBE_NULL for refcount_acquire
+	 */
+	m = bpf_refcount_acquire(n);
+	if (m)
+		bpf_obj_drop(m);
+	if (n)
+		bpf_obj_drop(n);
+
+	return 0;
+}
+
 SEC("?tc")
 __failure __msg("Unreleased reference id=3 alloc_insn=9")
 long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
-- 
cgit v1.2.3-70-g09d2


From e9ed8df7187cfdce1075d0ee591544ac15d072f1 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Tue, 7 Nov 2023 00:56:39 -0800
Subject: selftests/bpf: Test bpf_refcount_acquire of node obtained via direct
 ld

This patch demonstrates that verifier changes earlier in this series
result in bpf_refcount_acquire(mapval->stashed_kptr) passing
verification. The added test additionally validates that stashing a kptr
in mapval and - in a separate BPF program - refcount_acquiring the kptr
without unstashing works as expected at runtime.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20231107085639.3016113-7-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/local_kptr_stash.c    | 33 ++++++++++
 .../testing/selftests/bpf/progs/local_kptr_stash.c | 71 ++++++++++++++++++++++
 2 files changed, 104 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
index b25b870f87ba..e6e50a394472 100644
--- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void)
 	local_kptr_stash__destroy(skel);
 }
 
+static void test_refcount_acquire_without_unstash(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+	struct local_kptr_stash *skel;
+	int ret;
+
+	skel = local_kptr_stash__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+				     &opts);
+	ASSERT_OK(ret, "refcount_acquire_without_unstash run");
+	ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval");
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts);
+	ASSERT_OK(ret, "stash_refcounted_node run");
+	ASSERT_OK(opts.retval, "stash_refcounted_node retval");
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+				     &opts);
+	ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run");
+	ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval");
+
+	local_kptr_stash__destroy(skel);
+}
+
 static void test_local_kptr_stash_fail(void)
 {
 	RUN_TESTS(local_kptr_stash_fail);
@@ -86,6 +117,8 @@ void test_local_kptr_stash(void)
 		test_local_kptr_stash_plain();
 	if (test__start_subtest("local_kptr_stash_unstash"))
 		test_local_kptr_stash_unstash();
+	if (test__start_subtest("refcount_acquire_without_unstash"))
+		test_refcount_acquire_without_unstash();
 	if (test__start_subtest("local_kptr_stash_fail"))
 		test_local_kptr_stash_fail();
 }
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index b567a666d2b8..1769fdff6aea 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -14,6 +14,24 @@ struct node_data {
 	struct bpf_rb_node node;
 };
 
+struct refcounted_node {
+	long data;
+	struct bpf_rb_node rb_node;
+	struct bpf_refcount refcount;
+};
+
+struct stash {
+	struct bpf_spin_lock l;
+	struct refcounted_node __kptr *stashed;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct stash);
+	__uint(max_entries, 10);
+} refcounted_node_stash SEC(".maps");
+
 struct plain_local {
 	long key;
 	long data;
@@ -38,6 +56,7 @@ struct map_value {
  * Had to do the same w/ bpf_kfunc_call_test_release below
  */
 struct node_data *just_here_because_btf_bug;
+struct refcounted_node *just_here_because_btf_bug2;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx)
 	return 0;
 }
 
+SEC("tc")
+long refcount_acquire_without_unstash(void *ctx)
+{
+	struct refcounted_node *p;
+	struct stash *s;
+	int ret = 0;
+
+	s = bpf_map_lookup_elem(&refcounted_node_stash, &ret);
+	if (!s)
+		return 1;
+
+	if (!s->stashed)
+		/* refcount_acquire failure is expected when no refcounted_node
+		 * has been stashed before this program executes
+		 */
+		return 2;
+
+	p = bpf_refcount_acquire(s->stashed);
+	if (!p)
+		return 3;
+
+	ret = s->stashed ? s->stashed->data : -1;
+	bpf_obj_drop(p);
+	return ret;
+}
+
+/* Helper for refcount_acquire_without_unstash test */
+SEC("tc")
+long stash_refcounted_node(void *ctx)
+{
+	struct refcounted_node *p;
+	struct stash *s;
+	int key = 0;
+
+	s = bpf_map_lookup_elem(&refcounted_node_stash, &key);
+	if (!s)
+		return 1;
+
+	p = bpf_obj_new(typeof(*p));
+	if (!p)
+		return 2;
+	p->data = 42;
+
+	p = bpf_kptr_xchg(&s->stashed, p);
+	if (p) {
+		bpf_obj_drop(p);
+		return 3;
+	}
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 155addf0814a92d08fce26a11b27e3315cdba977 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Fri, 3 Nov 2023 19:49:00 -0700
Subject: bpf: Use named fields for certain bpf uapi structs

Martin and Vadim reported a verifier failure with bpf_dynptr usage.
The issue is mentioned but Vadim workarounded the issue with source
change ([1]). The below describes what is the issue and why there
is a verification failure.

  int BPF_PROG(skb_crypto_setup) {
    struct bpf_dynptr algo, key;
    ...

    bpf_dynptr_from_mem(..., ..., 0, &algo);
    ...
  }

The bpf program is using vmlinux.h, so we have the following definition in
vmlinux.h:
  struct bpf_dynptr {
        long: 64;
        long: 64;
  };
Note that in uapi header bpf.h, we have
  struct bpf_dynptr {
        long: 64;
        long: 64;
} __attribute__((aligned(8)));

So we lost alignment information for struct bpf_dynptr by using vmlinux.h.
Let us take a look at a simple program below:
  $ cat align.c
  typedef unsigned long long __u64;
  struct bpf_dynptr_no_align {
        __u64 :64;
        __u64 :64;
  };
  struct bpf_dynptr_yes_align {
        __u64 :64;
        __u64 :64;
  } __attribute__((aligned(8)));

  void bar(void *, void *);
  int foo() {
    struct bpf_dynptr_no_align a;
    struct bpf_dynptr_yes_align b;
    bar(&a, &b);
    return 0;
  }
  $ clang --target=bpf -O2 -S -emit-llvm align.c

Look at the generated IR file align.ll:
  ...
  %a = alloca %struct.bpf_dynptr_no_align, align 1
  %b = alloca %struct.bpf_dynptr_yes_align, align 8
  ...

The compiler dictates the alignment for struct bpf_dynptr_no_align is 1 and
the alignment for struct bpf_dynptr_yes_align is 8. So theoretically compiler
could allocate variable %a with alignment 1 although in reallity the compiler
may choose a different alignment by considering other local variables.

In [1], the verification failure happens because variable 'algo' is allocated
on the stack with alignment 4 (fp-28). But the verifer wants its alignment
to be 8.

To fix the issue, the RFC patch ([1]) tried to add '__attribute__((aligned(8)))'
to struct bpf_dynptr plus other similar structs. Andrii suggested that
we could directly modify uapi struct with named fields like struct 'bpf_iter_num':
  struct bpf_iter_num {
        /* opaque iterator state; having __u64 here allows to preserve correct
         * alignment requirements in vmlinux.h, generated from BTF
         */
        __u64 __opaque[1];
  } __attribute__((aligned(8)));

Indeed, adding named fields for those affected structs in this patch can preserve
alignment when bpf program references them in vmlinux.h. With this patch,
the verification failure in [1] can also be resolved.

  [1] https://lore.kernel.org/bpf/1b100f73-7625-4c1f-3ae5-50ecf84d3ff0@linux.dev/
  [2] https://lore.kernel.org/bpf/20231103055218.2395034-1-yonghong.song@linux.dev/

Cc: Vadim Fedorenko <vadfed@meta.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231104024900.1539182-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h       | 23 +++++++----------------
 tools/include/uapi/linux/bpf.h | 23 +++++++----------------
 2 files changed, 14 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0f6cdf52b1da..095ca7238ac2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -7151,40 +7151,31 @@ struct bpf_spin_lock {
 };
 
 struct bpf_timer {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_dynptr {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_list_head {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_list_node {
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[3];
 } __attribute__((aligned(8)));
 
 struct bpf_rb_root {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_rb_node {
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[4];
 } __attribute__((aligned(8)));
 
 struct bpf_refcount {
-	__u32 :32;
+	__u32 __opaque[1];
 } __attribute__((aligned(4)));
 
 struct bpf_sysctl {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0f6cdf52b1da..095ca7238ac2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -7151,40 +7151,31 @@ struct bpf_spin_lock {
 };
 
 struct bpf_timer {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_dynptr {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_list_head {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_list_node {
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[3];
 } __attribute__((aligned(8)));
 
 struct bpf_rb_root {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_rb_node {
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[4];
 } __attribute__((aligned(8)));
 
 struct bpf_refcount {
-	__u32 :32;
+	__u32 __opaque[1];
 } __attribute__((aligned(4)));
 
 struct bpf_sysctl {
-- 
cgit v1.2.3-70-g09d2


From b8e3a87a627b575896e448021e5c2f8a3bc19931 Mon Sep 17 00:00:00 2001
From: Jordan Rome <jordalgo@meta.com>
Date: Wed, 8 Nov 2023 03:23:34 -0800
Subject: bpf: Add crosstask check to __bpf_get_stack

Currently get_perf_callchain only supports user stack walking for
the current task. Passing the correct *crosstask* param will return
0 frames if the task passed to __bpf_get_stack isn't the current
one instead of a single incorrect frame/address. This change
passes the correct *crosstask* param but also does a preemptive
check in __bpf_get_stack if the task is current and returns
-EOPNOTSUPP if it is not.

This issue was found using bpf_get_task_stack inside a BPF
iterator ("iter/task"), which iterates over all tasks.
bpf_get_task_stack works fine for fetching kernel stacks
but because get_perf_callchain relies on the caller to know
if the requested *task* is the current one (via *crosstask*)
it was failing in a confusing way.

It might be possible to get user stacks for all tasks utilizing
something like access_process_vm but that requires the bpf
program calling bpf_get_task_stack to be sleepable and would
therefore be a breaking change.

Fixes: fa28dcb82a38 ("bpf: Introduce helper bpf_get_task_stack()")
Signed-off-by: Jordan Rome <jordalgo@meta.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231108112334.3433136-1-jordalgo@meta.com
---
 include/uapi/linux/bpf.h       |  3 +++
 kernel/bpf/stackmap.c          | 11 ++++++++++-
 tools/include/uapi/linux/bpf.h |  3 +++
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 095ca7238ac2..7cf8bcf9f6a2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4517,6 +4517,8 @@ union bpf_attr {
  * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
  *	Description
  *		Return a user or a kernel stack in bpf program provided buffer.
+ *		Note: the user stack will only be populated if the *task* is
+ *		the current task; all other tasks will return -EOPNOTSUPP.
  *		To achieve this, the helper needs *task*, which is a valid
  *		pointer to **struct task_struct**. To store the stacktrace, the
  *		bpf program provides *buf* with a nonnegative *size*.
@@ -4528,6 +4530,7 @@ union bpf_attr {
  *
  *		**BPF_F_USER_STACK**
  *			Collect a user space stack instead of a kernel stack.
+ *			The *task* must be the current task.
  *		**BPF_F_USER_BUILD_ID**
  *			Collect buildid+offset instead of ips for user stack,
  *			only valid if **BPF_F_USER_STACK** is also specified.
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index d6b277482085..dff7ba539701 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -388,6 +388,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 {
 	u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
 	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
+	bool crosstask = task && task != current;
 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
 	bool user = flags & BPF_F_USER_STACK;
 	struct perf_callchain_entry *trace;
@@ -410,6 +411,14 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 	if (task && user && !user_mode(regs))
 		goto err_fault;
 
+	/* get_perf_callchain does not support crosstask user stack walking
+	 * but returns an empty stack instead of NULL.
+	 */
+	if (crosstask && user) {
+		err = -EOPNOTSUPP;
+		goto clear;
+	}
+
 	num_elem = size / elem_size;
 	max_depth = num_elem + skip;
 	if (sysctl_perf_event_max_stack < max_depth)
@@ -421,7 +430,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 		trace = get_callchain_entry_for_task(task, max_depth);
 	else
 		trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
-					   false, false);
+					   crosstask, false);
 	if (unlikely(!trace))
 		goto err_fault;
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 095ca7238ac2..7cf8bcf9f6a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4517,6 +4517,8 @@ union bpf_attr {
  * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
  *	Description
  *		Return a user or a kernel stack in bpf program provided buffer.
+ *		Note: the user stack will only be populated if the *task* is
+ *		the current task; all other tasks will return -EOPNOTSUPP.
  *		To achieve this, the helper needs *task*, which is a valid
  *		pointer to **struct task_struct**. To store the stacktrace, the
  *		bpf program provides *buf* with a nonnegative *size*.
@@ -4528,6 +4530,7 @@ union bpf_attr {
  *
  *		**BPF_F_USER_STACK**
  *			Collect a user space stack instead of a kernel stack.
+ *			The *task* must be the current task.
  *		**BPF_F_USER_BUILD_ID**
  *			Collect buildid+offset instead of ips for user stack,
  *			only valid if **BPF_F_USER_STACK** is also specified.
-- 
cgit v1.2.3-70-g09d2


From 100888fb6d8a185866b1520031ee7e3182b173de Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Fri, 10 Nov 2023 11:36:44 -0800
Subject: selftests/bpf: Fix pyperf180 compilation failure with clang18

With latest clang18 (main branch of llvm-project repo), when building bpf selftests,
    [~/work/bpf-next (master)]$ make -C tools/testing/selftests/bpf LLVM=1 -j

The following compilation error happens:
    fatal error: error in backend: Branch target out of insn range
    ...
    Stack dump:
    0.      Program arguments: clang -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian
      -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include
      -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf -I/home/yhs/work/bpf-next/tools/include/uapi
      -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include -idirafter
      /home/yhs/work/llvm-project/llvm/build.18/install/lib/clang/18/include -idirafter /usr/local/include
      -idirafter /usr/include -Wno-compare-distinct-pointer-types -DENABLE_ATOMICS_TESTS -O2 --target=bpf
      -c progs/pyperf180.c -mcpu=v3 -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/pyperf180.bpf.o
    1.      <eof> parser at end of file
    2.      Code generation
    ...

The compilation failure only happens to cpu=v2 and cpu=v3. cpu=v4 is okay
since cpu=v4 supports 32-bit branch target offset.

The above failure is due to upstream llvm patch [1] where some inlining behavior
are changed in clang18.

To workaround the issue, previously all 180 loop iterations are fully unrolled.
The bpf macro __BPF_CPU_VERSION__ (implemented in clang18 recently) is used to avoid
unrolling changes if cpu=v4. If __BPF_CPU_VERSION__ is not available and the
compiler is clang18, the unrollng amount is unconditionally reduced.

  [1] https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/20231110193644.3130906-1-yonghong.song@linux.dev
---
 tools/testing/selftests/bpf/progs/pyperf180.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
index c39f559d3100..42c4a8b62e36 100644
--- a/tools/testing/selftests/bpf/progs/pyperf180.c
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -1,4 +1,26 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #define STACK_MAX_LEN 180
+
+/* llvm upstream commit at clang18
+ *   https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e
+ * changed inlining behavior and caused compilation failure as some branch
+ * target distance exceeded 16bit representation which is the maximum for
+ * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18
+ * to specify which cpu version is used for compilation. So a smaller
+ * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which
+ * reduced some branch target distances and resolved the compilation failure.
+ *
+ * To capture the case where a developer/ci uses clang18 but the corresponding
+ * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count
+ * will be set as well to prevent potential compilation failures.
+ */
+#ifdef __BPF_CPU_VERSION__
+#if __BPF_CPU_VERSION__ < 4
+#define UNROLL_COUNT 90
+#endif
+#elif __clang_major__ == 18
+#define UNROLL_COUNT 90
+#endif
+
 #include "pyperf.h"
-- 
cgit v1.2.3-70-g09d2


From 727a92d62fd6a382b4c5972008e45667e707b0e4 Mon Sep 17 00:00:00 2001
From: Jordan Rome <linux@jordanrome.com>
Date: Sat, 11 Nov 2023 18:30:10 -0800
Subject: selftests/bpf: Add assert for user stacks in test_task_stack

This is a follow up to:
commit b8e3a87a627b ("bpf: Add crosstask check to __bpf_get_stack").

This test ensures that the task iterator only gets a single
user stack (for the current task).

Signed-off-by: Jordan Rome <linux@jordanrome.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20231112023010.144675-1-linux@jordanrome.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c       | 2 ++
 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 4e02093c2cbe..618af9dfae9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -332,6 +332,8 @@ static void test_task_stack(void)
 	do_dummy_read(skel->progs.dump_task_stack);
 	do_dummy_read(skel->progs.get_task_user_stacks);
 
+	ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks");
+
 	bpf_iter_task_stack__destroy(skel);
 }
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index f2b8167b72a8..442f4ca39fd7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -35,6 +35,8 @@ int dump_task_stack(struct bpf_iter__task *ctx)
 	return 0;
 }
 
+int num_user_stacks = 0;
+
 SEC("iter/task")
 int get_task_user_stacks(struct bpf_iter__task *ctx)
 {
@@ -51,6 +53,9 @@ int get_task_user_stacks(struct bpf_iter__task *ctx)
 	if (res <= 0)
 		return 0;
 
+	/* Only one task, the current one, should succeed */
+	++num_user_stacks;
+
 	buf_sz += res;
 
 	/* If the verifier doesn't refine bpf_get_task_stack res, and instead
-- 
cgit v1.2.3-70-g09d2


From 4849775587844e44d215289c425bcd70f315efe7 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sat, 11 Nov 2023 09:00:30 +0000
Subject: selftests/bpf: Fix issues in setup_classid_environment()

If the net_cls subsystem is already mounted, attempting to mount it again
in setup_classid_environment() will result in a failure with the error code
EBUSY. Despite this, tmpfs will have been successfully mounted at
/sys/fs/cgroup/net_cls. Consequently, the /sys/fs/cgroup/net_cls directory
will be empty, causing subsequent setup operations to fail.

Here's an error log excerpt illustrating the issue when net_cls has already
been mounted at /sys/fs/cgroup/net_cls prior to running
setup_classid_environment():

- Before that change

  $ tools/testing/selftests/bpf/test_progs --name=cgroup_v1v2
  test_cgroup_v1v2:PASS:server_fd 0 nsec
  test_cgroup_v1v2:PASS:client_fd 0 nsec
  test_cgroup_v1v2:PASS:cgroup_fd 0 nsec
  test_cgroup_v1v2:PASS:server_fd 0 nsec
  run_test:PASS:skel_open 0 nsec
  run_test:PASS:prog_attach 0 nsec
  test_cgroup_v1v2:PASS:cgroup-v2-only 0 nsec
  (cgroup_helpers.c:248: errno: No such file or directory) Opening Cgroup Procs: /sys/fs/cgroup/net_cls/cgroup.procs
  (cgroup_helpers.c:540: errno: No such file or directory) Opening cgroup classid: /sys/fs/cgroup/net_cls/cgroup-test-work-dir/net_cls.classid
  run_test:PASS:skel_open 0 nsec
  run_test:PASS:prog_attach 0 nsec
  (cgroup_helpers.c:248: errno: No such file or directory) Opening Cgroup Procs: /sys/fs/cgroup/net_cls/cgroup-test-work-dir/cgroup.procs
  run_test:FAIL:join_classid unexpected error: 1 (errno 2)
  test_cgroup_v1v2:FAIL:cgroup-v1v2 unexpected error: -1 (errno 2)
  (cgroup_helpers.c:248: errno: No such file or directory) Opening Cgroup Procs: /sys/fs/cgroup/net_cls/cgroup.procs
  #44      cgroup_v1v2:FAIL
  Summary: 0/0 PASSED, 0 SKIPPED, 1 FAILED

- After that change
  $ tools/testing/selftests/bpf/test_progs --name=cgroup_v1v2
  #44      cgroup_v1v2:OK
  Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/r/20231111090034.4248-3-laoar.shao@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/cgroup_helpers.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 5b1da2a32ea7..10b5f42e65e7 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -523,10 +523,20 @@ int setup_classid_environment(void)
 		return 1;
 	}
 
-	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
-	    errno != EBUSY) {
-		log_err("mount cgroup net_cls");
-		return 1;
+	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
+		if (errno != EBUSY) {
+			log_err("mount cgroup net_cls");
+			return 1;
+		}
+
+		if (rmdir(NETCLS_MOUNT_PATH)) {
+			log_err("rmdir cgroup net_cls");
+			return 1;
+		}
+		if (umount(CGROUP_MOUNT_DFLT)) {
+			log_err("umount cgroup base");
+			return 1;
+		}
 	}
 
 	cleanup_classid_environment();
-- 
cgit v1.2.3-70-g09d2


From f744d35ecf46f111bf9b54bfdbc89a28ee8b928a Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sat, 11 Nov 2023 09:00:31 +0000
Subject: selftests/bpf: Add parallel support for classid

Include the current pid in the classid cgroup path. This way, different
testers relying on classid-based configurations will have distinct classid
cgroup directories, enabling them to run concurrently. Additionally, we
leverage the current pid as the classid, ensuring unique identification.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/r/20231111090034.4248-4-laoar.shao@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/cgroup_helpers.c         | 18 +++++++++++-------
 tools/testing/selftests/bpf/cgroup_helpers.h         |  2 +-
 tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c |  2 +-
 3 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 10b5f42e65e7..f18649a79d64 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -45,9 +45,12 @@
 #define format_parent_cgroup_path(buf, path) \
 	format_cgroup_path_pid(buf, path, getppid())
 
-#define format_classid_path(buf)				\
-	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
-		 CGROUP_WORK_DIR)
+#define format_classid_path_pid(buf, pid)				\
+	snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH,	\
+		 CGROUP_WORK_DIR, pid)
+
+#define format_classid_path(buf)	\
+	format_classid_path_pid(buf, getpid())
 
 static __thread bool cgroup_workdir_mounted;
 
@@ -551,15 +554,16 @@ int setup_classid_environment(void)
 
 /**
  * set_classid() - Set a cgroupv1 net_cls classid
- * @id: the numeric classid
  *
- * Writes the passed classid into the cgroup work dir's net_cls.classid
+ * Writes the classid into the cgroup work dir's net_cls.classid
  * file in order to later on trigger socket tagging.
  *
+ * We leverage the current pid as the classid, ensuring unique identification.
+ *
  * On success, it returns 0, otherwise on failure it returns 1. If there
  * is a failure, it prints the error to stderr.
  */
-int set_classid(unsigned int id)
+int set_classid(void)
 {
 	char cgroup_workdir[PATH_MAX - 42];
 	char cgroup_classid_path[PATH_MAX + 1];
@@ -575,7 +579,7 @@ int set_classid(unsigned int id)
 		return 1;
 	}
 
-	if (dprintf(fd, "%u\n", id) < 0) {
+	if (dprintf(fd, "%u\n", getpid()) < 0) {
 		log_err("Setting cgroup classid");
 		rc = 1;
 	}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5c2cb9c8b546..92fc41daf4a4 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -29,7 +29,7 @@ int setup_cgroup_environment(void);
 void cleanup_cgroup_environment(void);
 
 /* cgroupv1 related */
-int set_classid(unsigned int id);
+int set_classid(void);
 int join_classid(void);
 
 int setup_classid_environment(void);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index 9026b42914d3..addf720428f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -71,7 +71,7 @@ void test_cgroup_v1v2(void)
 	}
 	ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
 	setup_classid_environment();
-	set_classid(42);
+	set_classid();
 	ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
 	cleanup_classid_environment();
 	close(server_fd);
-- 
cgit v1.2.3-70-g09d2


From c1dcc050aa648bb3b831030d547c3fcc1c68140c Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sat, 11 Nov 2023 09:00:32 +0000
Subject: selftests/bpf: Add a new cgroup helper get_classid_cgroup_id()

Introduce a new helper function to retrieve the cgroup ID from a net_cls
cgroup directory.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/r/20231111090034.4248-5-laoar.shao@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/cgroup_helpers.c | 28 ++++++++++++++++++++++------
 tools/testing/selftests/bpf/cgroup_helpers.h |  1 +
 2 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index f18649a79d64..63bfa72185be 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -422,26 +422,23 @@ int create_and_get_cgroup(const char *relative_path)
 }
 
 /**
- * get_cgroup_id() - Get cgroup id for a particular cgroup path
- * @relative_path: The cgroup path, relative to the workdir, to join
+ * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path
+ * @cgroup_workdir: The absolute cgroup path
  *
  * On success, it returns the cgroup id. On failure it returns 0,
  * which is an invalid cgroup id.
  * If there is a failure, it prints the error to stderr.
  */
-unsigned long long get_cgroup_id(const char *relative_path)
+unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
 {
 	int dirfd, err, flags, mount_id, fhsize;
 	union {
 		unsigned long long cgid;
 		unsigned char raw_bytes[8];
 	} id;
-	char cgroup_workdir[PATH_MAX + 1];
 	struct file_handle *fhp, *fhp2;
 	unsigned long long ret = 0;
 
-	format_cgroup_path(cgroup_workdir, relative_path);
-
 	dirfd = AT_FDCWD;
 	flags = 0;
 	fhsize = sizeof(*fhp);
@@ -477,6 +474,14 @@ free_mem:
 	return ret;
 }
 
+unsigned long long get_cgroup_id(const char *relative_path)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_workdir, relative_path);
+	return get_cgroup_id_from_path(cgroup_workdir);
+}
+
 int cgroup_setup_and_join(const char *path) {
 	int cg_fd;
 
@@ -621,3 +626,14 @@ void cleanup_classid_environment(void)
 	join_cgroup_from_top(NETCLS_MOUNT_PATH);
 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
 }
+
+/**
+ * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup
+ */
+unsigned long long get_classid_cgroup_id(void)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_classid_path(cgroup_workdir);
+	return get_cgroup_id_from_path(cgroup_workdir);
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 92fc41daf4a4..e71da4ef031b 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -31,6 +31,7 @@ void cleanup_cgroup_environment(void);
 /* cgroupv1 related */
 int set_classid(void);
 int join_classid(void);
+unsigned long long get_classid_cgroup_id(void);
 
 int setup_classid_environment(void);
 void cleanup_classid_environment(void);
-- 
cgit v1.2.3-70-g09d2


From bf47300b186facc8ae66a0e2aa89073565f82bb3 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sat, 11 Nov 2023 09:00:33 +0000
Subject: selftests/bpf: Add a new cgroup helper get_cgroup_hierarchy_id()

A new cgroup helper function, get_cgroup1_hierarchy_id(), has been
introduced to obtain the ID of a cgroup1 hierarchy based on the provided
cgroup name. This cgroup name can be obtained from the /proc/self/cgroup
file.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/r/20231111090034.4248-6-laoar.shao@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/cgroup_helpers.c | 52 ++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/cgroup_helpers.h |  1 +
 2 files changed, 53 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 63bfa72185be..5aa133bf3688 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -637,3 +637,55 @@ unsigned long long get_classid_cgroup_id(void)
 	format_classid_path(cgroup_workdir);
 	return get_cgroup_id_from_path(cgroup_workdir);
 }
+
+/**
+ * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
+ * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
+ * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like
+ * "net_cls,net_prio".
+ */
+int get_cgroup1_hierarchy_id(const char *subsys_name)
+{
+	char *c, *c2, *c3, *c4;
+	bool found = false;
+	char line[1024];
+	FILE *file;
+	int i, id;
+
+	if (!subsys_name)
+		return -1;
+
+	file = fopen("/proc/self/cgroup", "r");
+	if (!file) {
+		log_err("fopen /proc/self/cgroup");
+		return -1;
+	}
+
+	while (fgets(line, 1024, file)) {
+		i = 0;
+		for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {
+			if (i == 0) {
+				id = strtol(c, NULL, 10);
+			} else if (i == 1) {
+				if (!strcmp(c, subsys_name)) {
+					found = true;
+					break;
+				}
+
+				/* Multiple subsystems may share one single mount point */
+				for (c3 = strtok_r(c, ",", &c4); c3;
+				     c3 = strtok_r(NULL, ",", &c4)) {
+					if (!strcmp(c, subsys_name)) {
+						found = true;
+						break;
+					}
+				}
+			}
+			i++;
+		}
+		if (found)
+			break;
+	}
+	fclose(file);
+	return found ? id : -1;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index e71da4ef031b..ee053641c026 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -20,6 +20,7 @@ int get_root_cgroup(void);
 int create_and_get_cgroup(const char *relative_path);
 void remove_cgroup(const char *relative_path);
 unsigned long long get_cgroup_id(const char *relative_path);
+int get_cgroup1_hierarchy_id(const char *subsys_name);
 
 int join_cgroup(const char *relative_path);
 int join_root_cgroup(void);
-- 
cgit v1.2.3-70-g09d2


From 360769233cc9c921e90ae387d167ea3cd3cbb04c Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sat, 11 Nov 2023 09:00:34 +0000
Subject: selftests/bpf: Add selftests for cgroup1 hierarchy

Add selftests for cgroup1 hierarchy.
The result as follows,

  $ tools/testing/selftests/bpf/test_progs --name=cgroup1_hierarchy
  #36/1    cgroup1_hierarchy/test_cgroup1_hierarchy:OK
  #36/2    cgroup1_hierarchy/test_root_cgid:OK
  #36/3    cgroup1_hierarchy/test_invalid_level:OK
  #36/4    cgroup1_hierarchy/test_invalid_cgid:OK
  #36/5    cgroup1_hierarchy/test_invalid_hid:OK
  #36/6    cgroup1_hierarchy/test_invalid_cgrp_name:OK
  #36/7    cgroup1_hierarchy/test_invalid_cgrp_name2:OK
  #36/8    cgroup1_hierarchy/test_sleepable_prog:OK
  #36      cgroup1_hierarchy:OK
  Summary: 1/8 PASSED, 0 SKIPPED, 0 FAILED

Besides, I also did some stress test similar to the patch #2 in this
series, as follows (with CONFIG_PROVE_RCU_LIST enabled):

- Continuously mounting and unmounting named cgroups in some tasks,
  for example:

  cgrp_name=$1
  while true
  do
      mount -t cgroup -o none,name=$cgrp_name none /$cgrp_name
      umount /$cgrp_name
  done

- Continuously run this selftest concurrently,
  while true; do ./test_progs --name=cgroup1_hierarchy; done

They can ran successfully without any RCU warnings in dmesg.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/r/20231111090034.4248-7-laoar.shao@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/cgroup1_hierarchy.c   | 158 +++++++++++++++++++++
 .../selftests/bpf/progs/test_cgroup1_hierarchy.c   |  71 +++++++++
 2 files changed, 229 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
new file mode 100644
index 000000000000..74d6d7546f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup1_hierarchy.skel.h"
+
+static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel)
+{
+	struct bpf_link *lsm_link, *fentry_link;
+	int err;
+
+	/* Attach LSM prog first */
+	lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+	if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+		return;
+
+	/* LSM prog will be triggered when attaching fentry */
+	fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+	ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+	err = bpf_link__destroy(lsm_link);
+	ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel)
+{
+	struct bpf_link *lsm_link, *fentry_link;
+	int err;
+
+	/* Attach LSM prog first */
+	lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run);
+	if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+		return;
+
+	/* LSM prog will be triggered when attaching fentry */
+	fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+	ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+	err = bpf_link__destroy(lsm_link);
+	ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel)
+{
+	struct bpf_link *lsm_link, *fentry_link;
+	int err;
+
+	/* Attach LSM prog first */
+	lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+	if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+		return;
+
+	/* LSM prog will be triggered when attaching fentry */
+	fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+	if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success"))
+		goto cleanup;
+
+	err = bpf_link__destroy(fentry_link);
+	ASSERT_OK(err, "destroy_lsm");
+
+cleanup:
+	err = bpf_link__destroy(lsm_link);
+	ASSERT_OK(err, "destroy_fentry");
+}
+
+void test_cgroup1_hierarchy(void)
+{
+	struct test_cgroup1_hierarchy *skel;
+	__u64 current_cgid;
+	int hid, err;
+
+	skel = test_cgroup1_hierarchy__open();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	skel->bss->target_pid = getpid();
+
+	err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1");
+	if (!ASSERT_OK(err, "fentry_set_target"))
+		goto destroy;
+
+	err = test_cgroup1_hierarchy__load(skel);
+	if (!ASSERT_OK(err, "load"))
+		goto destroy;
+
+	/* Setup cgroup1 hierarchy */
+	err = setup_classid_environment();
+	if (!ASSERT_OK(err, "setup_classid_environment"))
+		goto destroy;
+
+	err = join_classid();
+	if (!ASSERT_OK(err, "join_cgroup1"))
+		goto cleanup;
+
+	current_cgid = get_classid_cgroup_id();
+	if (!ASSERT_GE(current_cgid, 0, "cgroup1 id"))
+		goto cleanup;
+
+	hid = get_cgroup1_hierarchy_id("net_cls");
+	if (!ASSERT_GE(hid, 0, "cgroup1 id"))
+		goto cleanup;
+	skel->bss->target_hid = hid;
+
+	if (test__start_subtest("test_cgroup1_hierarchy")) {
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1(skel);
+	}
+
+	if (test__start_subtest("test_root_cgid")) {
+		skel->bss->target_ancestor_cgid = 1;
+		skel->bss->target_ancestor_level = 0;
+		bpf_cgroup1(skel);
+	}
+
+	if (test__start_subtest("test_invalid_level")) {
+		skel->bss->target_ancestor_cgid = 1;
+		skel->bss->target_ancestor_level = 1;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_cgid")) {
+		skel->bss->target_ancestor_cgid = 0;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_hid")) {
+		skel->bss->target_ancestor_cgid = 1;
+		skel->bss->target_ancestor_level = 0;
+		skel->bss->target_hid = -1;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_cgrp_name")) {
+		skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl");
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_cgrp_name2")) {
+		skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,");
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_sleepable_prog")) {
+		skel->bss->target_hid = hid;
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1_sleepable(skel);
+	}
+
+cleanup:
+	cleanup_classid_environment();
+destroy:
+	test_cgroup1_hierarchy__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
new file mode 100644
index 000000000000..44628865fe1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+__u32 target_ancestor_level;
+__u64 target_ancestor_cgid;
+int target_pid, target_hid;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static int bpf_link_create_verify(int cmd)
+{
+	struct cgroup *cgrp, *ancestor;
+	struct task_struct *task;
+	int ret = 0;
+
+	if (cmd != BPF_LINK_CREATE)
+		return 0;
+
+	task = bpf_get_current_task_btf();
+
+	/* Then it can run in parallel with others */
+	if (task->pid != target_pid)
+		return 0;
+
+	cgrp = bpf_task_get_cgroup1(task, target_hid);
+	if (!cgrp)
+		return 0;
+
+	/* Refuse it if its cgid or its ancestor's cgid is the target cgid */
+	if (cgrp->kn->id == target_ancestor_cgid)
+		ret = -1;
+
+	ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level);
+	if (!ancestor)
+		goto out;
+
+	if (ancestor->kn->id == target_ancestor_cgid)
+		ret = -1;
+	bpf_cgroup_release(ancestor);
+
+out:
+	bpf_cgroup_release(cgrp);
+	return ret;
+}
+
+SEC("lsm/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	return bpf_link_create_verify(cmd);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	return bpf_link_create_verify(cmd);
+}
+
+SEC("fentry")
+int BPF_PROG(fentry_run)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 5f99f312bd3bedb3b266b0d26376a8c500cdc97f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 11 Nov 2023 17:06:00 -0800
Subject: bpf: add register bounds sanity checks and sanitization

Add simple sanity checks that validate well-formed ranges (min <= max)
across u64, s64, u32, and s32 ranges. Also for cases when the value is
constant (either 64-bit or 32-bit), we validate that ranges and tnums
are in agreement.

These bounds checks are performed at the end of BPF_ALU/BPF_ALU64
operations, on conditional jumps, and for LDX instructions (where subreg
zero/sign extension is probably the most important to check). This
covers most of the interesting cases.

Also, we validate the sanity of the return register when manually
adjusting it for some special helpers.

By default, sanity violation will trigger a warning in verifier log and
resetting register bounds to "unbounded" ones. But to aid development
and debugging, BPF_F_TEST_SANITY_STRICT flag is added, which will
trigger hard failure of verification with -EFAULT on register bounds
violations. This allows selftests to catch such issues. veristat will
also gain a CLI option to enable this behavior.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Link: https://lore.kernel.org/r/20231112010609.848406-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h   |   1 +
 include/uapi/linux/bpf.h       |   3 ++
 kernel/bpf/syscall.c           |   3 +-
 kernel/bpf/verifier.c          | 117 ++++++++++++++++++++++++++++++++---------
 tools/include/uapi/linux/bpf.h |   3 ++
 5 files changed, 101 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 24213a99cc79..402b6bc44a1b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -602,6 +602,7 @@ struct bpf_verifier_env {
 	int stack_size;			/* number of states to be processed */
 	bool strict_alignment;		/* perform strict pointer alignment checks */
 	bool test_state_freq;		/* test verifier with different pruning frequency */
+	bool test_sanity_strict;	/* fail verification on sanity violations */
 	struct bpf_verifier_state *cur_state; /* current verifier state */
 	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
 	struct bpf_verifier_state_list *free_list;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7cf8bcf9f6a2..8a5855fcee69 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1200,6 +1200,9 @@ enum bpf_perf_event_type {
  */
 #define BPF_F_XDP_DEV_BOUND_ONLY	(1U << 6)
 
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_SANITY_STRICT	(1U << 7)
+
 /* link_create.kprobe_multi.flags used in LINK_CREATE command for
  * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
  */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0ed286b8a0f0..f266e03ba342 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2573,7 +2573,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 				 BPF_F_SLEEPABLE |
 				 BPF_F_TEST_RND_HI32 |
 				 BPF_F_XDP_HAS_FRAGS |
-				 BPF_F_XDP_DEV_BOUND_ONLY))
+				 BPF_F_XDP_DEV_BOUND_ONLY |
+				 BPF_F_TEST_SANITY_STRICT))
 		return -EINVAL;
 
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 65570eedfe88..e7edacf86e0f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2615,6 +2615,56 @@ static void reg_bounds_sync(struct bpf_reg_state *reg)
 	__update_reg_bounds(reg);
 }
 
+static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *reg, const char *ctx)
+{
+	const char *msg;
+
+	if (reg->umin_value > reg->umax_value ||
+	    reg->smin_value > reg->smax_value ||
+	    reg->u32_min_value > reg->u32_max_value ||
+	    reg->s32_min_value > reg->s32_max_value) {
+		    msg = "range bounds violation";
+		    goto out;
+	}
+
+	if (tnum_is_const(reg->var_off)) {
+		u64 uval = reg->var_off.value;
+		s64 sval = (s64)uval;
+
+		if (reg->umin_value != uval || reg->umax_value != uval ||
+		    reg->smin_value != sval || reg->smax_value != sval) {
+			msg = "const tnum out of sync with range bounds";
+			goto out;
+		}
+	}
+
+	if (tnum_subreg_is_const(reg->var_off)) {
+		u32 uval32 = tnum_subreg(reg->var_off).value;
+		s32 sval32 = (s32)uval32;
+
+		if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
+		    reg->s32_min_value != sval32 || reg->s32_max_value != sval32) {
+			msg = "const subreg tnum out of sync with range bounds";
+			goto out;
+		}
+	}
+
+	return 0;
+out:
+	verbose(env, "REG SANITY VIOLATION (%s): %s u64=[%#llx, %#llx] "
+		"s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n",
+		ctx, msg, reg->umin_value, reg->umax_value,
+		reg->smin_value, reg->smax_value,
+		reg->u32_min_value, reg->u32_max_value,
+		reg->s32_min_value, reg->s32_max_value,
+		reg->var_off.value, reg->var_off.mask);
+	if (env->test_sanity_strict)
+		return -EFAULT;
+	__mark_reg_unbounded(reg);
+	return 0;
+}
+
 static bool __reg32_bound_s64(s32 a)
 {
 	return a >= 0 && a <= S32_MAX;
@@ -9982,14 +10032,15 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 	return 0;
 }
 
-static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
-				   int func_id,
-				   struct bpf_call_arg_meta *meta)
+static int do_refine_retval_range(struct bpf_verifier_env *env,
+				  struct bpf_reg_state *regs, int ret_type,
+				  int func_id,
+				  struct bpf_call_arg_meta *meta)
 {
 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
 
 	if (ret_type != RET_INTEGER)
-		return;
+		return 0;
 
 	switch (func_id) {
 	case BPF_FUNC_get_stack:
@@ -10015,6 +10066,8 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
 		reg_bounds_sync(ret_reg);
 		break;
 	}
+
+	return reg_bounds_sanity_check(env, ret_reg, "retval");
 }
 
 static int
@@ -10666,7 +10719,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		regs[BPF_REG_0].ref_obj_id = id;
 	}
 
-	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
+	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
+	if (err)
+		return err;
 
 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
 	if (err)
@@ -14166,13 +14221,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
 		/* check dest operand */
 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+		err = err ?: adjust_reg_min_max_vals(env, insn);
 		if (err)
 			return err;
-
-		return adjust_reg_min_max_vals(env, insn);
 	}
 
-	return 0;
+	return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
 }
 
 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
@@ -14653,18 +14707,21 @@ again:
  * Technically we can do similar adjustments for pointers to the same object,
  * but we don't support that right now.
  */
-static void reg_set_min_max(struct bpf_reg_state *true_reg1,
-			    struct bpf_reg_state *true_reg2,
-			    struct bpf_reg_state *false_reg1,
-			    struct bpf_reg_state *false_reg2,
-			    u8 opcode, bool is_jmp32)
+static int reg_set_min_max(struct bpf_verifier_env *env,
+			   struct bpf_reg_state *true_reg1,
+			   struct bpf_reg_state *true_reg2,
+			   struct bpf_reg_state *false_reg1,
+			   struct bpf_reg_state *false_reg2,
+			   u8 opcode, bool is_jmp32)
 {
+	int err;
+
 	/* If either register is a pointer, we can't learn anything about its
 	 * variable offset from the compare (unless they were a pointer into
 	 * the same object, but we don't bother with that).
 	 */
 	if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE)
-		return;
+		return 0;
 
 	/* fallthrough (FALSE) branch */
 	regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32);
@@ -14675,6 +14732,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg1,
 	regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32);
 	reg_bounds_sync(true_reg1);
 	reg_bounds_sync(true_reg2);
+
+	err = reg_bounds_sanity_check(env, true_reg1, "true_reg1");
+	err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2");
+	err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1");
+	err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2");
+	return err;
 }
 
 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
@@ -14968,15 +15031,20 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
 
 	if (BPF_SRC(insn->code) == BPF_X) {
-		reg_set_min_max(&other_branch_regs[insn->dst_reg],
-				&other_branch_regs[insn->src_reg],
-				dst_reg, src_reg, opcode, is_jmp32);
+		err = reg_set_min_max(env,
+				      &other_branch_regs[insn->dst_reg],
+				      &other_branch_regs[insn->src_reg],
+				      dst_reg, src_reg, opcode, is_jmp32);
 	} else /* BPF_SRC(insn->code) == BPF_K */ {
-		reg_set_min_max(&other_branch_regs[insn->dst_reg],
-				src_reg /* fake one */,
-				dst_reg, src_reg /* same fake one */,
-				opcode, is_jmp32);
+		err = reg_set_min_max(env,
+				      &other_branch_regs[insn->dst_reg],
+				      src_reg /* fake one */,
+				      dst_reg, src_reg /* same fake one */,
+				      opcode, is_jmp32);
 	}
+	if (err)
+		return err;
+
 	if (BPF_SRC(insn->code) == BPF_X &&
 	    src_reg->type == SCALAR_VALUE && src_reg->id &&
 	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
@@ -17479,10 +17547,8 @@ static int do_check(struct bpf_verifier_env *env)
 					       insn->off, BPF_SIZE(insn->code),
 					       BPF_READ, insn->dst_reg, false,
 					       BPF_MODE(insn->code) == BPF_MEMSX);
-			if (err)
-				return err;
-
-			err = save_aux_ptr_type(env, src_reg_type, true);
+			err = err ?: save_aux_ptr_type(env, src_reg_type, true);
+			err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], "ldx");
 			if (err)
 				return err;
 		} else if (class == BPF_STX) {
@@ -20769,6 +20835,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 
 	if (is_priv)
 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
+	env->test_sanity_strict = attr->prog_flags & BPF_F_TEST_SANITY_STRICT;
 
 	env->explored_states = kvcalloc(state_htab_size(env),
 				       sizeof(struct bpf_verifier_state_list *),
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7cf8bcf9f6a2..8a5855fcee69 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1200,6 +1200,9 @@ enum bpf_perf_event_type {
  */
 #define BPF_F_XDP_DEV_BOUND_ONLY	(1U << 6)
 
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_SANITY_STRICT	(1U << 7)
+
 /* link_create.kprobe_multi.flags used in LINK_CREATE command for
  * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
  */
-- 
cgit v1.2.3-70-g09d2


From 8863238993e23ccc6d5a9d4ff9f1c043f88f692e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 11 Nov 2023 17:06:03 -0800
Subject: selftests/bpf: BPF register range bounds tester

Add test to validate BPF verifier's register range bounds tracking logic.

The main bulk is a lot of auto-generated tests based on a small set of
seed values for lower and upper 32 bits of full 64-bit values.
Currently we validate only range vs const comparisons, but the idea is
to start validating range over range comparisons in subsequent patch set.

When setting up initial register ranges we treat registers as one of
u64/s64/u32/s32 numeric types, and then independently perform conditional
comparisons based on a potentially different u64/s64/u32/s32 types. This
tests lots of tricky cases of deriving bounds information across
different numeric domains.

Given there are lots of auto-generated cases, we guard them behind
SLOW_TESTS=1 envvar requirement, and skip them altogether otherwise.
With current full set of upper/lower seed value, all supported
comparison operators and all the combinations of u64/s64/u32/s32 number
domains, we get about 7.7 million tests, which run in about 35 minutes
on my local qemu instance without parallelization. But we also split
those tests by init/cond numeric types, which allows to rely on
test_progs's parallelization of tests with `-j` option, getting run time
down to about 5 minutes on 8 cores. It's still something that shouldn't
be run during normal test_progs run.  But we can run it a reasonable
time, and so perhaps a nightly CI test run (once we have it) would be
a good option for this.

We also add a small set of tricky conditions that came up during
development and triggered various bugs or corner cases in either
selftest's reimplementation of range bounds logic or in verifier's logic
itself. These are fast enough to be run as part of normal test_progs
test run and are great for a quick sanity checking.

Let's take a look at test output to understand what's going on:

  $ sudo ./test_progs -t reg_bounds_crafted
  #191/1   reg_bounds_crafted/(u64)[0; 0xffffffff] (u64)< 0:OK
  ...
  #191/115 reg_bounds_crafted/(u64)[0; 0x17fffffff] (s32)< 0:OK
  ...
  #191/137 reg_bounds_crafted/(u64)[0xffffffff; 0x100000000] (u64)== 0:OK

Each test case is uniquely and fully described by this generated string.
E.g.: "(u64)[0; 0x17fffffff] (s32)< 0". This means that we
initialize a register (R6) in such a way that verifier knows that it can
have a value in [(u64)0; (u64)0x17fffffff] range. Another
register (R7) is also set up as u64, but this time a constant (zero in
this case). They then are compared using 32-bit signed < operation.
Resulting TRUE/FALSE branches are evaluated (including cases where it's
known that one of the branches will never be taken, in which case we
validate that verifier also determines this as a dead code). Test
validates that verifier's final register state matches expected state
based on selftest's own reg_state logic, implemented from scratch for
cross-checking purposes.

These test names can be conveniently used for further debugging, and if -vv
verboseness is requested we can get a corresponding verifier log (with
mark_precise logs filtered out as irrelevant and distracting). Example below is
slightly redacted for brevity, omitting irrelevant register output in
some places, marked with [...].

  $ sudo ./test_progs -a 'reg_bounds_crafted/(u32)[0; U32_MAX] (s32)< -1' -vv
  ...
  VERIFIER LOG:
  ========================
  func#0 @0
  0: R1=ctx(off=0,imm=0) R10=fp0
  0: (05) goto pc+2
  3: (85) call bpf_get_current_pid_tgid#14      ; R0_w=scalar()
  4: (bc) w6 = w0                       ; R0_w=scalar() R6_w=scalar(smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff))
  5: (85) call bpf_get_current_pid_tgid#14      ; R0_w=scalar()
  6: (bc) w7 = w0                       ; R0_w=scalar() R7_w=scalar(smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff))
  7: (b4) w1 = 0                        ; R1_w=0
  8: (b4) w2 = -1                       ; R2=4294967295
  9: (ae) if w6 < w1 goto pc-9
  9: R1=0 R6=scalar(smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff))
  10: (2e) if w6 > w2 goto pc-10
  10: R2=4294967295 R6=scalar(smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff))
  11: (b4) w1 = -1                      ; R1_w=4294967295
  12: (b4) w2 = -1                      ; R2_w=4294967295
  13: (ae) if w7 < w1 goto pc-13        ; R1_w=4294967295 R7=4294967295
  14: (2e) if w7 > w2 goto pc-14
  14: R2_w=4294967295 R7=4294967295
  15: (bc) w0 = w6                      ; [...] R6=scalar(id=1,smin=0,smax=umax=4294967295,var_off=(0x0; 0xffffffff))
  16: (bc) w0 = w7                      ; [...] R7=4294967295
  17: (ce) if w6 s< w7 goto pc+3        ; R6=scalar(id=1,smin=0,smax=umax=4294967295,smin32=-1,var_off=(0x0; 0xffffffff)) R7=4294967295
  18: (bc) w0 = w6                      ; [...] R6=scalar(id=1,smin=0,smax=umax=4294967295,smin32=-1,var_off=(0x0; 0xffffffff))
  19: (bc) w0 = w7                      ; [...] R7=4294967295
  20: (95) exit

  from 17 to 21: [...]
  21: (bc) w0 = w6                      ; [...] R6=scalar(id=1,smin=umin=umin32=2147483648,smax=umax=umax32=4294967294,smax32=-2,var_off=(0x80000000; 0x7fffffff))
  22: (bc) w0 = w7                      ; [...] R7=4294967295
  23: (95) exit

  from 13 to 1: [...]
  1: [...]
  1: (b7) r0 = 0                        ; R0_w=0
  2: (95) exit
  processed 24 insns (limit 1000000) max_states_per_insn 0 total_states 2 peak_states 2 mark_read 1
  =====================

Verifier log above is for `(u32)[0; U32_MAX] (s32)< -1` use cases, where u32
range is used for initialization, followed by signed < operator. Note
how we use w6/w7 in this case for register initialization (it would be
R6/R7 for 64-bit types) and then `if w6 s< w7` for comparison at
instruction #17. It will be `if R6 < R7` for 64-bit unsigned comparison.
Above example gives a good impression of the overall structure of a BPF
programs generated for reg_bounds tests.

In the future, this "framework" can be extended to test not just
conditional jumps, but also arithmetic operations. Adding randomized
testing is another possibility.

Some implementation notes. We basically have our own generics-like
operations on numbers, where all the numbers are stored in u64, but how
they are interpreted is passed as runtime argument enum num_t. Further,
`struct range` represents a bounds range, and those are collected
together into a minimal `struct reg_state`, which collects range bounds
across all four numberical domains: u64, s64, u32, s64.

Based on these primitives and `enum op` representing possible
conditional operation (<, <=, >, >=, ==, !=), there is a set of generic
helpers to perform "range arithmetics", which is used to maintain struct
reg_state. We simulate what verifier will do for reg bounds of R6 and R7
registers using these range and reg_state primitives. Simulated
information is used to determine branch taken conclusion and expected
exact register state across all four number domains.

Implementation of "range arithmetics" is more generic than what verifier
is currently performing: it allows range over range comparisons and
adjustments. This is the intended end goal of this patch set overall and verifier
logic is enhanced in subsequent patches in this series to handle range
vs range operations, at which point selftests are extended to validate
these conditions as well. For now it's range vs const cases only.

Note that tests are split into multiple groups by their numeric types
for initialization of ranges and for comparison operation. This allows
to use test_progs's -j parallelization to speed up tests, as we now have
16 groups of parallel running tests. Overall reduction of running time
that allows is pretty good, we go down from more than 30 minutes to
slightly less than 5 minutes running time.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Link: https://lore.kernel.org/r/20231112010609.848406-8-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/reg_bounds.c  | 1838 ++++++++++++++++++++
 1 file changed, 1838 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/reg_bounds.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
new file mode 100644
index 000000000000..7a524b381ed3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -0,0 +1,1838 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <test_progs.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+/* =================================
+ * SHORT AND CONSISTENT NUMBER TYPES
+ * =================================
+ */
+#define U64_MAX ((u64)UINT64_MAX)
+#define U32_MAX ((u32)UINT_MAX)
+#define S64_MIN ((s64)INT64_MIN)
+#define S64_MAX ((s64)INT64_MAX)
+#define S32_MIN ((s32)INT_MIN)
+#define S32_MAX ((s32)INT_MAX)
+
+typedef unsigned long long ___u64;
+typedef unsigned int ___u32;
+typedef long long ___s64;
+typedef int ___s32;
+
+/* avoid conflicts with already defined types in kernel headers */
+#define u64 ___u64
+#define u32 ___u32
+#define s64 ___s64
+#define s32 ___s32
+
+/* ==================================
+ * STRING BUF ABSTRACTION AND HELPERS
+ * ==================================
+ */
+struct strbuf {
+	size_t buf_sz;
+	int pos;
+	char buf[0];
+};
+
+#define DEFINE_STRBUF(name, N)						\
+	struct { struct strbuf buf; char data[(N)]; } ___##name;	\
+	struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf)
+
+__printf(2, 3)
+static inline void snappendf(struct strbuf *s, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	s->pos += vsnprintf(s->buf + s->pos,
+			    s->pos < s->buf_sz ? s->buf_sz - s->pos : 0,
+			    fmt, args);
+	va_end(args);
+}
+
+/* ==================================
+ * GENERIC NUMBER TYPE AND OPERATIONS
+ * ==================================
+ */
+enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 };
+
+static __always_inline u64 min_t(enum num_t t, u64 x, u64 y)
+{
+	switch (t) {
+	case U64: return (u64)x < (u64)y ? (u64)x : (u64)y;
+	case U32: return (u32)x < (u32)y ? (u32)x : (u32)y;
+	case S64: return (s64)x < (s64)y ? (s64)x : (s64)y;
+	case S32: return (s32)x < (s32)y ? (s32)x : (s32)y;
+	default: printf("min_t!\n"); exit(1);
+	}
+}
+
+static __always_inline u64 max_t(enum num_t t, u64 x, u64 y)
+{
+	switch (t) {
+	case U64: return (u64)x > (u64)y ? (u64)x : (u64)y;
+	case U32: return (u32)x > (u32)y ? (u32)x : (u32)y;
+	case S64: return (s64)x > (s64)y ? (s64)x : (s64)y;
+	case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y;
+	default: printf("max_t!\n"); exit(1);
+	}
+}
+
+static const char *t_str(enum num_t t)
+{
+	switch (t) {
+	case U64: return "u64";
+	case U32: return "u32";
+	case S64: return "s64";
+	case S32: return "s32";
+	default: printf("t_str!\n"); exit(1);
+	}
+}
+
+static enum num_t t_is_32(enum num_t t)
+{
+	switch (t) {
+	case U64: return false;
+	case U32: return true;
+	case S64: return false;
+	case S32: return true;
+	default: printf("t_is_32!\n"); exit(1);
+	}
+}
+
+static enum num_t t_signed(enum num_t t)
+{
+	switch (t) {
+	case U64: return S64;
+	case U32: return S32;
+	case S64: return S64;
+	case S32: return S32;
+	default: printf("t_signed!\n"); exit(1);
+	}
+}
+
+static enum num_t t_unsigned(enum num_t t)
+{
+	switch (t) {
+	case U64: return U64;
+	case U32: return U32;
+	case S64: return U64;
+	case S32: return U32;
+	default: printf("t_unsigned!\n"); exit(1);
+	}
+}
+
+static bool num_is_small(enum num_t t, u64 x)
+{
+	switch (t) {
+	case U64: return (u64)x <= 256;
+	case U32: return (u32)x <= 256;
+	case S64: return (s64)x >= -256 && (s64)x <= 256;
+	case S32: return (s32)x >= -256 && (s32)x <= 256;
+	default: printf("num_is_small!\n"); exit(1);
+	}
+}
+
+static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x)
+{
+	bool is_small = num_is_small(t, x);
+
+	if (is_small) {
+		switch (t) {
+		case U64: return snappendf(sb, "%llu", (u64)x);
+		case U32: return snappendf(sb, "%u", (u32)x);
+		case S64: return snappendf(sb, "%lld", (s64)x);
+		case S32: return snappendf(sb, "%d", (s32)x);
+		default: printf("snprintf_num!\n"); exit(1);
+		}
+	} else {
+		switch (t) {
+		case U64:
+			if (x == U64_MAX)
+				return snappendf(sb, "U64_MAX");
+			else if (x >= U64_MAX - 256)
+				return snappendf(sb, "U64_MAX-%llu", U64_MAX - x);
+			else
+				return snappendf(sb, "%#llx", (u64)x);
+		case U32:
+			if ((u32)x == U32_MAX)
+				return snappendf(sb, "U32_MAX");
+			else if ((u32)x >= U32_MAX - 256)
+				return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x);
+			else
+				return snappendf(sb, "%#x", (u32)x);
+		case S64:
+			if ((s64)x == S64_MAX)
+				return snappendf(sb, "S64_MAX");
+			else if ((s64)x >= S64_MAX - 256)
+				return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x);
+			else if ((s64)x == S64_MIN)
+				return snappendf(sb, "S64_MIN");
+			else if ((s64)x <= S64_MIN + 256)
+				return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN);
+			else
+				return snappendf(sb, "%#llx", (s64)x);
+		case S32:
+			if ((s32)x == S32_MAX)
+				return snappendf(sb, "S32_MAX");
+			else if ((s32)x >= S32_MAX - 256)
+				return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x);
+			else if ((s32)x == S32_MIN)
+				return snappendf(sb, "S32_MIN");
+			else if ((s32)x <= S32_MIN + 256)
+				return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN);
+			else
+				return snappendf(sb, "%#x", (s32)x);
+		default: printf("snprintf_num!\n"); exit(1);
+		}
+	}
+}
+
+/* ===================================
+ * GENERIC RANGE STRUCT AND OPERATIONS
+ * ===================================
+ */
+struct range {
+	u64 a, b;
+};
+
+static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x)
+{
+	if (x.a == x.b)
+		return snprintf_num(t, sb, x.a);
+
+	snappendf(sb, "[");
+	snprintf_num(t, sb, x.a);
+	snappendf(sb, "; ");
+	snprintf_num(t, sb, x.b);
+	snappendf(sb, "]");
+}
+
+static void print_range(enum num_t t, struct range x, const char *sfx)
+{
+	DEFINE_STRBUF(sb, 128);
+
+	snprintf_range(t, sb, x);
+	printf("%s%s", sb->buf, sfx);
+}
+
+static const struct range unkn[] = {
+	[U64] = { 0, U64_MAX },
+	[U32] = { 0, U32_MAX },
+	[S64] = { (u64)S64_MIN, (u64)S64_MAX },
+	[S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX },
+};
+
+static struct range unkn_subreg(enum num_t t)
+{
+	switch (t) {
+	case U64: return unkn[U32];
+	case U32: return unkn[U32];
+	case S64: return unkn[U32];
+	case S32: return unkn[S32];
+	default: printf("unkn_subreg!\n"); exit(1);
+	}
+}
+
+static struct range range(enum num_t t, u64 a, u64 b)
+{
+	switch (t) {
+	case U64: return (struct range){ (u64)a, (u64)b };
+	case U32: return (struct range){ (u32)a, (u32)b };
+	case S64: return (struct range){ (s64)a, (s64)b };
+	case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b };
+	default: printf("range!\n"); exit(1);
+	}
+}
+
+static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; }
+static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; }
+static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); }
+static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; }
+
+static bool range_eq(struct range x, struct range y)
+{
+	return x.a == y.a && x.b == y.b;
+}
+
+static struct range range_cast_to_s32(struct range x)
+{
+	u64 a = x.a, b = x.b;
+
+	/* if upper 32 bits are constant, lower 32 bits should form a proper
+	 * s32 range to be correct
+	 */
+	if (upper32(a) == upper32(b) && (s32)a <= (s32)b)
+		return range(S32, a, b);
+
+	/* Special case where upper bits form a small sequence of two
+	 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
+	 * 0x00000000 is also valid), while lower bits form a proper s32 range
+	 * going from negative numbers to positive numbers.
+	 *
+	 * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating
+	 * over full 64-bit numbers range will form a proper [-16, 16]
+	 * ([0xffffff00; 0x00000010]) range in its lower 32 bits.
+	 */
+	if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0)
+		return range(S32, a, b);
+
+	/* otherwise we can't derive much meaningful information */
+	return unkn[S32];
+}
+
+static struct range range_cast_u64(enum num_t to_t, struct range x)
+{
+	u64 a = (u64)x.a, b = (u64)x.b;
+
+	switch (to_t) {
+	case U64:
+		return x;
+	case U32:
+		if (upper32(a) != upper32(b))
+			return unkn[U32];
+		return range(U32, a, b);
+	case S64:
+		if (sign64(a) != sign64(b))
+			return unkn[S64];
+		return range(S64, a, b);
+	case S32:
+		return range_cast_to_s32(x);
+	default: printf("range_cast_u64!\n"); exit(1);
+	}
+}
+
+static struct range range_cast_s64(enum num_t to_t, struct range x)
+{
+	s64 a = (s64)x.a, b = (s64)x.b;
+
+	switch (to_t) {
+	case U64:
+		/* equivalent to (s64)a <= (s64)b check */
+		if (sign64(a) != sign64(b))
+			return unkn[U64];
+		return range(U64, a, b);
+	case U32:
+		if (upper32(a) != upper32(b) || sign32(a) != sign32(b))
+			return unkn[U32];
+		return range(U32, a, b);
+	case S64:
+		return x;
+	case S32:
+		return range_cast_to_s32(x);
+	default: printf("range_cast_s64!\n"); exit(1);
+	}
+}
+
+static struct range range_cast_u32(enum num_t to_t, struct range x)
+{
+	u32 a = (u32)x.a, b = (u32)x.b;
+
+	switch (to_t) {
+	case U64:
+	case S64:
+		/* u32 is always a valid zero-extended u64/s64 */
+		return range(to_t, a, b);
+	case U32:
+		return x;
+	case S32:
+		return range_cast_to_s32(range(U32, a, b));
+	default: printf("range_cast_u32!\n"); exit(1);
+	}
+}
+
+static struct range range_cast_s32(enum num_t to_t, struct range x)
+{
+	s32 a = (s32)x.a, b = (s32)x.b;
+
+	switch (to_t) {
+	case U64:
+	case U32:
+	case S64:
+		if (sign32(a) != sign32(b))
+			return unkn[to_t];
+		return range(to_t, a, b);
+	case S32:
+		return x;
+	default: printf("range_cast_s32!\n"); exit(1);
+	}
+}
+
+/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving
+ * all possible information. Worst case, it will be unknown range within
+ * *to_t* domain, if nothing more specific can be guaranteed during the
+ * conversion
+ */
+static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from)
+{
+	switch (from_t) {
+	case U64: return range_cast_u64(to_t, from);
+	case U32: return range_cast_u32(to_t, from);
+	case S64: return range_cast_s64(to_t, from);
+	case S32: return range_cast_s32(to_t, from);
+	default: printf("range_cast!\n"); exit(1);
+	}
+}
+
+static bool is_valid_num(enum num_t t, u64 x)
+{
+	switch (t) {
+	case U64: return true;
+	case U32: return upper32(x) == 0;
+	case S64: return true;
+	case S32: return upper32(x) == 0;
+	default: printf("is_valid_num!\n"); exit(1);
+	}
+}
+
+static bool is_valid_range(enum num_t t, struct range x)
+{
+	if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b))
+		return false;
+
+	switch (t) {
+	case U64: return (u64)x.a <= (u64)x.b;
+	case U32: return (u32)x.a <= (u32)x.b;
+	case S64: return (s64)x.a <= (s64)x.b;
+	case S32: return (s32)x.a <= (s32)x.b;
+	default: printf("is_valid_range!\n"); exit(1);
+	}
+}
+
+static struct range range_improve(enum num_t t, struct range old, struct range new)
+{
+	return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b));
+}
+
+static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y)
+{
+	struct range y_cast;
+
+	y_cast = range_cast(y_t, x_t, y);
+
+	/* the case when new range knowledge, *y*, is a 32-bit subregister
+	 * range, while previous range knowledge, *x*, is a full register
+	 * 64-bit range, needs special treatment to take into account upper 32
+	 * bits of full register range
+	 */
+	if (t_is_32(y_t) && !t_is_32(x_t)) {
+		struct range x_swap;
+
+		/* some combinations of upper 32 bits and sign bit can lead to
+		 * invalid ranges, in such cases it's easier to detect them
+		 * after cast/swap than try to enumerate all the conditions
+		 * under which transformation and knowledge transfer is valid
+		 */
+		x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b));
+		if (!is_valid_range(x_t, x_swap))
+			return x;
+		return range_improve(x_t, x, x_swap);
+	}
+
+	/* otherwise, plain range cast and intersection works */
+	return range_improve(x_t, x, y_cast);
+}
+
+/* =======================
+ * GENERIC CONDITIONAL OPS
+ * =======================
+ */
+enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE };
+
+static enum op complement_op(enum op op)
+{
+	switch (op) {
+	case OP_LT: return OP_GE;
+	case OP_LE: return OP_GT;
+	case OP_GT: return OP_LE;
+	case OP_GE: return OP_LT;
+	case OP_EQ: return OP_NE;
+	case OP_NE: return OP_EQ;
+	default: printf("complement_op!\n"); exit(1);
+	}
+}
+
+static const char *op_str(enum op op)
+{
+	switch (op) {
+	case OP_LT: return "<";
+	case OP_LE: return "<=";
+	case OP_GT: return ">";
+	case OP_GE: return ">=";
+	case OP_EQ: return "==";
+	case OP_NE: return "!=";
+	default: printf("op_str!\n"); exit(1);
+	}
+}
+
+/* Can register with range [x.a, x.b] *EVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+#define range_canbe(T) do {									\
+	switch (op) {										\
+	case OP_LT: return (T)x.a < (T)y.b;							\
+	case OP_LE: return (T)x.a <= (T)y.b;							\
+	case OP_GT: return (T)x.b > (T)y.a;							\
+	case OP_GE: return (T)x.b >= (T)y.a;							\
+	case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b);			\
+	case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a);		\
+	default: printf("range_canbe op %d\n", op); exit(1);					\
+	}											\
+} while (0)
+
+	switch (t) {
+	case U64: { range_canbe(u64); }
+	case U32: { range_canbe(u32); }
+	case S64: { range_canbe(s64); }
+	case S32: { range_canbe(s32); }
+	default: printf("range_canbe!\n"); exit(1);
+	}
+#undef range_canbe
+}
+
+/* Does register with range [x.a, x.b] *ALWAYS* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+	/* always op <=> ! canbe complement(op) */
+	return !range_canbe_op(t, x, y, complement_op(op));
+}
+
+/* Does register with range [x.a, x.b] *NEVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+	return !range_canbe_op(t, x, y, op);
+}
+
+/* similar to verifier's is_branch_taken():
+ *    1 - always taken;
+ *    0 - never taken,
+ *   -1 - unsure.
+ */
+static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+	if (range_always_op(t, x, y, op))
+		return 1;
+	if (range_never_op(t, x, y, op))
+		return 0;
+	return -1;
+}
+
+/* What would be the new estimates for register x and y ranges assuming truthful
+ * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy.
+ *
+ * We assume "interesting" cases where ranges overlap. Cases where it's
+ * obvious that (x OP y) is either always true or false should be filtered with
+ * range_never and range_always checks.
+ */
+static void range_cond(enum num_t t, struct range x, struct range y,
+		       enum op op, struct range *newx, struct range *newy)
+{
+	if (!range_canbe_op(t, x, y, op)) {
+		/* nothing to adjust, can't happen, return original values */
+		*newx = x;
+		*newy = y;
+		return;
+	}
+	switch (op) {
+	case OP_LT:
+		*newx = range(t, x.a, min_t(t, x.b, y.b - 1));
+		*newy = range(t, max_t(t, x.a + 1, y.a), y.b);
+		break;
+	case OP_LE:
+		*newx = range(t, x.a, min_t(t, x.b, y.b));
+		*newy = range(t, max_t(t, x.a, y.a), y.b);
+		break;
+	case OP_GT:
+		*newx = range(t, max_t(t, x.a, y.a + 1), x.b);
+		*newy = range(t, y.a, min_t(t, x.b - 1, y.b));
+		break;
+	case OP_GE:
+		*newx = range(t, max_t(t, x.a, y.a), x.b);
+		*newy = range(t, y.a, min_t(t, x.b, y.b));
+		break;
+	case OP_EQ:
+		*newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+		*newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+		break;
+	case OP_NE:
+		/* generic case, can't derive more information */
+		*newx = range(t, x.a, x.b);
+		*newy = range(t, y.a, y.b);
+		break;
+
+		/* below extended logic is not supported by verifier just yet */
+		if (x.a == x.b && x.a == y.a) {
+			/* X is a constant matching left side of Y */
+			*newx = range(t, x.a, x.b);
+			*newy = range(t, y.a + 1, y.b);
+		} else if (x.a == x.b && x.b == y.b) {
+			/* X is a constant matching rigth side of Y */
+			*newx = range(t, x.a, x.b);
+			*newy = range(t, y.a, y.b - 1);
+		} else if (y.a == y.b && x.a == y.a) {
+			/* Y is a constant matching left side of X */
+			*newx = range(t, x.a + 1, x.b);
+			*newy = range(t, y.a, y.b);
+		} else if (y.a == y.b && x.b == y.b) {
+			/* Y is a constant matching rigth side of X */
+			*newx = range(t, x.a, x.b - 1);
+			*newy = range(t, y.a, y.b);
+		} else {
+			/* generic case, can't derive more information */
+			*newx = range(t, x.a, x.b);
+			*newy = range(t, y.a, y.b);
+		}
+
+		break;
+	default:
+		break;
+	}
+}
+
+/* =======================
+ * REGISTER STATE HANDLING
+ * =======================
+ */
+struct reg_state {
+	struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */
+	bool valid;
+};
+
+static void print_reg_state(struct reg_state *r, const char *sfx)
+{
+	DEFINE_STRBUF(sb, 512);
+	enum num_t t;
+	int cnt = 0;
+
+	if (!r->valid) {
+		printf("<not found>%s", sfx);
+		return;
+	}
+
+	snappendf(sb, "scalar(");
+	for (t = first_t; t <= last_t; t++) {
+		snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t));
+		snprintf_range(t, sb, r->r[t]);
+	}
+	snappendf(sb, ")");
+
+	printf("%s%s", sb->buf, sfx);
+}
+
+static void print_refinement(enum num_t s_t, struct range src,
+			     enum num_t d_t, struct range old, struct range new,
+			     const char *ctx)
+{
+	printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t));
+	print_range(s_t, src, "");
+	printf(" (%s)DST_OLD=", t_str(d_t));
+	print_range(d_t, old, "");
+	printf(" (%s)DST_NEW=", t_str(d_t));
+	print_range(d_t, new, "\n");
+}
+
+static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx)
+{
+	enum num_t d_t, s_t;
+	struct range old;
+	bool keep_going = false;
+
+again:
+	/* try to derive new knowledge from just learned range x of type t */
+	for (d_t = first_t; d_t <= last_t; d_t++) {
+		old = r->r[d_t];
+		r->r[d_t] = range_refine(d_t, r->r[d_t], t, x);
+		if (!range_eq(r->r[d_t], old)) {
+			keep_going = true;
+			if (env.verbosity >= VERBOSE_VERY)
+				print_refinement(t, x, d_t, old, r->r[d_t], ctx);
+		}
+	}
+
+	/* now see if we can derive anything new from updated reg_state's ranges */
+	for (s_t = first_t; s_t <= last_t; s_t++) {
+		for (d_t = first_t; d_t <= last_t; d_t++) {
+			old = r->r[d_t];
+			r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]);
+			if (!range_eq(r->r[d_t], old)) {
+				keep_going = true;
+				if (env.verbosity >= VERBOSE_VERY)
+					print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx);
+			}
+		}
+	}
+
+	/* keep refining until we converge */
+	if (keep_going) {
+		keep_going = false;
+		goto again;
+	}
+}
+
+static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val)
+{
+	enum num_t tt;
+
+	rs->valid = true;
+	for (tt = first_t; tt <= last_t; tt++)
+		rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt];
+
+	reg_state_refine(rs, t, rs->r[t], "CONST");
+}
+
+static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op,
+			   struct reg_state *newx, struct reg_state *newy, const char *ctx)
+{
+	char buf[32];
+	enum num_t ts[2];
+	struct reg_state xx = *x, yy = *y;
+	int i, t_cnt;
+	struct range z1, z2;
+
+	if (op == OP_EQ || op == OP_NE) {
+		/* OP_EQ and OP_NE are sign-agnostic, so we need to process
+		 * both signed and unsigned domains at the same time
+		 */
+		ts[0] = t_unsigned(t);
+		ts[1] = t_signed(t);
+		t_cnt = 2;
+	} else {
+		ts[0] = t;
+		t_cnt = 1;
+	}
+
+	for (i = 0; i < t_cnt; i++) {
+		t = ts[i];
+		z1 = x->r[t];
+		z2 = y->r[t];
+
+		range_cond(t, z1, z2, op, &z1, &z2);
+
+		if (newx) {
+			snprintf(buf, sizeof(buf), "%s R1", ctx);
+			reg_state_refine(&xx, t, z1, buf);
+		}
+		if (newy) {
+			snprintf(buf, sizeof(buf), "%s R2", ctx);
+			reg_state_refine(&yy, t, z2, buf);
+		}
+	}
+
+	if (newx)
+		*newx = xx;
+	if (newy)
+		*newy = yy;
+}
+
+static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y,
+				     enum op op)
+{
+	if (op == OP_EQ || op == OP_NE) {
+		/* OP_EQ and OP_NE are sign-agnostic */
+		enum num_t tu = t_unsigned(t);
+		enum num_t ts = t_signed(t);
+		int br_u, br_s;
+
+		br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op);
+		br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op);
+
+		if (br_u >= 0 && br_s >= 0 && br_u != br_s)
+			ASSERT_FALSE(true, "branch taken inconsistency!\n");
+		if (br_u >= 0)
+			return br_u;
+		return br_s;
+	}
+	return range_branch_taken_op(t, x->r[t], y->r[t], op);
+}
+
+/* =====================================
+ * BPF PROGS GENERATION AND VERIFICATION
+ * =====================================
+ */
+struct case_spec {
+	/* whether to init full register (r1) or sub-register (w1) */
+	bool init_subregs;
+	/* whether to establish initial value range on full register (r1) or
+	 * sub-register (w1)
+	 */
+	bool setup_subregs;
+	/* whether to establish initial value range using signed or unsigned
+	 * comparisons (i.e., initialize umin/umax or smin/smax directly)
+	 */
+	bool setup_signed;
+	/* whether to perform comparison on full registers or sub-registers */
+	bool compare_subregs;
+	/* whether to perform comparison using signed or unsigned operations */
+	bool compare_signed;
+};
+
+/* Generate test BPF program based on provided test ranges, operation, and
+ * specifications about register bitness and signedness.
+ */
+static int load_range_cmp_prog(struct range x, struct range y, enum op op,
+			       int branch_taken, struct case_spec spec,
+			       char *log_buf, size_t log_sz,
+			       int *false_pos, int *true_pos)
+{
+#define emit(insn) ({							\
+	struct bpf_insn __insns[] = { insn };				\
+	int __i;							\
+	for (__i = 0; __i < ARRAY_SIZE(__insns); __i++)			\
+		insns[cur_pos + __i] = __insns[__i];			\
+	cur_pos += __i;							\
+})
+#define JMP_TO(target) (target - cur_pos - 1)
+	int cur_pos = 0, exit_pos, fd, op_code;
+	struct bpf_insn insns[64];
+	LIBBPF_OPTS(bpf_prog_load_opts, opts,
+		.log_level = 2,
+		.log_buf = log_buf,
+		.log_size = log_sz,
+	);
+
+	/* ; skip exit block below
+	 * goto +2;
+	 */
+	emit(BPF_JMP_A(2));
+	exit_pos = cur_pos;
+	/* ; exit block for all the preparatory conditionals
+	 * out:
+	 * r0 = 0;
+	 * exit;
+	 */
+	emit(BPF_MOV64_IMM(BPF_REG_0, 0));
+	emit(BPF_EXIT_INSN());
+	/*
+	 * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value
+	 * call bpf_get_current_pid_tgid;
+	 * r6 = r0;               | w6 = w0;
+	 * call bpf_get_current_pid_tgid;
+	 * r7 = r0;               | w7 = w0;
+	 */
+	emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+	if (spec.init_subregs)
+		emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0));
+	else
+		emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0));
+	emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+	if (spec.init_subregs)
+		emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0));
+	else
+		emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+	/* ; setup initial r6/w6 possible value range ([x.a, x.b])
+	 * r1 = %[x.a] ll;        | w1 = %[x.a];
+	 * r2 = %[x.b] ll;        | w2 = %[x.b];
+	 * if r6 < r1 goto out;   | if w6 < w1 goto out;
+	 * if r6 > r2 goto out;   | if w6 > w2 goto out;
+	 */
+	if (spec.setup_subregs) {
+		emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a));
+		emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				   BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				   BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+	} else {
+		emit(BPF_LD_IMM64(BPF_REG_1, x.a));
+		emit(BPF_LD_IMM64(BPF_REG_2, x.b));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				 BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				 BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+	}
+	/* ; setup initial r7/w7 possible value range ([y.a, y.b])
+	 * r1 = %[y.a] ll;        | w1 = %[y.a];
+	 * r2 = %[y.b] ll;        | w2 = %[y.b];
+	 * if r7 < r1 goto out;   | if w7 < w1 goto out;
+	 * if r7 > r2 goto out;   | if w7 > w2 goto out;
+	 */
+	if (spec.setup_subregs) {
+		emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a));
+		emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				   BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				   BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+	} else {
+		emit(BPF_LD_IMM64(BPF_REG_1, y.a));
+		emit(BPF_LD_IMM64(BPF_REG_2, y.b));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				 BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				 BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+	}
+	/* ; range test instruction
+	 * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3;
+	 */
+	switch (op) {
+	case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break;
+	case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break;
+	case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break;
+	case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break;
+	case OP_EQ: op_code = BPF_JEQ; break;
+	case OP_NE: op_code = BPF_JNE; break;
+	default:
+		printf("unrecognized op %d\n", op);
+		return -ENOTSUP;
+	}
+	/* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+	 * ; this is used for debugging, as verifier doesn't always print
+	 * ; registers states as of condition jump instruction (e.g., when
+	 * ; precision marking happens)
+	 * r0 = r6;               | w0 = w6;
+	 * r0 = r7;               | w0 = w7;
+	 */
+	if (spec.compare_subregs) {
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+	} else {
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	}
+	if (spec.compare_subregs)
+		emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+	else
+		emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+	/* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+	 * r0 = r6;               | w0 = w6;
+	 * r0 = r7;               | w0 = w7;
+	 * exit;
+	 */
+	*false_pos = cur_pos;
+	if (spec.compare_subregs) {
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+	} else {
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	}
+	if (branch_taken == 1) /* false branch is never taken */
+		emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+	else
+		emit(BPF_EXIT_INSN());
+	/* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+	 * r0 = r6;               | w0 = w6;
+	 * r0 = r7;               | w0 = w7;
+	 * exit;
+	 */
+	*true_pos = cur_pos;
+	if (spec.compare_subregs) {
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+	} else {
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	}
+	if (branch_taken == 0) /* true branch is never taken */
+		emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+	emit(BPF_EXIT_INSN()); /* last instruction has to be exit */
+
+	fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test",
+			   "GPL", insns, cur_pos, &opts);
+	if (fd < 0)
+		return fd;
+
+	close(fd);
+	return 0;
+#undef emit
+#undef JMP_TO
+}
+
+#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0)
+
+/* Parse register state from verifier log.
+ * `s` should point to the start of "Rx = ..." substring in the verifier log.
+ */
+static int parse_reg_state(const char *s, struct reg_state *reg)
+{
+	/* There are two generic forms for SCALAR register:
+	 * - known constant: R6_rwD=P%lld
+	 * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated
+	 *   list of optional range specifiers:
+	 *     - umin=%llu, if missing, assumed 0;
+	 *     - umax=%llu, if missing, assumed U64_MAX;
+	 *     - smin=%lld, if missing, assumed S64_MIN;
+	 *     - smax=%lld, if missing, assummed S64_MAX;
+	 *     - umin32=%d, if missing, assumed 0;
+	 *     - umax32=%d, if missing, assumed U32_MAX;
+	 *     - smin32=%d, if missing, assumed S32_MIN;
+	 *     - smax32=%d, if missing, assummed S32_MAX;
+	 *     - var_off=(%#llx; %#llx), tnum part, we don't care about it.
+	 *
+	 * If some of the values are equal, they will be grouped (but min/max
+	 * are not mixed together, and similarly negative values are not
+	 * grouped with non-negative ones). E.g.:
+	 *
+	 *   R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000)
+	 *
+	 * _rwD part is optional (and any of the letters can be missing).
+	 * P (precision mark) is optional as well.
+	 *
+	 * Anything inside scalar() is optional, including id, of course.
+	 */
+	struct {
+		const char *pfx;
+		const char *fmt;
+		u64 *dst, def;
+		bool is_32, is_set;
+	} *f, fields[8] = {
+		{"smin=", "%lld", &reg->r[S64].a, S64_MIN},
+		{"smax=", "%lld", &reg->r[S64].b, S64_MAX},
+		{"umin=", "%llu", &reg->r[U64].a, 0},
+		{"umax=", "%llu", &reg->r[U64].b, U64_MAX},
+		{"smin32=", "%lld", &reg->r[S32].a, (u32)S32_MIN, true},
+		{"smax32=", "%lld", &reg->r[S32].b, (u32)S32_MAX, true},
+		{"umin32=", "%llu", &reg->r[U32].a, 0,            true},
+		{"umax32=", "%llu", &reg->r[U32].b, U32_MAX,      true},
+	};
+	const char *p, *fmt;
+	int i;
+
+	p = strchr(s, '=');
+	if (!p)
+		return -EINVAL;
+	p++;
+	if (*p == 'P')
+		p++;
+
+	if (!str_has_pfx(p, "scalar(")) {
+		long long sval;
+		enum num_t t;
+
+		if (sscanf(p, "%lld", &sval) != 1)
+			return -EINVAL;
+
+		reg->valid = true;
+		for (t = first_t; t <= last_t; t++) {
+			reg->r[t] = range(t, sval, sval);
+		}
+		return 0;
+	}
+
+	p += sizeof("scalar");
+	while (p) {
+		int midxs[ARRAY_SIZE(fields)], mcnt = 0;
+		u64 val;
+
+		for (i = 0; i < ARRAY_SIZE(fields); i++) {
+			f = &fields[i];
+			if (!str_has_pfx(p, f->pfx))
+				continue;
+			midxs[mcnt++] = i;
+			p += strlen(f->pfx);
+		}
+
+		if (mcnt) {
+			/* populate all matched fields */
+			fmt = fields[midxs[0]].fmt;
+			if (sscanf(p, fmt, &val) != 1)
+				return -EINVAL;
+
+			for (i = 0; i < mcnt; i++) {
+				f = &fields[midxs[i]];
+				f->is_set = true;
+				*f->dst = f->is_32 ? (u64)(u32)val : val;
+			}
+		} else if (str_has_pfx(p, "var_off")) {
+			/* skip "var_off=(0x0; 0x3f)" part completely */
+			p = strchr(p, ')');
+			if (!p)
+				return -EINVAL;
+			p++;
+		}
+
+		p = strpbrk(p, ",)");
+		if (*p == ')')
+			break;
+		if (p)
+			p++;
+	}
+
+	reg->valid = true;
+
+	for (i = 0; i < ARRAY_SIZE(fields); i++) {
+		f = &fields[i];
+		if (!f->is_set)
+			*f->dst = f->def;
+	}
+
+	return 0;
+}
+
+
+/* Parse all register states (TRUE/FALSE branches and DST/SRC registers)
+ * out of the verifier log for a corresponding test case BPF program.
+ */
+static int parse_range_cmp_log(const char *log_buf, struct case_spec spec,
+			       int false_pos, int true_pos,
+			       struct reg_state *false1_reg, struct reg_state *false2_reg,
+			       struct reg_state *true1_reg, struct reg_state *true2_reg)
+{
+	struct {
+		int insn_idx;
+		int reg_idx;
+		const char *reg_upper;
+		struct reg_state *state;
+	} specs[] = {
+		{false_pos,     6, "R6=", false1_reg},
+		{false_pos + 1, 7, "R7=", false2_reg},
+		{true_pos,      6, "R6=", true1_reg},
+		{true_pos + 1,  7, "R7=", true2_reg},
+	};
+	char buf[32];
+	const char *p = log_buf, *q;
+	int i, err;
+
+	for (i = 0; i < 4; i++) {
+		sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx,
+			spec.compare_subregs ? "bc" : "bf",
+			spec.compare_subregs ? "w0" : "r0",
+			spec.compare_subregs ? "w" : "r", specs[i].reg_idx);
+
+		q = strstr(p, buf);
+		if (!q) {
+			*specs[i].state = (struct reg_state){.valid = false};
+			continue;
+		}
+		p = strstr(q, specs[i].reg_upper);
+		if (!p)
+			return -EINVAL;
+		err = parse_reg_state(p, specs[i].state);
+		if (err)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/* Validate ranges match, and print details if they don't */
+static bool assert_range_eq(enum num_t t, struct range x, struct range y,
+			    const char *ctx1, const char *ctx2)
+{
+	DEFINE_STRBUF(sb, 512);
+
+	if (range_eq(x, y))
+		return true;
+
+	snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2);
+	snprintf_range(t, sb, x);
+	snappendf(sb, " != ");
+	snprintf_range(t, sb, y);
+
+	printf("%s\n", sb->buf);
+
+	return false;
+}
+
+/* Validate that register states match, and print details if they don't */
+static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx)
+{
+	bool ok = true;
+	enum num_t t;
+
+	if (r->valid != e->valid) {
+		printf("MISMATCH %s: actual %s != expected %s\n", ctx,
+		       r->valid ? "<valid>" : "<invalid>",
+		       e->valid ? "<valid>" : "<invalid>");
+		return false;
+	}
+
+	if (!r->valid)
+		return true;
+
+	for (t = first_t; t <= last_t; t++) {
+		if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t)))
+			ok = false;
+	}
+
+	return ok;
+}
+
+/* Printf verifier log, filtering out irrelevant noise */
+static void print_verifier_log(const char *buf)
+{
+	const char *p;
+
+	while (buf[0]) {
+		p = strchrnul(buf, '\n');
+
+		/* filter out irrelevant precision backtracking logs */
+		if (str_has_pfx(buf, "mark_precise: "))
+			goto skip_line;
+
+		printf("%.*s\n", (int)(p - buf), buf);
+
+skip_line:
+		buf = *p == '\0' ? p : p + 1;
+	}
+}
+
+/* Simulate provided test case purely with our own range-based logic.
+ * This is done to set up expectations for verifier's branch_taken logic and
+ * verifier's register states in the verifier log.
+ */
+static void sim_case(enum num_t init_t, enum num_t cond_t,
+		     struct range x, struct range y, enum op op,
+		     struct reg_state *fr1, struct reg_state *fr2,
+		     struct reg_state *tr1, struct reg_state *tr2,
+		     int *branch_taken)
+{
+	const u64 A = x.a;
+	const u64 B = x.b;
+	const u64 C = y.a;
+	const u64 D = y.b;
+	struct reg_state rc;
+	enum op rev_op = complement_op(op);
+	enum num_t t;
+
+	fr1->valid = fr2->valid = true;
+	tr1->valid = tr2->valid = true;
+	for (t = first_t; t <= last_t; t++) {
+		/* if we are initializing using 32-bit subregisters,
+		 * full registers get upper 32 bits zeroed automatically
+		 */
+		struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t];
+
+		fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z;
+	}
+
+	/* step 1: r1 >= A, r2 >= C */
+	reg_state_set_const(&rc, init_t, A);
+	reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A");
+	reg_state_set_const(&rc, init_t, C);
+	reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C");
+	*tr1 = *fr1;
+	*tr2 = *fr2;
+	if (env.verbosity >= VERBOSE_VERY) {
+		printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+		printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+	}
+
+	/* step 2: r1 <= B, r2 <= D */
+	reg_state_set_const(&rc, init_t, B);
+	reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B");
+	reg_state_set_const(&rc, init_t, D);
+	reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D");
+	*tr1 = *fr1;
+	*tr2 = *fr2;
+	if (env.verbosity >= VERBOSE_VERY) {
+		printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+		printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+	}
+
+	/* step 3: r1 <op> r2 */
+	*branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op);
+	fr1->valid = fr2->valid = false;
+	tr1->valid = tr2->valid = false;
+	if (*branch_taken != 1) { /* FALSE is possible */
+		fr1->valid = fr2->valid = true;
+		reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE");
+	}
+	if (*branch_taken != 0) { /* TRUE is possible */
+		tr1->valid = tr2->valid = true;
+		reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE");
+	}
+	if (env.verbosity >= VERBOSE_VERY) {
+		printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n");
+		printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n");
+		printf("STEP3 (%s) TRUE  R1:", t_str(cond_t)); print_reg_state(tr1, "\n");
+		printf("STEP3 (%s) TRUE  R2:", t_str(cond_t)); print_reg_state(tr2, "\n");
+	}
+}
+
+/* ===============================
+ * HIGH-LEVEL TEST CASE VALIDATION
+ * ===============================
+ */
+static u32 upper_seeds[] = {
+	0,
+	1,
+	U32_MAX,
+	U32_MAX - 1,
+	S32_MAX,
+	(u32)S32_MIN,
+};
+
+static u32 lower_seeds[] = {
+	0,
+	1,
+	2, (u32)-2,
+	255, (u32)-255,
+	UINT_MAX,
+	UINT_MAX - 1,
+	INT_MAX,
+	(u32)INT_MIN,
+};
+
+struct ctx {
+	int val_cnt, subval_cnt, range_cnt, subrange_cnt;
+	u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+	s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+	u32 usubvals[ARRAY_SIZE(lower_seeds)];
+	s32 ssubvals[ARRAY_SIZE(lower_seeds)];
+	struct range *uranges, *sranges;
+	struct range *usubranges, *ssubranges;
+	int max_failure_cnt, cur_failure_cnt;
+	int total_case_cnt, case_cnt;
+	__u64 start_ns;
+	char progress_ctx[32];
+};
+
+static void cleanup_ctx(struct ctx *ctx)
+{
+	free(ctx->uranges);
+	free(ctx->sranges);
+	free(ctx->usubranges);
+	free(ctx->ssubranges);
+}
+
+struct subtest_case {
+	enum num_t init_t;
+	enum num_t cond_t;
+	struct range x;
+	struct range y;
+	enum op op;
+};
+
+static void subtest_case_str(struct strbuf *sb, struct subtest_case *t)
+{
+	snappendf(sb, "(%s)", t_str(t->init_t));
+	snprintf_range(t->init_t, sb, t->x);
+	snappendf(sb, " (%s)%s ", t_str(t->cond_t), op_str(t->op));
+	snprintf_range(t->init_t, sb, t->y);
+}
+
+/* Generate and validate test case based on specific combination of setup
+ * register ranges (including their expected num_t domain), and conditional
+ * operation to perform (including num_t domain in which it has to be
+ * performed)
+ */
+static int verify_case_op(enum num_t init_t, enum num_t cond_t,
+			  struct range x, struct range y, enum op op)
+{
+	char log_buf[256 * 1024];
+	size_t log_sz = sizeof(log_buf);
+	int err, false_pos = 0, true_pos = 0, branch_taken;
+	struct reg_state fr1, fr2, tr1, tr2;
+	struct reg_state fe1, fe2, te1, te2;
+	bool failed = false;
+	struct case_spec spec = {
+		.init_subregs = (init_t == U32 || init_t == S32),
+		.setup_subregs = (init_t == U32 || init_t == S32),
+		.setup_signed = (init_t == S64 || init_t == S32),
+		.compare_subregs = (cond_t == U32 || cond_t == S32),
+		.compare_signed = (cond_t == S64 || cond_t == S32),
+	};
+
+	log_buf[0] = '\0';
+
+	sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken);
+
+	err = load_range_cmp_prog(x, y, op, branch_taken, spec,
+				  log_buf, log_sz, &false_pos, &true_pos);
+	if (err) {
+		ASSERT_OK(err, "load_range_cmp_prog");
+		failed = true;
+	}
+
+	err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos,
+				  &fr1, &fr2, &tr1, &tr2);
+	if (err) {
+		ASSERT_OK(err, "parse_range_cmp_log");
+		failed = true;
+	}
+
+	if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") ||
+	    !assert_reg_state_eq(&fr2, &fe2, "false_reg2") ||
+	    !assert_reg_state_eq(&tr1, &te1, "true_reg1") ||
+	    !assert_reg_state_eq(&tr2, &te2, "true_reg2")) {
+		failed = true;
+	}
+
+	if (failed || env.verbosity >= VERBOSE_NORMAL) {
+		if (failed || env.verbosity >= VERBOSE_VERY) {
+			printf("VERIFIER LOG:\n========================\n");
+			print_verifier_log(log_buf);
+			printf("=====================\n");
+		}
+		printf("ACTUAL   FALSE1: "); print_reg_state(&fr1, "\n");
+		printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n");
+		printf("ACTUAL   FALSE2: "); print_reg_state(&fr2, "\n");
+		printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n");
+		printf("ACTUAL   TRUE1:  "); print_reg_state(&tr1, "\n");
+		printf("EXPECTED TRUE1:  "); print_reg_state(&te1, "\n");
+		printf("ACTUAL   TRUE2:  "); print_reg_state(&tr2, "\n");
+		printf("EXPECTED TRUE2:  "); print_reg_state(&te2, "\n");
+
+		return failed ? -EINVAL : 0;
+	}
+
+	return 0;
+}
+
+/* Given setup ranges and number types, go over all supported operations,
+ * generating individual subtest for each allowed combination
+ */
+static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+		       struct range x, struct range y)
+{
+	DEFINE_STRBUF(sb, 256);
+	int err;
+	struct subtest_case sub = {
+		.init_t = init_t,
+		.cond_t = cond_t,
+		.x = x,
+		.y = y,
+	};
+
+	for (sub.op = first_op; sub.op <= last_op; sub.op++) {
+		sb->pos = 0; /* reset position in strbuf */
+		subtest_case_str(sb, &sub);
+		if (!test__start_subtest(sb->buf))
+			continue;
+
+		if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */
+			printf("TEST CASE: %s\n", sb->buf);
+
+		err = verify_case_op(init_t, cond_t, x, y, sub.op);
+		if (err || env.verbosity >= VERBOSE_NORMAL)
+			ASSERT_OK(err, sb->buf);
+		if (err) {
+			ctx->cur_failure_cnt++;
+			if (ctx->cur_failure_cnt > ctx->max_failure_cnt)
+				return err;
+			return 0; /* keep testing other cases */
+		}
+		ctx->case_cnt++;
+		if ((ctx->case_cnt % 10000) == 0) {
+			double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt;
+			u64 elapsed_ns = get_time_ns() - ctx->start_ns;
+			double remain_ns = elapsed_ns / progress * (1 - progress);
+
+			fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), "
+					    "elapsed %llu mins (%.2lf hrs), "
+					    "ETA %.0lf mins (%.2lf hrs)\n",
+				ctx->progress_ctx,
+				ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress,
+				elapsed_ns / 1000000000 / 60,
+				elapsed_ns / 1000000000.0 / 3600,
+				remain_ns / 1000000000.0 / 60,
+				remain_ns / 1000000000.0 / 3600);
+		}
+	}
+
+	return 0;
+}
+
+/* ================================
+ * GENERATED CASES FROM SEED VALUES
+ * ================================
+ */
+static int u64_cmp(const void *p1, const void *p2)
+{
+	u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int u32_cmp(const void *p1, const void *p2)
+{
+	u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s64_cmp(const void *p1, const void *p2)
+{
+	s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s32_cmp(const void *p1, const void *p2)
+{
+	s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+/* Generate valid unique constants from seeds, both signed and unsigned */
+static void gen_vals(struct ctx *ctx)
+{
+	int i, j, cnt = 0;
+
+	for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) {
+		for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) {
+			ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j];
+		}
+	}
+
+	/* sort and compact uvals (i.e., it's `sort | uniq`) */
+	qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp);
+	for (i = 1, j = 0; i < cnt; i++) {
+		if (ctx->uvals[j] == ctx->uvals[i])
+			continue;
+		j++;
+		ctx->uvals[j] = ctx->uvals[i];
+	}
+	ctx->val_cnt = j + 1;
+
+	/* we have exactly the same number of s64 values, they are just in
+	 * a different order than u64s, so just sort them differently
+	 */
+	for (i = 0; i < ctx->val_cnt; i++)
+		ctx->svals[i] = ctx->uvals[i];
+	qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp);
+
+	if (env.verbosity >= VERBOSE_SUPER) {
+		DEFINE_STRBUF(sb1, 256);
+		DEFINE_STRBUF(sb2, 256);
+
+		for (i = 0; i < ctx->val_cnt; i++) {
+			sb1->pos = sb2->pos = 0;
+			snprintf_num(U64, sb1, ctx->uvals[i]);
+			snprintf_num(S64, sb2, ctx->svals[i]);
+			printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf);
+		}
+	}
+
+	/* 32-bit values are generated separately */
+	cnt = 0;
+	for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) {
+		ctx->usubvals[cnt++] = lower_seeds[i];
+	}
+
+	/* sort and compact usubvals (i.e., it's `sort | uniq`) */
+	qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp);
+	for (i = 1, j = 0; i < cnt; i++) {
+		if (ctx->usubvals[j] == ctx->usubvals[i])
+			continue;
+		j++;
+		ctx->usubvals[j] = ctx->usubvals[i];
+	}
+	ctx->subval_cnt = j + 1;
+
+	for (i = 0; i < ctx->subval_cnt; i++)
+		ctx->ssubvals[i] = ctx->usubvals[i];
+	qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp);
+
+	if (env.verbosity >= VERBOSE_SUPER) {
+		DEFINE_STRBUF(sb1, 256);
+		DEFINE_STRBUF(sb2, 256);
+
+		for (i = 0; i < ctx->subval_cnt; i++) {
+			sb1->pos = sb2->pos = 0;
+			snprintf_num(U32, sb1, ctx->usubvals[i]);
+			snprintf_num(S32, sb2, ctx->ssubvals[i]);
+			printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf);
+		}
+	}
+}
+
+/* Generate valid ranges from upper/lower seeds */
+static int gen_ranges(struct ctx *ctx)
+{
+	int i, j, cnt = 0;
+
+	for (i = 0; i < ctx->val_cnt; i++) {
+		for (j = i; j < ctx->val_cnt; j++) {
+			if (env.verbosity >= VERBOSE_SUPER) {
+				DEFINE_STRBUF(sb1, 256);
+				DEFINE_STRBUF(sb2, 256);
+
+				sb1->pos = sb2->pos = 0;
+				snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j]));
+				snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j]));
+				printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf);
+			}
+			cnt++;
+		}
+	}
+	ctx->range_cnt = cnt;
+
+	ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges));
+	if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc"))
+		return -EINVAL;
+	ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges));
+	if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc"))
+		return -EINVAL;
+
+	cnt = 0;
+	for (i = 0; i < ctx->val_cnt; i++) {
+		for (j = i; j < ctx->val_cnt; j++) {
+			ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]);
+			ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]);
+			cnt++;
+		}
+	}
+
+	cnt = 0;
+	for (i = 0; i < ctx->subval_cnt; i++) {
+		for (j = i; j < ctx->subval_cnt; j++) {
+			if (env.verbosity >= VERBOSE_SUPER) {
+				DEFINE_STRBUF(sb1, 256);
+				DEFINE_STRBUF(sb2, 256);
+
+				sb1->pos = sb2->pos = 0;
+				snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j]));
+				snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j]));
+				printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf);
+			}
+			cnt++;
+		}
+	}
+	ctx->subrange_cnt = cnt;
+
+	ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges));
+	if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc"))
+		return -EINVAL;
+	ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges));
+	if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc"))
+		return -EINVAL;
+
+	cnt = 0;
+	for (i = 0; i < ctx->subval_cnt; i++) {
+		for (j = i; j < ctx->subval_cnt; j++) {
+			ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]);
+			ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]);
+			cnt++;
+		}
+	}
+
+	return 0;
+}
+
+static int parse_env_vars(struct ctx *ctx)
+{
+	const char *s;
+
+	if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) {
+		test__skip();
+		return -ENOTSUP;
+	}
+
+	if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) {
+		errno = 0;
+		ctx->max_failure_cnt = strtol(s, NULL, 10);
+		if (errno || ctx->max_failure_cnt < 0) {
+			ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int prepare_gen_tests(struct ctx *ctx)
+{
+	int err;
+
+	err = parse_env_vars(ctx);
+	if (err)
+		return err;
+
+	gen_vals(ctx);
+	err = gen_ranges(ctx);
+	if (err) {
+		ASSERT_OK(err, "gen_ranges");
+		return err;
+	}
+
+	return 0;
+}
+
+/* Go over generated constants and ranges and validate various supported
+ * combinations of them
+ */
+static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t)
+{
+	struct ctx ctx;
+	struct range rconst;
+	const struct range *ranges;
+	const u64 *vals;
+	int i, j;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	if (prepare_gen_tests(&ctx))
+		goto cleanup;
+
+	ranges = init_t == U64 ? ctx.uranges : ctx.sranges;
+	vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals;
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "RANGE x CONST, %s -> %s",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < ctx.val_cnt; i++) {
+		for (j = 0; j < ctx.range_cnt; j++) {
+			rconst = range(init_t, vals[i], vals[i]);
+
+			/* (u64|s64)(<range> x <const>) */
+			if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+				goto cleanup;
+			/* (u64|s64)(<const> x <range>) */
+			if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+				goto cleanup;
+		}
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t)
+{
+	struct ctx ctx;
+	struct range rconst;
+	const struct range *ranges;
+	const u32 *vals;
+	int i, j;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	if (prepare_gen_tests(&ctx))
+		goto cleanup;
+
+	ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges;
+	vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals;
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "RANGE x CONST, %s -> %s",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < ctx.subval_cnt; i++) {
+		for (j = 0; j < ctx.subrange_cnt; j++) {
+			rconst = range(init_t, vals[i], vals[i]);
+
+			/* (u32|s32)(<range> x <const>) */
+			if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+				goto cleanup;
+			/* (u32|s32)(<const> x <range>) */
+			if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+				goto cleanup;
+		}
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
+/* Go over thousands of test cases generated from initial seed values.
+ * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If
+ * envvar is not set, this test is skipped during test_progs testing.
+ *
+ * We split this up into smaller subsets based on initialization and
+ * conditiona numeric domains to get an easy parallelization with test_progs'
+ * -j argument.
+ */
+
+/* RANGE x CONST, U64 initial range */
+void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); }
+void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); }
+void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); }
+void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); }
+/* RANGE x CONST, S64 initial range */
+void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); }
+void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); }
+void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); }
+void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); }
+/* RANGE x CONST, U32 initial range */
+void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); }
+void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); }
+void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); }
+void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); }
+/* RANGE x CONST, S32 initial range */
+void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); }
+void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); }
+void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); }
+void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); }
+
+/* A set of hard-coded "interesting" cases to validate as part of normal
+ * test_progs test runs
+ */
+static struct subtest_case crafted_cases[] = {
+	{U64, U64, {0, 0xffffffff}, {0, 0}},
+	{U64, U64, {0, 0x80000000}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}},
+	{U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}},
+
+	{U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}},
+	{U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}},
+	{U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}},
+	{U64, S64, {0, 0xffffffffULL}, {1, 1}},
+	{U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
+
+	{U64, U32, {0, 0x100000000}, {0, 0}},
+	{U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}},
+
+	{U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}},
+	/* these are tricky cases where lower 32 bits allow to tighten 64
+	 * bit boundaries based on tightened lower 32 bit boundaries
+	 */
+	{U64, S32, {0, 0x0ffffffffULL}, {0, 0}},
+	{U64, S32, {0, 0x100000000ULL}, {0, 0}},
+	{U64, S32, {0, 0x100000001ULL}, {0, 0}},
+	{U64, S32, {0, 0x180000000ULL}, {0, 0}},
+	{U64, S32, {0, 0x17fffffffULL}, {0, 0}},
+	{U64, S32, {0, 0x180000001ULL}, {0, 0}},
+
+	/* verifier knows about [-1, 0] range for s32 for this case already */
+	{S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+	/* but didn't know about these cases initially */
+	{U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */
+	{U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */
+
+	/* longer convergence case: learning from u64 -> s64 -> u64 -> u32,
+	 * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX])
+	 */
+	{S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+
+	{U32, U32, {1, U32_MAX}, {0, 0}},
+
+	{U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+};
+
+/* Go over crafted hard-coded cases. This is fast, so we do it as part of
+ * normal test_progs run.
+ */
+void test_reg_bounds_crafted(void)
+{
+	struct ctx ctx;
+	int i;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) {
+		struct subtest_case *c = &crafted_cases[i];
+
+		verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y);
+		verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x);
+	}
+
+	cleanup_ctx(&ctx);
+}
-- 
cgit v1.2.3-70-g09d2


From 774f94c5e74d86d554c4fd1e97c517a1a7ee7fe0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 11 Nov 2023 17:06:04 -0800
Subject: selftests/bpf: adjust OP_EQ/OP_NE handling to use subranges for
 branch taken

Similar to kernel-side BPF verifier logic enhancements, use 32-bit
subrange knowledge for is_branch_taken() logic in reg_bounds selftests.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20231112010609.848406-9-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/reg_bounds.c  | 30 +++++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 7a524b381ed3..10f3b6898274 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -748,16 +748,38 @@ static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct r
 		/* OP_EQ and OP_NE are sign-agnostic */
 		enum num_t tu = t_unsigned(t);
 		enum num_t ts = t_signed(t);
-		int br_u, br_s;
+		int br_u, br_s, br;
 
 		br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op);
 		br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op);
 
 		if (br_u >= 0 && br_s >= 0 && br_u != br_s)
 			ASSERT_FALSE(true, "branch taken inconsistency!\n");
-		if (br_u >= 0)
-			return br_u;
-		return br_s;
+
+		/* if 64-bit ranges are indecisive, use 32-bit subranges to
+		 * eliminate always/never taken branches, if possible
+		 */
+		if (br_u == -1 && (t == U64 || t == S64)) {
+			br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op);
+			/* we can only reject for OP_EQ, never take branch
+			 * based on lower 32 bits
+			 */
+			if (op == OP_EQ && br == 0)
+				return 0;
+			/* for OP_NEQ we can be conclusive only if lower 32 bits
+			 * differ and thus inequality branch is always taken
+			 */
+			if (op == OP_NE && br == 1)
+				return 1;
+
+			br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op);
+			if (op == OP_EQ && br == 0)
+				return 0;
+			if (op == OP_NE && br == 1)
+				return 1;
+		}
+
+		return br_u >= 0 ? br_u : br_s;
 	}
 	return range_branch_taken_op(t, x->r[t], y->r[t], op);
 }
-- 
cgit v1.2.3-70-g09d2


From 2b0d204e368b306d4db894749947ed591b667ec5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 11 Nov 2023 17:06:05 -0800
Subject: selftests/bpf: add range x range test to reg_bounds

Now that verifier supports range vs range bounds adjustments, validate
that by checking each generated range against every other generated
range, across all supported operators (everything by JSET).

We also add few cases that were problematic during development either
for verifier or for selftest's range tracking implementation.

Note that we utilize the same trick with splitting everything into
multiple independent parallelizable tests, but init_t and cond_t. This
brings down verification time in parallel mode from more than 8 hours
down to less that 1.5 hours. 106 million cases were successfully
validate for range vs range logic, in addition to about 7 million range
vs const cases, added in earlier patch.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231112010609.848406-10-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/reg_bounds.c  | 86 ++++++++++++++++++++++
 1 file changed, 86 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 10f3b6898274..5320fe5d9433 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -1760,6 +1760,60 @@ cleanup:
 	cleanup_ctx(&ctx);
 }
 
+static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t)
+{
+	struct ctx ctx;
+	const struct range *ranges;
+	int i, j, rcnt;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	if (prepare_gen_tests(&ctx))
+		goto cleanup;
+
+	switch (init_t)
+	{
+	case U64:
+		ranges = ctx.uranges;
+		rcnt = ctx.range_cnt;
+		break;
+	case U32:
+		ranges = ctx.usubranges;
+		rcnt = ctx.subrange_cnt;
+		break;
+	case S64:
+		ranges = ctx.sranges;
+		rcnt = ctx.range_cnt;
+		break;
+	case S32:
+		ranges = ctx.ssubranges;
+		rcnt = ctx.subrange_cnt;
+		break;
+	default:
+		printf("validate_gen_range_vs_range!\n");
+		exit(1);
+	}
+
+	ctx.total_case_cnt = (MAX_OP - MIN_OP + 1) * (2 * rcnt * (rcnt + 1) / 2);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "RANGE x RANGE, %s -> %s",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < rcnt; i++) {
+		for (j = i; j < rcnt; j++) {
+			/* (<range> x <range>) */
+			if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j]))
+				goto cleanup;
+			if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i]))
+				goto cleanup;
+		}
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
 /* Go over thousands of test cases generated from initial seed values.
  * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If
  * envvar is not set, this test is skipped during test_progs testing.
@@ -1790,6 +1844,27 @@ void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S
 void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); }
 void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); }
 
+/* RANGE x RANGE, U64 initial range */
+void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); }
+void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); }
+void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); }
+void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); }
+/* RANGE x RANGE, S64 initial range */
+void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); }
+void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); }
+void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); }
+void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); }
+/* RANGE x RANGE, U32 initial range */
+void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); }
+void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); }
+void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); }
+void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); }
+/* RANGE x RANGE, S32 initial range */
+void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); }
+void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); }
+void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); }
+void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); }
+
 /* A set of hard-coded "interesting" cases to validate as part of normal
  * test_progs test runs
  */
@@ -1803,6 +1878,12 @@ static struct subtest_case crafted_cases[] = {
 	{U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}},
 	{U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}},
 
+	/* single point overlap, interesting BPF_EQ and BPF_NE interactions */
+	{U64, U64, {0, 1}, {1, 0x80000000}},
+	{U64, S64, {0, 1}, {1, 0x80000000}},
+	{U64, U32, {0, 1}, {1, 0x80000000}},
+	{U64, S32, {0, 1}, {1, 0x80000000}},
+
 	{U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}},
 	{U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}},
 	{U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}},
@@ -1837,6 +1918,11 @@ static struct subtest_case crafted_cases[] = {
 	{U32, U32, {1, U32_MAX}, {0, 0}},
 
 	{U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+
+	{S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}},
+	{S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
+	{S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+	{S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
 };
 
 /* Go over crafted hard-coded cases. This is fast, so we do it as part of
-- 
cgit v1.2.3-70-g09d2


From dab16659c50e8c9c7c5d9584beacec28c769dcca Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 11 Nov 2023 17:06:06 -0800
Subject: selftests/bpf: add randomized reg_bounds tests

Add random cases generation to reg_bounds.c and run them without
SLOW_TESTS=1 to increase a chance of BPF CI catching latent issues.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231112010609.848406-11-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/reg_bounds.c  | 166 ++++++++++++++++++++-
 1 file changed, 159 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 5320fe5d9433..f3f724062b35 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -83,6 +83,17 @@ static __always_inline u64 max_t(enum num_t t, u64 x, u64 y)
 	}
 }
 
+static __always_inline u64 cast_t(enum num_t t, u64 x)
+{
+	switch (t) {
+	case U64: return (u64)x;
+	case U32: return (u32)x;
+	case S64: return (s64)x;
+	case S32: return (u32)(s32)x;
+	default: printf("cast_t!\n"); exit(1);
+	}
+}
+
 static const char *t_str(enum num_t t)
 {
 	switch (t) {
@@ -1312,8 +1323,10 @@ struct ctx {
 	struct range *usubranges, *ssubranges;
 	int max_failure_cnt, cur_failure_cnt;
 	int total_case_cnt, case_cnt;
+	int rand_case_cnt;
+	unsigned rand_seed;
 	__u64 start_ns;
-	char progress_ctx[32];
+	char progress_ctx[64];
 };
 
 static void cleanup_ctx(struct ctx *ctx)
@@ -1644,11 +1657,6 @@ static int parse_env_vars(struct ctx *ctx)
 {
 	const char *s;
 
-	if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) {
-		test__skip();
-		return -ENOTSUP;
-	}
-
 	if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) {
 		errno = 0;
 		ctx->max_failure_cnt = strtol(s, NULL, 10);
@@ -1658,13 +1666,37 @@ static int parse_env_vars(struct ctx *ctx)
 		}
 	}
 
+	if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) {
+		errno = 0;
+		ctx->rand_case_cnt = strtol(s, NULL, 10);
+		if (errno || ctx->rand_case_cnt < 0) {
+			ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT");
+			return -EINVAL;
+		}
+	}
+
+	if ((s = getenv("REG_BOUNDS_RAND_SEED"))) {
+		errno = 0;
+		ctx->rand_seed = strtoul(s, NULL, 10);
+		if (errno) {
+			ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED");
+			return -EINVAL;
+		}
+	}
+
 	return 0;
 }
 
 static int prepare_gen_tests(struct ctx *ctx)
 {
+	const char *s;
 	int err;
 
+	if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) {
+		test__skip();
+		return -ENOTSUP;
+	}
+
 	err = parse_env_vars(ctx);
 	if (err)
 		return err;
@@ -1794,7 +1826,7 @@ static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t)
 		exit(1);
 	}
 
-	ctx.total_case_cnt = (MAX_OP - MIN_OP + 1) * (2 * rcnt * (rcnt + 1) / 2);
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2);
 	ctx.start_ns = get_time_ns();
 	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
 		 "RANGE x RANGE, %s -> %s",
@@ -1865,6 +1897,126 @@ void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32,
 void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); }
 void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); }
 
+#define DEFAULT_RAND_CASE_CNT 25
+
+#define RAND_21BIT_MASK ((1 << 22) - 1)
+
+static u64 rand_u64()
+{
+	/* RAND_MAX is guaranteed to be at least 1<<15, but in practice it
+	 * seems to be 1<<31, so we need to call it thrice to get full u64;
+	 * we'll use rougly equal split: 22 + 21 + 21 bits
+	 */
+	return ((u64)random() << 42) |
+	       (((u64)random() & RAND_21BIT_MASK) << 21) |
+	       (random() & RAND_21BIT_MASK);
+}
+
+static u64 rand_const(enum num_t t)
+{
+	return cast_t(t, rand_u64());
+}
+
+static struct range rand_range(enum num_t t)
+{
+	u64 x = rand_const(t), y = rand_const(t);
+
+	return range(t, min_t(t, x, y), max_t(t, x, y));
+}
+
+static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range)
+{
+	struct ctx ctx;
+	struct range range1, range2;
+	int err, i;
+	u64 t;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	err = parse_env_vars(&ctx);
+	if (err) {
+		ASSERT_OK(err, "parse_env_vars");
+		return;
+	}
+
+	if (ctx.rand_case_cnt == 0)
+		ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT;
+	if (ctx.rand_seed == 0)
+		ctx.rand_seed = (unsigned)get_time_ns();
+
+	srandom(ctx.rand_seed);
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "[RANDOM SEED %u] RANGE x %s, %s -> %s",
+		 ctx.rand_seed, const_range ? "CONST" : "RANGE",
+		 t_str(init_t), t_str(cond_t));
+	fprintf(env.stdout, "%s\n", ctx.progress_ctx);
+
+	for (i = 0; i < ctx.rand_case_cnt; i++) {
+		range1 = rand_range(init_t);
+		if (const_range) {
+			t = rand_const(init_t);
+			range2 = range(init_t, t, t);
+		} else {
+			range2 = rand_range(init_t);
+		}
+
+		/* <range1> x <range2> */
+		if (verify_case(&ctx, init_t, cond_t, range1, range2))
+			goto cleanup;
+		/* <range2> x <range1> */
+		if (verify_case(&ctx, init_t, cond_t, range2, range1))
+			goto cleanup;
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
+/* [RANDOM] RANGE x CONST, U64 initial range */
+void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S64 initial range */
+void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, U32 initial range */
+void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S32 initial range */
+void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); }
+
+/* [RANDOM] RANGE x RANGE, U64 initial range */
+void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S64 initial range */
+void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, U32 initial range */
+void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S32 initial range */
+void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); }
+
 /* A set of hard-coded "interesting" cases to validate as part of normal
  * test_progs test runs
  */
-- 
cgit v1.2.3-70-g09d2


From 8c5677f8b31e92b57be7d5d0fbb1ac66eedf4f91 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 11 Nov 2023 17:06:07 -0800
Subject: selftests/bpf: set BPF_F_TEST_SANITY_SCRIPT by default

Make sure to set BPF_F_TEST_SANITY_STRICT program flag by default across
most verifier tests (and a bunch of others that set custom prog flags).

There are currently two tests that do fail validation, if enforced
strictly: verifier_bounds/crossing_64_bit_signed_boundary_2 and
verifier_bounds/crossing_32_bit_signed_boundary_2. To accommodate them,
we teach test_loader a flag negation:

__flag(!<flagname>) will *clear* specified flag, allowing easy opt-out.

We apply __flag(!BPF_F_TEST_SANITY_STRICT) to these to tests.

Also sprinkle BPF_F_TEST_SANITY_STRICT everywhere where we already set
test-only BPF_F_TEST_RND_HI32 flag, for completeness.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231112010609.848406-12-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/bpf_verif_scale.c     |  2 +-
 .../testing/selftests/bpf/progs/verifier_bounds.c  |  2 ++
 tools/testing/selftests/bpf/test_loader.c          | 35 ++++++++++++++++------
 tools/testing/selftests/bpf/test_sock_addr.c       |  1 +
 tools/testing/selftests/bpf/test_verifier.c        |  2 +-
 tools/testing/selftests/bpf/testing_helpers.c      |  4 +--
 6 files changed, 33 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 731c343897d8..3f2d70831873 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type)
 	}
 
 	bpf_program__set_type(prog, type);
-	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT);
 	bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
 
 	err = bpf_object__load(obj);
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index c5588a14fe2e..0c1460936373 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -965,6 +965,7 @@ l0_%=:	r0 = 0;						\
 SEC("xdp")
 __description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
 __success __retval(0)
+__flag(!BPF_F_TEST_SANITY_STRICT) /* known sanity violation */
 __naked void crossing_64_bit_signed_boundary_2(void)
 {
 	asm volatile ("					\
@@ -1046,6 +1047,7 @@ l0_%=:	r0 = 0;						\
 SEC("xdp")
 __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary")
 __success __retval(0)
+__flag(!BPF_F_TEST_SANITY_STRICT) /* known sanity violation */
 __naked void crossing_32_bit_signed_boundary_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 37ffa57f28a1..57e27b1a73a6 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -153,6 +153,14 @@ static int parse_retval(const char *str, int *val, const char *name)
 	return parse_int(str, val, name);
 }
 
+static void update_flags(int *flags, int flag, bool clear)
+{
+	if (clear)
+		*flags &= ~flag;
+	else
+		*flags |= flag;
+}
+
 /* Uses btf_decl_tag attributes to describe the expected test
  * behavior, see bpf_misc.h for detailed description of each attribute
  * and attribute combinations.
@@ -171,6 +179,7 @@ static int parse_test_spec(struct test_loader *tester,
 	memset(spec, 0, sizeof(*spec));
 
 	spec->prog_name = bpf_program__name(prog);
+	spec->prog_flags = BPF_F_TEST_SANITY_STRICT; /* by default be strict */
 
 	btf = bpf_object__btf(obj);
 	if (!btf) {
@@ -187,7 +196,8 @@ static int parse_test_spec(struct test_loader *tester,
 	for (i = 1; i < btf__type_cnt(btf); i++) {
 		const char *s, *val, *msg;
 		const struct btf_type *t;
-		int tmp;
+		bool clear;
+		int flags;
 
 		t = btf__type_by_id(btf, i);
 		if (!btf_is_decl_tag(t))
@@ -253,23 +263,30 @@ static int parse_test_spec(struct test_loader *tester,
 				goto cleanup;
 		} else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
 			val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
+
+			clear = val[0] == '!';
+			if (clear)
+				val++;
+
 			if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
-				spec->prog_flags |= BPF_F_STRICT_ALIGNMENT;
+				update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear);
 			} else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) {
-				spec->prog_flags |= BPF_F_ANY_ALIGNMENT;
+				update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear);
 			} else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) {
-				spec->prog_flags |= BPF_F_TEST_RND_HI32;
+				update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear);
 			} else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) {
-				spec->prog_flags |= BPF_F_TEST_STATE_FREQ;
+				update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear);
 			} else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) {
-				spec->prog_flags |= BPF_F_SLEEPABLE;
+				update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear);
 			} else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
-				spec->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+				update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear);
+			} else if (strcmp(val, "BPF_F_TEST_SANITY_STRICT") == 0) {
+				update_flags(&spec->prog_flags, BPF_F_TEST_SANITY_STRICT, clear);
 			} else /* assume numeric value */ {
-				err = parse_int(val, &tmp, "test prog flags");
+				err = parse_int(val, &flags, "test prog flags");
 				if (err)
 					goto cleanup;
-				spec->prog_flags |= tmp;
+				update_flags(&spec->prog_flags, flags, clear);
 			}
 		}
 	}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 2c89674fc62c..878c077e0fa7 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -680,6 +680,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
 	bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
 	bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
 	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+	bpf_program__set_flags(prog, BPF_F_TEST_SANITY_STRICT);
 
 	err = bpf_object__load(obj);
 	if (err) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 98107e0452d3..4992022f3137 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (fixup_skips != skips)
 		return;
 
-	pflags = BPF_F_TEST_RND_HI32;
+	pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT;
 	if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
 		pflags |= BPF_F_STRICT_ALIGNMENT;
 	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 8d994884c7b4..9786a94a666c 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
 	if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
 		bpf_program__set_type(prog, type);
 
-	flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32;
+	flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT;
 	bpf_program__set_flags(prog, flags);
 
 	err = bpf_object__load(obj);
@@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 {
 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
 		.kern_version = kern_version,
-		.prog_flags = BPF_F_TEST_RND_HI32,
+		.prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT,
 		.log_level = extra_prog_load_log_flags,
 		.log_buf = log_buf,
 		.log_size = log_buf_sz,
-- 
cgit v1.2.3-70-g09d2


From a5c57f81eb2b5d6de4f46e47fd85be50d179bfd8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 11 Nov 2023 17:06:08 -0800
Subject: veristat: add ability to set BPF_F_TEST_SANITY_STRICT flag with -r
 flag

Add a new flag -r (--test-sanity), similar to -t (--test-states), to add
extra BPF program flags when loading BPF programs.

This allows to use veristat to easily catch sanity violations in
production BPF programs.

reg_bounds tests are also enforcing BPF_F_TEST_SANITY_STRICT flag now.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231112010609.848406-13-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/reg_bounds.c |  1 +
 tools/testing/selftests/bpf/veristat.c              | 13 ++++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index f3f724062b35..fe0cb906644b 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -838,6 +838,7 @@ static int load_range_cmp_prog(struct range x, struct range y, enum op op,
 		.log_level = 2,
 		.log_buf = log_buf,
 		.log_size = log_sz,
+		.prog_flags = BPF_F_TEST_SANITY_STRICT,
 	);
 
 	/* ; skip exit block below
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 443a29fc6a62..609fd9753af0 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -145,6 +145,7 @@ static struct env {
 	bool debug;
 	bool quiet;
 	bool force_checkpoints;
+	bool strict_range_sanity;
 	enum resfmt out_fmt;
 	bool show_version;
 	bool comparison_mode;
@@ -214,8 +215,6 @@ static const struct argp_option opts[] = {
 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
-	{ "test-states", 't', NULL, 0,
-	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
 	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
@@ -224,6 +223,10 @@ static const struct argp_option opts[] = {
 	{ "compare", 'C', NULL, 0, "Comparison mode" },
 	{ "replay", 'R', NULL, 0, "Replay mode" },
 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
+	{ "test-states", 't', NULL, 0,
+	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+	{ "test-sanity", 'r', NULL, 0,
+	  "Force strict BPF verifier register sanity behavior (BPF_F_TEST_SANITY_STRICT program flag)" },
 	{},
 };
 
@@ -295,6 +298,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 't':
 		env.force_checkpoints = true;
 		break;
+	case 'r':
+		env.strict_range_sanity = true;
+		break;
 	case 'n':
 		errno = 0;
 		env.top_n = strtol(arg, NULL, 10);
@@ -302,7 +308,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			fprintf(stderr, "invalid top N specifier: %s\n", arg);
 			argp_usage(state);
 		}
-		break;
 	case 'C':
 		env.comparison_mode = true;
 		break;
@@ -1023,6 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 
 	if (env.force_checkpoints)
 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
+	if (env.strict_range_sanity)
+		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_SANITY_STRICT);
 
 	err = bpf_object__load(obj);
 	env.progs_processed++;
-- 
cgit v1.2.3-70-g09d2


From 882e3d873c2d8a2aebbc6c192aa1a2990b9d5b27 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 11 Nov 2023 17:06:09 -0800
Subject: selftests/bpf: add iter test requiring range x range logic

Add a simple verifier test that requires deriving reg bounds for one
register from another register that's not a constant. This is
a realistic example of iterating elements of an array with fixed maximum
number of elements, but smaller actual number of elements.

This small example was an original motivation for doing this whole patch
set in the first place, yes.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231112010609.848406-14-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/iters.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index c20c4e38b71c..b2181f850d3e 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1411,4 +1411,26 @@ __naked int checkpoint_states_deletion(void)
 	);
 }
 
+struct {
+	int data[32];
+	int n;
+} loop_data;
+
+SEC("raw_tp")
+__success
+int iter_arr_with_actual_elem_count(const void *ctx)
+{
+	int i, n = loop_data.n, sum = 0;
+
+	if (n > ARRAY_SIZE(loop_data.data))
+		return 0;
+
+	bpf_for(i, 0, n) {
+		/* no rechecking of i against ARRAY_SIZE(loop_data.n) */
+		sum += loop_data.data[i];
+	}
+
+	return sum;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 9ffa01cab0699476be12ed4917c7d282cedc5ddf Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Tue, 14 Nov 2023 13:04:39 -0300
Subject: selftests: tc-testing: drop '-N' argument from nsPlugin

This argument would bypass the net namespace creation and run the test in
the root namespace, even if nsPlugin was specified.
Drop it as it's the same as commenting out the nsPlugin from a test and adds
additional complexity to the plugin code.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/plugin-lib/nsPlugin.py    | 49 ++++++----------------
 1 file changed, 13 insertions(+), 36 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index b62429b0fcdb..2297b4568ca9 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -28,10 +28,7 @@ def prepare_suite(obj, test):
     shadow['DEV2'] = original['DEV2']
     obj.args.NAMES = shadow
 
-    if obj.args.namespace:
-        obj._ns_create()
-    else:
-        obj._ports_create()
+    obj._ns_create()
 
     # Make sure the netns is visible in the fs
     while True:
@@ -75,10 +72,7 @@ class SubPlugin(TdcPlugin):
         if self.args.verbose:
             print('{}.post_case'.format(self.sub_class))
 
-        if self.args.namespace:
-            self._ns_destroy()
-        else:
-            self._ports_destroy()
+        self._ns_destroy()
 
     def post_suite(self, index):
         if self.args.verbose:
@@ -93,24 +87,11 @@ class SubPlugin(TdcPlugin):
 
             subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
-    def add_args(self, parser):
-        super().add_args(parser)
-        self.argparser_group = self.argparser.add_argument_group(
-            'netns',
-            'options for nsPlugin(run commands in net namespace)')
-        self.argparser_group.add_argument(
-            '-N', '--no-namespace', action='store_false', default=True,
-            dest='namespace', help='Don\'t run commands in namespace')
-        return self.argparser
-
     def adjust_command(self, stage, command):
         super().adjust_command(stage, command)
         cmdform = 'list'
         cmdlist = list()
 
-        if not self.args.namespace:
-            return command
-
         if self.args.verbose:
             print('{}.adjust_command'.format(self.sub_class))
 
@@ -144,8 +125,6 @@ class SubPlugin(TdcPlugin):
         cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1'))
         cmds.append(self._replace_keywords('link set $DEV0 up'))
         cmds.append(self._replace_keywords('link add $DUMMY type dummy'))
-        if not self.args.namespace:
-            cmds.append(self._replace_keywords('link set $DEV1 up'))
 
         return cmds
 
@@ -161,18 +140,17 @@ class SubPlugin(TdcPlugin):
     def _ns_create_cmds(self):
         cmds = []
 
-        if self.args.namespace:
-            ns = self.args.NAMES['NS']
+        ns = self.args.NAMES['NS']
 
-            cmds.append(self._replace_keywords('netns add {}'.format(ns)))
-            cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
-            cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
-            cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
-            cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+        cmds.append(self._replace_keywords('netns add {}'.format(ns)))
+        cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
+        cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
+        cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
+        cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
 
-            if self.args.device:
-                cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
-                cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
+        if self.args.device:
+            cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
+            cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
 
         return cmds
 
@@ -192,9 +170,8 @@ class SubPlugin(TdcPlugin):
         Destroy the network namespace for testing (and any associated network
         devices as well)
         '''
-        if self.args.namespace:
-            self._exec_cmd('post', self._ns_destroy_cmd())
-            self._ports_destroy()
+        self._exec_cmd('post', self._ns_destroy_cmd())
+        self._ports_destroy()
 
     @cached_property
     def _proc(self):
-- 
cgit v1.2.3-70-g09d2


From fa63d353ddfb8a2d7688220a45b84e1507d211cf Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Tue, 14 Nov 2023 13:04:40 -0300
Subject: selftests: tc-testing: rework namespaces and devices setup

As mentioned in the TC Workshop 0x17, our recent changes to tdc broke
downstream CI systems like tuxsuite. The issue is the classic problem
with rcu/workqueue objects where you can miss them if not enough wall time
has passed. The latter is subjective to the system and kernel config,
in my machine could be nanoseconds while in another could be microseconds
or more.

In order to make the suite deterministic, poll for the existence
of the objects in a reasonable manner. Talking netlink directly is the
the best solution in order to avoid paying the cost of multiple
'fork()' calls, so introduce a netlink based setup routine using
pyroute2. We leave the iproute2 one as a fallback when pyroute2 is not
available.

Also rework the iproute2 side to mimic the netlink routine where it
creates DEV0 as the peer of DEV1 and moves DEV1 into the net namespace.
This way when the namespace is deleted DEV0 is also deleted
automatically, leaving no margin for resource leaks.

Another bonus of this change is that our setup time sped up by a factor
of 2 when using netlink.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/plugin-lib/nsPlugin.py    | 69 +++++++++++++++-------
 1 file changed, 49 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index 2297b4568ca9..62974bd3a4a5 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -9,6 +9,14 @@ from TdcPlugin import TdcPlugin
 
 from tdc_config import *
 
+try:
+    from pyroute2 import netns
+    from pyroute2 import IPRoute
+    netlink = True
+except ImportError:
+    netlink = False
+    print("!!! Consider installing pyroute2 !!!")
+
 def prepare_suite(obj, test):
     original = obj.args.NAMES
 
@@ -28,7 +36,10 @@ def prepare_suite(obj, test):
     shadow['DEV2'] = original['DEV2']
     obj.args.NAMES = shadow
 
-    obj._ns_create()
+    if netlink == True:
+        obj._nl_ns_create()
+    else:
+        obj._ns_create()
 
     # Make sure the netns is visible in the fs
     while True:
@@ -67,7 +78,6 @@ class SubPlugin(TdcPlugin):
         if test_skip:
             return
 
-
     def post_case(self):
         if self.args.verbose:
             print('{}.post_case'.format(self.sub_class))
@@ -119,23 +129,41 @@ class SubPlugin(TdcPlugin):
             print('adjust_command:  return command [{}]'.format(command))
         return command
 
-    def _ports_create_cmds(self):
-        cmds = []
+    def _nl_ns_create(self):
+        ns = self.args.NAMES["NS"];
+        dev0 = self.args.NAMES["DEV0"];
+        dev1 = self.args.NAMES["DEV1"];
+        dummy = self.args.NAMES["DUMMY"];
 
-        cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1'))
-        cmds.append(self._replace_keywords('link set $DEV0 up'))
-        cmds.append(self._replace_keywords('link add $DUMMY type dummy'))
-
-        return cmds
-
-    def _ports_create(self):
-        self._exec_cmd_batched('pre', self._ports_create_cmds())
-
-    def _ports_destroy_cmd(self):
-        return self._replace_keywords('link del $DEV0')
-
-    def _ports_destroy(self):
-        self._exec_cmd('post', self._ports_destroy_cmd())
+        if self.args.verbose:
+            print('{}._nl_ns_create'.format(self.sub_class))
+
+        netns.create(ns)
+        netns.pushns(newns=ns)
+        with IPRoute() as ip:
+            ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'})
+            ip.link('add', ifname=dummy, kind='dummy')
+            while True:
+                try:
+                    dev1_idx = ip.link_lookup(ifname=dev1)[0]
+                    dummy_idx = ip.link_lookup(ifname=dummy)[0]
+                    ip.link('set', index=dev1_idx, state='up')
+                    ip.link('set', index=dummy_idx, state='up')
+                    break
+                except:
+                    time.sleep(0.1)
+                    continue
+        netns.popns()
+
+        with IPRoute() as ip:
+            while True:
+                try:
+                    dev0_idx = ip.link_lookup(ifname=dev0)[0]
+                    ip.link('set', index=dev0_idx, state='up')
+                    break
+                except:
+                    time.sleep(0.1)
+                    continue
 
     def _ns_create_cmds(self):
         cmds = []
@@ -143,10 +171,13 @@ class SubPlugin(TdcPlugin):
         ns = self.args.NAMES['NS']
 
         cmds.append(self._replace_keywords('netns add {}'.format(ns)))
+        cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0'))
         cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
+        cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns)))
         cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
         cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
         cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+        cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns)))
 
         if self.args.device:
             cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
@@ -159,7 +190,6 @@ class SubPlugin(TdcPlugin):
         Create the network namespace in which the tests will be run and set up
         the required network devices for it.
         '''
-        self._ports_create()
         self._exec_cmd_batched('pre', self._ns_create_cmds())
 
     def _ns_destroy_cmd(self):
@@ -171,7 +201,6 @@ class SubPlugin(TdcPlugin):
         devices as well)
         '''
         self._exec_cmd('post', self._ns_destroy_cmd())
-        self._ports_destroy()
 
     @cached_property
     def _proc(self):
-- 
cgit v1.2.3-70-g09d2


From bb9623c337f5d13b15d700127281c9607d1f8ec2 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Tue, 14 Nov 2023 13:04:41 -0300
Subject: selftests: tc-testing: preload all modules in kselftests

While running tdc tests in parallel it can race over the module loading
done by tc and fail the run with random errors.
So avoid this by preloading all modules before running tdc in kselftests.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.sh | 65 ++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index eb357bd7923c..ae08b7a47c42 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -1,7 +1,68 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
-modprobe netdevsim
-modprobe sch_teql
+# If a module is required and was not compiled
+# the test that requires it will fail anyways
+try_modprobe() {
+   modprobe -q -R "$1"
+   if [ $? -ne 0 ]; then
+      echo "Module $1 not found... skipping."
+   else
+      modprobe "$1"
+   fi
+}
+
+try_modprobe netdevsim
+try_modprobe act_bpf
+try_modprobe act_connmark
+try_modprobe act_csum
+try_modprobe act_ct
+try_modprobe act_ctinfo
+try_modprobe act_gact
+try_modprobe act_gate
+try_modprobe act_ipt
+try_modprobe act_mirred
+try_modprobe act_mpls
+try_modprobe act_nat
+try_modprobe act_pedit
+try_modprobe act_police
+try_modprobe act_sample
+try_modprobe act_simple
+try_modprobe act_skbedit
+try_modprobe act_skbmod
+try_modprobe act_tunnel_key
+try_modprobe act_vlan
+try_modprobe cls_basic
+try_modprobe cls_bpf
+try_modprobe cls_cgroup
+try_modprobe cls_flow
+try_modprobe cls_flower
+try_modprobe cls_fw
+try_modprobe cls_matchall
+try_modprobe cls_route
+try_modprobe cls_u32
+try_modprobe em_canid
+try_modprobe em_cmp
+try_modprobe em_ipset
+try_modprobe em_ipt
+try_modprobe em_meta
+try_modprobe em_nbyte
+try_modprobe em_text
+try_modprobe em_u32
+try_modprobe sch_cake
+try_modprobe sch_cbs
+try_modprobe sch_choke
+try_modprobe sch_codel
+try_modprobe sch_drr
+try_modprobe sch_etf
+try_modprobe sch_ets
+try_modprobe sch_fq
+try_modprobe sch_fq_codel
+try_modprobe sch_fq_pie
+try_modprobe sch_gred
+try_modprobe sch_hfsc
+try_modprobe sch_hhf
+try_modprobe sch_htb
+try_modprobe sch_teql
 ./tdc.py -c actions --nobuildebpf
 ./tdc.py -c qdisc
-- 
cgit v1.2.3-70-g09d2


From 04fd47bf70f95533c2b8387c19c7e11c085945d2 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Tue, 14 Nov 2023 13:04:42 -0300
Subject: selftests: tc-testing: use parallel tdc in kselftests

Leverage parallel tests in kselftests using all the available cpus.
We tested this in tuxsuite and locally extensively and it seems it's ready for prime time.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index ae08b7a47c42..4dbe50bde5a0 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -64,5 +64,5 @@ try_modprobe sch_hfsc
 try_modprobe sch_hhf
 try_modprobe sch_htb
 try_modprobe sch_teql
-./tdc.py -c actions --nobuildebpf
-./tdc.py -c qdisc
+./tdc.py -J`nproc` -c actions --nobuildebpf
+./tdc.py -J`nproc` -c qdisc
-- 
cgit v1.2.3-70-g09d2


From 3bdd9fd29cb0f136b307559a19c107210ad5c314 Mon Sep 17 00:00:00 2001
From: Lucas Karpinski <lkarpins@redhat.com>
Date: Tue, 14 Nov 2023 10:11:31 -0500
Subject: selftests/net: synchronize udpgro tests' tx and rx connection

The sockets used by udpgso_bench_tx aren't always ready when
udpgso_bench_tx transmits packets. This issue is more prevalent in -rt
kernels, but can occur in both. Replace the hacky sleep calls with a
function that checks whether the ports in the namespace are ready for
use.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Lucas Karpinski <lkarpins@redhat.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/net_helper.sh     | 22 ++++++++++++++++++++++
 tools/testing/selftests/net/udpgro.sh         | 13 ++++++-------
 tools/testing/selftests/net/udpgro_bench.sh   |  5 +++--
 tools/testing/selftests/net/udpgro_frglist.sh |  5 +++--
 4 files changed, 34 insertions(+), 11 deletions(-)
 create mode 100755 tools/testing/selftests/net/net_helper.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
new file mode 100755
index 000000000000..4fe0befa13fb
--- /dev/null
+++ b/tools/testing/selftests/net/net_helper.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Helper functions
+
+wait_local_port_listen()
+{
+	local listener_ns="${1}"
+	local port="${2}"
+	local protocol="${3}"
+	local port_hex
+	local i
+
+	port_hex="$(printf "%04X" "${port}")"
+	for i in $(seq 10); do
+		if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \
+		   grep -q "${port_hex}"; then
+			break
+		fi
+		sleep 0.1
+	done
+}
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 0c743752669a..af5dc57c8ce9 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,6 +3,8 @@
 #
 # Run a series of udpgro functional tests.
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
 BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -51,8 +53,7 @@ run_one() {
 		echo "ok" || \
 		echo "failed" &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen ${PEER_NS} 8000 udp
 	./udpgso_bench_tx ${tx_args}
 	ret=$?
 	wait $(jobs -p)
@@ -97,7 +98,7 @@ run_one_nat() {
 		echo "ok" || \
 		echo "failed"&
 
-	sleep 0.1
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 	ret=$?
 	kill -INT $pid
@@ -118,11 +119,9 @@ run_one_2sock() {
 		echo "ok" || \
 		echo "failed" &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen "${PEER_NS}" 12345 udp
 	./udpgso_bench_tx ${tx_args} -p 12345
-	sleep 0.1
-	# first UDP GSO socket should be closed at this point
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 	ret=$?
 	wait $(jobs -p)
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 894972877e8b..cb664679b434 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,6 +3,8 @@
 #
 # Run a series of udpgro benchmarks
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
 BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -40,8 +42,7 @@ run_one() {
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 }
 
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 0a6359bed0b9..dd47fa96f6b3 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,6 +3,8 @@
 #
 # Run a series of udpgro benchmarks
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
 BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -45,8 +47,7 @@ run_one() {
         echo ${rx_args}
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 }
 
-- 
cgit v1.2.3-70-g09d2


From ff8867af01daa7ea770bebf5f91199b7434b74e5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 17 Nov 2023 09:14:04 -0800
Subject: bpf: rename BPF_F_TEST_SANITY_STRICT to BPF_F_TEST_REG_INVARIANTS

Rename verifier internal flag BPF_F_TEST_SANITY_STRICT to more neutral
BPF_F_TEST_REG_INVARIANTS. This is a follow up to [0].

A few selftests and veristat need to be adjusted in the same patch as
well.

  [0] https://patchwork.kernel.org/project/netdevbpf/patch/20231112010609.848406-5-andrii@kernel.org/

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231117171404.225508-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h                             |  2 +-
 include/uapi/linux/bpf.h                                 |  2 +-
 kernel/bpf/syscall.c                                     |  2 +-
 kernel/bpf/verifier.c                                    |  6 +++---
 tools/include/uapi/linux/bpf.h                           |  2 +-
 tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c |  2 +-
 tools/testing/selftests/bpf/prog_tests/reg_bounds.c      |  2 +-
 tools/testing/selftests/bpf/progs/verifier_bounds.c      |  4 ++--
 tools/testing/selftests/bpf/test_loader.c                |  6 +++---
 tools/testing/selftests/bpf/test_sock_addr.c             |  3 +--
 tools/testing/selftests/bpf/test_verifier.c              |  2 +-
 tools/testing/selftests/bpf/testing_helpers.c            |  4 ++--
 tools/testing/selftests/bpf/veristat.c                   | 12 ++++++------
 13 files changed, 24 insertions(+), 25 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 402b6bc44a1b..52a4012b8255 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -602,7 +602,7 @@ struct bpf_verifier_env {
 	int stack_size;			/* number of states to be processed */
 	bool strict_alignment;		/* perform strict pointer alignment checks */
 	bool test_state_freq;		/* test verifier with different pruning frequency */
-	bool test_sanity_strict;	/* fail verification on sanity violations */
+	bool test_reg_invariants;	/* fail verification on register invariants violations */
 	struct bpf_verifier_state *cur_state; /* current verifier state */
 	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
 	struct bpf_verifier_state_list *free_list;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8a5855fcee69..7a5498242eaa 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1201,7 +1201,7 @@ enum bpf_perf_event_type {
 #define BPF_F_XDP_DEV_BOUND_ONLY	(1U << 6)
 
 /* The verifier internal test flag. Behavior is undefined */
-#define BPF_F_TEST_SANITY_STRICT	(1U << 7)
+#define BPF_F_TEST_REG_INVARIANTS	(1U << 7)
 
 /* link_create.kprobe_multi.flags used in LINK_CREATE command for
  * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f266e03ba342..5e43ddd1b83f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2574,7 +2574,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 				 BPF_F_TEST_RND_HI32 |
 				 BPF_F_XDP_HAS_FRAGS |
 				 BPF_F_XDP_DEV_BOUND_ONLY |
-				 BPF_F_TEST_SANITY_STRICT))
+				 BPF_F_TEST_REG_INVARIANTS))
 		return -EINVAL;
 
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 59505881e7a7..7c3461b89513 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2608,14 +2608,14 @@ static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
 
 	return 0;
 out:
-	verbose(env, "REG SANITY VIOLATION (%s): %s u64=[%#llx, %#llx] "
+	verbose(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
 		"s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n",
 		ctx, msg, reg->umin_value, reg->umax_value,
 		reg->smin_value, reg->smax_value,
 		reg->u32_min_value, reg->u32_max_value,
 		reg->s32_min_value, reg->s32_max_value,
 		reg->var_off.value, reg->var_off.mask);
-	if (env->test_sanity_strict)
+	if (env->test_reg_invariants)
 		return -EFAULT;
 	__mark_reg_unbounded(reg);
 	return 0;
@@ -20791,7 +20791,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 
 	if (is_priv)
 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
-	env->test_sanity_strict = attr->prog_flags & BPF_F_TEST_SANITY_STRICT;
+	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
 
 	env->explored_states = kvcalloc(state_htab_size(env),
 				       sizeof(struct bpf_verifier_state_list *),
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8a5855fcee69..7a5498242eaa 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1201,7 +1201,7 @@ enum bpf_perf_event_type {
 #define BPF_F_XDP_DEV_BOUND_ONLY	(1U << 6)
 
 /* The verifier internal test flag. Behavior is undefined */
-#define BPF_F_TEST_SANITY_STRICT	(1U << 7)
+#define BPF_F_TEST_REG_INVARIANTS	(1U << 7)
 
 /* link_create.kprobe_multi.flags used in LINK_CREATE command for
  * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 3f2d70831873..e770912fc1d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type)
 	}
 
 	bpf_program__set_type(prog, type);
-	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT);
+	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
 	bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
 
 	err = bpf_object__load(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index fe0cb906644b..7a8b0bf0a7f8 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -838,7 +838,7 @@ static int load_range_cmp_prog(struct range x, struct range y, enum op op,
 		.log_level = 2,
 		.log_buf = log_buf,
 		.log_size = log_sz,
-		.prog_flags = BPF_F_TEST_SANITY_STRICT,
+		.prog_flags = BPF_F_TEST_REG_INVARIANTS,
 	);
 
 	/* ; skip exit block below
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 0c1460936373..ec430b71730b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -965,7 +965,7 @@ l0_%=:	r0 = 0;						\
 SEC("xdp")
 __description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
 __success __retval(0)
-__flag(!BPF_F_TEST_SANITY_STRICT) /* known sanity violation */
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
 __naked void crossing_64_bit_signed_boundary_2(void)
 {
 	asm volatile ("					\
@@ -1047,7 +1047,7 @@ l0_%=:	r0 = 0;						\
 SEC("xdp")
 __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary")
 __success __retval(0)
-__flag(!BPF_F_TEST_SANITY_STRICT) /* known sanity violation */
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
 __naked void crossing_32_bit_signed_boundary_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 57e27b1a73a6..a350ecdfba4a 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -179,7 +179,7 @@ static int parse_test_spec(struct test_loader *tester,
 	memset(spec, 0, sizeof(*spec));
 
 	spec->prog_name = bpf_program__name(prog);
-	spec->prog_flags = BPF_F_TEST_SANITY_STRICT; /* by default be strict */
+	spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */
 
 	btf = bpf_object__btf(obj);
 	if (!btf) {
@@ -280,8 +280,8 @@ static int parse_test_spec(struct test_loader *tester,
 				update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear);
 			} else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
 				update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear);
-			} else if (strcmp(val, "BPF_F_TEST_SANITY_STRICT") == 0) {
-				update_flags(&spec->prog_flags, BPF_F_TEST_SANITY_STRICT, clear);
+			} else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) {
+				update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear);
 			} else /* assume numeric value */ {
 				err = parse_int(val, &flags, "test prog flags");
 				if (err)
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 878c077e0fa7..b0068a9d2cfe 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -679,8 +679,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
 
 	bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
 	bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
-	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
-	bpf_program__set_flags(prog, BPF_F_TEST_SANITY_STRICT);
+	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
 
 	err = bpf_object__load(obj);
 	if (err) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 4992022f3137..f36e41435be7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (fixup_skips != skips)
 		return;
 
-	pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT;
+	pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
 	if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
 		pflags |= BPF_F_STRICT_ALIGNMENT;
 	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 9786a94a666c..d2458c1b1671 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
 	if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
 		bpf_program__set_type(prog, type);
 
-	flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT;
+	flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
 	bpf_program__set_flags(prog, flags);
 
 	err = bpf_object__load(obj);
@@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 {
 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
 		.kern_version = kern_version,
-		.prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_SANITY_STRICT,
+		.prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS,
 		.log_level = extra_prog_load_log_flags,
 		.log_buf = log_buf,
 		.log_size = log_buf_sz,
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 609fd9753af0..1d418d66e375 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -145,7 +145,7 @@ static struct env {
 	bool debug;
 	bool quiet;
 	bool force_checkpoints;
-	bool strict_range_sanity;
+	bool force_reg_invariants;
 	enum resfmt out_fmt;
 	bool show_version;
 	bool comparison_mode;
@@ -225,8 +225,8 @@ static const struct argp_option opts[] = {
 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
 	{ "test-states", 't', NULL, 0,
 	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
-	{ "test-sanity", 'r', NULL, 0,
-	  "Force strict BPF verifier register sanity behavior (BPF_F_TEST_SANITY_STRICT program flag)" },
+	{ "test-reg-invariants", 'r', NULL, 0,
+	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
 	{},
 };
 
@@ -299,7 +299,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 		env.force_checkpoints = true;
 		break;
 	case 'r':
-		env.strict_range_sanity = true;
+		env.force_reg_invariants = true;
 		break;
 	case 'n':
 		errno = 0;
@@ -1028,8 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 
 	if (env.force_checkpoints)
 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
-	if (env.strict_range_sanity)
-		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_SANITY_STRICT);
+	if (env.force_reg_invariants)
+		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
 
 	err = bpf_object__load(obj);
 	env.progs_processed++;
-- 
cgit v1.2.3-70-g09d2


From af51d6bd0b130b06fc58b35fee8f93b0a5c77f21 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 15 Nov 2023 13:17:23 +0100
Subject: selftests: mlxsw: Add PCI reset test

Test that PCI reset works correctly by verifying that only the expected
reset methods are supported and that after issuing the reset the ifindex
of the port changes.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/pci_reset.sh       | 58 ++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
new file mode 100755
index 000000000000..fe0343b95e6c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that PCI reset works correctly by verifying that only the expected reset
+# methods are supported and that after issuing the reset the ifindex of the
+# port changes.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	pci_reset_test
+"
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+pci_reset_test()
+{
+	RET=0
+
+	local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1)
+	local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2)
+
+	if [ $bus != "pci" ]; then
+		check_err 1 "devlink device is not a PCI device"
+		log_test "pci reset"
+		return
+	fi
+
+	if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then
+		check_err 1 "reset is not supported"
+		log_test "pci reset"
+		return
+	fi
+
+	[[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]]
+	check_err $? "only \"bus\" reset method should be supported"
+
+	local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+	echo 1 > /sys/bus/pci/devices/$bdf/reset
+	check_err $? "reset failed"
+
+	# Wait for udev to rename newly created netdev.
+	udevadm settle
+
+	local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+	[[ $ifindex_pre != $ifindex_post ]]
+	check_err $? "reset not performed"
+
+	log_test "pci reset"
+}
+
+swp1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-70-g09d2


From 0c95c9fdb696f35c7864785ba84cb9a50152daff Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 17 Nov 2023 19:46:20 -0800
Subject: bpf: emit map name in register state if applicable and available

In complicated real-world applications, whenever debugging some
verification error through verifier log, it often would be very useful
to see map name for PTR_TO_MAP_VALUE register. Usually this needs to be
inferred from key/value sizes and maybe trying to guess C code location,
but it's not always clear.

Given verifier has the name, and it's never too long, let's just emit it
for ptr_to_map_key, ptr_to_map_value, and const_ptr_to_map registers. We
reshuffle the order a bit, so that map name, key size, and value size
appear before offset and immediate values, which seems like a more
logical order.

Current output:

  R1_w=map_ptr(map=array_map,ks=4,vs=8,off=0,imm=0)

But we'll get rid of useless off=0 and imm=0 parts in the next patch.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231118034623.3320920-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/log.c                                   | 24 ++++++++++++++++------
 tools/testing/selftests/bpf/prog_tests/spin_lock.c | 10 ++++-----
 2 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 97a1641e848e..c209ab1ec2b5 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -553,6 +553,17 @@ static void print_scalar_ranges(struct bpf_verifier_env *env,
 	}
 }
 
+static bool type_is_map_ptr(enum bpf_reg_type t) {
+	switch (base_type(t)) {
+	case CONST_PTR_TO_MAP:
+	case PTR_TO_MAP_KEY:
+	case PTR_TO_MAP_VALUE:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
 {
 	enum bpf_reg_type t;
@@ -584,16 +595,17 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s
 		verbose_a("ref_obj_id=%d", reg->ref_obj_id);
 	if (type_is_non_owning_ref(reg->type))
 		verbose_a("%s", "non_own_ref");
+	if (type_is_map_ptr(t)) {
+		if (reg->map_ptr->name[0])
+			verbose_a("map=%s", reg->map_ptr->name);
+		verbose_a("ks=%d,vs=%d",
+			  reg->map_ptr->key_size,
+			  reg->map_ptr->value_size);
+	}
 	if (t != SCALAR_VALUE)
 		verbose_a("off=%d", reg->off);
 	if (type_is_pkt_pointer(t))
 		verbose_a("r=%d", reg->range);
-	else if (base_type(t) == CONST_PTR_TO_MAP ||
-		 base_type(t) == PTR_TO_MAP_KEY ||
-		 base_type(t) == PTR_TO_MAP_VALUE)
-		verbose_a("ks=%d,vs=%d",
-			  reg->map_ptr->key_size,
-			  reg->map_ptr->value_size);
 	if (tnum_is_const(reg->var_off)) {
 		/* Typically an immediate SCALAR_VALUE, but
 		 * could be a pointer whose offset is too big
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index f29c08d93beb..ace65224286f 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -17,18 +17,18 @@ static struct {
 	  "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=ptr_ expected=percpu_ptr_" },
 	{ "lock_id_global_zero",
-	  "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n"
+	  "; R1_w=map_value(map=.data.A,ks=4,vs=4,off=0,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                       ;"
-	  " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)"
-	  " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n"
+	  " R0_w=map_value(id=1,map=array_map,ks=4,vs=8,off=0,imm=0)"
+	  " R1_w=map_value(id=1,map=array_map,ks=4,vs=8,off=0,imm=0)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_innermapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                      ;"
-	  " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)"
-	  " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n"
+	  " R0=map_value(id=2,ks=4,vs=8,off=0,imm=0)"
+	  " R1_w=map_value(id=2,ks=4,vs=8,off=0,imm=0)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
-- 
cgit v1.2.3-70-g09d2


From 1db747d75b1dbe17bf4283ed87bd3b7a92010f34 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 17 Nov 2023 19:46:21 -0800
Subject: bpf: omit default off=0 and imm=0 in register state log

Simplify BPF verifier log further by omitting default (and frequently
irrelevant) off=0 and imm=0 parts for non-SCALAR_VALUE registers. As can
be seen from fixed tests, this is often a visual noise for PTR_TO_CTX
register and even for PTR_TO_PACKET registers.

Omitting default values follows the rest of register state logic: we
omit default values to keep verifier log succinct and to highlight
interesting state that deviates from default one. E.g., we do the same
for var_off, when it's unknown, which gives no additional information.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231118034623.3320920-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/log.c                                   | 10 +++---
 tools/testing/selftests/bpf/prog_tests/align.c     | 42 +++++++++++-----------
 tools/testing/selftests/bpf/prog_tests/log_buf.c   |  4 +--
 tools/testing/selftests/bpf/prog_tests/spin_lock.c | 14 ++++----
 .../selftests/bpf/progs/exceptions_assert.c        | 10 +++---
 5 files changed, 39 insertions(+), 41 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index c209ab1ec2b5..20b4f81087da 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -602,16 +602,14 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s
 			  reg->map_ptr->key_size,
 			  reg->map_ptr->value_size);
 	}
-	if (t != SCALAR_VALUE)
+	if (t != SCALAR_VALUE && reg->off)
 		verbose_a("off=%d", reg->off);
 	if (type_is_pkt_pointer(t))
 		verbose_a("r=%d", reg->range);
 	if (tnum_is_const(reg->var_off)) {
-		/* Typically an immediate SCALAR_VALUE, but
-		 * could be a pointer whose offset is too big
-		 * for reg->off
-		 */
-		verbose_a("imm=%llx", reg->var_off.value);
+		/* a pointer register with fixed offset */
+		if (reg->var_off.value)
+			verbose_a("imm=%llx", reg->var_off.value);
 	} else {
 		print_scalar_ranges(env, reg, &sep);
 		if (!tnum_is_unknown(reg->var_off)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 465c1c3a3d3c..4ebd0da898f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -40,7 +40,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "2"},
 			{1, "R3_w", "4"},
@@ -68,7 +68,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "1"},
 			{1, "R3_w", "2"},
@@ -97,7 +97,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "4"},
 			{1, "R3_w", "8"},
@@ -119,7 +119,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "7"},
 			{1, "R3_w", "7"},
@@ -162,13 +162,13 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{6, "R0_w", "pkt(off=8,r=8,imm=0)"},
+			{6, "R0_w", "pkt(off=8,r=8)"},
 			{6, "R3_w", "var_off=(0x0; 0xff)"},
 			{7, "R3_w", "var_off=(0x0; 0x1fe)"},
 			{8, "R3_w", "var_off=(0x0; 0x3fc)"},
 			{9, "R3_w", "var_off=(0x0; 0x7f8)"},
 			{10, "R3_w", "var_off=(0x0; 0xff0)"},
-			{12, "R3_w", "pkt_end(off=0,imm=0)"},
+			{12, "R3_w", "pkt_end()"},
 			{17, "R4_w", "var_off=(0x0; 0xff)"},
 			{18, "R4_w", "var_off=(0x0; 0x1fe0)"},
 			{19, "R4_w", "var_off=(0x0; 0xff0)"},
@@ -235,11 +235,11 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{2, "R5_w", "pkt(off=0,r=0,imm=0)"},
-			{4, "R5_w", "pkt(off=14,r=0,imm=0)"},
-			{5, "R4_w", "pkt(off=14,r=0,imm=0)"},
-			{9, "R2", "pkt(off=0,r=18,imm=0)"},
-			{10, "R5", "pkt(off=14,r=18,imm=0)"},
+			{2, "R5_w", "pkt(r=0)"},
+			{4, "R5_w", "pkt(off=14,r=0)"},
+			{5, "R4_w", "pkt(off=14,r=0)"},
+			{9, "R2", "pkt(r=18)"},
+			{10, "R5", "pkt(off=14,r=18)"},
 			{10, "R4_w", "var_off=(0x0; 0xff)"},
 			{13, "R4_w", "var_off=(0x0; 0xffff)"},
 			{14, "R4_w", "var_off=(0x0; 0xffff)"},
@@ -299,7 +299,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Offset is added to packet pointer R5, resulting in
 			 * known fixed offset, and variable offset from R6.
@@ -337,7 +337,7 @@ static struct bpf_align_test tests[] = {
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			{26, "R5_w", "pkt(off=14,r=8,"},
+			{26, "R5_w", "pkt(off=14,r=8)"},
 			/* Variable offset is added to R5, resulting in a
 			 * variable offset of (4n). See comment for insn #18
 			 * for R4 = R5 trick.
@@ -397,7 +397,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{8, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -459,7 +459,7 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
 		.matches = {
-			{3, "R5_w", "pkt_end(off=0,imm=0)"},
+			{3, "R5_w", "pkt_end()"},
 			/* (ptr - ptr) << 2 == unknown, (4n) */
 			{5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
@@ -513,7 +513,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{8, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{9, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -566,7 +566,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{9, "R6_w", "var_off=(0x0; 0x3c)"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{10, "R6_w", "var_off=(0x2; 0x7c)"},
@@ -659,14 +659,14 @@ static int do_test_single(struct bpf_align_test *test)
 			/* Check the next line as well in case the previous line
 			 * did not have a corresponding bpf insn. Example:
 			 * func#0 @0
-			 * 0: R1=ctx(off=0,imm=0) R10=fp0
+			 * 0: R1=ctx() R10=fp0
 			 * 0: (b7) r3 = 2                 ; R3_w=2
 			 *
 			 * Sometimes it's actually two lines below, e.g. when
 			 * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
-			 *   from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
-			 *   6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
-			 *   6: (71) r3 = *(u8 *)(r2 +0)           ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
+			 *   from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+			 *   6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+			 *   6: (71) r3 = *(u8 *)(r2 +0)           ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
 			 */
 			while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
 				cur_line = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index fe9a23e65ef4..0f7ea4d7d9f6 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -78,7 +78,7 @@ static void obj_load_log_buf(void)
 	ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"),
 		      "libbpf_log_not_empty");
 	ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty");
-	ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"),
+	ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"),
 		      "good_log_verbose");
 	ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"),
 		      "bad_log_not_empty");
@@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void)
 	opts.log_level = 2;
 	fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
 			   good_prog_insns, good_prog_insn_cnt, &opts);
-	ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2");
+	ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2");
 	ASSERT_GE(fd, 0, "good_fd2");
 	if (fd >= 0)
 		close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index ace65224286f..18d451be57c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
 	const char *err_msg;
 } spin_lock_fail_tests[] = {
 	{ "lock_id_kptr_preserve",
-	  "5: (bf) r1 = r0                       ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) "
-	  "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+	  "5: (bf) r1 = r0                       ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
+	  "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=ptr_ expected=percpu_ptr_" },
 	{ "lock_id_global_zero",
-	  "; R1_w=map_value(map=.data.A,ks=4,vs=4,off=0,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n"
+	  "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                       ;"
-	  " R0_w=map_value(id=1,map=array_map,ks=4,vs=8,off=0,imm=0)"
-	  " R1_w=map_value(id=1,map=array_map,ks=4,vs=8,off=0,imm=0)\n"
+	  " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
+	  " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_innermapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                      ;"
-	  " R0=map_value(id=2,ks=4,vs=8,off=0,imm=0)"
-	  " R1_w=map_value(id=2,ks=4,vs=8,off=0,imm=0)\n"
+	  " R0=map_value(id=2,ks=4,vs=8)"
+	  " R1_w=map_value(id=2,ks=4,vs=8)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index e1e5c54a6a11..26f7d67432cc 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -59,7 +59,7 @@ check_assert(s64, ge, neg, INT_MIN);
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
+__msg(": R0=0 R1=ctx() R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
 int check_assert_range_s64(struct __sk_buff *ctx)
 {
 	struct bpf_sock *sk = ctx->sk;
@@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
+__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
 int check_assert_range_u64(struct __sk_buff *ctx)
 {
 	u64 num = ctx->len;
@@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R0=0 R1=ctx() R2=4096 R10=fp0")
 int check_assert_single_range_s64(struct __sk_buff *ctx)
 {
 	struct bpf_sock *sk = ctx->sk;
@@ -103,7 +103,7 @@ int check_assert_single_range_s64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R1=ctx() R2=4096 R10=fp0")
 int check_assert_single_range_u64(struct __sk_buff *ctx)
 {
 	u64 num = ctx->len;
@@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0")
+__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
 int check_assert_generic(struct __sk_buff *ctx)
 {
 	u8 *data_end = (void *)(long)ctx->data_end;
-- 
cgit v1.2.3-70-g09d2


From 0f8dbdbc641b45a5fa31d497f9fc83ffe1174fa3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 17 Nov 2023 19:46:22 -0800
Subject: bpf: smarter verifier log number printing logic

Instead of always printing numbers as either decimals (and in some
cases, like for "imm=%llx", in hexadecimals), decide the form based on
actual values. For numbers in a reasonably small range (currently,
[0, U16_MAX] for unsigned values, and [S16_MIN, S16_MAX] for signed ones),
emit them as decimals. In all other cases, even for signed values,
emit them in hexadecimals.

For large values hex form is often times way more useful: it's easier to
see an exact difference between 0xffffffff80000000 and 0xffffffff7fffffff,
than between 18446744071562067966 and 18446744071562067967, as one
particular example.

Small values representing small pointer offsets or application
constants, on the other hand, are way more useful to be represented in
decimal notation.

Adjust reg_bounds register state parsing logic to take into account this
change.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231118034623.3320920-8-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/log.c                                   | 79 +++++++++++++++++++---
 .../testing/selftests/bpf/prog_tests/reg_bounds.c  | 53 +++++++++------
 .../selftests/bpf/progs/exceptions_assert.c        | 32 ++++-----
 3 files changed, 118 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 20b4f81087da..87105aa482ed 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -509,10 +509,52 @@ static void print_liveness(struct bpf_verifier_env *env,
 		verbose(env, "D");
 }
 
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool is_unum_decimal(u64 num)
+{
+	return num <= UNUM_MAX_DECIMAL;
+}
+
+static bool is_snum_decimal(s64 num)
+{
+	return num >= SNUM_MIN_DECIMAL && num <= SNUM_MAX_DECIMAL;
+}
+
+static void verbose_unum(struct bpf_verifier_env *env, u64 num)
+{
+	if (is_unum_decimal(num))
+		verbose(env, "%llu", num);
+	else
+		verbose(env, "%#llx", num);
+}
+
+static void verbose_snum(struct bpf_verifier_env *env, s64 num)
+{
+	if (is_snum_decimal(num))
+		verbose(env, "%lld", num);
+	else
+		verbose(env, "%#llx", num);
+}
+
 static void print_scalar_ranges(struct bpf_verifier_env *env,
 				const struct bpf_reg_state *reg,
 				const char **sep)
 {
+	/* For signed ranges, we want to unify 64-bit and 32-bit values in the
+	 * output as much as possible, but there is a bit of a complication.
+	 * If we choose to print values as decimals, this is natural to do,
+	 * because negative 64-bit and 32-bit values >= -S32_MIN have the same
+	 * representation due to sign extension. But if we choose to print
+	 * them in hex format (see is_snum_decimal()), then sign extension is
+	 * misleading.
+	 * E.g., smin=-2 and smin32=-2 are exactly the same in decimal, but in
+	 * hex they will be smin=0xfffffffffffffffe and smin32=0xfffffffe, two
+	 * very different numbers.
+	 * So we avoid sign extension if we choose to print values in hex.
+	 */
 	struct {
 		const char *name;
 		u64 val;
@@ -522,8 +564,14 @@ static void print_scalar_ranges(struct bpf_verifier_env *env,
 		{"smax",   reg->smax_value,         reg->smax_value == S64_MAX},
 		{"umin",   reg->umin_value,         reg->umin_value == 0},
 		{"umax",   reg->umax_value,         reg->umax_value == U64_MAX},
-		{"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN},
-		{"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX},
+		{"smin32",
+		 is_snum_decimal((s64)reg->s32_min_value)
+			 ? (s64)reg->s32_min_value
+			 : (u32)reg->s32_min_value, reg->s32_min_value == S32_MIN},
+		{"smax32",
+		 is_snum_decimal((s64)reg->s32_max_value)
+			 ? (s64)reg->s32_max_value
+			 : (u32)reg->s32_max_value, reg->s32_max_value == S32_MAX},
 		{"umin32", reg->u32_min_value,      reg->u32_min_value == 0},
 		{"umax32", reg->u32_max_value,      reg->u32_max_value == U32_MAX},
 	}, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)];
@@ -549,7 +597,10 @@ static void print_scalar_ranges(struct bpf_verifier_env *env,
 			verbose(env, "%s=", m2->name);
 		}
 
-		verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val);
+		if (m1->name[0] == 's')
+			verbose_snum(env, m1->val);
+		else
+			verbose_unum(env, m1->val);
 	}
 }
 
@@ -576,14 +627,14 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s
 	    tnum_is_const(reg->var_off)) {
 		/* reg->off should be 0 for SCALAR_VALUE */
 		verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
-		verbose(env, "%lld", reg->var_off.value + reg->off);
+		verbose_snum(env, reg->var_off.value + reg->off);
 		return;
 	}
 /*
  * _a stands for append, was shortened to avoid multiline statements below.
  * This macro is used to output a comma separated list of attributes.
  */
-#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
+#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, ##__VA_ARGS__); sep = ","; })
 
 	verbose(env, "%s", reg_type_str(env, t));
 	if (base_type(t) == PTR_TO_BTF_ID)
@@ -602,14 +653,20 @@ static void print_reg_state(struct bpf_verifier_env *env, const struct bpf_reg_s
 			  reg->map_ptr->key_size,
 			  reg->map_ptr->value_size);
 	}
-	if (t != SCALAR_VALUE && reg->off)
-		verbose_a("off=%d", reg->off);
-	if (type_is_pkt_pointer(t))
-		verbose_a("r=%d", reg->range);
+	if (t != SCALAR_VALUE && reg->off) {
+		verbose_a("off=");
+		verbose_snum(env, reg->off);
+	}
+	if (type_is_pkt_pointer(t)) {
+		verbose_a("r=");
+		verbose_unum(env, reg->range);
+	}
 	if (tnum_is_const(reg->var_off)) {
 		/* a pointer register with fixed offset */
-		if (reg->var_off.value)
-			verbose_a("imm=%llx", reg->var_off.value);
+		if (reg->var_off.value) {
+			verbose_a("imm=");
+			verbose_snum(env, reg->var_off.value);
+		}
 	} else {
 		print_scalar_ranges(env, reg, &sep);
 		if (!tnum_is_unknown(reg->var_off)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 7a8b0bf0a7f8..fd4ab23e6f54 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -13,10 +13,13 @@
  */
 #define U64_MAX ((u64)UINT64_MAX)
 #define U32_MAX ((u32)UINT_MAX)
+#define U16_MAX ((u32)UINT_MAX)
 #define S64_MIN ((s64)INT64_MIN)
 #define S64_MAX ((s64)INT64_MAX)
 #define S32_MIN ((s32)INT_MIN)
 #define S32_MAX ((s32)INT_MAX)
+#define S16_MIN ((s16)0x80000000)
+#define S16_MAX ((s16)0x7fffffff)
 
 typedef unsigned long long ___u64;
 typedef unsigned int ___u32;
@@ -138,13 +141,17 @@ static enum num_t t_unsigned(enum num_t t)
 	}
 }
 
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
 static bool num_is_small(enum num_t t, u64 x)
 {
 	switch (t) {
-	case U64: return (u64)x <= 256;
-	case U32: return (u32)x <= 256;
-	case S64: return (s64)x >= -256 && (s64)x <= 256;
-	case S32: return (s32)x >= -256 && (s32)x <= 256;
+	case U64: return (u64)x <= UNUM_MAX_DECIMAL;
+	case U32: return (u32)x <= UNUM_MAX_DECIMAL;
+	case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL;
+	case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL;
 	default: printf("num_is_small!\n"); exit(1);
 	}
 }
@@ -1023,20 +1030,19 @@ static int parse_reg_state(const char *s, struct reg_state *reg)
 	 */
 	struct {
 		const char *pfx;
-		const char *fmt;
 		u64 *dst, def;
 		bool is_32, is_set;
 	} *f, fields[8] = {
-		{"smin=", "%lld", &reg->r[S64].a, S64_MIN},
-		{"smax=", "%lld", &reg->r[S64].b, S64_MAX},
-		{"umin=", "%llu", &reg->r[U64].a, 0},
-		{"umax=", "%llu", &reg->r[U64].b, U64_MAX},
-		{"smin32=", "%lld", &reg->r[S32].a, (u32)S32_MIN, true},
-		{"smax32=", "%lld", &reg->r[S32].b, (u32)S32_MAX, true},
-		{"umin32=", "%llu", &reg->r[U32].a, 0,            true},
-		{"umax32=", "%llu", &reg->r[U32].b, U32_MAX,      true},
+		{"smin=", &reg->r[S64].a, S64_MIN},
+		{"smax=", &reg->r[S64].b, S64_MAX},
+		{"umin=", &reg->r[U64].a, 0},
+		{"umax=", &reg->r[U64].b, U64_MAX},
+		{"smin32=", &reg->r[S32].a, (u32)S32_MIN, true},
+		{"smax32=", &reg->r[S32].b, (u32)S32_MAX, true},
+		{"umin32=", &reg->r[U32].a, 0,            true},
+		{"umax32=", &reg->r[U32].b, U32_MAX,      true},
 	};
-	const char *p, *fmt;
+	const char *p;
 	int i;
 
 	p = strchr(s, '=');
@@ -1050,8 +1056,13 @@ static int parse_reg_state(const char *s, struct reg_state *reg)
 		long long sval;
 		enum num_t t;
 
-		if (sscanf(p, "%lld", &sval) != 1)
-			return -EINVAL;
+		if (p[0] == '0' && p[1] == 'x') {
+			if (sscanf(p, "%llx", &sval) != 1)
+				return -EINVAL;
+		} else {
+			if (sscanf(p, "%lld", &sval) != 1)
+				return -EINVAL;
+		}
 
 		reg->valid = true;
 		for (t = first_t; t <= last_t; t++) {
@@ -1075,9 +1086,13 @@ static int parse_reg_state(const char *s, struct reg_state *reg)
 
 		if (mcnt) {
 			/* populate all matched fields */
-			fmt = fields[midxs[0]].fmt;
-			if (sscanf(p, fmt, &val) != 1)
-				return -EINVAL;
+			if (p[0] == '0' && p[1] == 'x') {
+				if (sscanf(p, "%llx", &val) != 1)
+					return -EINVAL;
+			} else {
+				if (sscanf(p, "%lld", &val) != 1)
+					return -EINVAL;
+			}
 
 			for (i = 0; i < mcnt; i++) {
 				f = &fields[midxs[i]];
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 26f7d67432cc..49efaed143fc 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,48 +18,48 @@
 		return *(u64 *)num;					\
 	}
 
-__msg(": R0_w=-2147483648 R10=fp0")
+__msg(": R0_w=0xffffffff80000000 R10=fp0")
 check_assert(s64, eq, int_min, INT_MIN);
-__msg(": R0_w=2147483647 R10=fp0")
+__msg(": R0_w=0x7fffffff R10=fp0")
 check_assert(s64, eq, int_max, INT_MAX);
 __msg(": R0_w=0 R10=fp0")
 check_assert(s64, eq, zero, 0);
-__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0")
+__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0")
 check_assert(s64, eq, llong_min, LLONG_MIN);
-__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0")
+__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0")
 check_assert(s64, eq, llong_max, LLONG_MAX);
 
-__msg(": R0_w=scalar(smax=2147483646) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0")
 check_assert(s64, lt, pos, INT_MAX);
-__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, lt, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, lt, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smax=2147483647) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0")
 check_assert(s64, le, pos, INT_MAX);
 __msg(": R0_w=scalar(smax=0) R10=fp0")
 check_assert(s64, le, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, le, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, gt, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, gt, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483647) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0")
 check_assert(s64, gt, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, ge, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
+__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
 check_assert(s64, ge, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483648) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0")
 check_assert(s64, ge, neg, INT_MIN);
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R0=0 R1=ctx() R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
+__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
 int check_assert_range_s64(struct __sk_buff *ctx)
 {
 	struct bpf_sock *sk = ctx->sk;
-- 
cgit v1.2.3-70-g09d2


From 54293e4d6a629befb0b0d93aa416a658678f7f01 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Tue, 14 Nov 2023 11:18:56 -0300
Subject: selftests/tc-testing: add hashtable tests for u32

Add tests to specifically check for the refcount interactions of
hashtables created by u32. These tables should not be deleted when
referenced and the flush order should respect a tree like composition.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231114141856.974326-3-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/filters/u32.json | 57 ++++++++++++++++++++++
 1 file changed, 57 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
index ddc7c355be0a..24bd0c2a3014 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -272,5 +272,62 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 parent root drr"
         ]
+    },
+    {
+        "id": "bd32",
+        "name": "Try to delete hashtable referenced by another u32 filter",
+        "category": [
+            "filter",
+            "u32"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:"
+        ],
+        "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter show dev $DEV1",
+        "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
+    },
+    {
+        "id": "4585",
+        "name": "Delete small tree of u32 hashtables and filters",
+        "category": [
+            "filter",
+            "u32"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:"
+        ],
+        "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1",
+        "matchPattern": "protocol ip pref 2 u32",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
     }
 ]
-- 
cgit v1.2.3-70-g09d2


From 57b97ecb40caeb116c22451bbdaaa9a1d12c0b43 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 20 Nov 2023 10:04:52 -0800
Subject: selftests/bpf: reduce verboseness of reg_bounds selftest logs

Reduce verboseness of test_progs' output in reg_bounds set of tests with
two changes.

First, instead of each different operator (<, <=, >, ...) being it's own
subtest, combine all different ops for the same (x, y, init_t, cond_t)
values into single subtest. Instead of getting 6 subtests, we get one
generic one, e.g.:

  #192/53  reg_bounds_crafted/(s64)[0xffffffffffffffff; 0] (s64)<op> 0xffffffff00000000:OK

Second, for random generated test cases, treat all of them as a single
test to eliminate very verbose output with random values in them. So now
we'll just get one line per each combination of (init_t, cond_t),
instead of 6 x 25 = 150 subtests before this change:

  #225     reg_bounds_rand_consts_s32_s32:OK

Given we reduce verboseness so much, it makes sense to do a bit more
random testing, so we also bump default number of random tests to 100,
up from 25. This doesn't increase runtime significantly, especially in
parallelized mode.

With all the above changes we still make sure that we have all the
information necessary for reproducing test case if it happens to fail.
That includes reporting random seed and specific operator that is
failing. Those will only be printed to console if related test/subtest
fails, so it doesn't have any added verboseness implications.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231120180452.145849-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/reg_bounds.c  | 32 ++++++++++++++--------
 1 file changed, 21 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index fd4ab23e6f54..0c9abd279e18 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -1361,11 +1361,11 @@ struct subtest_case {
 	enum op op;
 };
 
-static void subtest_case_str(struct strbuf *sb, struct subtest_case *t)
+static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op)
 {
 	snappendf(sb, "(%s)", t_str(t->init_t));
 	snprintf_range(t->init_t, sb, t->x);
-	snappendf(sb, " (%s)%s ", t_str(t->cond_t), op_str(t->op));
+	snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>");
 	snprintf_range(t->init_t, sb, t->y);
 }
 
@@ -1440,8 +1440,8 @@ static int verify_case_op(enum num_t init_t, enum num_t cond_t,
 /* Given setup ranges and number types, go over all supported operations,
  * generating individual subtest for each allowed combination
  */
-static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
-		       struct range x, struct range y)
+static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+			   struct range x, struct range y, bool is_subtest)
 {
 	DEFINE_STRBUF(sb, 256);
 	int err;
@@ -1452,11 +1452,14 @@ static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
 		.y = y,
 	};
 
+	sb->pos = 0; /* reset position in strbuf */
+	subtest_case_str(sb, &sub, false /* ignore op */);
+	if (is_subtest && !test__start_subtest(sb->buf))
+		return 0;
+
 	for (sub.op = first_op; sub.op <= last_op; sub.op++) {
 		sb->pos = 0; /* reset position in strbuf */
-		subtest_case_str(sb, &sub);
-		if (!test__start_subtest(sb->buf))
-			continue;
+		subtest_case_str(sb, &sub, true /* print op */);
 
 		if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */
 			printf("TEST CASE: %s\n", sb->buf);
@@ -1491,6 +1494,12 @@ static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
 	return 0;
 }
 
+static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+		       struct range x, struct range y)
+{
+	return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */);
+}
+
 /* ================================
  * GENERATED CASES FROM SEED VALUES
  * ================================
@@ -1913,7 +1922,7 @@ void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32,
 void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); }
 void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); }
 
-#define DEFAULT_RAND_CASE_CNT 25
+#define DEFAULT_RAND_CASE_CNT 100
 
 #define RAND_21BIT_MASK ((1 << 22) - 1)
 
@@ -1968,7 +1977,6 @@ static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool cons
 		 "[RANDOM SEED %u] RANGE x %s, %s -> %s",
 		 ctx.rand_seed, const_range ? "CONST" : "RANGE",
 		 t_str(init_t), t_str(cond_t));
-	fprintf(env.stdout, "%s\n", ctx.progress_ctx);
 
 	for (i = 0; i < ctx.rand_case_cnt; i++) {
 		range1 = rand_range(init_t);
@@ -1980,14 +1988,16 @@ static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool cons
 		}
 
 		/* <range1> x <range2> */
-		if (verify_case(&ctx, init_t, cond_t, range1, range2))
+		if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */))
 			goto cleanup;
 		/* <range2> x <range1> */
-		if (verify_case(&ctx, init_t, cond_t, range2, range1))
+		if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */))
 			goto cleanup;
 	}
 
 cleanup:
+	/* make sure we report random seed for reproducing */
+	ASSERT_TRUE(true, ctx.progress_ctx);
 	cleanup_ctx(&ctx);
 }
 
-- 
cgit v1.2.3-70-g09d2


From a0bc96c0cd6e61fcaebff34432791a4b5118fc68 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 16 Nov 2023 15:34:43 -0500
Subject: selftests: net: verify fq per-band packet limit

Commit 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR
scheduling") introduces multiple traffic bands, and per-band maximum
packet count.

Per-band limits ensures that packets in one class cannot fill the
entire qdisc and so cause DoS to the traffic in the other classes.

Verify this behavior:
  1. set the limit to 10 per band
  2. send 20 pkts on band A: verify that 10 are queued, 10 dropped
  3. send 20 pkts on band A: verify that  0 are queued, 20 dropped
  4. send 20 pkts on band B: verify that 10 are queued, 10 dropped

Packets must remain queued for a period to trigger this behavior.
Use SO_TXTIME to store packets for 100 msec.

The test reuses existing upstream test infra. The script is a fork of
cmsg_time.sh. The scripts call cmsg_sender.

The test extends cmsg_sender with two arguments:

* '-P' SO_PRIORITY
  There is a subtle difference between IPv4 and IPv6 stack behavior:
  PF_INET/IP_TOS        sets IP header bits and sk_priority
  PF_INET6/IPV6_TCLASS  sets IP header bits BUT NOT sk_priority

* '-n' num pkts
  Send multiple packets in quick succession.
  I first attempted a for loop in the script, but this is too slow in
  virtualized environments, causing flakiness as the 100ms timeout is
  reached and packets are dequeued.

Also do not wait for timestamps to be queued unless timestamps are
requested.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20231116203449.2627525-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile            |  1 +
 tools/testing/selftests/net/cmsg_sender.c       | 50 ++++++++++++++--------
 tools/testing/selftests/net/fq_band_pktlimit.sh | 57 +++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 17 deletions(-)
 create mode 100755 tools/testing/selftests/net/fq_band_pktlimit.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 5b2aca4c5f10..9274edfb76ff 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -91,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh
 TEST_PROGS += test_vxlan_nolocalbypass.sh
 TEST_PROGS += test_bridge_backup_port.sh
 TEST_PROGS += fdb_flush.sh
+TEST_PROGS += fq_band_pktlimit.sh
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 24b21b15ed3f..8d7575389f58 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -45,11 +45,13 @@ struct options {
 	const char *host;
 	const char *service;
 	unsigned int size;
+	unsigned int num_pkt;
 	struct {
 		unsigned int mark;
 		unsigned int dontfrag;
 		unsigned int tclass;
 		unsigned int hlimit;
+		unsigned int priority;
 	} sockopt;
 	struct {
 		unsigned int family;
@@ -72,6 +74,7 @@ struct options {
 	} v6;
 } opt = {
 	.size = 13,
+	.num_pkt = 1,
 	.sock = {
 		.family	= AF_UNSPEC,
 		.type	= SOCK_DGRAM,
@@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	int o;
 
-	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) {
+	while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[])
 				cs_usage(argv[0]);
 			}
 			break;
-
+		case 'P':
+			opt.sockopt.priority = atoi(optarg);
+			break;
 		case 'm':
 			opt.mark.ena = true;
 			opt.mark.val = atoi(optarg);
@@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[])
 		case 'M':
 			opt.sockopt.mark = atoi(optarg);
 			break;
+		case 'n':
+			opt.num_pkt = atoi(optarg);
+			break;
 		case 'd':
 			opt.txtime.ena = true;
 			opt.txtime.delay = atoi(optarg);
@@ -410,6 +418,10 @@ static void ca_set_sockopts(int fd)
 	    setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
 		       &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
 		error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+	if (opt.sockopt.priority &&
+	    setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
+		       &opt.sockopt.priority, sizeof(opt.sockopt.priority)))
+		error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
 }
 
 int main(int argc, char *argv[])
@@ -421,6 +433,7 @@ int main(int argc, char *argv[])
 	char *buf;
 	int err;
 	int fd;
+	int i;
 
 	cs_parse_args(argc, argv);
 
@@ -480,24 +493,27 @@ int main(int argc, char *argv[])
 
 	cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
 
-	err = sendmsg(fd, &msg, 0);
-	if (err < 0) {
-		if (!opt.silent_send)
-			fprintf(stderr, "send failed: %s\n", strerror(errno));
-		err = ERN_SEND;
-		goto err_out;
-	} else if (err != (int)opt.size) {
-		fprintf(stderr, "short send\n");
-		err = ERN_SEND_SHORT;
-		goto err_out;
-	} else {
-		err = ERN_SUCCESS;
+	for (i = 0; i < opt.num_pkt; i++) {
+		err = sendmsg(fd, &msg, 0);
+		if (err < 0) {
+			if (!opt.silent_send)
+				fprintf(stderr, "send failed: %s\n", strerror(errno));
+			err = ERN_SEND;
+			goto err_out;
+		} else if (err != (int)opt.size) {
+			fprintf(stderr, "short send\n");
+			err = ERN_SEND_SHORT;
+			goto err_out;
+		}
 	}
+	err = ERN_SUCCESS;
 
-	/* Make sure all timestamps have time to loop back */
-	usleep(opt.txtime.delay);
+	if (opt.ts.ena) {
+		/* Make sure all timestamps have time to loop back */
+		usleep(opt.txtime.delay);
 
-	cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+		cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+	}
 
 err_out:
 	close(fd);
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
new file mode 100755
index 000000000000..24b77bdf41ff
--- /dev/null
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that FQ has a packet limit per band:
+#
+# 1. set the limit to 10 per band
+# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped
+# 3. send 20 pkts on band A: verify that  0 are queued, 20 dropped
+# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped
+#
+# Send packets with a 100ms delay to ensure that previously sent
+# packets are still queued when later ones are sent.
+# Use SO_TXTIME for this.
+
+die() {
+	echo "$1"
+	exit 1
+}
+
+# run inside private netns
+if [[ $# -eq 0 ]]; then
+	./in_netns.sh "$0" __subprocess
+	exit
+fi
+
+ip link add type dummy
+ip link set dev dummy0 up
+ip -6 addr add fdaa::1/128 dev dummy0
+ip -6 route add fdaa::/64 dev dummy0
+tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000
+OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Initial stats will report zero sent, as all packets are still
+# queued in FQ. Sleep for the delay period (100ms) and see that
+# twenty are now sent.
+sleep 0.1
+OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Log the output after the test
+echo "${OUT1}"
+echo "${OUT2}"
+echo "${OUT3}"
+echo "${OUT4}"
+
+# Test the output for expected values
+echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10'  || die "unexpected drop count at 1"
+echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30'  || die "unexpected drop count at 2"
+echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40'  || die "unexpected drop count at 3"
+echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4"
-- 
cgit v1.2.3-70-g09d2


From 025de7b6a6dd44e78d0d45b4f3b4f104ed54b0fd Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 17 Nov 2023 14:12:03 -0300
Subject: selftests: tc-testing: cap parallel tdc to 4 cores

We have observed a lot of lock contention and test instability when running with >8 cores.
Enough to actually make the tests run slower than with fewer cores.

Cap the maximum cores of parallel tdc to 4 which showed in testing to
be a reasonable number for efficiency and stability in different kernel
config scenarios.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231117171208.2066136-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/tdc.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index a6718192aff3..f764b43f112b 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -1017,6 +1017,7 @@ def main():
     parser = pm.call_add_args(parser)
     (args, remaining) = parser.parse_known_args()
     args.NAMES = NAMES
+    args.mp = min(args.mp, 4)
     pm.set_args(args)
     check_default_settings(args, remaining, pm)
     if args.verbose > 2:
-- 
cgit v1.2.3-70-g09d2


From 50a5988a7a540fb1ad4e620e1bbf11cc646e3dc7 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 17 Nov 2023 14:12:04 -0300
Subject: selftests: tc-testing: move back to per test ns setup

Surprisingly in kernel configs with most of the debug knobs turned on,
pre-allocating the test resources makes tdc run much slower overall than
when allocating resources on a per test basis.

As these knobs are used in kselftests in downstream CIs, let's go back
to the old way of doing things to avoid kselftests timeouts.

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202311161129.3b45ed53-oliver.sang@intel.com
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231117171208.2066136-3-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/plugin-lib/nsPlugin.py    | 68 ++++++++--------------
 1 file changed, 25 insertions(+), 43 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index 62974bd3a4a5..2b8cbfdf1083 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -17,44 +17,6 @@ except ImportError:
     netlink = False
     print("!!! Consider installing pyroute2 !!!")
 
-def prepare_suite(obj, test):
-    original = obj.args.NAMES
-
-    if 'skip' in test and test['skip'] == 'yes':
-        return
-
-    if 'nsPlugin' not in test['plugins']:
-        return
-
-    shadow = {}
-    shadow['IP'] = original['IP']
-    shadow['TC'] = original['TC']
-    shadow['NS'] = '{}-{}'.format(original['NS'], test['random'])
-    shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id'])
-    shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id'])
-    shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id'])
-    shadow['DEV2'] = original['DEV2']
-    obj.args.NAMES = shadow
-
-    if netlink == True:
-        obj._nl_ns_create()
-    else:
-        obj._ns_create()
-
-    # Make sure the netns is visible in the fs
-    while True:
-        obj._proc_check()
-        try:
-            ns = obj.args.NAMES['NS']
-            f = open('/run/netns/{}'.format(ns))
-            f.close()
-            break
-        except:
-            time.sleep(0.1)
-            continue
-
-    obj.args.NAMES = original
-
 class SubPlugin(TdcPlugin):
     def __init__(self):
         self.sub_class = 'ns/SubPlugin'
@@ -65,19 +27,39 @@ class SubPlugin(TdcPlugin):
 
         super().pre_suite(testcount, testlist)
 
-        print("Setting up namespaces and devices...")
+    def prepare_test(self, test):
+        if 'skip' in test and test['skip'] == 'yes':
+            return
 
-        with Pool(self.args.mp) as p:
-            it = zip(cycle([self]), testlist)
-            p.starmap(prepare_suite, it)
+        if 'nsPlugin' not in test['plugins']:
+            return
 
-    def pre_case(self, caseinfo, test_skip):
+        if netlink == True:
+            self._nl_ns_create()
+        else:
+            self._ns_create()
+
+        # Make sure the netns is visible in the fs
+        while True:
+            self._proc_check()
+            try:
+                ns = self.args.NAMES['NS']
+                f = open('/run/netns/{}'.format(ns))
+                f.close()
+                break
+            except:
+                time.sleep(0.1)
+                continue
+
+    def pre_case(self, test, test_skip):
         if self.args.verbose:
             print('{}.pre_case'.format(self.sub_class))
 
         if test_skip:
             return
 
+        self.prepare_test(test)
+
     def post_case(self):
         if self.args.verbose:
             print('{}.post_case'.format(self.sub_class))
-- 
cgit v1.2.3-70-g09d2


From 3d5026fc5adbc796a0547fcef19d997786e0bb31 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 17 Nov 2023 14:12:05 -0300
Subject: selftests: tc-testing: use netns delete from pyroute2

When pyroute2 is available, use the native netns delete routine instead
of calling iproute2 to do it. As forks are expensive with some kernel
configs, minimize its usage to avoid kselftests timeouts.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231117171208.2066136-4-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index 2b8cbfdf1083..920dcbedc395 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -64,7 +64,10 @@ class SubPlugin(TdcPlugin):
         if self.args.verbose:
             print('{}.post_case'.format(self.sub_class))
 
-        self._ns_destroy()
+        if netlink == True:
+            self._nl_ns_destroy()
+        else:
+            self._ns_destroy()
 
     def post_suite(self, index):
         if self.args.verbose:
@@ -174,6 +177,10 @@ class SubPlugin(TdcPlugin):
         '''
         self._exec_cmd_batched('pre', self._ns_create_cmds())
 
+    def _nl_ns_destroy(self):
+        ns = self.args.NAMES['NS']
+        netns.remove(ns)
+
     def _ns_destroy_cmd(self):
         return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS']))
 
-- 
cgit v1.2.3-70-g09d2


From 3f2d94a4ff489ebbc6b66cc33f9775cb33c00533 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 17 Nov 2023 14:12:06 -0300
Subject: selftests: tc-testing: leverage -all in suite ns teardown

Instead of listing lingering ns pinned files and delete them one by one, leverage '-all'
from iproute2 to do it in a single process fork.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231117171208.2066136-5-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index 920dcbedc395..7b674befceec 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -74,13 +74,12 @@ class SubPlugin(TdcPlugin):
             print('{}.post_suite'.format(self.sub_class))
 
         # Make sure we don't leak resources
-        for f in os.listdir('/run/netns/'):
-            cmd = self._replace_keywords("$IP netns del {}".format(f))
+        cmd = "$IP -a netns del"
 
-            if self.args.verbose > 3:
-                print('_exec_cmd:  command "{}"'.format(cmd))
+        if self.args.verbose > 3:
+            print('_exec_cmd:  command "{}"'.format(cmd))
 
-            subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
     def adjust_command(self, stage, command):
         super().adjust_command(stage, command)
-- 
cgit v1.2.3-70-g09d2


From 4b480cfb1066a8394017697ff4a58a970641e9b7 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 17 Nov 2023 14:12:07 -0300
Subject: selftests: tc-testing: timeout on unbounded loops

In the spirit of failing early, timeout on unbounded loops that take
longer than 20 ticks to complete. Such loops are to ensure that objects
created are already visible so tests can proceed without any issues.

If a test setup takes more than 20 ticks to see an object, there's
definetely something wrong.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231117171208.2066136-6-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index 7b674befceec..65c8f3f983b9 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -40,7 +40,10 @@ class SubPlugin(TdcPlugin):
             self._ns_create()
 
         # Make sure the netns is visible in the fs
+        ticks = 20
         while True:
+            if ticks == 0:
+                raise TimeoutError
             self._proc_check()
             try:
                 ns = self.args.NAMES['NS']
@@ -49,6 +52,7 @@ class SubPlugin(TdcPlugin):
                 break
             except:
                 time.sleep(0.1)
+                ticks -= 1
                 continue
 
     def pre_case(self, test, test_skip):
@@ -127,7 +131,10 @@ class SubPlugin(TdcPlugin):
         with IPRoute() as ip:
             ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'})
             ip.link('add', ifname=dummy, kind='dummy')
+            ticks = 20
             while True:
+                if ticks == 0:
+                    raise TimeoutError
                 try:
                     dev1_idx = ip.link_lookup(ifname=dev1)[0]
                     dummy_idx = ip.link_lookup(ifname=dummy)[0]
@@ -136,17 +143,22 @@ class SubPlugin(TdcPlugin):
                     break
                 except:
                     time.sleep(0.1)
+                    ticks -= 1
                     continue
         netns.popns()
 
         with IPRoute() as ip:
+            ticks = 20
             while True:
+                if ticks == 0:
+                    raise TimeoutError
                 try:
                     dev0_idx = ip.link_lookup(ifname=dev0)[0]
                     ip.link('set', index=dev0_idx, state='up')
                     break
                 except:
                     time.sleep(0.1)
+                    ticks -= 1
                     continue
 
     def _ns_create_cmds(self):
-- 
cgit v1.2.3-70-g09d2


From 4968afa0143dbff8a84b5ce3c302dd7d0bdcfa48 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 17 Nov 2023 14:12:08 -0300
Subject: selftests: tc-testing: report number of workers in use

Report the number of workers in use to process the test batches.
Since the number is now subject to a limit, avoid users getting
confused.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231117171208.2066136-7-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/tdc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index f764b43f112b..669ec89ebfe1 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -616,7 +616,7 @@ def test_runner_mp(pm, args, alltests):
     batches.insert(0, serial)
 
     print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial)))
-    print("Using {} batches".format(len(batches)))
+    print("Using {} batches and {} workers".format(len(batches), args.mp))
 
     # We can't pickle these objects so workaround them
     global mp_pm
-- 
cgit v1.2.3-70-g09d2


From b0e2a0395312f4e53504ae84eeb5902e5518d1d7 Mon Sep 17 00:00:00 2001
From: Yuran Pereira <yuran.pereira@hotmail.com>
Date: Tue, 21 Nov 2023 05:35:39 +0530
Subject: selftests/bpf: Replaces the usage of CHECK calls for ASSERTs in
 bpf_tcp_ca

bpf_tcp_ca uses the `CHECK` calls even though the use of
ASSERT_ series of macros is preferred in the bpf selftests.

This patch replaces all `CHECK` calls for equivalent `ASSERT_`
macro calls.

Signed-off-by: Yuran Pereira <yuran.pereira@hotmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/GV1PR10MB6563F180C0F2BB4F6CFA5130E8BBA@GV1PR10MB6563.EURPRD10.PROD.OUTLOOK.COM
---
 .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c  | 48 ++++++++++------------
 1 file changed, 22 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 4aabeaa525d4..a88e6e07e4f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -20,15 +20,14 @@
 
 static const unsigned int total_bytes = 10 * 1024 * 1024;
 static int expected_stg = 0xeB9F;
-static int stop, duration;
+static int stop;
 
 static int settcpca(int fd, const char *tcp_ca)
 {
 	int err;
 
 	err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
-	if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n",
-		  errno))
+	if (!ASSERT_NEQ(err, -1, "setsockopt"))
 		return -1;
 
 	return 0;
@@ -65,8 +64,7 @@ static void *server(void *arg)
 		bytes += nr_sent;
 	}
 
-	CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
-	      bytes, total_bytes, nr_sent, errno);
+	ASSERT_EQ(bytes, total_bytes, "send");
 
 done:
 	if (fd >= 0)
@@ -92,10 +90,11 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 	WRITE_ONCE(stop, 0);
 
 	lfd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(lfd, -1, "socket"))
 		return;
+
 	fd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
+	if (!ASSERT_NEQ(fd, -1, "socket")) {
 		close(lfd);
 		return;
 	}
@@ -108,26 +107,27 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 	sa6.sin6_family = AF_INET6;
 	sa6.sin6_addr = in6addr_loopback;
 	err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
-	if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "bind"))
 		goto done;
+
 	err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
-	if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "getsockname"))
 		goto done;
+
 	err = listen(lfd, 1);
-	if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "listen"))
 		goto done;
 
 	if (sk_stg_map) {
 		err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
 					  &expected_stg, BPF_NOEXIST);
-		if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
-			  "err:%d errno:%d\n", err, errno))
+		if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)"))
 			goto done;
 	}
 
 	/* connect to server */
 	err = connect(fd, (struct sockaddr *)&sa6, addrlen);
-	if (CHECK(err == -1, "connect", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "connect"))
 		goto done;
 
 	if (sk_stg_map) {
@@ -135,14 +135,13 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 
 		err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
 					  &tmp_stg);
-		if (CHECK(!err || errno != ENOENT,
-			  "bpf_map_lookup_elem(sk_stg_map)",
-			  "err:%d errno:%d\n", err, errno))
+		if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") ||
+				!ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)"))
 			goto done;
 	}
 
 	err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
-	if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+	if (!ASSERT_OK(err, "pthread_create"))
 		goto done;
 
 	/* recv total_bytes */
@@ -156,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 		bytes += nr_recv;
 	}
 
-	CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
-	      bytes, total_bytes, nr_recv, errno);
+	ASSERT_EQ(bytes, total_bytes, "recv");
 
 	WRITE_ONCE(stop, 1);
 	pthread_join(srv_thread, &thread_ret);
-	CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
-	      PTR_ERR(thread_ret));
+	ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
+
 done:
 	close(lfd);
 	close(fd);
@@ -174,7 +172,7 @@ static void test_cubic(void)
 	struct bpf_link *link;
 
 	cubic_skel = bpf_cubic__open_and_load();
-	if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n"))
+	if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load"))
 		return;
 
 	link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
@@ -197,7 +195,7 @@ static void test_dctcp(void)
 	struct bpf_link *link;
 
 	dctcp_skel = bpf_dctcp__open_and_load();
-	if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n"))
+	if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
 		return;
 
 	link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
@@ -207,9 +205,7 @@ static void test_dctcp(void)
 	}
 
 	do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
-	CHECK(dctcp_skel->bss->stg_result != expected_stg,
-	      "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
-	      dctcp_skel->bss->stg_result, expected_stg);
+	ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result");
 
 	bpf_link__destroy(link);
 	bpf_dctcp__destroy(dctcp_skel);
-- 
cgit v1.2.3-70-g09d2


From 3ec1114a97457398077e45b231d502d1cc30439d Mon Sep 17 00:00:00 2001
From: Yuran Pereira <yuran.pereira@hotmail.com>
Date: Tue, 21 Nov 2023 05:37:43 +0530
Subject: selftests/bpf: Replaces the usage of CHECK calls for ASSERTs in
 bind_perm

bind_perm uses the `CHECK` calls even though the use of
ASSERT_ series of macros is preferred in the bpf selftests.

This patch replaces all `CHECK` calls for equivalent `ASSERT_`
macro calls.

Signed-off-by: Yuran Pereira <yuran.pereira@hotmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/GV1PR10MB656314F467E075A106CA02BFE8BBA@GV1PR10MB6563.EURPRD10.PROD.OUTLOOK.COM
---
 tools/testing/selftests/bpf/prog_tests/bind_perm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index a1766a298bb7..f7cd129cb82b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -9,8 +9,6 @@
 #include "cap_helpers.h"
 #include "bind_perm.skel.h"
 
-static int duration;
-
 static int create_netns(void)
 {
 	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
@@ -27,7 +25,7 @@ void try_bind(int family, int port, int expected_errno)
 	int fd = -1;
 
 	fd = socket(family, SOCK_STREAM, 0);
-	if (CHECK(fd < 0, "fd", "errno %d", errno))
+	if (!ASSERT_GE(fd, 0, "socket"))
 		goto close_socket;
 
 	if (family == AF_INET) {
@@ -60,7 +58,7 @@ void test_bind_perm(void)
 		return;
 
 	cgroup_fd = test__join_cgroup("/bind_perm");
-	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+	if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
 		return;
 
 	skel = bind_perm__open_and_load();
-- 
cgit v1.2.3-70-g09d2


From f125d09b99fc0ee43f865810390f10b8f23a2c98 Mon Sep 17 00:00:00 2001
From: Yuran Pereira <yuran.pereira@hotmail.com>
Date: Tue, 21 Nov 2023 05:39:25 +0530
Subject: selftests/bpf: Replaces the usage of CHECK calls for ASSERTs in
 bpf_obj_id

bpf_obj_id uses the `CHECK` calls even though the use of
ASSERT_ series of macros is preferred in the bpf selftests.

This patch replaces all `CHECK` calls for equivalent `ASSERT_`
macro calls.

Signed-off-by: Yuran Pereira <yuran.pereira@hotmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/GV1PR10MB65639AA3A10B4BBAA79952C7E8BBA@GV1PR10MB6563.EURPRD10.PROD.OUTLOOK.COM
---
 .../testing/selftests/bpf/prog_tests/bpf_obj_id.c  | 204 ++++++++-------------
 1 file changed, 73 insertions(+), 131 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 675b90b15280..f09d6ac2ef09 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -25,7 +25,7 @@ void serial_test_bpf_obj_id(void)
 	 */
 	__u32 map_ids[nr_iters + 1];
 	char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
-	__u32 i, next_id, info_len, nr_id_found, duration = 0;
+	__u32 i, next_id, info_len, nr_id_found;
 	struct timespec real_time_ts, boot_time_ts;
 	int err = 0;
 	__u64 array_value;
@@ -33,16 +33,16 @@ void serial_test_bpf_obj_id(void)
 	time_t now, load_time;
 
 	err = bpf_prog_get_fd_by_id(0);
-	CHECK(err >= 0 || errno != ENOENT,
-	      "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+	ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id");
+	ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id");
 
 	err = bpf_map_get_fd_by_id(0);
-	CHECK(err >= 0 || errno != ENOENT,
-	      "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+	ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+	ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
 
 	err = bpf_link_get_fd_by_id(0);
-	CHECK(err >= 0 || errno != ENOENT,
-	      "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
+	ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+	ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
 
 	/* Check bpf_map_get_info_by_fd() */
 	bzero(zeros, sizeof(zeros));
@@ -53,25 +53,26 @@ void serial_test_bpf_obj_id(void)
 		/* test_obj_id.o is a dumb prog. It should never fail
 		 * to load.
 		 */
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "bpf_prog_test_load"))
 			continue;
 
 		/* Insert a magic value to the map */
 		map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
-		if (CHECK_FAIL(map_fds[i] < 0))
+		if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map"))
 			goto done;
+
 		err = bpf_map_update_elem(map_fds[i], &array_key,
 					  &array_magic_value, 0);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
 			goto done;
 
-		prog = bpf_object__find_program_by_name(objs[i],
-							"test_obj_id");
-		if (CHECK_FAIL(!prog))
+		prog = bpf_object__find_program_by_name(objs[i], "test_obj_id");
+		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
 			goto done;
+
 		links[i] = bpf_program__attach(prog);
 		err = libbpf_get_error(links[i]);
-		if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+		if (!ASSERT_OK(err, "bpf_program__attach")) {
 			links[i] = NULL;
 			goto done;
 		}
@@ -81,24 +82,14 @@ void serial_test_bpf_obj_id(void)
 		bzero(&map_infos[i], info_len);
 		err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i],
 					     &info_len);
-		if (CHECK(err ||
-			  map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
-			  map_infos[i].key_size != sizeof(__u32) ||
-			  map_infos[i].value_size != sizeof(__u64) ||
-			  map_infos[i].max_entries != 1 ||
-			  map_infos[i].map_flags != 0 ||
-			  info_len != sizeof(struct bpf_map_info) ||
-			  strcmp((char *)map_infos[i].name, expected_map_name),
-			  "get-map-info(fd)",
-			  "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
-			  err, errno,
-			  map_infos[i].type, BPF_MAP_TYPE_ARRAY,
-			  info_len, sizeof(struct bpf_map_info),
-			  map_infos[i].key_size,
-			  map_infos[i].value_size,
-			  map_infos[i].max_entries,
-			  map_infos[i].map_flags,
-			  map_infos[i].name, expected_map_name))
+		if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") ||
+				!ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") ||
+				!ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") ||
+				!ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") ||
+				!ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") ||
+				!ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") ||
+				!ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") ||
+				!ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name"))
 			goto done;
 
 		/* Check getting prog info */
@@ -112,48 +103,34 @@ void serial_test_bpf_obj_id(void)
 		prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
 		prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
 		prog_infos[i].nr_map_ids = 2;
+
 		err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "clock_gettime"))
 			goto done;
+
 		err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "clock_gettime"))
 			goto done;
+
 		err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i],
 					      &info_len);
 		load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
 			+ (prog_infos[i].load_time / nsec_per_sec);
-		if (CHECK(err ||
-			  prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
-			  info_len != sizeof(struct bpf_prog_info) ||
-			  (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
-			  (env.jit_enabled &&
-			   !memcmp(jited_insns, zeros, sizeof(zeros))) ||
-			  !prog_infos[i].xlated_prog_len ||
-			  !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
-			  load_time < now - 60 || load_time > now + 60 ||
-			  prog_infos[i].created_by_uid != my_uid ||
-			  prog_infos[i].nr_map_ids != 1 ||
-			  *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
-			  strcmp((char *)prog_infos[i].name, expected_prog_name),
-			  "get-prog-info(fd)",
-			  "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
-			  "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
-			  "jited_prog %d xlated_prog %d load_time %lu(%lu) "
-			  "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
-			  "name %s(%s)\n",
-			  err, errno, i,
-			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
-			  info_len, sizeof(struct bpf_prog_info),
-			  env.jit_enabled,
-			  prog_infos[i].jited_prog_len,
-			  prog_infos[i].xlated_prog_len,
-			  !!memcmp(jited_insns, zeros, sizeof(zeros)),
-			  !!memcmp(xlated_insns, zeros, sizeof(zeros)),
-			  load_time, now,
-			  prog_infos[i].created_by_uid, my_uid,
-			  prog_infos[i].nr_map_ids, 1,
-			  *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
-			  prog_infos[i].name, expected_prog_name))
+
+		if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+				!ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") ||
+				!ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") ||
+				!ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") ||
+				!ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))),
+					"jited_insns") ||
+				!ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") ||
+				!ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") ||
+				!ASSERT_GE(load_time, (now - 60), "load_time") ||
+				!ASSERT_LE(load_time, (now + 60), "load_time") ||
+				!ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") ||
+				!ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") ||
+				!ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") ||
+				!ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name"))
 			goto done;
 
 		/* Check getting link info */
@@ -163,25 +140,12 @@ void serial_test_bpf_obj_id(void)
 		link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
 		err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]),
 					      &link_infos[i], &info_len);
-		if (CHECK(err ||
-			  link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
-			  link_infos[i].prog_id != prog_infos[i].id ||
-			  link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) ||
-			  strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
-				 "sys_enter") ||
-			  info_len != sizeof(struct bpf_link_info),
-			  "get-link-info(fd)",
-			  "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
-			  "prog_id %d (%d) tp_name %s(%s)\n",
-			  err, errno,
-			  info_len, sizeof(struct bpf_link_info),
-			  link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
-			  link_infos[i].id,
-			  link_infos[i].prog_id, prog_infos[i].id,
-			  (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
-			  "sys_enter"))
+		if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") ||
+				!ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") ||
+				!ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") ||
+				!ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") ||
+				!ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name"))
 			goto done;
-
 	}
 
 	/* Check bpf_prog_get_next_id() */
@@ -190,7 +154,7 @@ void serial_test_bpf_obj_id(void)
 	while (!bpf_prog_get_next_id(next_id, &next_id)) {
 		struct bpf_prog_info prog_info = {};
 		__u32 saved_map_id;
-		int prog_fd;
+		int prog_fd, cmp_res;
 
 		info_len = sizeof(prog_info);
 
@@ -198,9 +162,7 @@ void serial_test_bpf_obj_id(void)
 		if (prog_fd < 0 && errno == ENOENT)
 			/* The bpf_prog is in the dead row */
 			continue;
-		if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
-			  "prog_fd %d next_id %d errno %d\n",
-			  prog_fd, next_id, errno))
+		if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id"))
 			break;
 
 		for (i = 0; i < nr_iters; i++)
@@ -218,9 +180,8 @@ void serial_test_bpf_obj_id(void)
 		 */
 		prog_info.nr_map_ids = 1;
 		err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
-		if (CHECK(!err || errno != EFAULT,
-			  "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
-			  err, errno, EFAULT))
+		if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") ||
+				!ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd"))
 			break;
 		bzero(&prog_info, sizeof(prog_info));
 		info_len = sizeof(prog_info);
@@ -231,27 +192,22 @@ void serial_test_bpf_obj_id(void)
 		err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
 		prog_infos[i].jited_prog_insns = 0;
 		prog_infos[i].xlated_prog_insns = 0;
-		CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
-		      memcmp(&prog_info, &prog_infos[i], info_len) ||
-		      *(int *)(long)prog_info.map_ids != saved_map_id,
-		      "get-prog-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
-		      err, errno, info_len, sizeof(struct bpf_prog_info),
-		      memcmp(&prog_info, &prog_infos[i], info_len),
-		      *(int *)(long)prog_info.map_ids, saved_map_id);
+		cmp_res = memcmp(&prog_info, &prog_infos[i], info_len);
+
+		ASSERT_OK(err, "bpf_prog_get_info_by_fd");
+		ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len");
+		ASSERT_OK(cmp_res, "memcmp");
+		ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id");
 		close(prog_fd);
 	}
-	CHECK(nr_id_found != nr_iters,
-	      "check total prog id found by get_next_id",
-	      "nr_id_found %u(%u)\n",
-	      nr_id_found, nr_iters);
+	ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found");
 
 	/* Check bpf_map_get_next_id() */
 	nr_id_found = 0;
 	next_id = 0;
 	while (!bpf_map_get_next_id(next_id, &next_id)) {
 		struct bpf_map_info map_info = {};
-		int map_fd;
+		int map_fd, cmp_res;
 
 		info_len = sizeof(map_info);
 
@@ -259,9 +215,7 @@ void serial_test_bpf_obj_id(void)
 		if (map_fd < 0 && errno == ENOENT)
 			/* The bpf_map is in the dead row */
 			continue;
-		if (CHECK(map_fd < 0, "get-map-fd(next_id)",
-			  "map_fd %d next_id %u errno %d\n",
-			  map_fd, next_id, errno))
+		if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
 			break;
 
 		for (i = 0; i < nr_iters; i++)
@@ -274,25 +228,19 @@ void serial_test_bpf_obj_id(void)
 		nr_id_found++;
 
 		err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
 			goto done;
 
 		err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
-		CHECK(err || info_len != sizeof(struct bpf_map_info) ||
-		      memcmp(&map_info, &map_infos[i], info_len) ||
-		      array_value != array_magic_value,
-		      "check get-map-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
-		      err, errno, info_len, sizeof(struct bpf_map_info),
-		      memcmp(&map_info, &map_infos[i], info_len),
-		      array_value, array_magic_value);
+		cmp_res = memcmp(&map_info, &map_infos[i], info_len);
+		ASSERT_OK(err, "bpf_map_get_info_by_fd");
+		ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len");
+		ASSERT_OK(cmp_res, "memcmp");
+		ASSERT_EQ(array_value, array_magic_value, "array_value");
 
 		close(map_fd);
 	}
-	CHECK(nr_id_found != nr_iters,
-	      "check total map id found by get_next_id",
-	      "nr_id_found %u(%u)\n",
-	      nr_id_found, nr_iters);
+	ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found");
 
 	/* Check bpf_link_get_next_id() */
 	nr_id_found = 0;
@@ -308,9 +256,7 @@ void serial_test_bpf_obj_id(void)
 		if (link_fd < 0 && errno == ENOENT)
 			/* The bpf_link is in the dead row */
 			continue;
-		if (CHECK(link_fd < 0, "get-link-fd(next_id)",
-			  "link_fd %d next_id %u errno %d\n",
-			  link_fd, next_id, errno))
+		if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id"))
 			break;
 
 		for (i = 0; i < nr_iters; i++)
@@ -325,17 +271,13 @@ void serial_test_bpf_obj_id(void)
 		err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len);
 		cmp_res = memcmp(&link_info, &link_infos[i],
 				offsetof(struct bpf_link_info, raw_tracepoint));
-		CHECK(err || info_len != sizeof(link_info) || cmp_res,
-		      "check get-link-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%zu) memcmp %d\n",
-		      err, errno, info_len, sizeof(struct bpf_link_info),
-		      cmp_res);
+		ASSERT_OK(err, "bpf_link_get_info_by_fd");
+		ASSERT_EQ(info_len, sizeof(link_info), "info_len");
+		ASSERT_OK(cmp_res, "memcmp");
 
 		close(link_fd);
 	}
-	CHECK(nr_id_found != nr_iters,
-	      "check total link id found by get_next_id",
-	      "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+	ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found");
 
 done:
 	for (i = 0; i < nr_iters; i++) {
-- 
cgit v1.2.3-70-g09d2


From 3ece0e85f679c23d2a5128993846c58a2f5f890e Mon Sep 17 00:00:00 2001
From: Yuran Pereira <yuran.pereira@hotmail.com>
Date: Tue, 21 Nov 2023 05:40:41 +0530
Subject: selftests/bpf: Replaces the usage of CHECK calls for ASSERTs in
 vmlinux

vmlinux.c uses the `CHECK` calls even though the use of ASSERT_ series
of macros is preferred in the bpf selftests.

This patch replaces all `CHECK` calls for equivalent `ASSERT_`
macro calls.

Signed-off-by: Yuran Pereira <yuran.pereira@hotmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/GV1PR10MB6563ED1023A2A3AEF30BDA5DE8BBA@GV1PR10MB6563.EURPRD10.PROD.OUTLOOK.COM
---
 tools/testing/selftests/bpf/prog_tests/vmlinux.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
index 72310cfc6474..6fb2217d940b 100644
--- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -16,27 +16,27 @@ static void nsleep()
 
 void test_vmlinux(void)
 {
-	int duration = 0, err;
+	int err;
 	struct test_vmlinux* skel;
 	struct test_vmlinux__bss *bss;
 
 	skel = test_vmlinux__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load"))
 		return;
 	bss = skel->bss;
 
 	err = test_vmlinux__attach(skel);
-	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "test_vmlinux__attach"))
 		goto cleanup;
 
 	/* trigger everything */
 	nsleep();
 
-	CHECK(!bss->tp_called, "tp", "not called\n");
-	CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
-	CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
-	CHECK(!bss->kprobe_called, "kprobe", "not called\n");
-	CHECK(!bss->fentry_called, "fentry", "not called\n");
+	ASSERT_TRUE(bss->tp_called, "tp");
+	ASSERT_TRUE(bss->raw_tp_called, "raw_tp");
+	ASSERT_TRUE(bss->tp_btf_called, "tp_btf");
+	ASSERT_TRUE(bss->kprobe_called, "kprobe");
+	ASSERT_TRUE(bss->fentry_called, "fentry");
 
 cleanup:
 	test_vmlinux__destroy(skel);
-- 
cgit v1.2.3-70-g09d2


From b8d78cb2e24d92352878a9f6525aec002c891528 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 23 Nov 2023 02:04:39 +0200
Subject: libbpf: Start v1.4 development cycle

Bump libbpf.map to v1.4.0 to start a new libbpf version cycle.

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20231123000439.12025-1-eddyz87@gmail.com
---
 tools/lib/bpf/libbpf.map       | 3 +++
 tools/lib/bpf/libbpf_version.h | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index b52dc28dc8af..91c5aef7dae7 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -409,3 +409,6 @@ LIBBPF_1.3.0 {
 		ring__size;
 		ring_buffer__ring;
 } LIBBPF_1.2.0;
+
+LIBBPF_1.4.0 {
+} LIBBPF_1.3.0;
diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h
index 290411ddb39e..e783a47da815 100644
--- a/tools/lib/bpf/libbpf_version.h
+++ b/tools/lib/bpf/libbpf_version.h
@@ -4,6 +4,6 @@
 #define __LIBBPF_VERSION_H
 
 #define LIBBPF_MAJOR_VERSION 1
-#define LIBBPF_MINOR_VERSION 3
+#define LIBBPF_MINOR_VERSION 4
 
 #endif /* __LIBBPF_VERSION_H */
-- 
cgit v1.2.3-70-g09d2


From f061c9f7d058ffc32de66f2efb3e1c368e305423 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Tue, 21 Nov 2023 03:48:31 -0800
Subject: Documentation: Document each netlink family

This is a simple script that parses the Netlink YAML spec files
(Documentation/netlink/specs/), and generates RST files to be rendered
in the Network -> Netlink Specification documentation page.

Create a python script that is invoked during 'make htmldocs', reads the
YAML specs input file and generate the correspondent RST file.

Create a new Documentation/networking/netlink_spec index page, and
reference each Netlink RST file that was processed above in this main
index.rst file.

In case of any exception during the parsing, dump the error and skip
the file.

Do not regenerate the RST files if the input files (YAML) were not
changed in-between invocations.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Breno Leitao <leitao@debian.org>

----
Changelog:

V3:
	* Do not regenerate the RST files if the input files were not
	  changed. In order to do it, a few things changed:
	  - Rely on Makefile more to find what changed, and trigger
	    individual file processing
	  - The script parses file by file now (instead of batches)
	  - Create a new option to generate the index file

V2:
	* Moved the logic from a sphinx extension to a external script
	* Adjust some formatting as suggested by Donald Hunter and Jakub
	* Auto generating all the rsts instead of having stubs
	* Handling error gracefully
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/Makefile                           |  16 +-
 Documentation/networking/index.rst               |   1 +
 Documentation/networking/netlink_spec/.gitignore |   1 +
 Documentation/networking/netlink_spec/readme.txt |   4 +
 tools/net/ynl/ynl-gen-rst.py                     | 388 +++++++++++++++++++++++
 5 files changed, 409 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/networking/netlink_spec/.gitignore
 create mode 100644 Documentation/networking/netlink_spec/readme.txt
 create mode 100755 tools/net/ynl/ynl-gen-rst.py

(limited to 'tools')

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 2f35793acd2a..5c156fbb6cdf 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -97,7 +97,21 @@ quiet_cmd_sphinx = SPHINX  $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
 		cp $(if $(patsubst /%,,$(DOCS_CSS)),$(abspath $(srctree)/$(DOCS_CSS)),$(DOCS_CSS)) $(BUILDDIR)/$3/_static/; \
 	fi
 
-htmldocs:
+YNL_INDEX:=$(srctree)/Documentation/networking/netlink_spec/index.rst
+YNL_RST_DIR:=$(srctree)/Documentation/networking/netlink_spec
+YNL_YAML_DIR:=$(srctree)/Documentation/netlink/specs
+YNL_TOOL:=$(srctree)/tools/net/ynl/ynl-gen-rst.py
+
+YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml))
+YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP))
+
+$(YNL_INDEX): $(YNL_RST_FILES)
+	@$(YNL_TOOL) -o $@ -x
+
+$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml
+	@$(YNL_TOOL) -i $< -o $@
+
+htmldocs: $(YNL_INDEX)
 	@$(srctree)/scripts/sphinx-pre-install --version-check
 	@+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
 
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 683eb42309cc..cb435c141794 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -55,6 +55,7 @@ Contents:
    filter
    generic-hdlc
    generic_netlink
+   netlink_spec/index
    gen_stats
    gtp
    ila
diff --git a/Documentation/networking/netlink_spec/.gitignore b/Documentation/networking/netlink_spec/.gitignore
new file mode 100644
index 000000000000..30d85567b592
--- /dev/null
+++ b/Documentation/networking/netlink_spec/.gitignore
@@ -0,0 +1 @@
+*.rst
diff --git a/Documentation/networking/netlink_spec/readme.txt b/Documentation/networking/netlink_spec/readme.txt
new file mode 100644
index 000000000000..6763f99d216c
--- /dev/null
+++ b/Documentation/networking/netlink_spec/readme.txt
@@ -0,0 +1,4 @@
+SPDX-License-Identifier: GPL-2.0
+
+This file is populated during the build of the documentation (htmldocs) by the
+tools/net/ynl/ynl-gen-rst.py script.
diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py
new file mode 100755
index 000000000000..b6292109e236
--- /dev/null
+++ b/tools/net/ynl/ynl-gen-rst.py
@@ -0,0 +1,388 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8; mode: python -*-
+
+"""
+    Script to auto generate the documentation for Netlink specifications.
+
+    :copyright:  Copyright (C) 2023  Breno Leitao <leitao@debian.org>
+    :license:    GPL Version 2, June 1991 see linux/COPYING for details.
+
+    This script performs extensive parsing to the Linux kernel's netlink YAML
+    spec files, in an effort to avoid needing to heavily mark up the original
+    YAML file.
+
+    This code is split in three big parts:
+        1) RST formatters: Use to convert a string to a RST output
+        2) Parser helpers: Functions to parse the YAML data structure
+        3) Main function and small helpers
+"""
+
+from typing import Any, Dict, List
+import os.path
+import sys
+import argparse
+import logging
+import yaml
+
+
+SPACE_PER_LEVEL = 4
+
+
+# RST Formatters
+# ==============
+def headroom(level: int) -> str:
+    """Return space to format"""
+    return " " * (level * SPACE_PER_LEVEL)
+
+
+def bold(text: str) -> str:
+    """Format bold text"""
+    return f"**{text}**"
+
+
+def inline(text: str) -> str:
+    """Format inline text"""
+    return f"``{text}``"
+
+
+def sanitize(text: str) -> str:
+    """Remove newlines and multiple spaces"""
+    # This is useful for some fields that are spread across multiple lines
+    return str(text).replace("\n", "").strip()
+
+
+def rst_fields(key: str, value: str, level: int = 0) -> str:
+    """Return a RST formatted field"""
+    return headroom(level) + f":{key}: {value}"
+
+
+def rst_definition(key: str, value: Any, level: int = 0) -> str:
+    """Format a single rst definition"""
+    return headroom(level) + key + "\n" + headroom(level + 1) + str(value)
+
+
+def rst_paragraph(paragraph: str, level: int = 0) -> str:
+    """Return a formatted paragraph"""
+    return headroom(level) + paragraph
+
+
+def rst_bullet(item: str, level: int = 0) -> str:
+    """Return a formatted a bullet"""
+    return headroom(level) + f" - {item}"
+
+
+def rst_subsection(title: str) -> str:
+    """Add a sub-section to the document"""
+    return f"{title}\n" + "-" * len(title)
+
+
+def rst_subsubsection(title: str) -> str:
+    """Add a sub-sub-section to the document"""
+    return f"{title}\n" + "~" * len(title)
+
+
+def rst_section(title: str) -> str:
+    """Add a section to the document"""
+    return f"\n{title}\n" + "=" * len(title)
+
+
+def rst_subtitle(title: str) -> str:
+    """Add a subtitle to the document"""
+    return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n"
+
+
+def rst_title(title: str) -> str:
+    """Add a title to the document"""
+    return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n"
+
+
+def rst_list_inline(list_: List[str], level: int = 0) -> str:
+    """Format a list using inlines"""
+    return headroom(level) + "[" + ", ".join(inline(i) for i in list_) + "]"
+
+
+def rst_header() -> str:
+    """The headers for all the auto generated RST files"""
+    lines = []
+
+    lines.append(rst_paragraph(".. SPDX-License-Identifier: GPL-2.0"))
+    lines.append(rst_paragraph(".. NOTE: This document was auto-generated.\n\n"))
+
+    return "\n".join(lines)
+
+
+def rst_toctree(maxdepth: int = 2) -> str:
+    """Generate a toctree RST primitive"""
+    lines = []
+
+    lines.append(".. toctree::")
+    lines.append(f"   :maxdepth: {maxdepth}\n\n")
+
+    return "\n".join(lines)
+
+
+# Parsers
+# =======
+
+
+def parse_mcast_group(mcast_group: List[Dict[str, Any]]) -> str:
+    """Parse 'multicast' group list and return a formatted string"""
+    lines = []
+    for group in mcast_group:
+        lines.append(rst_bullet(group["name"]))
+
+    return "\n".join(lines)
+
+
+def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str:
+    """Parse 'do' section and return a formatted string"""
+    lines = []
+    for key in do_dict.keys():
+        lines.append(rst_paragraph(bold(key), level + 1))
+        lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n")
+
+    return "\n".join(lines)
+
+
+def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str:
+    """Parse 'attributes' section"""
+    if "attributes" not in attrs:
+        return ""
+    lines = [rst_fields("attributes", rst_list_inline(attrs["attributes"]), level + 1)]
+
+    return "\n".join(lines)
+
+
+def parse_operations(operations: List[Dict[str, Any]]) -> str:
+    """Parse operations block"""
+    preprocessed = ["name", "doc", "title", "do", "dump"]
+    lines = []
+
+    for operation in operations:
+        lines.append(rst_section(operation["name"]))
+        lines.append(rst_paragraph(sanitize(operation["doc"])) + "\n")
+
+        for key in operation.keys():
+            if key in preprocessed:
+                # Skip the special fields
+                continue
+            lines.append(rst_fields(key, operation[key], 0))
+
+        if "do" in operation:
+            lines.append(rst_paragraph(":do:", 0))
+            lines.append(parse_do(operation["do"], 0))
+        if "dump" in operation:
+            lines.append(rst_paragraph(":dump:", 0))
+            lines.append(parse_do(operation["dump"], 0))
+
+        # New line after fields
+        lines.append("\n")
+
+    return "\n".join(lines)
+
+
+def parse_entries(entries: List[Dict[str, Any]], level: int) -> str:
+    """Parse a list of entries"""
+    lines = []
+    for entry in entries:
+        if isinstance(entry, dict):
+            # entries could be a list or a dictionary
+            lines.append(
+                rst_fields(entry.get("name", ""), sanitize(entry.get("doc", "")), level)
+            )
+        elif isinstance(entry, list):
+            lines.append(rst_list_inline(entry, level))
+        else:
+            lines.append(rst_bullet(inline(sanitize(entry)), level))
+
+    lines.append("\n")
+    return "\n".join(lines)
+
+
+def parse_definitions(defs: Dict[str, Any]) -> str:
+    """Parse definitions section"""
+    preprocessed = ["name", "entries", "members"]
+    ignored = ["render-max"]  # This is not printed
+    lines = []
+
+    for definition in defs:
+        lines.append(rst_section(definition["name"]))
+        for k in definition.keys():
+            if k in preprocessed + ignored:
+                continue
+            lines.append(rst_fields(k, sanitize(definition[k]), 0))
+
+        # Field list needs to finish with a new line
+        lines.append("\n")
+        if "entries" in definition:
+            lines.append(rst_paragraph(":entries:", 0))
+            lines.append(parse_entries(definition["entries"], 1))
+        if "members" in definition:
+            lines.append(rst_paragraph(":members:", 0))
+            lines.append(parse_entries(definition["members"], 1))
+
+    return "\n".join(lines)
+
+
+def parse_attr_sets(entries: List[Dict[str, Any]]) -> str:
+    """Parse attribute from attribute-set"""
+    preprocessed = ["name", "type"]
+    ignored = ["checks"]
+    lines = []
+
+    for entry in entries:
+        lines.append(rst_section(entry["name"]))
+        for attr in entry["attributes"]:
+            type_ = attr.get("type")
+            attr_line = bold(attr["name"])
+            if type_:
+                # Add the attribute type in the same line
+                attr_line += f" ({inline(type_)})"
+
+            lines.append(rst_subsubsection(attr_line))
+
+            for k in attr.keys():
+                if k in preprocessed + ignored:
+                    continue
+                lines.append(rst_fields(k, sanitize(attr[k]), 2))
+            lines.append("\n")
+
+    return "\n".join(lines)
+
+
+def parse_yaml(obj: Dict[str, Any]) -> str:
+    """Format the whole YAML into a RST string"""
+    lines = []
+
+    # Main header
+
+    lines.append(rst_header())
+
+    title = f"Family ``{obj['name']}`` netlink specification"
+    lines.append(rst_title(title))
+    lines.append(rst_paragraph(".. contents::\n"))
+
+    if "doc" in obj:
+        lines.append(rst_subtitle("Summary"))
+        lines.append(rst_paragraph(obj["doc"], 0))
+
+    # Operations
+    if "operations" in obj:
+        lines.append(rst_subtitle("Operations"))
+        lines.append(parse_operations(obj["operations"]["list"]))
+
+    # Multicast groups
+    if "mcast-groups" in obj:
+        lines.append(rst_subtitle("Multicast groups"))
+        lines.append(parse_mcast_group(obj["mcast-groups"]["list"]))
+
+    # Definitions
+    if "definitions" in obj:
+        lines.append(rst_subtitle("Definitions"))
+        lines.append(parse_definitions(obj["definitions"]))
+
+    # Attributes set
+    if "attribute-sets" in obj:
+        lines.append(rst_subtitle("Attribute sets"))
+        lines.append(parse_attr_sets(obj["attribute-sets"]))
+
+    return "\n".join(lines)
+
+
+# Main functions
+# ==============
+
+
+def parse_arguments() -> argparse.Namespace:
+    """Parse arguments from user"""
+    parser = argparse.ArgumentParser(description="Netlink RST generator")
+
+    parser.add_argument("-v", "--verbose", action="store_true")
+    parser.add_argument("-o", "--output", help="Output file name")
+
+    # Index and input are mutually exclusive
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument(
+        "-x", "--index", action="store_true", help="Generate the index page"
+    )
+    group.add_argument("-i", "--input", help="YAML file name")
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+
+    if args.input and not os.path.isfile(args.input):
+        logging.warning("%s is not a valid file.", args.input)
+        sys.exit(-1)
+
+    if not args.output:
+        logging.error("No output file specified.")
+        sys.exit(-1)
+
+    if os.path.isfile(args.output):
+        logging.debug("%s already exists. Overwriting it.", args.output)
+
+    return args
+
+
+def parse_yaml_file(filename: str) -> str:
+    """Transform the YAML specified by filename into a rst-formmated string"""
+    with open(filename, "r", encoding="utf-8") as spec_file:
+        yaml_data = yaml.safe_load(spec_file)
+        content = parse_yaml(yaml_data)
+
+    return content
+
+
+def write_to_rstfile(content: str, filename: str) -> None:
+    """Write the generated content into an RST file"""
+    logging.debug("Saving RST file to %s", filename)
+
+    with open(filename, "w", encoding="utf-8") as rst_file:
+        rst_file.write(content)
+
+
+def generate_main_index_rst(output: str) -> None:
+    """Generate the `networking_spec/index` content and write to the file"""
+    lines = []
+
+    lines.append(rst_header())
+    lines.append(rst_title("Netlink Specification"))
+    lines.append(rst_toctree(1))
+
+    index_dir = os.path.dirname(output)
+    logging.debug("Looking for .rst files in %s", index_dir)
+    for filename in os.listdir(index_dir):
+        if not filename.endswith(".rst") or filename == "index.rst":
+            continue
+        lines.append(f"   {filename.replace('.rst', '')}\n")
+
+    logging.debug("Writing an index file at %s", output)
+    write_to_rstfile("".join(lines), output)
+
+
+def main() -> None:
+    """Main function that reads the YAML files and generates the RST files"""
+
+    args = parse_arguments()
+
+    if args.input:
+        logging.debug("Parsing %s", args.input)
+        try:
+            content = parse_yaml_file(os.path.join(args.input))
+        except Exception as exception:
+            logging.warning("Failed to parse %s.", args.input)
+            logging.warning(exception)
+            sys.exit(-1)
+
+        write_to_rstfile(content, args.output)
+
+    if args.index:
+        # Generate the index RST file
+        generate_main_index_rst(args.output)
+
+
+if __name__ == "__main__":
+    main()
-- 
cgit v1.2.3-70-g09d2


From 2afae08c9dcb8ac648414277cec70c2fe6a34d9e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 23 Nov 2023 19:59:36 -0800
Subject: bpf: Validate global subprogs lazily

Slightly change BPF verifier logic around eagerness and order of global
subprog validation. Instead of going over every global subprog eagerly
and validating it before main (entry) BPF program is verified, turn it
around. Validate main program first, mark subprogs that were called from
main program for later verification, but otherwise assume it is valid.
Afterwards, go over marked global subprogs and validate those,
potentially marking some more global functions as being called. Continue
this process until all (transitively) callable global subprogs are
validated. It's a BFS traversal at its heart and will always converge.

This is an important change because it allows to feature-gate some
subprograms that might not be verifiable on some older kernel, depending
on supported set of features.

E.g., at some point, global functions were allowed to accept a pointer
to memory, which size is identified by user-provided type.
Unfortunately, older kernels don't support this feature. With BPF CO-RE
approach, the natural way would be to still compile BPF object file once
and guard calls to this global subprog with some CO-RE check or using
.rodata variables. That's what people do to guard usage of new helpers
or kfuncs, and any other new BPF-side feature that might be missing on
old kernels.

That's currently impossible to do with global subprogs, unfortunately,
because they are eagerly and unconditionally validated. This patch set
aims to change this, so that in the future when global funcs gain new
features, those can be guarded using BPF CO-RE techniques in the same
fashion as any other new kernel feature.

Two selftests had to be adjusted in sync with these changes.

test_global_func12 relied on eager global subprog validation failing
before main program failure is detected (unknown return value). Fix by
making sure that main program is always valid.

verifier_subprog_precision's parent_stack_slot_precise subtest relied on
verifier checkpointing heuristic to do a checkpoint at instruction #5,
but that's no longer true because we don't have enough jumps validated
before reaching insn #5 due to global subprogs being validated later.

Other than that, no changes, as one would expect.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20231124035937.403208-3-andrii@kernel.org
---
 include/linux/bpf.h                                |  2 +
 kernel/bpf/verifier.c                              | 48 +++++++++++++++++++---
 .../selftests/bpf/progs/test_global_func12.c       |  4 +-
 .../bpf/progs/verifier_subprog_precision.c         |  4 +-
 4 files changed, 48 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 258ba232e302..eb447b0a9423 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1347,6 +1347,8 @@ static inline bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
 struct bpf_func_info_aux {
 	u16 linkage;
 	bool unreliable;
+	bool called : 1;
+	bool verified : 1;
 };
 
 enum bpf_jit_poke_reason {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a2939ebf2638..8e7b6072e3f4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -434,6 +434,11 @@ static const char *subprog_name(const struct bpf_verifier_env *env, int subprog)
 	return btf_type_name(env->prog->aux->btf, info->type_id);
 }
 
+static struct bpf_func_info_aux *subprog_aux(const struct bpf_verifier_env *env, int subprog)
+{
+	return &env->prog->aux->func_info_aux[subprog];
+}
+
 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 {
 	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
@@ -9290,6 +9295,8 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 
 		verbose(env, "Func#%d ('%s') is global and assumed valid.\n",
 			subprog, sub_name);
+		/* mark global subprog for verifying after main prog */
+		subprog_aux(env, subprog)->called = true;
 		clear_caller_saved_regs(env, caller->regs);
 
 		/* All global functions return a 64-bit SCALAR_VALUE */
@@ -19873,8 +19880,11 @@ out:
 	return ret;
 }
 
-/* Verify all global functions in a BPF program one by one based on their BTF.
- * All global functions must pass verification. Otherwise the whole program is rejected.
+/* Lazily verify all global functions based on their BTF, if they are called
+ * from main BPF program or any of subprograms transitively.
+ * BPF global subprogs called from dead code are not validated.
+ * All callable global functions must pass verification.
+ * Otherwise the whole program is rejected.
  * Consider:
  * int bar(int);
  * int foo(int f)
@@ -19893,14 +19903,26 @@ out:
 static int do_check_subprogs(struct bpf_verifier_env *env)
 {
 	struct bpf_prog_aux *aux = env->prog->aux;
-	int i, ret;
+	struct bpf_func_info_aux *sub_aux;
+	int i, ret, new_cnt;
 
 	if (!aux->func_info)
 		return 0;
 
+	/* exception callback is presumed to be always called */
+	if (env->exception_callback_subprog)
+		subprog_aux(env, env->exception_callback_subprog)->called = true;
+
+again:
+	new_cnt = 0;
 	for (i = 1; i < env->subprog_cnt; i++) {
-		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
+		if (!subprog_is_global(env, i))
+			continue;
+
+		sub_aux = subprog_aux(env, i);
+		if (!sub_aux->called || sub_aux->verified)
 			continue;
+
 		env->insn_idx = env->subprog_info[i].start;
 		WARN_ON_ONCE(env->insn_idx == 0);
 		ret = do_check_common(env, i, env->exception_callback_subprog == i);
@@ -19910,7 +19932,21 @@ static int do_check_subprogs(struct bpf_verifier_env *env)
 			verbose(env, "Func#%d ('%s') is safe for any args that match its prototype\n",
 				i, subprog_name(env, i));
 		}
+
+		/* We verified new global subprog, it might have called some
+		 * more global subprogs that we haven't verified yet, so we
+		 * need to do another pass over subprogs to verify those.
+		 */
+		sub_aux->verified = true;
+		new_cnt++;
 	}
+
+	/* We can't loop forever as we verify at least one global subprog on
+	 * each pass.
+	 */
+	if (new_cnt)
+		goto again;
+
 	return 0;
 }
 
@@ -20556,8 +20592,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret < 0)
 		goto skip_full_check;
 
-	ret = do_check_subprogs(env);
-	ret = ret ?: do_check_main(env);
+	ret = do_check_main(env);
+	ret = ret ?: do_check_subprogs(env);
 
 	if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
 		ret = bpf_prog_offload_finalize(env);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func12.c b/tools/testing/selftests/bpf/progs/test_global_func12.c
index 7f159d83c6f6..6e03d42519a6 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func12.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func12.c
@@ -19,5 +19,7 @@ int global_func12(struct __sk_buff *skb)
 {
 	const struct S s = {.x = skb->len };
 
-	return foo(&s);
+	foo(&s);
+
+	return 1;
 }
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index f61d623b1ce8..b5efcaeaa1ae 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -370,12 +370,10 @@ __naked int parent_stack_slot_precise(void)
 SEC("?raw_tp")
 __success __log_level(2)
 __msg("9: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 9 first_idx 6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0")
 __msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4")
 __msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)")
-__msg("mark_precise: frame0: parent state regs= stack=-8:")
-__msg("mark_precise: frame0: last_idx 5 first_idx 0")
 __msg("mark_precise: frame0: regs= stack=-8 before 5: (85) call pc+6")
 __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0")
 __msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6")
-- 
cgit v1.2.3-70-g09d2


From e8a339b5235e294f29153149ea7cf26a9a87dbea Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 23 Nov 2023 19:59:37 -0800
Subject: selftests/bpf: Add lazy global subprog validation tests

Add a few test that validate BPF verifier's lazy approach to validating
global subprogs.

We check that global subprogs that are called transitively through
another global subprog is validated.

We also check that invalid global subprog is not validated, if it's not
called from the main program.

And we also check that main program is always validated first, before
any of the subprogs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20231124035937.403208-4-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/verifier.c  |  2 +
 .../selftests/bpf/progs/verifier_global_subprogs.c | 92 ++++++++++++++++++++++
 2 files changed, 94 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/verifier_global_subprogs.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 5cfa7a6316b6..8d746642cbd7 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -25,6 +25,7 @@
 #include "verifier_direct_stack_access_wraparound.skel.h"
 #include "verifier_div0.skel.h"
 #include "verifier_div_overflow.skel.h"
+#include "verifier_global_subprogs.skel.h"
 #include "verifier_gotol.skel.h"
 #include "verifier_helper_access_var_len.skel.h"
 #include "verifier_helper_packet_access.skel.h"
@@ -134,6 +135,7 @@ void test_verifier_direct_packet_access(void) { RUN(verifier_direct_packet_acces
 void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); }
 void test_verifier_div0(void)                 { RUN(verifier_div0); }
 void test_verifier_div_overflow(void)         { RUN(verifier_div_overflow); }
+void test_verifier_global_subprogs(void)      { RUN(verifier_global_subprogs); }
 void test_verifier_gotol(void)                { RUN(verifier_gotol); }
 void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); }
 void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); }
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
new file mode 100644
index 000000000000..a0a5efd1caa1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+int arr[1];
+int unkn_idx;
+
+__noinline long global_bad(void)
+{
+	return arr[unkn_idx]; /* BOOM */
+}
+
+__noinline long global_good(void)
+{
+	return arr[0];
+}
+
+__noinline long global_calls_bad(void)
+{
+	return global_good() + global_bad() /* does BOOM indirectly */;
+}
+
+__noinline long global_calls_good_only(void)
+{
+	return global_good();
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* main prog is validated completely first */
+__msg("('global_calls_good_only') is global and assumed valid.")
+__msg("1: (95) exit")
+/* eventually global_good() is transitively validated as well */
+__msg("Validating global_good() func")
+__msg("('global_good') is safe for any args that match its prototype")
+int chained_global_func_calls_success(void)
+{
+	return global_calls_good_only();
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+/* main prog validated successfully first */
+__msg("1: (95) exit")
+/* eventually we validate global_bad() and fail */
+__msg("Validating global_bad() func")
+__msg("math between map_value pointer and register") /* BOOM */
+int chained_global_func_calls_bad(void)
+{
+	return global_calls_bad();
+}
+
+/* do out of bounds access forcing verifier to fail verification if this
+ * global func is called
+ */
+__noinline int global_unsupp(const int *mem)
+{
+	if (!mem)
+		return 0;
+	return mem[100]; /* BOOM */
+}
+
+const volatile bool skip_unsupp_global = true;
+
+SEC("?raw_tp")
+__success
+int guarded_unsupp_global_called(void)
+{
+	if (!skip_unsupp_global)
+		return global_unsupp(NULL);
+	return 0;
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("Func#1 ('global_unsupp') is global and assumed valid.")
+__msg("Validating global_unsupp() func#1...")
+__msg("value is outside of the allowed memory range")
+int unguarded_unsupp_global_called(void)
+{
+	int x = 0;
+
+	return global_unsupp(&x);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 8e3707975e04e432f132955652fc77e9ff82f31d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 22 Nov 2023 09:33:23 -0800
Subject: tools: ynl-gen: always append ULL/LL to range types

32bit builds generate the following warning when we use a u32-max
in range validation:

  warning: decimal constant 4294967295 is between LONG_MAX and ULONG_MAX. For C99 that means long long, C90 compilers are very likely to produce unsigned long (and a warning) here

The range values are u64, slap ULL/LL on all of them just
to avoid such noise.

There's currently no code using full range validation, but
it will matter in the upcoming page-pool introspection.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/net/ynl/ynl-gen-c.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 3bd6b928c14f..fe5ca7fbe011 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -2070,12 +2070,13 @@ def print_kernel_policy_ranges(family, cw):
                 first = False
 
             sign = '' if attr.type[0] == 'u' else '_signed'
+            suffix = 'ULL' if attr.type[0] == 'u' else 'LL'
             cw.block_start(line=f'static const struct netlink_range_validation{sign} {c_lower(attr.enum_name)}_range =')
             members = []
             if 'min' in attr.checks:
-                members.append(('min', attr.get_limit('min')))
+                members.append(('min', str(attr.get_limit('min')) + suffix))
             if 'max' in attr.checks:
-                members.append(('max', attr.get_limit('max')))
+                members.append(('max', str(attr.get_limit('max')) + suffix))
             cw.write_struct_init(members)
             cw.block_end(line=';')
             cw.nl()
-- 
cgit v1.2.3-70-g09d2


From 19ed9b3d7a77c6ac3927fe05f4eacfa056b43a48 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 22 Nov 2023 19:08:44 -0800
Subject: tools: ynl-get: use family c-name

If a new family is ever added with a dash in the name
the C codegen will break. Make sure we use the "safe"
form of the name consistently.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/net/ynl/ynl-gen-c.py | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index fe5ca7fbe011..b3438e96fde6 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -67,9 +67,9 @@ class Type(SpecAttr):
         if 'nested-attributes' in attr:
             self.nested_attrs = attr['nested-attributes']
             if self.nested_attrs == family.name:
-                self.nested_render_name = f"{family.name}"
+                self.nested_render_name = c_lower(f"{family.name}")
             else:
-                self.nested_render_name = f"{family.name}_{c_lower(self.nested_attrs)}"
+                self.nested_render_name = c_lower(f"{family.name}_{self.nested_attrs}")
 
             if self.nested_attrs in self.family.consts:
                 self.nested_struct_type = 'struct ' + self.nested_render_name + '_'
@@ -335,7 +335,7 @@ class TypeScalar(Type):
 
         maybe_enum = not self.is_bitfield and 'enum' in self.attr
         if maybe_enum and self.family.consts[self.attr['enum']].enum_name:
-            self.type_name = f"enum {self.family.name}_{c_lower(self.attr['enum'])}"
+            self.type_name = c_lower(f"enum {self.family.name}_{self.attr['enum']}")
         elif self.is_auto_scalar:
             self.type_name = '__' + self.type[0] + '64'
         else:
@@ -685,9 +685,9 @@ class Struct:
 
         self.nested = type_list is None
         if family.name == c_lower(space_name):
-            self.render_name = f"{family.name}"
+            self.render_name = c_lower(family.name)
         else:
-            self.render_name = f"{family.name}_{c_lower(space_name)}"
+            self.render_name = c_lower(family.name + '-' + space_name)
         self.struct_name = 'struct ' + self.render_name
         if self.nested and space_name in family.consts:
             self.struct_name += '_'
@@ -841,7 +841,7 @@ class Operation(SpecOperation):
     def __init__(self, family, yaml, req_value, rsp_value):
         super().__init__(family, yaml, req_value, rsp_value)
 
-        self.render_name = family.name + '_' + c_lower(self.name)
+        self.render_name = c_lower(family.name + '_' + self.name)
 
         self.dual_policy = ('do' in yaml and 'request' in yaml['do']) and \
                          ('dump' in yaml and 'request' in yaml['dump'])
@@ -1431,7 +1431,7 @@ def op_prefix(ri, direction, deref=False):
                 suffix += '_rsp'
                 suffix += '_dump' if deref else '_list'
 
-    return f"{ri.family['name']}{suffix}"
+    return f"{ri.family.c_name}{suffix}"
 
 
 def type_name(ri, direction, deref=False):
@@ -1497,11 +1497,11 @@ def _put_enum_to_str_helper(cw, render_name, map_name, arg_name, enum=None):
 
 
 def put_op_name_fwd(family, cw):
-    cw.write_func_prot('const char *', f'{family.name}_op_str', ['int op'], suffix=';')
+    cw.write_func_prot('const char *', f'{family.c_name}_op_str', ['int op'], suffix=';')
 
 
 def put_op_name(family, cw):
-    map_name = f'{family.name}_op_strmap'
+    map_name = f'{family.c_name}_op_strmap'
     cw.block_start(line=f"static const char * const {map_name}[] =")
     for op_name, op in family.msgs.items():
         if op.rsp_value:
@@ -1518,7 +1518,7 @@ def put_op_name(family, cw):
     cw.block_end(line=';')
     cw.nl()
 
-    _put_enum_to_str_helper(cw, family.name + '_op', map_name, 'op')
+    _put_enum_to_str_helper(cw, family.c_name + '_op', map_name, 'op')
 
 
 def put_enum_to_str_fwd(family, cw, enum):
@@ -1840,7 +1840,7 @@ def _print_type(ri, direction, struct):
     if ri.op_mode == 'dump':
         suffix += '_dump'
 
-    ri.cw.block_start(line=f"struct {ri.family['name']}{suffix}")
+    ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}")
 
     meta_started = False
     for _, attr in struct.member_list():
@@ -2106,7 +2106,7 @@ def print_kernel_op_table_fwd(family, cw, terminate):
             cnt = len(family.ops)
 
         qual = 'static const' if not exported else 'const'
-        line = f"{qual} struct {struct_type} {family.name}_nl_ops[{cnt}]"
+        line = f"{qual} struct {struct_type} {family.c_name}_nl_ops[{cnt}]"
         if terminate:
             cw.p(f"extern {line};")
         else:
@@ -2249,7 +2249,7 @@ def print_kernel_mcgrp_src(family, cw):
     if not family.mcgrps['list']:
         return
 
-    cw.block_start('static const struct genl_multicast_group ' + family.name + '_nl_mcgrps[] =')
+    cw.block_start('static const struct genl_multicast_group ' + family.c_name + '_nl_mcgrps[] =')
     for grp in family.mcgrps['list']:
         name = grp['name']
         grp_id = c_upper(f"{family.name}-nlgrp-{name}")
@@ -2262,7 +2262,7 @@ def print_kernel_family_struct_hdr(family, cw):
     if not kernel_can_gen_family_struct(family):
         return
 
-    cw.p(f"extern struct genl_family {family.name}_nl_family;")
+    cw.p(f"extern struct genl_family {family.c_name}_nl_family;")
     cw.nl()
 
 
@@ -2277,14 +2277,14 @@ def print_kernel_family_struct_src(family, cw):
     cw.p('.parallel_ops\t= true,')
     cw.p('.module\t\t= THIS_MODULE,')
     if family.kernel_policy == 'per-op':
-        cw.p(f'.ops\t\t= {family.name}_nl_ops,')
-        cw.p(f'.n_ops\t\t= ARRAY_SIZE({family.name}_nl_ops),')
+        cw.p(f'.ops\t\t= {family.c_name}_nl_ops,')
+        cw.p(f'.n_ops\t\t= ARRAY_SIZE({family.c_name}_nl_ops),')
     elif family.kernel_policy == 'split':
-        cw.p(f'.split_ops\t= {family.name}_nl_ops,')
-        cw.p(f'.n_split_ops\t= ARRAY_SIZE({family.name}_nl_ops),')
+        cw.p(f'.split_ops\t= {family.c_name}_nl_ops,')
+        cw.p(f'.n_split_ops\t= ARRAY_SIZE({family.c_name}_nl_ops),')
     if family.mcgrps['list']:
-        cw.p(f'.mcgrps\t\t= {family.name}_nl_mcgrps,')
-        cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.name}_nl_mcgrps),')
+        cw.p(f'.mcgrps\t\t= {family.c_name}_nl_mcgrps,')
+        cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),')
     cw.block_end(';')
 
 
@@ -2294,7 +2294,7 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'):
         if obj[enum_name]:
             start_line = 'enum ' + c_lower(obj[enum_name])
     elif ckey and ckey in obj:
-        start_line = 'enum ' + family.name + '_' + c_lower(obj[ckey])
+        start_line = 'enum ' + family.c_name + '_' + c_lower(obj[ckey])
     cw.block_start(line=start_line)
 
 
@@ -2478,7 +2478,7 @@ def render_user_family(family, cw, prototype):
         cw.nl()
 
     cw.block_start(f'{symbol} = ')
-    cw.p(f'.name\t\t= "{family.name}",')
+    cw.p(f'.name\t\t= "{family.c_name}",')
     if family.ntfs:
         cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
         cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),")
@@ -2560,7 +2560,7 @@ def main():
         render_uapi(parsed, cw)
         return
 
-    hdr_prot = f"_LINUX_{parsed.name.upper()}_GEN_H"
+    hdr_prot = f"_LINUX_{parsed.c_name.upper()}_GEN_H"
     if args.header:
         cw.p('#ifndef ' + hdr_prot)
         cw.p('#define ' + hdr_prot)
-- 
cgit v1.2.3-70-g09d2


From 30c90200153430915a4c4aaaca7437d2592e6ed2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 22 Nov 2023 19:10:50 -0800
Subject: tools: ynl-gen: use enum name from the spec

The enum name used for id-to-str table does not handle
the enum-name override in the spec correctly.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/net/ynl/ynl-gen-c.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index b3438e96fde6..cbbda276f6d1 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -755,11 +755,17 @@ class EnumSet(SpecEnumSet):
         if 'enum-name' in yaml:
             if yaml['enum-name']:
                 self.enum_name = 'enum ' + c_lower(yaml['enum-name'])
+                self.user_type = self.enum_name
             else:
                 self.enum_name = None
         else:
             self.enum_name = 'enum ' + self.render_name
 
+        if self.enum_name:
+            self.user_type = self.enum_name
+        else:
+            self.user_type = 'int'
+
         self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-")
 
         super().__init__(family, yaml)
@@ -1483,8 +1489,8 @@ def put_typol(cw, struct):
 
 def _put_enum_to_str_helper(cw, render_name, map_name, arg_name, enum=None):
     args = [f'int {arg_name}']
-    if enum and not ('enum-name' in enum and not enum['enum-name']):
-        args = [f'enum {render_name} {arg_name}']
+    if enum:
+        args = [enum.user_type + ' ' + arg_name]
     cw.write_func_prot('const char *', f'{render_name}_str', args)
     cw.block_start()
     if enum and enum.type == 'flags':
@@ -1522,9 +1528,7 @@ def put_op_name(family, cw):
 
 
 def put_enum_to_str_fwd(family, cw, enum):
-    args = [f'enum {enum.render_name} value']
-    if 'enum-name' in enum and not enum['enum-name']:
-        args = ['int value']
+    args = [enum.user_type + ' value']
     cw.write_func_prot('const char *', f'{enum.render_name}_str', args, suffix=';')
 
 
-- 
cgit v1.2.3-70-g09d2


From b16904fd9f01b580db357ef2b1cc9e86d89576c2 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Sun, 26 Nov 2023 21:03:42 -0800
Subject: bpf: Fix a few selftest failures due to llvm18 change

With latest upstream llvm18, the following test cases failed:

  $ ./test_progs -j
  #13/2    bpf_cookie/multi_kprobe_link_api:FAIL
  #13/3    bpf_cookie/multi_kprobe_attach_api:FAIL
  #13      bpf_cookie:FAIL
  #77      fentry_fexit:FAIL
  #78/1    fentry_test/fentry:FAIL
  #78      fentry_test:FAIL
  #82/1    fexit_test/fexit:FAIL
  #82      fexit_test:FAIL
  #112/1   kprobe_multi_test/skel_api:FAIL
  #112/2   kprobe_multi_test/link_api_addrs:FAIL
  [...]
  #112     kprobe_multi_test:FAIL
  #356/17  test_global_funcs/global_func17:FAIL
  #356     test_global_funcs:FAIL

Further analysis shows llvm upstream patch [1] is responsible for the above
failures. For example, for function bpf_fentry_test7() in net/bpf/test_run.c,
without [1], the asm code is:

  0000000000000400 <bpf_fentry_test7>:
     400: f3 0f 1e fa                   endbr64
     404: e8 00 00 00 00                callq   0x409 <bpf_fentry_test7+0x9>
     409: 48 89 f8                      movq    %rdi, %rax
     40c: c3                            retq
     40d: 0f 1f 00                      nopl    (%rax)

... and with [1], the asm code is:

  0000000000005d20 <bpf_fentry_test7.specialized.1>:
    5d20: e8 00 00 00 00                callq   0x5d25 <bpf_fentry_test7.specialized.1+0x5>
    5d25: c3                            retq

... and <bpf_fentry_test7.specialized.1> is called instead of <bpf_fentry_test7>
and this caused test failures for #13/#77 etc. except #356.

For test case #356/17, with [1] (progs/test_global_func17.c)), the main prog
looks like:

  0000000000000000 <global_func17>:
       0:       b4 00 00 00 2a 00 00 00 w0 = 0x2a
       1:       95 00 00 00 00 00 00 00 exit

... which passed verification while the test itself expects a verification
failure.

Let us add 'barrier_var' style asm code in both places to prevent function
specialization which caused selftests failure.

  [1] https://github.com/llvm/llvm-project/pull/72903

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20231127050342.1945270-1-yonghong.song@linux.dev
---
 net/bpf/test_run.c                                     | 2 +-
 tools/testing/selftests/bpf/progs/test_global_func17.c | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c9fdcc5cdce1..711cf5d59816 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -542,7 +542,7 @@ struct bpf_fentry_test_t {
 
 int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
 {
-	asm volatile ("");
+	asm volatile ("": "+r"(arg));
 	return (long)arg;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c
index a32e11c7d933..5de44b09e8ec 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func17.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func17.c
@@ -5,6 +5,7 @@
 
 __noinline int foo(int *p)
 {
+	barrier_var(p);
 	return p ? (*p = 42) : 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 876843ce1e4897e8ceade50bfa3d9a4ec483abf3 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 10:20:56 -0800
Subject: bpftool: mark orphaned programs during prog show

Commit ef01f4e25c17 ("bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD
and PERF_BPF_EVENT_PROG_UNLOAD") stopped removing program's id from
idr when the offloaded/bound netdev goes away. I was supposed to
take a look and check in [0], but apparently I did not.

Martin points out it might be useful to keep it that way for
observability sake, but we at least need to mark those programs as
unusable.

Mark those programs as 'orphaned' and keep printing the list when
we encounter ENODEV.

0: unspec  tag 0000000000000000
        xlated 0B  not jited  memlock 4096B  orphaned

[0]: https://lore.kernel.org/all/CAKH8qBtyR20ZWAc11z1-6pGb3Hd47AQUTbE_cfoktG59TqaJ7Q@mail.gmail.com/

v3:
* use two spaces for "  orphaned" (Quentin)

Cc: netdev@vger.kernel.org
Fixes: ef01f4e25c17 ("bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD and PERF_BPF_EVENT_PROG_UNLOAD")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/r/20231127182057.1081138-1-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/bpf/bpftool/prog.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 7ec4f5671e7a..feb8e305804f 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -442,7 +442,7 @@ static void print_prog_header_json(struct bpf_prog_info *info, int fd)
 		jsonw_uint_field(json_wtr, "recursion_misses", info->recursion_misses);
 }
 
-static void print_prog_json(struct bpf_prog_info *info, int fd)
+static void print_prog_json(struct bpf_prog_info *info, int fd, bool orphaned)
 {
 	char *memlock;
 
@@ -461,6 +461,7 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 		jsonw_uint_field(json_wtr, "uid", info->created_by_uid);
 	}
 
+	jsonw_bool_field(json_wtr, "orphaned", orphaned);
 	jsonw_uint_field(json_wtr, "bytes_xlated", info->xlated_prog_len);
 
 	if (info->jited_prog_len) {
@@ -527,7 +528,7 @@ static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
 	printf("\n");
 }
 
-static void print_prog_plain(struct bpf_prog_info *info, int fd)
+static void print_prog_plain(struct bpf_prog_info *info, int fd, bool orphaned)
 {
 	char *memlock;
 
@@ -554,6 +555,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 		printf("  memlock %sB", memlock);
 	free(memlock);
 
+	if (orphaned)
+		printf("  orphaned");
+
 	if (info->nr_map_ids)
 		show_prog_maps(fd, info->nr_map_ids);
 
@@ -581,15 +585,15 @@ static int show_prog(int fd)
 	int err;
 
 	err = bpf_prog_get_info_by_fd(fd, &info, &len);
-	if (err) {
+	if (err && err != -ENODEV) {
 		p_err("can't get prog info: %s", strerror(errno));
 		return -1;
 	}
 
 	if (json_output)
-		print_prog_json(&info, fd);
+		print_prog_json(&info, fd, err == -ENODEV);
 	else
-		print_prog_plain(&info, fd);
+		print_prog_plain(&info, fd, err == -ENODEV);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From cf9791631027a476f7cdb0e1b3ac6add16eff264 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 10:20:57 -0800
Subject: selftests/bpf: update test_offload to use new orphaned property

- filter orphaned programs by default
- when trying to query orphaned program, don't expect bpftool failure

Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20231127182057.1081138-2-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/test_offload.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 40cba8d368d9..6157f884d091 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -169,12 +169,14 @@ def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False):
     return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns,
                 fail=fail, include_stderr=include_stderr)
 
-def bpftool_prog_list(expected=None, ns=""):
+def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True):
     _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
     # Remove the base progs
     for p in base_progs:
         if p in progs:
             progs.remove(p)
+    if exclude_orphaned:
+        progs = [ p for p in progs if not p['orphaned'] ]
     if expected is not None:
         if len(progs) != expected:
             fail(True, "%d BPF programs loaded, expected %d" %
@@ -612,11 +614,9 @@ def pin_map(file_name, idx=0, expected=1):
 
 def check_dev_info_removed(prog_file=None, map_file=None):
     bpftool_prog_list(expected=0)
+    bpftool_prog_list(expected=1, exclude_orphaned=False)
     ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
-    fail(ret == 0, "Showing prog with removed device did not fail")
-    fail(err["error"].find("No such device") == -1,
-         "Showing prog with removed device expected ENODEV, error is %s" %
-         (err["error"]))
+    fail(ret != 0, "failed to show prog with removed device")
 
     bpftool_map_list(expected=0)
     ret, err = bpftool("map show pin %s" % (map_file), fail=False)
@@ -1395,10 +1395,7 @@ try:
 
     start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
     ret, out = bpftool("prog show %s" % (progB), fail=False)
-    fail(ret == 0, "got information about orphaned program")
-    fail("error" not in out, "no error reported for get info on orphaned")
-    fail(out["error"] != "can't get prog info: No such device",
-         "wrong error for get info on orphaned")
+    fail(ret != 0, "couldn't get information about orphaned program")
 
     print("%s: OK" % (os.path.basename(__file__)))
 
-- 
cgit v1.2.3-70-g09d2


From a79d8ba734bdbd2574ad16dd1b96506e5f642c4a Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 24 Nov 2023 12:42:44 -0300
Subject: selftests: tc-testing: remove buildebpf plugin

As tdc only tests loading/deleting and anything more complicated is
better left to the ebpf test suite, provide a pre-compiled version of
'action.c' and don't bother compiling it in kselftests or on the fly
at all.

Cc: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231124154248.315470-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/Makefile        |  29 +--------
 tools/testing/selftests/tc-testing/README          |   2 -
 tools/testing/selftests/tc-testing/action-ebpf     | Bin 0 -> 856 bytes
 .../tc-testing/plugin-lib/buildebpfPlugin.py       |  67 ---------------------
 .../selftests/tc-testing/tc-tests/actions/bpf.json |  14 ++---
 .../selftests/tc-testing/tc-tests/filters/bpf.json |  10 ++-
 tools/testing/selftests/tc-testing/tdc.sh          |   2 +-
 7 files changed, 11 insertions(+), 113 deletions(-)
 create mode 100644 tools/testing/selftests/tc-testing/action-ebpf
 delete mode 100644 tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile
index b1fa2e177e2f..e8b3dde4fa16 100644
--- a/tools/testing/selftests/tc-testing/Makefile
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -1,31 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-include ../../../scripts/Makefile.include
 
-top_srcdir = $(abspath ../../../..)
-APIDIR := $(top_scrdir)/include/uapi
-TEST_GEN_FILES = action.o
+TEST_PROGS += ./tdc.sh
+TEST_FILES := action-ebpf tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts
 
 include ../lib.mk
-
-PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
-
-ifeq ($(PROBE),)
-  CPU ?= probe
-else
-  CPU ?= generic
-endif
-
-CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
-	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
-
-CLANG_FLAGS = -I. -I$(APIDIR) \
-	      $(CLANG_SYS_INCLUDES) \
-	      -Wno-compare-distinct-pointer-types
-
-$(OUTPUT)/%.o: %.c
-	$(CLANG) $(CLANG_FLAGS) \
-		 -O2 --target=bpf -emit-llvm -c $< -o - |      \
-	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
-
-TEST_PROGS += ./tdc.sh
-TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index be7b00799b3e..fc8e858ff119 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -195,8 +195,6 @@ directory:
       and the other is a test whether the command leaked memory or not.
       (This one is a preliminary version, it may not work quite right yet,
       but the overall template is there and it should only need tweaks.)
-  - buildebpfPlugin.py:
-      builds all programs in $EBPFDIR.
 
 
 ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/action-ebpf b/tools/testing/selftests/tc-testing/action-ebpf
new file mode 100644
index 000000000000..4879479b2ee5
Binary files /dev/null and b/tools/testing/selftests/tc-testing/action-ebpf differ
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
deleted file mode 100644
index d34fe06268d2..000000000000
--- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
+++ /dev/null
@@ -1,67 +0,0 @@
-'''
-build ebpf program
-'''
-
-import os
-import signal
-from string import Template
-import subprocess
-import time
-from TdcPlugin import TdcPlugin
-from tdc_config import *
-
-class SubPlugin(TdcPlugin):
-    def __init__(self):
-        self.sub_class = 'buildebpf/SubPlugin'
-        self.tap = ''
-        super().__init__()
-
-    def pre_suite(self, testcount, testidlist):
-        super().pre_suite(testcount, testidlist)
-
-        if self.args.buildebpf:
-            self._ebpf_makeall()
-
-    def post_suite(self, index):
-        super().post_suite(index)
-
-        self._ebpf_makeclean()
-
-    def add_args(self, parser):
-        super().add_args(parser)
-
-        self.argparser_group = self.argparser.add_argument_group(
-            'buildebpf',
-            'options for buildebpfPlugin')
-        self.argparser_group.add_argument(
-            '--nobuildebpf', action='store_false', default=True,
-            dest='buildebpf',
-            help='Don\'t build eBPF programs')
-
-        return self.argparser
-
-    def _ebpf_makeall(self):
-        if self.args.buildebpf:
-            self._make('all')
-
-    def _ebpf_makeclean(self):
-        if self.args.buildebpf:
-            self._make('clean')
-
-    def _make(self, target):
-        command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target)
-        proc = subprocess.Popen(command,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            env=os.environ.copy())
-        (rawout, serr) = proc.communicate()
-
-        if proc.returncode != 0 and len(serr) > 0:
-            foutput = serr.decode("utf-8")
-        else:
-            foutput = rawout.decode("utf-8")
-
-        proc.stdout.close()
-        proc.stderr.close()
-        return proc, foutput
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 91832400ddbd..6e00bf32ef9a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -54,9 +54,6 @@
             "actions",
             "bpf"
         ],
-        "plugins": {
-                "requires": "buildebpfPlugin"
-        },
         "setup": [
             [
                 "$TC action flush action bpf",
@@ -65,10 +62,10 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ok index 667",
         "expExitCode": "0",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
         "matchCount": "1",
         "teardown": [
             "$TC action flush action bpf"
@@ -81,9 +78,6 @@
             "actions",
             "bpf"
         ],
-        "plugins": {
-                "requires": "buildebpfPlugin"
-        },
         "setup": [
             [
                 "$TC action flush action bpf",
@@ -92,10 +86,10 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ko index 667",
         "expExitCode": "255",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ko\\] id [0-9].*index 667 ref",
         "matchCount": "0",
         "teardown": [
             [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
index 013fb983bc3f..725d406a30ac 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
@@ -52,17 +52,16 @@
         ],
         "plugins": {
             "requires": [
-               "buildebpfPlugin",
                "nsPlugin"
             ]
         },
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ok",
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ok",
         "expExitCode": "0",
         "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
-        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?",
         "matchCount": "1",
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
@@ -77,17 +76,16 @@
         ],
         "plugins": {
             "requires": [
-               "buildebpfPlugin",
                "nsPlugin"
             ]
         },
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ko",
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ko",
         "expExitCode": "1",
         "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
-        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?",
         "matchCount": "0",
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index 4dbe50bde5a0..407fa53822a0 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -64,5 +64,5 @@ try_modprobe sch_hfsc
 try_modprobe sch_hhf
 try_modprobe sch_htb
 try_modprobe sch_teql
-./tdc.py -J`nproc` -c actions --nobuildebpf
+./tdc.py -J`nproc` -c actions
 ./tdc.py -J`nproc` -c qdisc
-- 
cgit v1.2.3-70-g09d2


From 8059e68b99280cc9a224593f3142f9368229c6ee Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 24 Nov 2023 12:42:45 -0300
Subject: selftests: tc-testing: remove unnecessary time.sleep

This operation is redundant and it's not stabilizing nor waiting
for anything.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231124154248.315470-3-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/tdc.py | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 669ec89ebfe1..c5ec861687b6 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -497,11 +497,6 @@ def prepare_run(pm, args, testlist):
         pm.call_post_suite(1)
         return emergency_exit_message
 
-    if args.verbose:
-        print('give test rig 2 seconds to stabilize')
-
-    time.sleep(2)
-
 def purge_run(pm, index):
     pm.call_post_suite(index)
 
-- 
cgit v1.2.3-70-g09d2


From 56e16bc69bb7d36a931111d8abdcd44b939751c4 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 24 Nov 2023 12:42:46 -0300
Subject: selftests: tc-testing: prefix iproute2 functions with "ipr2"

As suggested by Simon, prefix the functions that operate on iproute2
commands in contrast with the "nl" netlink prefix.

Cc: Simon Horman <horms@kernel.org>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231124154248.315470-4-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/tc-testing/plugin-lib/nsPlugin.py  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index 65c8f3f983b9..dc7a0597cf44 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -37,7 +37,7 @@ class SubPlugin(TdcPlugin):
         if netlink == True:
             self._nl_ns_create()
         else:
-            self._ns_create()
+            self._ipr2_ns_create()
 
         # Make sure the netns is visible in the fs
         ticks = 20
@@ -71,7 +71,7 @@ class SubPlugin(TdcPlugin):
         if netlink == True:
             self._nl_ns_destroy()
         else:
-            self._ns_destroy()
+            self._ipr2_ns_destroy()
 
     def post_suite(self, index):
         if self.args.verbose:
@@ -161,7 +161,7 @@ class SubPlugin(TdcPlugin):
                     ticks -= 1
                     continue
 
-    def _ns_create_cmds(self):
+    def _ipr2_ns_create_cmds(self):
         cmds = []
 
         ns = self.args.NAMES['NS']
@@ -181,26 +181,26 @@ class SubPlugin(TdcPlugin):
 
         return cmds
 
-    def _ns_create(self):
+    def _ipr2_ns_create(self):
         '''
         Create the network namespace in which the tests will be run and set up
         the required network devices for it.
         '''
-        self._exec_cmd_batched('pre', self._ns_create_cmds())
+        self._exec_cmd_batched('pre', self._ipr2_ns_create_cmds())
 
     def _nl_ns_destroy(self):
         ns = self.args.NAMES['NS']
         netns.remove(ns)
 
-    def _ns_destroy_cmd(self):
+    def _ipr2_ns_destroy_cmd(self):
         return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS']))
 
-    def _ns_destroy(self):
+    def _ipr2_ns_destroy(self):
         '''
         Destroy the network namespace for testing (and any associated network
         devices as well)
         '''
-        self._exec_cmd('post', self._ns_destroy_cmd())
+        self._exec_cmd('post', self._ipr2_ns_destroy_cmd())
 
     @cached_property
     def _proc(self):
-- 
cgit v1.2.3-70-g09d2


From 501679f5d4a433144ae755dd2e5f757b1ce5a152 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 24 Nov 2023 12:42:47 -0300
Subject: selftests: tc-testing: cleanup on Ctrl-C

Cleanup net namespaces and other resources if we get a SIGINT (Ctrl-C).
As user visible resources are allocated on a per test basis, it's only
required to catch this condition when (possibly) running tests.

So far calling post_suite is enough to free up anything that might
linger.

A missing keyword replacement for nsPlugin is also included.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231124154248.315470-5-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 2 +-
 tools/testing/selftests/tc-testing/tdc.py                 | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index dc7a0597cf44..77b1106b8388 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -78,7 +78,7 @@ class SubPlugin(TdcPlugin):
             print('{}.post_suite'.format(self.sub_class))
 
         # Make sure we don't leak resources
-        cmd = "$IP -a netns del"
+        cmd = self._replace_keywords("$IP -a netns del")
 
         if self.args.verbose > 3:
             print('_exec_cmd:  command "{}"'.format(cmd))
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index c5ec861687b6..caeacc691587 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -1018,7 +1018,11 @@ def main():
     if args.verbose > 2:
         print('args is {}'.format(args))
 
-    set_operation_mode(pm, parser, args, remaining)
+    try:
+        set_operation_mode(pm, parser, args, remaining)
+    except KeyboardInterrupt:
+        # Cleanup on Ctrl-C
+        pm.call_post_suite(None)
 
 if __name__ == "__main__":
     main()
-- 
cgit v1.2.3-70-g09d2


From ed346fccfc40364888601a2ec75dd94f4dca23bd Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Fri, 24 Nov 2023 12:42:48 -0300
Subject: selftests: tc-testing: remove unused import

Remove this leftover from the times we pre-allocated everything

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231124154248.315470-6-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index 77b1106b8388..bb19b8b76d3b 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -23,8 +23,6 @@ class SubPlugin(TdcPlugin):
         super().__init__()
 
     def pre_suite(self, testcount, testlist):
-        from itertools import cycle
-
         super().pre_suite(testcount, testlist)
 
     def prepare_test(self, test):
-- 
cgit v1.2.3-70-g09d2


From 637567e4a3ef6f6a5ffa48781207d270265f7e68 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sun, 26 Nov 2023 15:07:40 -0800
Subject: tools: ynl: add sample for getting page-pool information

Regenerate the tools/ code after netdev spec changes.

Add sample to query page-pool info in a concise fashion:

$ ./page-pool
    eth0[2]	page pools: 10 (zombies: 0)
		refs: 41984 bytes: 171966464 (refs: 0 bytes: 0)
		recycling: 90.3% (alloc: 656:397681 recycle: 89652:270201)

Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/include/uapi/linux/netdev.h     |  36 +++
 tools/net/ynl/generated/netdev-user.c | 419 ++++++++++++++++++++++++++++++++++
 tools/net/ynl/generated/netdev-user.h | 171 ++++++++++++++
 tools/net/ynl/lib/ynl.h               |   2 +-
 tools/net/ynl/samples/.gitignore      |   1 +
 tools/net/ynl/samples/Makefile        |   2 +-
 tools/net/ynl/samples/page-pool.c     | 147 ++++++++++++
 7 files changed, 776 insertions(+), 2 deletions(-)
 create mode 100644 tools/net/ynl/samples/page-pool.c

(limited to 'tools')

diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 2943a151d4f1..2b37233e00c0 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -64,16 +64,52 @@ enum {
 	NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
 };
 
+enum {
+	NETDEV_A_PAGE_POOL_ID = 1,
+	NETDEV_A_PAGE_POOL_IFINDEX,
+	NETDEV_A_PAGE_POOL_NAPI_ID,
+	NETDEV_A_PAGE_POOL_INFLIGHT,
+	NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
+	NETDEV_A_PAGE_POOL_DETACH_TIME,
+
+	__NETDEV_A_PAGE_POOL_MAX,
+	NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
+};
+
+enum {
+	NETDEV_A_PAGE_POOL_STATS_INFO = 1,
+	NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST = 8,
+	NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW,
+	NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER,
+	NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY,
+	NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL,
+	NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE,
+	NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED,
+	NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL,
+	NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING,
+	NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL,
+	NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT,
+
+	__NETDEV_A_PAGE_POOL_STATS_MAX,
+	NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1)
+};
+
 enum {
 	NETDEV_CMD_DEV_GET = 1,
 	NETDEV_CMD_DEV_ADD_NTF,
 	NETDEV_CMD_DEV_DEL_NTF,
 	NETDEV_CMD_DEV_CHANGE_NTF,
+	NETDEV_CMD_PAGE_POOL_GET,
+	NETDEV_CMD_PAGE_POOL_ADD_NTF,
+	NETDEV_CMD_PAGE_POOL_DEL_NTF,
+	NETDEV_CMD_PAGE_POOL_CHANGE_NTF,
+	NETDEV_CMD_PAGE_POOL_STATS_GET,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
 };
 
 #define NETDEV_MCGRP_MGMT	"mgmt"
+#define NETDEV_MCGRP_PAGE_POOL	"page-pool"
 
 #endif /* _UAPI_LINUX_NETDEV_H */
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
index b5ffe8cd1144..a7b7019d00f1 100644
--- a/tools/net/ynl/generated/netdev-user.c
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -18,6 +18,11 @@ static const char * const netdev_op_strmap[] = {
 	[NETDEV_CMD_DEV_ADD_NTF] = "dev-add-ntf",
 	[NETDEV_CMD_DEV_DEL_NTF] = "dev-del-ntf",
 	[NETDEV_CMD_DEV_CHANGE_NTF] = "dev-change-ntf",
+	[NETDEV_CMD_PAGE_POOL_GET] = "page-pool-get",
+	[NETDEV_CMD_PAGE_POOL_ADD_NTF] = "page-pool-add-ntf",
+	[NETDEV_CMD_PAGE_POOL_DEL_NTF] = "page-pool-del-ntf",
+	[NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf",
+	[NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get",
 };
 
 const char *netdev_op_str(int op)
@@ -59,6 +64,16 @@ const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value)
 }
 
 /* Policies */
+struct ynl_policy_attr netdev_page_pool_info_policy[NETDEV_A_PAGE_POOL_MAX + 1] = {
+	[NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest netdev_page_pool_info_nest = {
+	.max_attr = NETDEV_A_PAGE_POOL_MAX,
+	.table = netdev_page_pool_info_policy,
+};
+
 struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = {
 	[NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
 	[NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
@@ -72,7 +87,85 @@ struct ynl_policy_nest netdev_dev_nest = {
 	.table = netdev_dev_policy,
 };
 
+struct ynl_policy_attr netdev_page_pool_policy[NETDEV_A_PAGE_POOL_MAX + 1] = {
+	[NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
+	[NETDEV_A_PAGE_POOL_NAPI_ID] = { .name = "napi-id", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_INFLIGHT] = { .name = "inflight", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_INFLIGHT_MEM] = { .name = "inflight-mem", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_DETACH_TIME] = { .name = "detach-time", .type = YNL_PT_UINT, },
+};
+
+struct ynl_policy_nest netdev_page_pool_nest = {
+	.max_attr = NETDEV_A_PAGE_POOL_MAX,
+	.table = netdev_page_pool_policy,
+};
+
+struct ynl_policy_attr netdev_page_pool_stats_policy[NETDEV_A_PAGE_POOL_STATS_MAX + 1] = {
+	[NETDEV_A_PAGE_POOL_STATS_INFO] = { .name = "info", .type = YNL_PT_NEST, .nest = &netdev_page_pool_info_nest, },
+	[NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST] = { .name = "alloc-fast", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW] = { .name = "alloc-slow", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER] = { .name = "alloc-slow-high-order", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY] = { .name = "alloc-empty", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL] = { .name = "alloc-refill", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE] = { .name = "alloc-waive", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED] = { .name = "recycle-cached", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL] = { .name = "recycle-cache-full", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING] = { .name = "recycle-ring", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL] = { .name = "recycle-ring-full", .type = YNL_PT_UINT, },
+	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT] = { .name = "recycle-released-refcnt", .type = YNL_PT_UINT, },
+};
+
+struct ynl_policy_nest netdev_page_pool_stats_nest = {
+	.max_attr = NETDEV_A_PAGE_POOL_STATS_MAX,
+	.table = netdev_page_pool_stats_policy,
+};
+
 /* Common nested types */
+void netdev_page_pool_info_free(struct netdev_page_pool_info *obj)
+{
+}
+
+int netdev_page_pool_info_put(struct nlmsghdr *nlh, unsigned int attr_type,
+			      struct netdev_page_pool_info *obj)
+{
+	struct nlattr *nest;
+
+	nest = mnl_attr_nest_start(nlh, attr_type);
+	if (obj->_present.id)
+		mnl_attr_put_uint(nlh, NETDEV_A_PAGE_POOL_ID, obj->id);
+	if (obj->_present.ifindex)
+		mnl_attr_put_u32(nlh, NETDEV_A_PAGE_POOL_IFINDEX, obj->ifindex);
+	mnl_attr_nest_end(nlh, nest);
+
+	return 0;
+}
+
+int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg,
+				const struct nlattr *nested)
+{
+	struct netdev_page_pool_info *dst = yarg->data;
+	const struct nlattr *attr;
+
+	mnl_attr_for_each_nested(attr, nested) {
+		unsigned int type = mnl_attr_get_type(attr);
+
+		if (type == NETDEV_A_PAGE_POOL_ID) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.id = 1;
+			dst->id = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_IFINDEX) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.ifindex = 1;
+			dst->ifindex = mnl_attr_get_u32(attr);
+		}
+	}
+
+	return 0;
+}
+
 /* ============== NETDEV_CMD_DEV_GET ============== */
 /* NETDEV_CMD_DEV_GET - do */
 void netdev_dev_get_req_free(struct netdev_dev_get_req *req)
@@ -197,6 +290,314 @@ void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp)
 	free(rsp);
 }
 
+/* ============== NETDEV_CMD_PAGE_POOL_GET ============== */
+/* NETDEV_CMD_PAGE_POOL_GET - do */
+void netdev_page_pool_get_req_free(struct netdev_page_pool_get_req *req)
+{
+	free(req);
+}
+
+void netdev_page_pool_get_rsp_free(struct netdev_page_pool_get_rsp *rsp)
+{
+	free(rsp);
+}
+
+int netdev_page_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+	struct netdev_page_pool_get_rsp *dst;
+	struct ynl_parse_arg *yarg = data;
+	const struct nlattr *attr;
+
+	dst = yarg->data;
+
+	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+		unsigned int type = mnl_attr_get_type(attr);
+
+		if (type == NETDEV_A_PAGE_POOL_ID) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.id = 1;
+			dst->id = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_IFINDEX) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.ifindex = 1;
+			dst->ifindex = mnl_attr_get_u32(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_NAPI_ID) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.napi_id = 1;
+			dst->napi_id = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_INFLIGHT) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.inflight = 1;
+			dst->inflight = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_INFLIGHT_MEM) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.inflight_mem = 1;
+			dst->inflight_mem = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_DETACH_TIME) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.detach_time = 1;
+			dst->detach_time = mnl_attr_get_uint(attr);
+		}
+	}
+
+	return MNL_CB_OK;
+}
+
+struct netdev_page_pool_get_rsp *
+netdev_page_pool_get(struct ynl_sock *ys, struct netdev_page_pool_get_req *req)
+{
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+	struct netdev_page_pool_get_rsp *rsp;
+	struct nlmsghdr *nlh;
+	int err;
+
+	nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_GET, 1);
+	ys->req_policy = &netdev_page_pool_nest;
+	yrs.yarg.rsp_policy = &netdev_page_pool_nest;
+
+	if (req->_present.id)
+		mnl_attr_put_uint(nlh, NETDEV_A_PAGE_POOL_ID, req->id);
+
+	rsp = calloc(1, sizeof(*rsp));
+	yrs.yarg.data = rsp;
+	yrs.cb = netdev_page_pool_get_rsp_parse;
+	yrs.rsp_cmd = NETDEV_CMD_PAGE_POOL_GET;
+
+	err = ynl_exec(ys, nlh, &yrs);
+	if (err < 0)
+		goto err_free;
+
+	return rsp;
+
+err_free:
+	netdev_page_pool_get_rsp_free(rsp);
+	return NULL;
+}
+
+/* NETDEV_CMD_PAGE_POOL_GET - dump */
+void netdev_page_pool_get_list_free(struct netdev_page_pool_get_list *rsp)
+{
+	struct netdev_page_pool_get_list *next = rsp;
+
+	while ((void *)next != YNL_LIST_END) {
+		rsp = next;
+		next = rsp->next;
+
+		free(rsp);
+	}
+}
+
+struct netdev_page_pool_get_list *
+netdev_page_pool_get_dump(struct ynl_sock *ys)
+{
+	struct ynl_dump_state yds = {};
+	struct nlmsghdr *nlh;
+	int err;
+
+	yds.ys = ys;
+	yds.alloc_sz = sizeof(struct netdev_page_pool_get_list);
+	yds.cb = netdev_page_pool_get_rsp_parse;
+	yds.rsp_cmd = NETDEV_CMD_PAGE_POOL_GET;
+	yds.rsp_policy = &netdev_page_pool_nest;
+
+	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_GET, 1);
+
+	err = ynl_exec_dump(ys, nlh, &yds);
+	if (err < 0)
+		goto free_list;
+
+	return yds.first;
+
+free_list:
+	netdev_page_pool_get_list_free(yds.first);
+	return NULL;
+}
+
+/* NETDEV_CMD_PAGE_POOL_GET - notify */
+void netdev_page_pool_get_ntf_free(struct netdev_page_pool_get_ntf *rsp)
+{
+	free(rsp);
+}
+
+/* ============== NETDEV_CMD_PAGE_POOL_STATS_GET ============== */
+/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */
+void
+netdev_page_pool_stats_get_req_free(struct netdev_page_pool_stats_get_req *req)
+{
+	netdev_page_pool_info_free(&req->info);
+	free(req);
+}
+
+void
+netdev_page_pool_stats_get_rsp_free(struct netdev_page_pool_stats_get_rsp *rsp)
+{
+	netdev_page_pool_info_free(&rsp->info);
+	free(rsp);
+}
+
+int netdev_page_pool_stats_get_rsp_parse(const struct nlmsghdr *nlh,
+					 void *data)
+{
+	struct netdev_page_pool_stats_get_rsp *dst;
+	struct ynl_parse_arg *yarg = data;
+	const struct nlattr *attr;
+	struct ynl_parse_arg parg;
+
+	dst = yarg->data;
+	parg.ys = yarg->ys;
+
+	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+		unsigned int type = mnl_attr_get_type(attr);
+
+		if (type == NETDEV_A_PAGE_POOL_STATS_INFO) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.info = 1;
+
+			parg.rsp_policy = &netdev_page_pool_info_nest;
+			parg.data = &dst->info;
+			if (netdev_page_pool_info_parse(&parg, attr))
+				return MNL_CB_ERROR;
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.alloc_fast = 1;
+			dst->alloc_fast = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.alloc_slow = 1;
+			dst->alloc_slow = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.alloc_slow_high_order = 1;
+			dst->alloc_slow_high_order = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.alloc_empty = 1;
+			dst->alloc_empty = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.alloc_refill = 1;
+			dst->alloc_refill = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.alloc_waive = 1;
+			dst->alloc_waive = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.recycle_cached = 1;
+			dst->recycle_cached = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.recycle_cache_full = 1;
+			dst->recycle_cache_full = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.recycle_ring = 1;
+			dst->recycle_ring = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.recycle_ring_full = 1;
+			dst->recycle_ring_full = mnl_attr_get_uint(attr);
+		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.recycle_released_refcnt = 1;
+			dst->recycle_released_refcnt = mnl_attr_get_uint(attr);
+		}
+	}
+
+	return MNL_CB_OK;
+}
+
+struct netdev_page_pool_stats_get_rsp *
+netdev_page_pool_stats_get(struct ynl_sock *ys,
+			   struct netdev_page_pool_stats_get_req *req)
+{
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+	struct netdev_page_pool_stats_get_rsp *rsp;
+	struct nlmsghdr *nlh;
+	int err;
+
+	nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_STATS_GET, 1);
+	ys->req_policy = &netdev_page_pool_stats_nest;
+	yrs.yarg.rsp_policy = &netdev_page_pool_stats_nest;
+
+	if (req->_present.info)
+		netdev_page_pool_info_put(nlh, NETDEV_A_PAGE_POOL_STATS_INFO, &req->info);
+
+	rsp = calloc(1, sizeof(*rsp));
+	yrs.yarg.data = rsp;
+	yrs.cb = netdev_page_pool_stats_get_rsp_parse;
+	yrs.rsp_cmd = NETDEV_CMD_PAGE_POOL_STATS_GET;
+
+	err = ynl_exec(ys, nlh, &yrs);
+	if (err < 0)
+		goto err_free;
+
+	return rsp;
+
+err_free:
+	netdev_page_pool_stats_get_rsp_free(rsp);
+	return NULL;
+}
+
+/* NETDEV_CMD_PAGE_POOL_STATS_GET - dump */
+void
+netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp)
+{
+	struct netdev_page_pool_stats_get_list *next = rsp;
+
+	while ((void *)next != YNL_LIST_END) {
+		rsp = next;
+		next = rsp->next;
+
+		netdev_page_pool_info_free(&rsp->obj.info);
+		free(rsp);
+	}
+}
+
+struct netdev_page_pool_stats_get_list *
+netdev_page_pool_stats_get_dump(struct ynl_sock *ys)
+{
+	struct ynl_dump_state yds = {};
+	struct nlmsghdr *nlh;
+	int err;
+
+	yds.ys = ys;
+	yds.alloc_sz = sizeof(struct netdev_page_pool_stats_get_list);
+	yds.cb = netdev_page_pool_stats_get_rsp_parse;
+	yds.rsp_cmd = NETDEV_CMD_PAGE_POOL_STATS_GET;
+	yds.rsp_policy = &netdev_page_pool_stats_nest;
+
+	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_STATS_GET, 1);
+
+	err = ynl_exec_dump(ys, nlh, &yds);
+	if (err < 0)
+		goto free_list;
+
+	return yds.first;
+
+free_list:
+	netdev_page_pool_stats_get_list_free(yds.first);
+	return NULL;
+}
+
 static const struct ynl_ntf_info netdev_ntf_info[] =  {
 	[NETDEV_CMD_DEV_ADD_NTF] =  {
 		.alloc_sz	= sizeof(struct netdev_dev_get_ntf),
@@ -216,6 +617,24 @@ static const struct ynl_ntf_info netdev_ntf_info[] =  {
 		.policy		= &netdev_dev_nest,
 		.free		= (void *)netdev_dev_get_ntf_free,
 	},
+	[NETDEV_CMD_PAGE_POOL_ADD_NTF] =  {
+		.alloc_sz	= sizeof(struct netdev_page_pool_get_ntf),
+		.cb		= netdev_page_pool_get_rsp_parse,
+		.policy		= &netdev_page_pool_nest,
+		.free		= (void *)netdev_page_pool_get_ntf_free,
+	},
+	[NETDEV_CMD_PAGE_POOL_DEL_NTF] =  {
+		.alloc_sz	= sizeof(struct netdev_page_pool_get_ntf),
+		.cb		= netdev_page_pool_get_rsp_parse,
+		.policy		= &netdev_page_pool_nest,
+		.free		= (void *)netdev_page_pool_get_ntf_free,
+	},
+	[NETDEV_CMD_PAGE_POOL_CHANGE_NTF] =  {
+		.alloc_sz	= sizeof(struct netdev_page_pool_get_ntf),
+		.cb		= netdev_page_pool_get_rsp_parse,
+		.policy		= &netdev_page_pool_nest,
+		.free		= (void *)netdev_page_pool_get_ntf_free,
+	},
 };
 
 const struct ynl_family ynl_netdev_family =  {
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
index 4fafac879df3..4093602c9b6c 100644
--- a/tools/net/ynl/generated/netdev-user.h
+++ b/tools/net/ynl/generated/netdev-user.h
@@ -21,6 +21,16 @@ const char *netdev_xdp_act_str(enum netdev_xdp_act value);
 const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value);
 
 /* Common nested types */
+struct netdev_page_pool_info {
+	struct {
+		__u32 id:1;
+		__u32 ifindex:1;
+	} _present;
+
+	__u64 id;
+	__u32 ifindex;
+};
+
 /* ============== NETDEV_CMD_DEV_GET ============== */
 /* NETDEV_CMD_DEV_GET - do */
 struct netdev_dev_get_req {
@@ -87,4 +97,165 @@ struct netdev_dev_get_ntf {
 
 void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp);
 
+/* ============== NETDEV_CMD_PAGE_POOL_GET ============== */
+/* NETDEV_CMD_PAGE_POOL_GET - do */
+struct netdev_page_pool_get_req {
+	struct {
+		__u32 id:1;
+	} _present;
+
+	__u64 id;
+};
+
+static inline struct netdev_page_pool_get_req *
+netdev_page_pool_get_req_alloc(void)
+{
+	return calloc(1, sizeof(struct netdev_page_pool_get_req));
+}
+void netdev_page_pool_get_req_free(struct netdev_page_pool_get_req *req);
+
+static inline void
+netdev_page_pool_get_req_set_id(struct netdev_page_pool_get_req *req, __u64 id)
+{
+	req->_present.id = 1;
+	req->id = id;
+}
+
+struct netdev_page_pool_get_rsp {
+	struct {
+		__u32 id:1;
+		__u32 ifindex:1;
+		__u32 napi_id:1;
+		__u32 inflight:1;
+		__u32 inflight_mem:1;
+		__u32 detach_time:1;
+	} _present;
+
+	__u64 id;
+	__u32 ifindex;
+	__u64 napi_id;
+	__u64 inflight;
+	__u64 inflight_mem;
+	__u64 detach_time;
+};
+
+void netdev_page_pool_get_rsp_free(struct netdev_page_pool_get_rsp *rsp);
+
+/*
+ * Get / dump information about Page Pools.
+(Only Page Pools associated with a net_device can be listed.)
+
+ */
+struct netdev_page_pool_get_rsp *
+netdev_page_pool_get(struct ynl_sock *ys, struct netdev_page_pool_get_req *req);
+
+/* NETDEV_CMD_PAGE_POOL_GET - dump */
+struct netdev_page_pool_get_list {
+	struct netdev_page_pool_get_list *next;
+	struct netdev_page_pool_get_rsp obj __attribute__((aligned(8)));
+};
+
+void netdev_page_pool_get_list_free(struct netdev_page_pool_get_list *rsp);
+
+struct netdev_page_pool_get_list *
+netdev_page_pool_get_dump(struct ynl_sock *ys);
+
+/* NETDEV_CMD_PAGE_POOL_GET - notify */
+struct netdev_page_pool_get_ntf {
+	__u16 family;
+	__u8 cmd;
+	struct ynl_ntf_base_type *next;
+	void (*free)(struct netdev_page_pool_get_ntf *ntf);
+	struct netdev_page_pool_get_rsp obj __attribute__((aligned(8)));
+};
+
+void netdev_page_pool_get_ntf_free(struct netdev_page_pool_get_ntf *rsp);
+
+/* ============== NETDEV_CMD_PAGE_POOL_STATS_GET ============== */
+/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */
+struct netdev_page_pool_stats_get_req {
+	struct {
+		__u32 info:1;
+	} _present;
+
+	struct netdev_page_pool_info info;
+};
+
+static inline struct netdev_page_pool_stats_get_req *
+netdev_page_pool_stats_get_req_alloc(void)
+{
+	return calloc(1, sizeof(struct netdev_page_pool_stats_get_req));
+}
+void
+netdev_page_pool_stats_get_req_free(struct netdev_page_pool_stats_get_req *req);
+
+static inline void
+netdev_page_pool_stats_get_req_set_info_id(struct netdev_page_pool_stats_get_req *req,
+					   __u64 id)
+{
+	req->_present.info = 1;
+	req->info._present.id = 1;
+	req->info.id = id;
+}
+static inline void
+netdev_page_pool_stats_get_req_set_info_ifindex(struct netdev_page_pool_stats_get_req *req,
+						__u32 ifindex)
+{
+	req->_present.info = 1;
+	req->info._present.ifindex = 1;
+	req->info.ifindex = ifindex;
+}
+
+struct netdev_page_pool_stats_get_rsp {
+	struct {
+		__u32 info:1;
+		__u32 alloc_fast:1;
+		__u32 alloc_slow:1;
+		__u32 alloc_slow_high_order:1;
+		__u32 alloc_empty:1;
+		__u32 alloc_refill:1;
+		__u32 alloc_waive:1;
+		__u32 recycle_cached:1;
+		__u32 recycle_cache_full:1;
+		__u32 recycle_ring:1;
+		__u32 recycle_ring_full:1;
+		__u32 recycle_released_refcnt:1;
+	} _present;
+
+	struct netdev_page_pool_info info;
+	__u64 alloc_fast;
+	__u64 alloc_slow;
+	__u64 alloc_slow_high_order;
+	__u64 alloc_empty;
+	__u64 alloc_refill;
+	__u64 alloc_waive;
+	__u64 recycle_cached;
+	__u64 recycle_cache_full;
+	__u64 recycle_ring;
+	__u64 recycle_ring_full;
+	__u64 recycle_released_refcnt;
+};
+
+void
+netdev_page_pool_stats_get_rsp_free(struct netdev_page_pool_stats_get_rsp *rsp);
+
+/*
+ * Get page pool statistics.
+ */
+struct netdev_page_pool_stats_get_rsp *
+netdev_page_pool_stats_get(struct ynl_sock *ys,
+			   struct netdev_page_pool_stats_get_req *req);
+
+/* NETDEV_CMD_PAGE_POOL_STATS_GET - dump */
+struct netdev_page_pool_stats_get_list {
+	struct netdev_page_pool_stats_get_list *next;
+	struct netdev_page_pool_stats_get_rsp obj __attribute__((aligned(8)));
+};
+
+void
+netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp);
+
+struct netdev_page_pool_stats_get_list *
+netdev_page_pool_stats_get_dump(struct ynl_sock *ys);
+
 #endif /* _LINUX_NETDEV_GEN_H */
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index e974378e3b8c..075d868f3b57 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -239,7 +239,7 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg);
 #ifndef MNL_HAS_AUTO_SCALARS
 static inline uint64_t mnl_attr_get_uint(const struct nlattr *attr)
 {
-	if (mnl_attr_get_len(attr) == 4)
+	if (mnl_attr_get_payload_len(attr) == 4)
 		return mnl_attr_get_u32(attr);
 	return mnl_attr_get_u64(attr);
 }
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index 2aae60c4829f..49637b26c482 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -1,3 +1,4 @@
 ethtool
 devlink
 netdev
+page-pool
\ No newline at end of file
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
index 3dbb106e87d9..1afefc266b7a 100644
--- a/tools/net/ynl/samples/Makefile
+++ b/tools/net/ynl/samples/Makefile
@@ -18,7 +18,7 @@ include $(wildcard *.d)
 
 all: $(BINS)
 
-$(BINS): ../lib/ynl.a ../generated/protos.a
+$(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS)
 	@echo -e '\tCC sample $@'
 	@$(COMPILE.c) $(CFLAGS_$@) $@.c -o $@.o
 	@$(LINK.c) $@.o -o $@  $(LDLIBS)
diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c
new file mode 100644
index 000000000000..18d359713469
--- /dev/null
+++ b/tools/net/ynl/samples/page-pool.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <net/if.h>
+
+#include "netdev-user.h"
+
+struct stat {
+	unsigned int ifc;
+
+	struct {
+		unsigned int cnt;
+		size_t refs, bytes;
+	} live[2];
+
+	size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache;
+};
+
+struct stats_array {
+	unsigned int i, max;
+	struct stat *s;
+};
+
+static struct stat *find_ifc(struct stats_array *a, unsigned int ifindex)
+{
+	unsigned int i;
+
+	for (i = 0; i < a->i; i++) {
+		if (a->s[i].ifc == ifindex)
+			return &a->s[i];
+	}
+
+	a->i++;
+	if (a->i == a->max) {
+		a->max *= 2;
+		a->s = reallocarray(a->s, a->max, sizeof(*a->s));
+	}
+	a->s[i].ifc = ifindex;
+	return &a->s[i];
+}
+
+static void count(struct stat *s, unsigned int l,
+		  struct netdev_page_pool_get_rsp *pp)
+{
+	s->live[l].cnt++;
+	if (pp->_present.inflight)
+		s->live[l].refs += pp->inflight;
+	if (pp->_present.inflight_mem)
+		s->live[l].bytes += pp->inflight_mem;
+}
+
+int main(int argc, char **argv)
+{
+	struct netdev_page_pool_stats_get_list *pp_stats;
+	struct netdev_page_pool_get_list *pools;
+	struct stats_array a = {};
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return 1;
+	}
+
+	a.max = 128;
+	a.s = calloc(a.max, sizeof(*a.s));
+	if (!a.s)
+		goto err_close;
+
+	pools = netdev_page_pool_get_dump(ys);
+	if (!pools)
+		goto err_free;
+
+	ynl_dump_foreach(pools, pp) {
+		struct stat *s = find_ifc(&a, pp->ifindex);
+
+		count(s, 1, pp);
+		if (pp->_present.destroyed)
+			count(s, 0, pp);
+	}
+	netdev_page_pool_get_list_free(pools);
+
+	pp_stats = netdev_page_pool_stats_get_dump(ys);
+	if (!pp_stats)
+		goto err_free;
+
+	ynl_dump_foreach(pp_stats, pp) {
+		struct stat *s = find_ifc(&a, pp->info.ifindex);
+
+		if (pp->_present.alloc_fast)
+			s->alloc_fast += pp->alloc_fast;
+		if (pp->_present.alloc_slow)
+			s->alloc_slow += pp->alloc_slow;
+		if (pp->_present.recycle_ring)
+			s->recycle_ring += pp->recycle_ring;
+		if (pp->_present.recycle_cached)
+			s->recycle_cache += pp->recycle_cached;
+	}
+	netdev_page_pool_stats_get_list_free(pp_stats);
+
+	for (unsigned int i = 0; i < a.i; i++) {
+		char ifname[IF_NAMESIZE];
+		struct stat *s = &a.s[i];
+		const char *name;
+		double recycle;
+
+		if (!s->ifc) {
+			name = "<orphan>\t";
+		} else {
+			name = if_indextoname(s->ifc, ifname);
+			if (name)
+				printf("%8s", name);
+			printf("[%d]\t", s->ifc);
+		}
+
+		printf("page pools: %u (zombies: %u)\n",
+		       s->live[1].cnt, s->live[0].cnt);
+		printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n",
+		       s->live[1].refs, s->live[1].bytes,
+		       s->live[0].refs, s->live[0].bytes);
+
+		/* We don't know how many pages are sitting in cache and ring
+		 * so we will under-count the recycling rate a bit.
+		 */
+		recycle = (double)(s->recycle_ring + s->recycle_cache) /
+			(s->alloc_fast + s->alloc_slow) * 100;
+		printf("\t\trecycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)\n",
+		       recycle, s->alloc_slow, s->alloc_fast,
+		       s->recycle_ring, s->recycle_cache);
+	}
+
+	ynl_sock_destroy(ys);
+	return 0;
+
+err_free:
+	free(a.s);
+err_close:
+	fprintf(stderr, "YNL: %s\n", ys->err.msg);
+	ynl_sock_destroy(ys);
+	return 2;
+}
-- 
cgit v1.2.3-70-g09d2


From 48f0dfd8d3e212ab27b6db147ed10407ff6aaa88 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 25 Nov 2023 20:31:25 +0100
Subject: libbpf: Add st_type argument to elf_resolve_syms_offsets function

We need to get offsets for static variables in following changes,
so making elf_resolve_syms_offsets to take st_type value as argument
and passing it to elf_sym_iter_new.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20231125193130.834322-2-jolsa@kernel.org
---
 tools/lib/bpf/elf.c                                        | 5 +++--
 tools/lib/bpf/libbpf.c                                     | 2 +-
 tools/lib/bpf/libbpf_internal.h                            | 3 ++-
 tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index 2a62bf411bb3..b02faec748a5 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -407,7 +407,8 @@ static int symbol_cmp(const void *a, const void *b)
  * size, that needs to be released by the caller.
  */
 int elf_resolve_syms_offsets(const char *binary_path, int cnt,
-			     const char **syms, unsigned long **poffsets)
+			     const char **syms, unsigned long **poffsets,
+			     int st_type)
 {
 	int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
 	int err = 0, i, cnt_done = 0;
@@ -438,7 +439,7 @@ int elf_resolve_syms_offsets(const char *binary_path, int cnt,
 		struct elf_sym_iter iter;
 		struct elf_sym *sym;
 
-		err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC);
+		err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], st_type);
 		if (err == -ENOENT)
 			continue;
 		if (err)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e067be95da3c..ea9b8158c20d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -11447,7 +11447,7 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
 			return libbpf_err_ptr(err);
 		offsets = resolved_offsets;
 	} else if (syms) {
-		err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets);
+		err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
 		if (err < 0)
 			return libbpf_err_ptr(err);
 		offsets = resolved_offsets;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index f0f08635adb0..b5d334754e5d 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -594,7 +594,8 @@ int elf_open(const char *binary_path, struct elf_fd *elf_fd);
 void elf_close(struct elf_fd *elf_fd);
 
 int elf_resolve_syms_offsets(const char *binary_path, int cnt,
-			     const char **syms, unsigned long **poffsets);
+			     const char **syms, unsigned long **poffsets,
+			     int st_type);
 int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
 				 unsigned long **poffsets, size_t *pcnt);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index cd051d3901a9..ece260cf2c0b 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -249,7 +249,7 @@ static void __test_link_api(struct child *child)
 	int link_extra_fd = -1;
 	int err;
 
-	err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets);
+	err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets, STT_FUNC);
 	if (!ASSERT_OK(err, "elf_resolve_syms_offsets"))
 		return;
 
-- 
cgit v1.2.3-70-g09d2


From e56fdbfb06e26a7066b070967badef4148528df2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 25 Nov 2023 20:31:27 +0100
Subject: bpf: Add link_info support for uprobe multi link

Adding support to get uprobe_link details through bpf_link_info
interface.

Adding new struct uprobe_multi to struct bpf_link_info to carry
the uprobe_multi link details.

The uprobe_multi.count is passed from user space to denote size
of array fields (offsets/ref_ctr_offsets/cookies). The actual
array size is stored back to uprobe_multi.count (allowing user
to find out the actual array size) and array fields are populated
up to the user passed size.

All the non-array fields (path/count/flags/pid) are always set.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/20231125193130.834322-4-jolsa@kernel.org
---
 include/uapi/linux/bpf.h       | 10 ++++++
 kernel/trace/bpf_trace.c       | 72 ++++++++++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 10 ++++++
 3 files changed, 92 insertions(+)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7a5498242eaa..e88746ba7d21 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6562,6 +6562,16 @@ struct bpf_link_info {
 			__u32 flags;
 			__u64 missed;
 		} kprobe_multi;
+		struct {
+			__aligned_u64 path;
+			__aligned_u64 offsets;
+			__aligned_u64 ref_ctr_offsets;
+			__aligned_u64 cookies;
+			__u32 path_size; /* in/out: real path size on success, including zero byte */
+			__u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */
+			__u32 flags;
+			__u32 pid;
+		} uprobe_multi;
 		struct {
 			__u32 type; /* enum bpf_perf_event_type */
 			__u32 :32;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ad0323f27288..c284a4ad0315 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3042,6 +3042,7 @@ struct bpf_uprobe_multi_link {
 	struct path path;
 	struct bpf_link link;
 	u32 cnt;
+	u32 flags;
 	struct bpf_uprobe *uprobes;
 	struct task_struct *task;
 };
@@ -3083,9 +3084,79 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
 	kfree(umulti_link);
 }
 
+static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
+						struct bpf_link_info *info)
+{
+	u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets);
+	u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies);
+	u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets);
+	u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path);
+	u32 upath_size = info->uprobe_multi.path_size;
+	struct bpf_uprobe_multi_link *umulti_link;
+	u32 ucount = info->uprobe_multi.count;
+	int err = 0, i;
+	long left;
+
+	if (!upath ^ !upath_size)
+		return -EINVAL;
+
+	if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount)
+		return -EINVAL;
+
+	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+	info->uprobe_multi.count = umulti_link->cnt;
+	info->uprobe_multi.flags = umulti_link->flags;
+	info->uprobe_multi.pid = umulti_link->task ?
+				 task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
+
+	if (upath) {
+		char *p, *buf;
+
+		upath_size = min_t(u32, upath_size, PATH_MAX);
+
+		buf = kmalloc(upath_size, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+		p = d_path(&umulti_link->path, buf, upath_size);
+		if (IS_ERR(p)) {
+			kfree(buf);
+			return PTR_ERR(p);
+		}
+		upath_size = buf + upath_size - p;
+		left = copy_to_user(upath, p, upath_size);
+		kfree(buf);
+		if (left)
+			return -EFAULT;
+		info->uprobe_multi.path_size = upath_size;
+	}
+
+	if (!uoffsets && !ucookies && !uref_ctr_offsets)
+		return 0;
+
+	if (ucount < umulti_link->cnt)
+		err = -ENOSPC;
+	else
+		ucount = umulti_link->cnt;
+
+	for (i = 0; i < ucount; i++) {
+		if (uoffsets &&
+		    put_user(umulti_link->uprobes[i].offset, uoffsets + i))
+			return -EFAULT;
+		if (uref_ctr_offsets &&
+		    put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i))
+			return -EFAULT;
+		if (ucookies &&
+		    put_user(umulti_link->uprobes[i].cookie, ucookies + i))
+			return -EFAULT;
+	}
+
+	return err;
+}
+
 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
 	.release = bpf_uprobe_multi_link_release,
 	.dealloc = bpf_uprobe_multi_link_dealloc,
+	.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
 };
 
 static int uprobe_prog_run(struct bpf_uprobe *uprobe,
@@ -3274,6 +3345,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
 	link->uprobes = uprobes;
 	link->path = path;
 	link->task = task;
+	link->flags = flags;
 
 	bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI,
 		      &bpf_uprobe_multi_link_lops, prog);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7a5498242eaa..e88746ba7d21 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6562,6 +6562,16 @@ struct bpf_link_info {
 			__u32 flags;
 			__u64 missed;
 		} kprobe_multi;
+		struct {
+			__aligned_u64 path;
+			__aligned_u64 offsets;
+			__aligned_u64 ref_ctr_offsets;
+			__aligned_u64 cookies;
+			__u32 path_size; /* in/out: real path size on success, including zero byte */
+			__u32 count; /* in/out: uprobe_multi offsets/ref_ctr_offsets/cookies count */
+			__u32 flags;
+			__u32 pid;
+		} uprobe_multi;
 		struct {
 			__u32 type; /* enum bpf_perf_event_type */
 			__u32 :32;
-- 
cgit v1.2.3-70-g09d2


From 1703612885723869064f18e8816c6f3f87987748 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 25 Nov 2023 20:31:28 +0100
Subject: selftests/bpf: Use bpf_link__destroy in fill_link_info tests

The fill_link_info test keeps skeleton open and just creates
various links. We are wrongly calling bpf_link__detach after
each test to close them, we need to call bpf_link__destroy.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Yafang Shao <laoar.shao@gmail.com>
Link: https://lore.kernel.org/bpf/20231125193130.834322-5-jolsa@kernel.org
---
 .../selftests/bpf/prog_tests/fill_link_info.c      | 44 +++++++++++-----------
 1 file changed, 23 insertions(+), 21 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index 97142a4db374..9294cb8d7743 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -140,14 +140,14 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
 		.retprobe = type == BPF_PERF_EVENT_KRETPROBE,
 	);
 	ssize_t entry_offset = 0;
+	struct bpf_link *link;
 	int link_fd, err;
 
-	skel->links.kprobe_run = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run,
-								 KPROBE_FUNC, &opts);
-	if (!ASSERT_OK_PTR(skel->links.kprobe_run, "attach_kprobe"))
+	link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, KPROBE_FUNC, &opts);
+	if (!ASSERT_OK_PTR(link, "attach_kprobe"))
 		return;
 
-	link_fd = bpf_link__fd(skel->links.kprobe_run);
+	link_fd = bpf_link__fd(link);
 	if (!invalid) {
 		/* See also arch_adjust_kprobe_addr(). */
 		if (skel->kconfig->CONFIG_X86_KERNEL_IBT)
@@ -157,39 +157,41 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
 	} else {
 		kprobe_fill_invalid_user_buffer(link_fd);
 	}
-	bpf_link__detach(skel->links.kprobe_run);
+	bpf_link__destroy(link);
 }
 
 static void test_tp_fill_link_info(struct test_fill_link_info *skel)
 {
+	struct bpf_link *link;
 	int link_fd, err;
 
-	skel->links.tp_run = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME);
-	if (!ASSERT_OK_PTR(skel->links.tp_run, "attach_tp"))
+	link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME);
+	if (!ASSERT_OK_PTR(link, "attach_tp"))
 		return;
 
-	link_fd = bpf_link__fd(skel->links.tp_run);
+	link_fd = bpf_link__fd(link);
 	err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_TRACEPOINT, 0, 0, 0);
 	ASSERT_OK(err, "verify_perf_link_info");
-	bpf_link__detach(skel->links.tp_run);
+	bpf_link__destroy(link);
 }
 
 static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
 				       enum bpf_perf_event_type type)
 {
+	struct bpf_link *link;
 	int link_fd, err;
 
-	skel->links.uprobe_run = bpf_program__attach_uprobe(skel->progs.uprobe_run,
-							    type == BPF_PERF_EVENT_URETPROBE,
-							    0, /* self pid */
-							    UPROBE_FILE, uprobe_offset);
-	if (!ASSERT_OK_PTR(skel->links.uprobe_run, "attach_uprobe"))
+	link = bpf_program__attach_uprobe(skel->progs.uprobe_run,
+					  type == BPF_PERF_EVENT_URETPROBE,
+					  0, /* self pid */
+					  UPROBE_FILE, uprobe_offset);
+	if (!ASSERT_OK_PTR(link, "attach_uprobe"))
 		return;
 
-	link_fd = bpf_link__fd(skel->links.uprobe_run);
+	link_fd = bpf_link__fd(link);
 	err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0);
 	ASSERT_OK(err, "verify_perf_link_info");
-	bpf_link__detach(skel->links.uprobe_run);
+	bpf_link__destroy(link);
 }
 
 static int verify_kmulti_link_info(int fd, bool retprobe)
@@ -278,24 +280,24 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel,
 					     bool retprobe, bool invalid)
 {
 	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	struct bpf_link *link;
 	int link_fd, err;
 
 	opts.syms = kmulti_syms;
 	opts.cnt = KMULTI_CNT;
 	opts.retprobe = retprobe;
-	skel->links.kmulti_run = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run,
-								       NULL, &opts);
-	if (!ASSERT_OK_PTR(skel->links.kmulti_run, "attach_kprobe_multi"))
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts);
+	if (!ASSERT_OK_PTR(link, "attach_kprobe_multi"))
 		return;
 
-	link_fd = bpf_link__fd(skel->links.kmulti_run);
+	link_fd = bpf_link__fd(link);
 	if (!invalid) {
 		err = verify_kmulti_link_info(link_fd, retprobe);
 		ASSERT_OK(err, "verify_kmulti_link_info");
 	} else {
 		verify_kmulti_invalid_user_buffer(link_fd);
 	}
-	bpf_link__detach(skel->links.kmulti_run);
+	bpf_link__destroy(link);
 }
 
 void test_fill_link_info(void)
-- 
cgit v1.2.3-70-g09d2


From 147c69307bcf67f1f01246f9acb794da9837f299 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 25 Nov 2023 20:31:29 +0100
Subject: selftests/bpf: Add link_info test for uprobe_multi link

Adding fill_link_info test for uprobe_multi link.

Setting up uprobes with bogus ref_ctr_offsets and cookie values
to test all the bpf_link_info::uprobe_multi fields.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/20231125193130.834322-6-jolsa@kernel.org
---
 .../selftests/bpf/prog_tests/fill_link_info.c      | 198 +++++++++++++++++++++
 .../selftests/bpf/progs/test_fill_link_info.c      |   6 +
 2 files changed, 204 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index 9294cb8d7743..d4b1901f7879 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -7,6 +7,7 @@
 #include <test_progs.h>
 #include "trace_helpers.h"
 #include "test_fill_link_info.skel.h"
+#include "bpf/libbpf_internal.h"
 
 #define TP_CAT "sched"
 #define TP_NAME "sched_switch"
@@ -300,6 +301,196 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel,
 	bpf_link__destroy(link);
 }
 
+#define SEC(name) __attribute__((section(name), used))
+
+static short uprobe_link_info_sema_1 SEC(".probes");
+static short uprobe_link_info_sema_2 SEC(".probes");
+static short uprobe_link_info_sema_3 SEC(".probes");
+
+noinline void uprobe_link_info_func_1(void)
+{
+	asm volatile ("");
+	uprobe_link_info_sema_1++;
+}
+
+noinline void uprobe_link_info_func_2(void)
+{
+	asm volatile ("");
+	uprobe_link_info_sema_2++;
+}
+
+noinline void uprobe_link_info_func_3(void)
+{
+	asm volatile ("");
+	uprobe_link_info_sema_3++;
+}
+
+static int
+verify_umulti_link_info(int fd, bool retprobe, __u64 *offsets,
+			__u64 *cookies, __u64 *ref_ctr_offsets)
+{
+	char path[PATH_MAX], path_buf[PATH_MAX];
+	struct bpf_link_info info;
+	__u32 len = sizeof(info);
+	__u64 ref_ctr_offsets_buf[3];
+	__u64 offsets_buf[3];
+	__u64 cookies_buf[3];
+	int i, err, bit;
+	__u32 count = 0;
+
+	memset(path, 0, sizeof(path));
+	err = readlink("/proc/self/exe", path, sizeof(path));
+	if (!ASSERT_NEQ(err, -1, "readlink"))
+		return -1;
+
+	for (bit = 0; bit < 8; bit++) {
+		memset(&info, 0, sizeof(info));
+		info.uprobe_multi.path = ptr_to_u64(path_buf);
+		info.uprobe_multi.path_size = sizeof(path_buf);
+		info.uprobe_multi.count = count;
+
+		if (bit & 0x1)
+			info.uprobe_multi.offsets = ptr_to_u64(offsets_buf);
+		if (bit & 0x2)
+			info.uprobe_multi.cookies = ptr_to_u64(cookies_buf);
+		if (bit & 0x4)
+			info.uprobe_multi.ref_ctr_offsets = ptr_to_u64(ref_ctr_offsets_buf);
+
+		err = bpf_link_get_info_by_fd(fd, &info, &len);
+		if (!ASSERT_OK(err, "bpf_link_get_info_by_fd"))
+			return -1;
+
+		if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_UPROBE_MULTI, "info.type"))
+			return -1;
+
+		ASSERT_EQ(info.uprobe_multi.pid, getpid(), "info.uprobe_multi.pid");
+		ASSERT_EQ(info.uprobe_multi.count, 3, "info.uprobe_multi.count");
+		ASSERT_EQ(info.uprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN,
+			  retprobe, "info.uprobe_multi.flags.retprobe");
+		ASSERT_EQ(info.uprobe_multi.path_size, strlen(path) + 1, "info.uprobe_multi.path_size");
+		ASSERT_STREQ(path_buf, path, "info.uprobe_multi.path");
+
+		for (i = 0; i < info.uprobe_multi.count; i++) {
+			if (info.uprobe_multi.offsets)
+				ASSERT_EQ(offsets_buf[i], offsets[i], "info.uprobe_multi.offsets");
+			if (info.uprobe_multi.cookies)
+				ASSERT_EQ(cookies_buf[i], cookies[i], "info.uprobe_multi.cookies");
+			if (info.uprobe_multi.ref_ctr_offsets) {
+				ASSERT_EQ(ref_ctr_offsets_buf[i], ref_ctr_offsets[i],
+					  "info.uprobe_multi.ref_ctr_offsets");
+			}
+		}
+		count = count ?: info.uprobe_multi.count;
+	}
+
+	return 0;
+}
+
+static void verify_umulti_invalid_user_buffer(int fd)
+{
+	struct bpf_link_info info;
+	__u32 len = sizeof(info);
+	__u64 buf[3];
+	int err;
+
+	/* upath_size defined, not path */
+	memset(&info, 0, sizeof(info));
+	info.uprobe_multi.path_size = 3;
+	err = bpf_link_get_info_by_fd(fd, &info, &len);
+	ASSERT_EQ(err, -EINVAL, "failed_upath_size");
+
+	/* path defined, but small */
+	memset(&info, 0, sizeof(info));
+	info.uprobe_multi.path = ptr_to_u64(buf);
+	info.uprobe_multi.path_size = 3;
+	err = bpf_link_get_info_by_fd(fd, &info, &len);
+	ASSERT_LT(err, 0, "failed_upath_small");
+
+	/* path has wrong pointer */
+	memset(&info, 0, sizeof(info));
+	info.uprobe_multi.path_size = PATH_MAX;
+	info.uprobe_multi.path = 123;
+	err = bpf_link_get_info_by_fd(fd, &info, &len);
+	ASSERT_EQ(err, -EFAULT, "failed_bad_path_ptr");
+
+	/* count zero, with offsets */
+	memset(&info, 0, sizeof(info));
+	info.uprobe_multi.offsets = ptr_to_u64(buf);
+	err = bpf_link_get_info_by_fd(fd, &info, &len);
+	ASSERT_EQ(err, -EINVAL, "failed_count");
+
+	/* offsets not big enough */
+	memset(&info, 0, sizeof(info));
+	info.uprobe_multi.offsets = ptr_to_u64(buf);
+	info.uprobe_multi.count = 2;
+	err = bpf_link_get_info_by_fd(fd, &info, &len);
+	ASSERT_EQ(err, -ENOSPC, "failed_small_count");
+
+	/* offsets has wrong pointer */
+	memset(&info, 0, sizeof(info));
+	info.uprobe_multi.offsets = 123;
+	info.uprobe_multi.count = 3;
+	err = bpf_link_get_info_by_fd(fd, &info, &len);
+	ASSERT_EQ(err, -EFAULT, "failed_wrong_offsets");
+}
+
+static void test_uprobe_multi_fill_link_info(struct test_fill_link_info *skel,
+					     bool retprobe, bool invalid)
+{
+	LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+		.retprobe = retprobe,
+	);
+	const char *syms[3] = {
+		"uprobe_link_info_func_1",
+		"uprobe_link_info_func_2",
+		"uprobe_link_info_func_3",
+	};
+	__u64 cookies[3] = {
+		0xdead,
+		0xbeef,
+		0xcafe,
+	};
+	const char *sema[3] = {
+		"uprobe_link_info_sema_1",
+		"uprobe_link_info_sema_2",
+		"uprobe_link_info_sema_3",
+	};
+	__u64 *offsets = NULL, *ref_ctr_offsets;
+	struct bpf_link *link;
+	int link_fd, err;
+
+	err = elf_resolve_syms_offsets("/proc/self/exe", 3, sema,
+				       (unsigned long **) &ref_ctr_offsets, STT_OBJECT);
+	if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object"))
+		return;
+
+	err = elf_resolve_syms_offsets("/proc/self/exe", 3, syms,
+				       (unsigned long **) &offsets, STT_FUNC);
+	if (!ASSERT_OK(err, "elf_resolve_syms_offsets_func"))
+		goto out;
+
+	opts.syms = syms;
+	opts.cookies = &cookies[0];
+	opts.ref_ctr_offsets = (unsigned long *) &ref_ctr_offsets[0];
+	opts.cnt = ARRAY_SIZE(syms);
+
+	link = bpf_program__attach_uprobe_multi(skel->progs.umulti_run, 0,
+						"/proc/self/exe", NULL, &opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+		goto out;
+
+	link_fd = bpf_link__fd(link);
+	if (invalid)
+		verify_umulti_invalid_user_buffer(link_fd);
+	else
+		verify_umulti_link_info(link_fd, retprobe, offsets, cookies, ref_ctr_offsets);
+
+	bpf_link__destroy(link);
+out:
+	free(ref_ctr_offsets);
+	free(offsets);
+}
+
 void test_fill_link_info(void)
 {
 	struct test_fill_link_info *skel;
@@ -339,6 +530,13 @@ void test_fill_link_info(void)
 	if (test__start_subtest("kprobe_multi_invalid_ubuff"))
 		test_kprobe_multi_fill_link_info(skel, true, true);
 
+	if (test__start_subtest("uprobe_multi_link_info"))
+		test_uprobe_multi_fill_link_info(skel, false, false);
+	if (test__start_subtest("uretprobe_multi_link_info"))
+		test_uprobe_multi_fill_link_info(skel, true, false);
+	if (test__start_subtest("uprobe_multi_invalid"))
+		test_uprobe_multi_fill_link_info(skel, false, true);
+
 cleanup:
 	test_fill_link_info__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
index 564f402d56fe..69509f8bb680 100644
--- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c
+++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
@@ -39,4 +39,10 @@ int BPF_PROG(kmulti_run)
 	return 0;
 }
 
+SEC("uprobe.multi")
+int BPF_PROG(umulti_run)
+{
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From a7795698f8b6c48283fa4334eb313bc1350b2864 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 25 Nov 2023 20:31:30 +0100
Subject: bpftool: Add support to display uprobe_multi links

Adding support to display details for uprobe_multi links,
both plain:

  # bpftool link -p
  ...
  24: uprobe_multi  prog 126
          uprobe.multi  path /home/jolsa/bpf/test_progs  func_cnt 3  pid 4143
          offset             ref_ctr_offset     cookies
          0xd1f88            0xf5d5a8           0xdead
          0xd1f8f            0xf5d5aa           0xbeef
          0xd1f96            0xf5d5ac           0xcafe

and json:

  # bpftool link -p
  [{
  ...
      },{
          "id": 24,
          "type": "uprobe_multi",
          "prog_id": 126,
          "retprobe": false,
          "path": "/home/jolsa/bpf/test_progs",
          "func_cnt": 3,
          "pid": 4143,
          "funcs": [{
                  "offset": 860040,
                  "ref_ctr_offset": 16111016,
                  "cookie": 57005
              },{
                  "offset": 860047,
                  "ref_ctr_offset": 16111018,
                  "cookie": 48879
              },{
                  "offset": 860054,
                  "ref_ctr_offset": 16111020,
                  "cookie": 51966
              }
          ]
      }
  ]

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20231125193130.834322-7-jolsa@kernel.org
---
 tools/bpf/bpftool/link.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 103 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index a1528cde81ab..cb46667a6b2e 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -294,6 +294,37 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
 	jsonw_end_array(json_wtr);
 }
 
+static __u64 *u64_to_arr(__u64 val)
+{
+	return (__u64 *) u64_to_ptr(val);
+}
+
+static void
+show_uprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+	__u32 i;
+
+	jsonw_bool_field(json_wtr, "retprobe",
+			 info->uprobe_multi.flags & BPF_F_UPROBE_MULTI_RETURN);
+	jsonw_string_field(json_wtr, "path", (char *) u64_to_ptr(info->uprobe_multi.path));
+	jsonw_uint_field(json_wtr, "func_cnt", info->uprobe_multi.count);
+	jsonw_int_field(json_wtr, "pid", (int) info->uprobe_multi.pid);
+	jsonw_name(json_wtr, "funcs");
+	jsonw_start_array(json_wtr);
+
+	for (i = 0; i < info->uprobe_multi.count; i++) {
+		jsonw_start_object(json_wtr);
+		jsonw_uint_field(json_wtr, "offset",
+				 u64_to_arr(info->uprobe_multi.offsets)[i]);
+		jsonw_uint_field(json_wtr, "ref_ctr_offset",
+				 u64_to_arr(info->uprobe_multi.ref_ctr_offsets)[i]);
+		jsonw_uint_field(json_wtr, "cookie",
+				 u64_to_arr(info->uprobe_multi.cookies)[i]);
+		jsonw_end_object(json_wtr);
+	}
+	jsonw_end_array(json_wtr);
+}
+
 static void
 show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
 {
@@ -465,6 +496,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 	case BPF_LINK_TYPE_KPROBE_MULTI:
 		show_kprobe_multi_json(info, json_wtr);
 		break;
+	case BPF_LINK_TYPE_UPROBE_MULTI:
+		show_uprobe_multi_json(info, json_wtr);
+		break;
 	case BPF_LINK_TYPE_PERF_EVENT:
 		switch (info->perf_event.type) {
 		case BPF_PERF_EVENT_EVENT:
@@ -674,6 +708,33 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
 	}
 }
 
+static void show_uprobe_multi_plain(struct bpf_link_info *info)
+{
+	__u32 i;
+
+	if (!info->uprobe_multi.count)
+		return;
+
+	if (info->uprobe_multi.flags & BPF_F_UPROBE_MULTI_RETURN)
+		printf("\n\turetprobe.multi  ");
+	else
+		printf("\n\tuprobe.multi  ");
+
+	printf("path %s  ", (char *) u64_to_ptr(info->uprobe_multi.path));
+	printf("func_cnt %u  ", info->uprobe_multi.count);
+
+	if (info->uprobe_multi.pid)
+		printf("pid %d  ", info->uprobe_multi.pid);
+
+	printf("\n\t%-16s   %-16s   %-16s", "offset", "ref_ctr_offset", "cookies");
+	for (i = 0; i < info->uprobe_multi.count; i++) {
+		printf("\n\t0x%-16llx 0x%-16llx 0x%-16llx",
+			u64_to_arr(info->uprobe_multi.offsets)[i],
+			u64_to_arr(info->uprobe_multi.ref_ctr_offsets)[i],
+			u64_to_arr(info->uprobe_multi.cookies)[i]);
+	}
+}
+
 static void show_perf_event_kprobe_plain(struct bpf_link_info *info)
 {
 	const char *buf;
@@ -807,6 +868,9 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 	case BPF_LINK_TYPE_KPROBE_MULTI:
 		show_kprobe_multi_plain(info);
 		break;
+	case BPF_LINK_TYPE_UPROBE_MULTI:
+		show_uprobe_multi_plain(info);
+		break;
 	case BPF_LINK_TYPE_PERF_EVENT:
 		switch (info->perf_event.type) {
 		case BPF_PERF_EVENT_EVENT:
@@ -846,8 +910,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 
 static int do_show_link(int fd)
 {
+	__u64 *ref_ctr_offsets = NULL, *offsets = NULL, *cookies = NULL;
 	struct bpf_link_info info;
 	__u32 len = sizeof(info);
+	char path_buf[PATH_MAX];
 	__u64 *addrs = NULL;
 	char buf[PATH_MAX];
 	int count;
@@ -889,6 +955,39 @@ again:
 			goto again;
 		}
 	}
+	if (info.type == BPF_LINK_TYPE_UPROBE_MULTI &&
+	    !info.uprobe_multi.offsets) {
+		count = info.uprobe_multi.count;
+		if (count) {
+			offsets = calloc(count, sizeof(__u64));
+			if (!offsets) {
+				p_err("mem alloc failed");
+				close(fd);
+				return -ENOMEM;
+			}
+			info.uprobe_multi.offsets = ptr_to_u64(offsets);
+			ref_ctr_offsets = calloc(count, sizeof(__u64));
+			if (!ref_ctr_offsets) {
+				p_err("mem alloc failed");
+				free(offsets);
+				close(fd);
+				return -ENOMEM;
+			}
+			info.uprobe_multi.ref_ctr_offsets = ptr_to_u64(ref_ctr_offsets);
+			cookies = calloc(count, sizeof(__u64));
+			if (!cookies) {
+				p_err("mem alloc failed");
+				free(cookies);
+				free(offsets);
+				close(fd);
+				return -ENOMEM;
+			}
+			info.uprobe_multi.cookies = ptr_to_u64(cookies);
+			info.uprobe_multi.path = ptr_to_u64(path_buf);
+			info.uprobe_multi.path_size = sizeof(path_buf);
+			goto again;
+		}
+	}
 	if (info.type == BPF_LINK_TYPE_PERF_EVENT) {
 		switch (info.perf_event.type) {
 		case BPF_PERF_EVENT_TRACEPOINT:
@@ -924,8 +1023,10 @@ again:
 	else
 		show_link_close_plain(fd, &info);
 
-	if (addrs)
-		free(addrs);
+	free(ref_ctr_offsets);
+	free(cookies);
+	free(offsets);
+	free(addrs);
 	close(fd);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 2ce344b6891618063c0bebe22d9cdf9c086e0aa8 Mon Sep 17 00:00:00 2001
From: Akihiko Odaki <akihiko.odaki@daynix.com>
Date: Sat, 25 Nov 2023 17:42:50 +0900
Subject: selftests/bpf: Choose pkg-config for the target

pkg-config is used to build sign-file executable. It should use the
library for the target instead of the host as it is called during tests.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231125084253.85025-2-akihiko.odaki@daynix.com
---
 tools/testing/selftests/bpf/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 9c27b67bc7b1..94825ef813d5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -18,7 +18,7 @@ else
 GENDIR := $(abspath ../../../../include/generated)
 endif
 GENHDR := $(GENDIR)/autoconf.h
-HOSTPKG_CONFIG := pkg-config
+PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
 
 ifneq ($(wildcard $(GENHDR)),)
   GENFLAGS := -DHAVE_GENHDR
@@ -219,9 +219,9 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r
 
 $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
 	$(call msg,SIGN-FILE,,$@)
-	$(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \
+	$(Q)$(CC) $(shell $(PKG_CONFIG) --cflags libcrypto 2> /dev/null) \
 		  $< -o $@ \
-		  $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
+		  $(shell $(PKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
 
 $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
 	$(call msg,MOD,,$@)
-- 
cgit v1.2.3-70-g09d2


From 18f6f9de98d1d0f7040e6e6c39fce8939f55520f Mon Sep 17 00:00:00 2001
From: Akihiko Odaki <akihiko.odaki@daynix.com>
Date: Sat, 25 Nov 2023 17:42:51 +0900
Subject: selftests/bpf: Override PKG_CONFIG for static builds

A library may need to depend on additional archive files for static
builds so pkg-config should be instructed to list them.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231125084253.85025-3-akihiko.odaki@daynix.com
---
 tools/testing/selftests/bpf/README.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index cb9b95702ac6..9af79c7a9b58 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -77,7 +77,7 @@ In case of linker errors when running selftests, try using static linking:
 
 .. code-block:: console
 
-  $ LDLIBS=-static vmtest.sh
+  $ LDLIBS=-static PKG_CONFIG='pkg-config --static' vmtest.sh
 
 .. note:: Some distros may not support static linking.
 
-- 
cgit v1.2.3-70-g09d2


From 8998a479fd96b0b209dcb2feb468eba7eddb4ddb Mon Sep 17 00:00:00 2001
From: Akihiko Odaki <akihiko.odaki@daynix.com>
Date: Sat, 25 Nov 2023 17:42:52 +0900
Subject: selftests/bpf: Use pkg-config for libelf

When linking statically, libraries may require other dependencies to be
included to ld flags. In particular, libelf may require libzstd. Use
pkg-config to determine such dependencies.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231125084253.85025-4-akihiko.odaki@daynix.com
---
 tools/testing/selftests/bpf/Makefile | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 94825ef813d5..617ae55c3bb5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -29,13 +29,17 @@ SAN_CFLAGS	?=
 SAN_LDFLAGS	?= $(SAN_CFLAGS)
 RELEASE		?=
 OPT_FLAGS	?= $(if $(RELEASE),-O2,-O0)
+
+LIBELF_CFLAGS	:= $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
+LIBELF_LIBS	:= $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
+
 CFLAGS += -g $(OPT_FLAGS) -rdynamic					\
 	  -Wall -Werror 						\
-	  $(GENFLAGS) $(SAN_CFLAGS)					\
+	  $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS)			\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
 	  -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
 LDFLAGS += $(SAN_LDFLAGS)
-LDLIBS += -lelf -lz -lrt -lpthread
+LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread
 
 ifneq ($(LLVM),)
 # Silence some warnings when compiled with clang
-- 
cgit v1.2.3-70-g09d2


From 341ac980eab90ac1f6c22ee9f9da83ed9604d899 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 11:03:07 -0800
Subject: xsk: Support tx_metadata_len

For zerocopy mode, tx_desc->addr can point to an arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.

Introduce new tx_metadata_len umem config option that indicates how many
bytes to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).

The size of the metadata has mostly the same constraints as XDP:
- less than 256 bytes
- 8-byte aligned (compared to 4-byte alignment on xdp, due to 8-byte
  timestamp in the completion)
- non-zero

This data is not interpreted in any way right now.

Reviewed-by: Song Yoong Siang <yoong.siang.song@intel.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20231127190319.1190813-2-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/xdp_sock.h            |  1 +
 include/net/xsk_buff_pool.h       |  1 +
 include/uapi/linux/if_xdp.h       |  1 +
 net/xdp/xdp_umem.c                |  4 ++++
 net/xdp/xsk.c                     | 12 +++++++++++-
 net/xdp/xsk_buff_pool.c           |  1 +
 net/xdp/xsk_queue.h               | 17 ++++++++++-------
 tools/include/uapi/linux/if_xdp.h |  1 +
 8 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index f83128007fb0..bcf765124f72 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -30,6 +30,7 @@ struct xdp_umem {
 	struct user_struct *user;
 	refcount_t users;
 	u8 flags;
+	u8 tx_metadata_len;
 	bool zc;
 	struct page **pgs;
 	int id;
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index b0bdff26fc88..1985ffaf9b0c 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -77,6 +77,7 @@ struct xsk_buff_pool {
 	u32 chunk_size;
 	u32 chunk_shift;
 	u32 frame_len;
+	u8 tx_metadata_len; /* inherited from umem */
 	u8 cached_need_wakeup;
 	bool uses_need_wakeup;
 	bool dma_need_sync;
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 8d48863472b9..2ecf79282c26 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -76,6 +76,7 @@ struct xdp_umem_reg {
 	__u32 chunk_size;
 	__u32 headroom;
 	__u32 flags;
+	__u32 tx_metadata_len;
 };
 
 struct xdp_statistics {
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 06cead2b8e34..946a687fb8e8 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
 		return -EINVAL;
 
+	if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
+		return -EINVAL;
+
 	umem->size = size;
 	umem->headroom = headroom;
 	umem->chunk_size = chunk_size;
@@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	umem->pgs = NULL;
 	umem->user = NULL;
 	umem->flags = mr->flags;
+	umem->tx_metadata_len = mr->tx_metadata_len;
 
 	INIT_LIST_HEAD(&umem->xsk_dma_list);
 	refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ae9f8cb611f6..c904356e2800 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1283,6 +1283,14 @@ struct xdp_umem_reg_v1 {
 	__u32 headroom;
 };
 
+struct xdp_umem_reg_v2 {
+	__u64 addr; /* Start of packet data area */
+	__u64 len; /* Length of packet data area */
+	__u32 chunk_size;
+	__u32 headroom;
+	__u32 flags;
+};
+
 static int xsk_setsockopt(struct socket *sock, int level, int optname,
 			  sockptr_t optval, unsigned int optlen)
 {
@@ -1326,8 +1334,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 
 		if (optlen < sizeof(struct xdp_umem_reg_v1))
 			return -EINVAL;
-		else if (optlen < sizeof(mr))
+		else if (optlen < sizeof(struct xdp_umem_reg_v2))
 			mr_size = sizeof(struct xdp_umem_reg_v1);
+		else if (optlen < sizeof(mr))
+			mr_size = sizeof(struct xdp_umem_reg_v2);
 
 		if (copy_from_sockptr(&mr, optval, mr_size))
 			return -EFAULT;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 49cb9f9a09be..386eddcdf837 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
 		XDP_PACKET_HEADROOM;
 	pool->umem = umem;
 	pool->addrs = umem->addrs;
+	pool->tx_metadata_len = umem->tx_metadata_len;
 	INIT_LIST_HEAD(&pool->free_list);
 	INIT_LIST_HEAD(&pool->xskb_list);
 	INIT_LIST_HEAD(&pool->xsk_tx_list);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 13354a1e4280..c74a1372bcb9 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options)
 static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
 					    struct xdp_desc *desc)
 {
-	u64 offset = desc->addr & (pool->chunk_size - 1);
+	u64 addr = desc->addr - pool->tx_metadata_len;
+	u64 len = desc->len + pool->tx_metadata_len;
+	u64 offset = addr & (pool->chunk_size - 1);
 
 	if (!desc->len)
 		return false;
 
-	if (offset + desc->len > pool->chunk_size)
+	if (offset + len > pool->chunk_size)
 		return false;
 
-	if (desc->addr >= pool->addrs_cnt)
+	if (addr >= pool->addrs_cnt)
 		return false;
 
 	if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
 static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
 					      struct xdp_desc *desc)
 {
-	u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+	u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+	u64 len = desc->len + pool->tx_metadata_len;
 
 	if (!desc->len)
 		return false;
 
-	if (desc->len > pool->chunk_size)
+	if (len > pool->chunk_size)
 		return false;
 
-	if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
-	    xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+	if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+	    xp_desc_crosses_non_contig_pg(pool, addr, len))
 		return false;
 
 	if (xp_unused_options_set(desc->options))
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 73a47da885dc..34411a2e5b6c 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -76,6 +76,7 @@ struct xdp_umem_reg {
 	__u32 chunk_size;
 	__u32 headroom;
 	__u32 flags;
+	__u32 tx_metadata_len;
 };
 
 struct xdp_statistics {
-- 
cgit v1.2.3-70-g09d2


From 48eb03dd26304c24f03bdbb9382e89c8564e71df Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 11:03:08 -0800
Subject: xsk: Add TX timestamp and TX checksum offload support

This change actually defines the (initial) metadata layout
that should be used by AF_XDP userspace (xsk_tx_metadata).
The first field is flags which requests appropriate offloads,
followed by the offload-specific fields. The supported per-device
offloads are exported via netlink (new xsk-flags).

The offloads themselves are still implemented in a bit of a
framework-y fashion that's left from my initial kfunc attempt.
I'm introducing new xsk_tx_metadata_ops which drivers are
supposed to implement. The drivers are also supposed
to call xsk_tx_metadata_request/xsk_tx_metadata_complete in
the right places. Since xsk_tx_metadata_{request,_complete}
are static inline, we don't incur any extra overhead doing
indirect calls.

The benefit of this scheme is as follows:
- keeps all metadata layout parsing away from driver code
- makes it easy to grep and see which drivers implement what
- don't need any extra flags to maintain to keep track of what
  offloads are implemented; if the callback is implemented - the offload
  is supported (used by netlink reporting code)

Two offloads are defined right now:
1. XDP_TXMD_FLAGS_CHECKSUM: skb-style csum_start+csum_offset
2. XDP_TXMD_FLAGS_TIMESTAMP: writes TX timestamp back into metadata
   area upon completion (tx_timestamp field)

XDP_TXMD_FLAGS_TIMESTAMP is also implemented for XDP_COPY mode: it writes
SW timestamp from the skb destructor (note I'm reusing hwtstamps to pass
metadata pointer).

The struct is forward-compatible and can be extended in the future
by appending more fields.

Reviewed-by: Song Yoong Siang <yoong.siang.song@intel.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20231127190319.1190813-3-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml |  19 +++++-
 include/linux/netdevice.h               |   2 +
 include/linux/skbuff.h                  |  14 +++-
 include/net/xdp_sock.h                  | 110 ++++++++++++++++++++++++++++++++
 include/net/xdp_sock_drv.h              |  13 ++++
 include/net/xsk_buff_pool.h             |   6 ++
 include/uapi/linux/if_xdp.h             |  38 +++++++++++
 include/uapi/linux/netdev.h             |  16 +++++
 net/core/netdev-genl.c                  |  13 +++-
 net/xdp/xsk.c                           |  34 ++++++++++
 net/xdp/xsk_queue.h                     |   2 +-
 tools/include/uapi/linux/if_xdp.h       |  52 +++++++++++++--
 tools/include/uapi/linux/netdev.h       |  16 +++++
 tools/net/ynl/generated/netdev-user.c   |  19 ++++++
 tools/net/ynl/generated/netdev-user.h   |   3 +
 15 files changed, 348 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 14511b13f305..00439bcbd2e3 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -45,7 +45,6 @@ definitions:
   -
     type: flags
     name: xdp-rx-metadata
-    render-max: true
     entries:
       -
         name: timestamp
@@ -55,6 +54,18 @@ definitions:
         name: hash
         doc:
           Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
+  -
+    type: flags
+    name: xsk-flags
+    entries:
+      -
+        name: tx-timestamp
+        doc:
+          HW timestamping egress packets is supported by the driver.
+      -
+        name: tx-checksum
+        doc:
+          L3 checksum HW offload is supported by the driver.
 
 attribute-sets:
   -
@@ -86,6 +97,11 @@ attribute-sets:
              See Documentation/networking/xdp-rx-metadata.rst for more details.
         type: u64
         enum: xdp-rx-metadata
+      -
+        name: xsk-features
+        doc: Bitmask of enabled AF_XDP features.
+        type: u64
+        enum: xsk-flags
 
 operations:
   list:
@@ -103,6 +119,7 @@ operations:
             - xdp-features
             - xdp-zc-max-segs
             - xdp-rx-metadata-features
+            - xsk-features
       dump:
         reply: *dev-all
     -
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e87caa81f70c..08da8b28c816 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1865,6 +1865,7 @@ enum netdev_stat_type {
  *	@netdev_ops:	Includes several pointers to callbacks,
  *			if one wants to override the ndo_*() functions
  *	@xdp_metadata_ops:	Includes pointers to XDP metadata callbacks.
+ *	@xsk_tx_metadata_ops:	Includes pointers to AF_XDP TX metadata callbacks.
  *	@ethtool_ops:	Management operations
  *	@l3mdev_ops:	Layer 3 master device operations
  *	@ndisc_ops:	Includes callbacks for different IPv6 neighbour
@@ -2128,6 +2129,7 @@ struct net_device {
 	unsigned long long	priv_flags;
 	const struct net_device_ops *netdev_ops;
 	const struct xdp_metadata_ops *xdp_metadata_ops;
+	const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops;
 	int			ifindex;
 	unsigned short		gflags;
 	unsigned short		hard_header_len;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 27998f73183e..b370eb8d70f7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -566,6 +566,15 @@ struct ubuf_info_msgzc {
 int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
 void mm_unaccount_pinned_pages(struct mmpin *mmp);
 
+/* Preserve some data across TX submission and completion.
+ *
+ * Note, this state is stored in the driver. Extending the layout
+ * might need some special care.
+ */
+struct xsk_tx_metadata_compl {
+	__u64 *tx_timestamp;
+};
+
 /* This data is invariant across clones and lives at
  * the end of the header data, ie. at skb->end.
  */
@@ -578,7 +587,10 @@ struct skb_shared_info {
 	/* Warning: this field is not always filled in (UFO)! */
 	unsigned short	gso_segs;
 	struct sk_buff	*frag_list;
-	struct skb_shared_hwtstamps hwtstamps;
+	union {
+		struct skb_shared_hwtstamps hwtstamps;
+		struct xsk_tx_metadata_compl xsk_meta;
+	};
 	unsigned int	gso_type;
 	u32		tskey;
 
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index bcf765124f72..3cb4dc9bd70e 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -93,12 +93,105 @@ struct xdp_sock {
 	struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
 };
 
+/*
+ * AF_XDP TX metadata hooks for network devices.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
+ *
+ * void (*tmo_request_timestamp)(void *priv)
+ *     Called when AF_XDP frame requested egress timestamp.
+ *
+ * u64 (*tmo_fill_timestamp)(void *priv)
+ *     Called when AF_XDP frame, that had requested egress timestamp,
+ *     received a completion. The hook needs to return the actual HW timestamp.
+ *
+ * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv)
+ *     Called when AF_XDP frame requested HW checksum offload. csum_start
+ *     indicates position where checksumming should start.
+ *     csum_offset indicates position where checksum should be stored.
+ *
+ */
+struct xsk_tx_metadata_ops {
+	void	(*tmo_request_timestamp)(void *priv);
+	u64	(*tmo_fill_timestamp)(void *priv);
+	void	(*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv);
+};
+
 #ifdef CONFIG_XDP_SOCKETS
 
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
 void __xsk_map_flush(void);
 
+/**
+ *  xsk_tx_metadata_to_compl - Save enough relevant metadata information
+ *  to perform tx completion in the future.
+ *  @meta: pointer to AF_XDP metadata area
+ *  @compl: pointer to output struct xsk_tx_metadata_to_compl
+ *
+ *  This function should be called by the networking device when
+ *  it prepares AF_XDP egress packet. The value of @compl should be stored
+ *  and passed to xsk_tx_metadata_complete upon TX completion.
+ */
+static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
+					    struct xsk_tx_metadata_compl *compl)
+{
+	if (!meta)
+		return;
+
+	if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
+		compl->tx_timestamp = &meta->completion.tx_timestamp;
+	else
+		compl->tx_timestamp = NULL;
+}
+
+/**
+ *  xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission
+ *  and call appropriate xsk_tx_metadata_ops operation.
+ *  @meta: pointer to AF_XDP metadata area
+ *  @ops: pointer to struct xsk_tx_metadata_ops
+ *  @priv: pointer to driver-private aread
+ *
+ *  This function should be called by the networking device when
+ *  it prepares AF_XDP egress packet.
+ */
+static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta,
+					   const struct xsk_tx_metadata_ops *ops,
+					   void *priv)
+{
+	if (!meta)
+		return;
+
+	if (ops->tmo_request_timestamp)
+		if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
+			ops->tmo_request_timestamp(priv);
+
+	if (ops->tmo_request_checksum)
+		if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM)
+			ops->tmo_request_checksum(meta->request.csum_start,
+						  meta->request.csum_offset, priv);
+}
+
+/**
+ *  xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion
+ *  and call appropriate xsk_tx_metadata_ops operation.
+ *  @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl
+ *  @ops: pointer to struct xsk_tx_metadata_ops
+ *  @priv: pointer to driver-private aread
+ *
+ *  This function should be called by the networking device upon
+ *  AF_XDP egress completion.
+ */
+static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
+					    const struct xsk_tx_metadata_ops *ops,
+					    void *priv)
+{
+	if (!compl)
+		return;
+
+	*compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
+}
+
 #else
 
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -115,6 +208,23 @@ static inline void __xsk_map_flush(void)
 {
 }
 
+static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
+					    struct xsk_tx_metadata_compl *compl)
+{
+}
+
+static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta,
+					   const struct xsk_tx_metadata_ops *ops,
+					   void *priv)
+{
+}
+
+static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
+					    const struct xsk_tx_metadata_ops *ops,
+					    void *priv)
+{
+}
+
 #endif /* CONFIG_XDP_SOCKETS */
 
 #if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET)
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 1f6fc8c7a84c..e2558ac3e195 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -165,6 +165,14 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
 	return xp_raw_get_data(pool, addr);
 }
 
+static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+	if (!pool->tx_metadata_len)
+		return NULL;
+
+	return xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
+}
+
 static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
 {
 	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
@@ -324,6 +332,11 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
 	return NULL;
 }
 
+static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+	return NULL;
+}
+
 static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
 {
 }
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 1985ffaf9b0c..97f5cc10d79e 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -33,6 +33,7 @@ struct xdp_buff_xsk {
 };
 
 #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
+#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t))
 
 struct xsk_dma_map {
 	dma_addr_t *dma_pages;
@@ -234,4 +235,9 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
 	return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
 }
 
+static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool)
+{
+	return pool->tx_metadata_len > 0;
+}
+
 #endif /* XSK_BUFF_POOL_H_ */
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 2ecf79282c26..95de66d5a26c 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -106,6 +106,41 @@ struct xdp_options {
 #define XSK_UNALIGNED_BUF_ADDR_MASK \
 	((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
 
+/* Request transmit timestamp. Upon completion, put it into tx_timestamp
+ * field of struct xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_TIMESTAMP		(1 << 0)
+
+/* Request transmit checksum offload. Checksum start position and offset
+ * are communicated via csum_start and csum_offset fields of struct
+ * xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_CHECKSUM			(1 << 1)
+
+/* AF_XDP offloads request. 'request' union member is consumed by the driver
+ * when the packet is being transmitted. 'completion' union member is
+ * filled by the driver when the transmit completion arrives.
+ */
+struct xsk_tx_metadata {
+	__u64 flags;
+
+	union {
+		struct {
+			/* XDP_TXMD_FLAGS_CHECKSUM */
+
+			/* Offset from desc->addr where checksumming should start. */
+			__u16 csum_start;
+			/* Offset from csum_start where checksum should be stored. */
+			__u16 csum_offset;
+		} request;
+
+		struct {
+			/* XDP_TXMD_FLAGS_TIMESTAMP */
+			__u64 tx_timestamp;
+		} completion;
+	};
+};
+
 /* Rx/Tx descriptor */
 struct xdp_desc {
 	__u64 addr;
@@ -122,4 +157,7 @@ struct xdp_desc {
  */
 #define XDP_PKT_CONTD (1 << 0)
 
+/* TX packet carries valid metadata. */
+#define XDP_TX_METADATA (1 << 1)
+
 #endif /* _LINUX_IF_XDP_H */
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 2943a151d4f1..48d5477a668c 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -53,12 +53,28 @@ enum netdev_xdp_rx_metadata {
 	NETDEV_XDP_RX_METADATA_MASK = 3,
 };
 
+/**
+ * enum netdev_xsk_flags
+ * @NETDEV_XSK_FLAGS_TX_TIMESTAMP: HW timestamping egress packets is supported
+ *   by the driver.
+ * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the
+ *   driver.
+ */
+enum netdev_xsk_flags {
+	NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1,
+	NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
+
+	/* private: */
+	NETDEV_XSK_FLAGS_MASK = 3,
+};
+
 enum {
 	NETDEV_A_DEV_IFINDEX = 1,
 	NETDEV_A_DEV_PAD,
 	NETDEV_A_DEV_XDP_FEATURES,
 	NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
 	NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
+	NETDEV_A_DEV_XSK_FEATURES,
 
 	__NETDEV_A_DEV_MAX,
 	NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index fe61f85bcf33..10f2124e9e23 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -6,6 +6,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/xdp.h>
+#include <net/xdp_sock.h>
 
 #include "netdev-genl-gen.h"
 
@@ -13,6 +14,7 @@ static int
 netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
 		   const struct genl_info *info)
 {
+	u64 xsk_features = 0;
 	u64 xdp_rx_meta = 0;
 	void *hdr;
 
@@ -26,11 +28,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
 XDP_METADATA_KFUNC_xxx
 #undef XDP_METADATA_KFUNC
 
+	if (netdev->xsk_tx_metadata_ops) {
+		if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
+			xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
+		if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
+			xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
+	}
+
 	if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
 			      netdev->xdp_features, NETDEV_A_DEV_PAD) ||
 	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
-			      xdp_rx_meta, NETDEV_A_DEV_PAD)) {
+			      xdp_rx_meta, NETDEV_A_DEV_PAD) ||
+	    nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
+			      xsk_features, NETDEV_A_DEV_PAD)) {
 		genlmsg_cancel(rsp, hdr);
 		return -EINVAL;
 	}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c904356e2800..e83ade32f1fd 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -571,6 +571,13 @@ static u32 xsk_get_num_desc(struct sk_buff *skb)
 
 static void xsk_destruct_skb(struct sk_buff *skb)
 {
+	struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta;
+
+	if (compl->tx_timestamp) {
+		/* sw completion timestamp, not a real one */
+		*compl->tx_timestamp = ktime_get_tai_fast_ns();
+	}
+
 	xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
 	sock_wfree(skb);
 }
@@ -655,8 +662,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
 static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 				     struct xdp_desc *desc)
 {
+	struct xsk_tx_metadata *meta = NULL;
 	struct net_device *dev = xs->dev;
 	struct sk_buff *skb = xs->skb;
+	bool first_frag = false;
 	int err;
 
 	if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
@@ -687,6 +696,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 				kfree_skb(skb);
 				goto free_err;
 			}
+
+			first_frag = true;
 		} else {
 			int nr_frags = skb_shinfo(skb)->nr_frags;
 			struct page *page;
@@ -709,12 +720,35 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 
 			skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
 		}
+
+		if (first_frag && desc->options & XDP_TX_METADATA) {
+			if (unlikely(xs->pool->tx_metadata_len == 0)) {
+				err = -EINVAL;
+				goto free_err;
+			}
+
+			meta = buffer - xs->pool->tx_metadata_len;
+
+			if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) {
+				if (unlikely(meta->request.csum_start +
+					     meta->request.csum_offset +
+					     sizeof(__sum16) > len)) {
+					err = -EINVAL;
+					goto free_err;
+				}
+
+				skb->csum_start = hr + meta->request.csum_start;
+				skb->csum_offset = meta->request.csum_offset;
+				skb->ip_summed = CHECKSUM_PARTIAL;
+			}
+		}
 	}
 
 	skb->dev = dev;
 	skb->priority = READ_ONCE(xs->sk.sk_priority);
 	skb->mark = READ_ONCE(xs->sk.sk_mark);
 	skb->destructor = xsk_destruct_skb;
+	xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
 	xsk_set_destructor_arg(skb);
 
 	return skb;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index c74a1372bcb9..6f2d1621c992 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -137,7 +137,7 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
 
 static inline bool xp_unused_options_set(u32 options)
 {
-	return options & ~XDP_PKT_CONTD;
+	return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA);
 }
 
 static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 34411a2e5b6c..d0882edc1642 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -26,11 +26,11 @@
  */
 #define XDP_USE_NEED_WAKEUP (1 << 3)
 /* By setting this option, userspace application indicates that it can
- * handle multiple descriptors per packet thus enabling xsk core to split
+ * handle multiple descriptors per packet thus enabling AF_XDP to split
  * multi-buffer XDP frames into multiple Rx descriptors. Without this set
- * such frames will be dropped by xsk.
+ * such frames will be dropped.
  */
-#define XDP_USE_SG     (1 << 4)
+#define XDP_USE_SG	(1 << 4)
 
 /* Flags for xsk_umem_config flags */
 #define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
@@ -106,6 +106,41 @@ struct xdp_options {
 #define XSK_UNALIGNED_BUF_ADDR_MASK \
 	((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
 
+/* Request transmit timestamp. Upon completion, put it into tx_timestamp
+ * field of union xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_TIMESTAMP		(1 << 0)
+
+/* Request transmit checksum offload. Checksum start position and offset
+ * are communicated via csum_start and csum_offset fields of union
+ * xsk_tx_metadata.
+ */
+#define XDP_TXMD_FLAGS_CHECKSUM			(1 << 1)
+
+/* AF_XDP offloads request. 'request' union member is consumed by the driver
+ * when the packet is being transmitted. 'completion' union member is
+ * filled by the driver when the transmit completion arrives.
+ */
+struct xsk_tx_metadata {
+	__u64 flags;
+
+	union {
+		struct {
+			/* XDP_TXMD_FLAGS_CHECKSUM */
+
+			/* Offset from desc->addr where checksumming should start. */
+			__u16 csum_start;
+			/* Offset from csum_start where checksum should be stored. */
+			__u16 csum_offset;
+		} request;
+
+		struct {
+			/* XDP_TXMD_FLAGS_TIMESTAMP */
+			__u64 tx_timestamp;
+		} completion;
+	};
+};
+
 /* Rx/Tx descriptor */
 struct xdp_desc {
 	__u64 addr;
@@ -113,9 +148,16 @@ struct xdp_desc {
 	__u32 options;
 };
 
-/* Flag indicating packet constitutes of multiple buffers*/
+/* UMEM descriptor is __u64 */
+
+/* Flag indicating that the packet continues with the buffer pointed out by the
+ * next frame in the ring. The end of the packet is signalled by setting this
+ * bit to zero. For single buffer packets, every descriptor has 'options' set
+ * to 0 and this maintains backward compatibility.
+ */
 #define XDP_PKT_CONTD (1 << 0)
 
-/* UMEM descriptor is __u64 */
+/* TX packet carries valid metadata. */
+#define XDP_TX_METADATA (1 << 1)
 
 #endif /* _LINUX_IF_XDP_H */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 2943a151d4f1..48d5477a668c 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -53,12 +53,28 @@ enum netdev_xdp_rx_metadata {
 	NETDEV_XDP_RX_METADATA_MASK = 3,
 };
 
+/**
+ * enum netdev_xsk_flags
+ * @NETDEV_XSK_FLAGS_TX_TIMESTAMP: HW timestamping egress packets is supported
+ *   by the driver.
+ * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the
+ *   driver.
+ */
+enum netdev_xsk_flags {
+	NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1,
+	NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
+
+	/* private: */
+	NETDEV_XSK_FLAGS_MASK = 3,
+};
+
 enum {
 	NETDEV_A_DEV_IFINDEX = 1,
 	NETDEV_A_DEV_PAD,
 	NETDEV_A_DEV_XDP_FEATURES,
 	NETDEV_A_DEV_XDP_ZC_MAX_SEGS,
 	NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
+	NETDEV_A_DEV_XSK_FEATURES,
 
 	__NETDEV_A_DEV_MAX,
 	NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1)
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
index b5ffe8cd1144..6283d87dad37 100644
--- a/tools/net/ynl/generated/netdev-user.c
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -58,6 +58,19 @@ const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value)
 	return netdev_xdp_rx_metadata_strmap[value];
 }
 
+static const char * const netdev_xsk_flags_strmap[] = {
+	[0] = "tx-timestamp",
+	[1] = "tx-checksum",
+};
+
+const char *netdev_xsk_flags_str(enum netdev_xsk_flags value)
+{
+	value = ffs(value) - 1;
+	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xsk_flags_strmap))
+		return NULL;
+	return netdev_xsk_flags_strmap[value];
+}
+
 /* Policies */
 struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = {
 	[NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
@@ -65,6 +78,7 @@ struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = {
 	[NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, },
 	[NETDEV_A_DEV_XDP_ZC_MAX_SEGS] = { .name = "xdp-zc-max-segs", .type = YNL_PT_U32, },
 	[NETDEV_A_DEV_XDP_RX_METADATA_FEATURES] = { .name = "xdp-rx-metadata-features", .type = YNL_PT_U64, },
+	[NETDEV_A_DEV_XSK_FEATURES] = { .name = "xsk-features", .type = YNL_PT_U64, },
 };
 
 struct ynl_policy_nest netdev_dev_nest = {
@@ -116,6 +130,11 @@ int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
 				return MNL_CB_ERROR;
 			dst->_present.xdp_rx_metadata_features = 1;
 			dst->xdp_rx_metadata_features = mnl_attr_get_u64(attr);
+		} else if (type == NETDEV_A_DEV_XSK_FEATURES) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.xsk_features = 1;
+			dst->xsk_features = mnl_attr_get_u64(attr);
 		}
 	}
 
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
index 4fafac879df3..39af1908444b 100644
--- a/tools/net/ynl/generated/netdev-user.h
+++ b/tools/net/ynl/generated/netdev-user.h
@@ -19,6 +19,7 @@ extern const struct ynl_family ynl_netdev_family;
 const char *netdev_op_str(int op);
 const char *netdev_xdp_act_str(enum netdev_xdp_act value);
 const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value);
+const char *netdev_xsk_flags_str(enum netdev_xsk_flags value);
 
 /* Common nested types */
 /* ============== NETDEV_CMD_DEV_GET ============== */
@@ -50,12 +51,14 @@ struct netdev_dev_get_rsp {
 		__u32 xdp_features:1;
 		__u32 xdp_zc_max_segs:1;
 		__u32 xdp_rx_metadata_features:1;
+		__u32 xsk_features:1;
 	} _present;
 
 	__u32 ifindex;
 	__u64 xdp_features;
 	__u32 xdp_zc_max_segs;
 	__u64 xdp_rx_metadata_features;
+	__u64 xsk_features;
 };
 
 void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp);
-- 
cgit v1.2.3-70-g09d2


From 9276009d35d3f65e083b95d30a4f967baaae0fc6 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 11:03:09 -0800
Subject: tools: ynl: Print xsk-features from the sample

In a similar fashion we do for the other bit masks.
Fix mask parsing (>= vs >) while we are it.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20231127190319.1190813-4-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/net/ynl/samples/netdev.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c
index b828225daad0..591b90e21890 100644
--- a/tools/net/ynl/samples/netdev.c
+++ b/tools/net/ynl/samples/netdev.c
@@ -33,17 +33,23 @@ static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op)
 		return;
 
 	printf("xdp-features (%llx):", d->xdp_features);
-	for (int i = 0; d->xdp_features > 1U << i; i++) {
+	for (int i = 0; d->xdp_features >= 1U << i; i++) {
 		if (d->xdp_features & (1U << i))
 			printf(" %s", netdev_xdp_act_str(1 << i));
 	}
 
 	printf(" xdp-rx-metadata-features (%llx):", d->xdp_rx_metadata_features);
-	for (int i = 0; d->xdp_rx_metadata_features > 1U << i; i++) {
+	for (int i = 0; d->xdp_rx_metadata_features >= 1U << i; i++) {
 		if (d->xdp_rx_metadata_features & (1U << i))
 			printf(" %s", netdev_xdp_rx_metadata_str(1 << i));
 	}
 
+	printf(" xsk-features (%llx):", d->xsk_features);
+	for (int i = 0; d->xsk_features >= 1U << i; i++) {
+		if (d->xsk_features & (1U << i))
+			printf(" %s", netdev_xsk_flags_str(1 << i));
+	}
+
 	printf(" xdp-zc-max-segs=%u", d->xdp_zc_max_segs);
 
 	name = netdev_op_str(op);
-- 
cgit v1.2.3-70-g09d2


From 11614723af26e7c32fcb704d8f30fdf60c1122dc Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 11:03:14 -0800
Subject: xsk: Add option to calculate TX checksum in SW

For XDP_COPY mode, add a UMEM option XDP_UMEM_TX_SW_CSUM
to call skb_checksum_help in transmit path. Might be useful
to debugging issues with real hardware. I also use this mode
in the selftests.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20231127190319.1190813-9-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/networking/xsk-tx-metadata.rst | 9 +++++++++
 include/net/xsk_buff_pool.h                  | 1 +
 include/uapi/linux/if_xdp.h                  | 8 +++++++-
 net/xdp/xdp_umem.c                           | 7 ++++++-
 net/xdp/xsk.c                                | 6 ++++++
 net/xdp/xsk_buff_pool.c                      | 1 +
 tools/include/uapi/linux/if_xdp.h            | 8 +++++++-
 7 files changed, 37 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/Documentation/networking/xsk-tx-metadata.rst b/Documentation/networking/xsk-tx-metadata.rst
index 4f376560b23f..97ecfa480d00 100644
--- a/Documentation/networking/xsk-tx-metadata.rst
+++ b/Documentation/networking/xsk-tx-metadata.rst
@@ -50,6 +50,15 @@ packet's ``struct xdp_desc`` descriptor should set ``XDP_TX_METADATA``
 bit in the ``options`` field. Also note that in a multi-buffer packet
 only the first chunk should carry the metadata.
 
+Software TX Checksum
+====================
+
+For development and testing purposes its possible to pass
+``XDP_UMEM_TX_SW_CSUM`` flag to ``XDP_UMEM_REG`` UMEM registration call.
+In this case, when running in ``XDK_COPY`` mode, the TX checksum
+is calculated on the CPU. Do not enable this option in production because
+it will negatively affect performance.
+
 Querying Device Capabilities
 ============================
 
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 97f5cc10d79e..8d48d37ab7c0 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -83,6 +83,7 @@ struct xsk_buff_pool {
 	bool uses_need_wakeup;
 	bool dma_need_sync;
 	bool unaligned;
+	bool tx_sw_csum;
 	void *addrs;
 	/* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
 	 * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 95de66d5a26c..d31698410410 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -33,7 +33,13 @@
 #define XDP_USE_SG	(1 << 4)
 
 /* Flags for xsk_umem_config flags */
-#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG	(1 << 0)
+
+/* Force checksum calculation in software. Can be used for testing or
+ * working around potential HW issues. This option causes performance
+ * degradation and only works in XDP_COPY mode.
+ */
+#define XDP_UMEM_TX_SW_CSUM		(1 << 1)
 
 struct sockaddr_xdp {
 	__u16 sxdp_family;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 946a687fb8e8..caa340134b0e 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -148,6 +148,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
 	return 0;
 }
 
+#define XDP_UMEM_FLAGS_VALID ( \
+		XDP_UMEM_UNALIGNED_CHUNK_FLAG | \
+		XDP_UMEM_TX_SW_CSUM | \
+	0)
+
 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 {
 	bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
@@ -167,7 +172,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 		return -EINVAL;
 	}
 
-	if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+	if (mr->flags & ~XDP_UMEM_FLAGS_VALID)
 		return -EINVAL;
 
 	if (!unaligned_chunks && !is_power_of_2(chunk_size))
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d66ba9d6154f..281d49b4fca4 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -744,6 +744,12 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 				skb->csum_start = hr + meta->request.csum_start;
 				skb->csum_offset = meta->request.csum_offset;
 				skb->ip_summed = CHECKSUM_PARTIAL;
+
+				if (unlikely(xs->pool->tx_sw_csum)) {
+					err = skb_checksum_help(skb);
+					if (err)
+						goto free_err;
+				}
 			}
 		}
 	}
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 386eddcdf837..4f6f538a5462 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -86,6 +86,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
 	pool->umem = umem;
 	pool->addrs = umem->addrs;
 	pool->tx_metadata_len = umem->tx_metadata_len;
+	pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM;
 	INIT_LIST_HEAD(&pool->free_list);
 	INIT_LIST_HEAD(&pool->xskb_list);
 	INIT_LIST_HEAD(&pool->xsk_tx_list);
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index d0882edc1642..638c606dfa74 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -33,7 +33,13 @@
 #define XDP_USE_SG	(1 << 4)
 
 /* Flags for xsk_umem_config flags */
-#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG	(1 << 0)
+
+/* Force checksum calculation in software. Can be used for testing or
+ * working around potential HW issues. This option causes performance
+ * degradation and only works in XDP_COPY mode.
+ */
+#define XDP_UMEM_TX_SW_CSUM		(1 << 1)
 
 struct sockaddr_xdp {
 	__u16 sxdp_family;
-- 
cgit v1.2.3-70-g09d2


From df3ed0003ec4994ce8ed4818c435c481281df89e Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 11:03:15 -0800
Subject: selftests/xsk: Support tx_metadata_len

Add new config field and propagate to UMEM registration setsockopt.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20231127190319.1190813-10-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xsk.c | 3 +++
 tools/testing/selftests/bpf/xsk.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index e574711eeb84..25d568abf0f2 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -115,6 +115,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
 		cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 		cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
 		cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
+		cfg->tx_metadata_len = 0;
 		return;
 	}
 
@@ -123,6 +124,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
 	cfg->frame_size = usr_cfg->frame_size;
 	cfg->frame_headroom = usr_cfg->frame_headroom;
 	cfg->flags = usr_cfg->flags;
+	cfg->tx_metadata_len = usr_cfg->tx_metadata_len;
 }
 
 static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
@@ -252,6 +254,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
 	mr.chunk_size = umem->config.frame_size;
 	mr.headroom = umem->config.frame_headroom;
 	mr.flags = umem->config.flags;
+	mr.tx_metadata_len = umem->config.tx_metadata_len;
 
 	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
 	if (err) {
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 771570bc3731..93c2cc413cfc 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -200,6 +200,7 @@ struct xsk_umem_config {
 	__u32 frame_size;
 	__u32 frame_headroom;
 	__u32 flags;
+	__u32 tx_metadata_len;
 };
 
 int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags);
-- 
cgit v1.2.3-70-g09d2


From f6642de0c3e94d3ef6f44e127d11fcf4138873f7 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 11:03:16 -0800
Subject: selftests/bpf: Add csum helpers

Checksum helpers will be used to calculate pseudo-header checksum in
AF_XDP metadata selftests.

The helpers are mirroring existing kernel ones:
- csum_tcpudp_magic : IPv4 pseudo header csum
- csum_ipv6_magic : IPv6 pseudo header csum
- csum_fold : fold csum and do one's complement

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20231127190319.1190813-11-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/network_helpers.h | 43 +++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 34f1200a781b..94b9be24e39b 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -71,4 +71,47 @@ struct nstoken;
  */
 struct nstoken *open_netns(const char *name);
 void close_netns(struct nstoken *token);
+
+static __u16 csum_fold(__u32 csum)
+{
+	csum = (csum & 0xffff) + (csum >> 16);
+	csum = (csum & 0xffff) + (csum >> 16);
+
+	return (__u16)~csum;
+}
+
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+					__u32 len, __u8 proto,
+					__wsum csum)
+{
+	__u64 s = csum;
+
+	s += (__u32)saddr;
+	s += (__u32)daddr;
+	s += htons(proto + len);
+	s = (s & 0xffffffff) + (s >> 32);
+	s = (s & 0xffffffff) + (s >> 32);
+
+	return csum_fold((__u32)s);
+}
+
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+				      const struct in6_addr *daddr,
+					__u32 len, __u8 proto,
+					__wsum csum)
+{
+	__u64 s = csum;
+	int i;
+
+	for (i = 0; i < 4; i++)
+		s += (__u32)saddr->s6_addr32[i];
+	for (i = 0; i < 4; i++)
+		s += (__u32)daddr->s6_addr32[i];
+	s += htons(proto + len);
+	s = (s & 0xffffffff) + (s >> 32);
+	s = (s & 0xffffffff) + (s >> 32);
+
+	return csum_fold((__u32)s);
+}
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 40808a237d9c8fcaa3eaeefe2ac59e4733907478 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 11:03:17 -0800
Subject: selftests/bpf: Add TX side to xdp_metadata

Request TX timestamp and make sure it's not empty.
Request TX checksum offload (SW-only) and make sure it's resolved
to the correct one.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20231127190319.1190813-12-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/xdp_metadata.c        | 33 +++++++++++++++++++---
 1 file changed, 29 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 4439ba9392f8..33cdf88efa6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -56,7 +56,8 @@ static int open_xsk(int ifindex, struct xsk *xsk)
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
+		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM,
+		.tx_metadata_len = sizeof(struct xsk_tx_metadata),
 	};
 	__u32 idx;
 	u64 addr;
@@ -138,6 +139,7 @@ static void ip_csum(struct iphdr *iph)
 
 static int generate_packet(struct xsk *xsk, __u16 dst_port)
 {
+	struct xsk_tx_metadata *meta;
 	struct xdp_desc *tx_desc;
 	struct udphdr *udph;
 	struct ethhdr *eth;
@@ -151,10 +153,14 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
 		return -1;
 
 	tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
-	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
+	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
 	printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
 	data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
 
+	meta = data - sizeof(struct xsk_tx_metadata);
+	memset(meta, 0, sizeof(*meta));
+	meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
+
 	eth = data;
 	iph = (void *)(eth + 1);
 	udph = (void *)(iph + 1);
@@ -178,11 +184,17 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
 	udph->source = htons(AF_XDP_SOURCE_PORT);
 	udph->dest = htons(dst_port);
 	udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
-	udph->check = 0;
+	udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+					 ntohs(udph->len), IPPROTO_UDP, 0);
 
 	memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
 
+	meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
+	meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
+	meta->request.csum_offset = offsetof(struct udphdr, check);
+
 	tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
+	tx_desc->options |= XDP_TX_METADATA;
 	xsk_ring_prod__submit(&xsk->tx, 1);
 
 	ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
@@ -194,13 +206,21 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
 
 static void complete_tx(struct xsk *xsk)
 {
-	__u32 idx;
+	struct xsk_tx_metadata *meta;
 	__u64 addr;
+	void *data;
+	__u32 idx;
 
 	if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
 		addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
 
 		printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+
+		data = xsk_umem__get_data(xsk->umem_area, addr);
+		meta = data - sizeof(struct xsk_tx_metadata);
+
+		ASSERT_NEQ(meta->completion.tx_timestamp, 0, "tx_timestamp");
+
 		xsk_ring_cons__release(&xsk->comp, 1);
 	}
 }
@@ -221,6 +241,7 @@ static int verify_xsk_metadata(struct xsk *xsk)
 	const struct xdp_desc *rx_desc;
 	struct pollfd fds = {};
 	struct xdp_meta *meta;
+	struct udphdr *udph;
 	struct ethhdr *eth;
 	struct iphdr *iph;
 	__u64 comp_addr;
@@ -257,6 +278,7 @@ static int verify_xsk_metadata(struct xsk *xsk)
 	ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
 	iph = (void *)(eth + 1);
 	ASSERT_EQ((int)iph->version, 4, "iph->version");
+	udph = (void *)(iph + 1);
 
 	/* custom metadata */
 
@@ -270,6 +292,9 @@ static int verify_xsk_metadata(struct xsk *xsk)
 
 	ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
 
+	/* checksum offload */
+	ASSERT_EQ(udph->check, htons(0x721c), "csum");
+
 	xsk_ring_cons__release(&xsk->rx, 1);
 	refill_rx(xsk, comp_addr);
 
-- 
cgit v1.2.3-70-g09d2


From 12b4b7963d3cca253db3c31aa7611b988699e30f Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 11:03:18 -0800
Subject: selftests/bpf: Convert xdp_hw_metadata to XDP_USE_NEED_WAKEUP

This is the recommended way to run AF_XDP, so let's use it in the test.

Also, some unrelated changes to now blow up the log too much:
- change default mode to zerocopy and add -c to use copy mode
- small fixes for the flags/sizes/prints
- add print_tstamp_delta to print timestamp + reference

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20231127190319.1190813-13-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 73 ++++++++++++++++++---------
 1 file changed, 49 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index c3ba40d0b9de..4484b17c7a56 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -32,7 +32,7 @@
 
 #include "xdp_metadata.h"
 
-#define UMEM_NUM 16
+#define UMEM_NUM 256
 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
@@ -48,7 +48,7 @@ struct xsk {
 };
 
 struct xdp_hw_metadata *bpf_obj;
-__u16 bind_flags = XDP_COPY;
+__u16 bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
 struct xsk *rx_xsk;
 const char *ifname;
 int ifindex;
@@ -68,7 +68,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-		.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
+		.flags = XSK_UMEM__DEFAULT_FLAGS,
 	};
 	__u32 idx;
 	u64 addr;
@@ -110,7 +110,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
 	for (i = 0; i < UMEM_NUM / 2; i++) {
 		addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
 		printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
-		*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
+		*xsk_ring_prod__fill_addr(&xsk->fill, idx + i) = addr;
 	}
 	xsk_ring_prod__submit(&xsk->fill, ret);
 
@@ -131,12 +131,22 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
 	__u32 idx;
 
 	if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
-		printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
+		printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
 		*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
 		xsk_ring_prod__submit(&xsk->fill, 1);
 	}
 }
 
+static int kick_tx(struct xsk *xsk)
+{
+	return sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
+}
+
+static int kick_rx(struct xsk *xsk)
+{
+	return recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+}
+
 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
 static __u64 gettime(clockid_t clock_id)
 {
@@ -152,6 +162,17 @@ static __u64 gettime(clockid_t clock_id)
 	return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
 }
 
+static void print_tstamp_delta(const char *name, const char *refname,
+			       __u64 tstamp, __u64 reference)
+{
+	__s64 delta = (__s64)reference - (__s64)tstamp;
+
+	printf("%s:   %llu (sec:%0.4f) delta to %s sec:%0.4f (%0.3f usec)\n",
+	       name, tstamp, (double)tstamp / NANOSEC_PER_SEC, refname,
+	       (double)delta / NANOSEC_PER_SEC,
+	       (double)delta / 1000);
+}
+
 static void verify_xdp_metadata(void *data, clockid_t clock_id)
 {
 	struct xdp_meta *meta;
@@ -167,22 +188,13 @@ static void verify_xdp_metadata(void *data, clockid_t clock_id)
 	printf("rx_timestamp:  %llu (sec:%0.4f)\n", meta->rx_timestamp,
 	       (double)meta->rx_timestamp / NANOSEC_PER_SEC);
 	if (meta->rx_timestamp) {
-		__u64 usr_clock = gettime(clock_id);
-		__u64 xdp_clock = meta->xdp_timestamp;
-		__s64 delta_X = xdp_clock - meta->rx_timestamp;
-		__s64 delta_X2U = usr_clock - xdp_clock;
-
-		printf("XDP RX-time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
-		       xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC,
-		       (double)delta_X / NANOSEC_PER_SEC,
-		       (double)delta_X / 1000);
-
-		printf("AF_XDP time:   %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
-		       usr_clock, (double)usr_clock / NANOSEC_PER_SEC,
-		       (double)delta_X2U / NANOSEC_PER_SEC,
-		       (double)delta_X2U / 1000);
-	}
+		__u64 ref_tstamp = gettime(clock_id);
 
+		print_tstamp_delta("HW RX-time", "User RX-time",
+				   meta->rx_timestamp, ref_tstamp);
+		print_tstamp_delta("XDP RX-time", "User RX-time",
+				   meta->xdp_timestamp, ref_tstamp);
+	}
 }
 
 static void verify_skb_metadata(int fd)
@@ -252,6 +264,13 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 
 	while (true) {
 		errno = 0;
+
+		for (i = 0; i < rxq; i++) {
+			ret = kick_rx(&rx_xsk[i]);
+			if (ret)
+				printf("kick_rx ret=%d\n", ret);
+		}
+
 		ret = poll(fds, rxq + 1, 1000);
 		printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
 		       ret, errno, bpf_obj->bss->pkts_skip,
@@ -420,8 +439,9 @@ static void print_usage(void)
 {
 	const char *usage =
 		"Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n"
-		"  -m    Enable multi-buffer XDP for larger MTU\n"
+		"  -c    Run in copy mode (zerocopy is default)\n"
 		"  -h    Display this help and exit\n\n"
+		"  -m    Enable multi-buffer XDP for larger MTU\n"
 		"Generate test packets on the other machine with:\n"
 		"  echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
 
@@ -432,14 +452,19 @@ static void read_args(int argc, char *argv[])
 {
 	int opt;
 
-	while ((opt = getopt(argc, argv, "mh")) != -1) {
+	while ((opt = getopt(argc, argv, "chm")) != -1) {
 		switch (opt) {
-		case 'm':
-			bind_flags |= XDP_USE_SG;
+		case 'c':
+			bind_flags &= ~XDP_USE_NEED_WAKEUP;
+			bind_flags &= ~XDP_ZEROCOPY;
+			bind_flags |= XDP_COPY;
 			break;
 		case 'h':
 			print_usage();
 			exit(0);
+		case 'm':
+			bind_flags |= XDP_USE_SG;
+			break;
 		case '?':
 			if (isprint(optopt))
 				fprintf(stderr, "Unknown option: -%c\n", optopt);
-- 
cgit v1.2.3-70-g09d2


From 60523115c1b10c8855492d84c1ba88af452e221c Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 27 Nov 2023 11:03:19 -0800
Subject: selftests/bpf: Add TX side to xdp_hw_metadata

When we get a packet on port 9091, we swap src/dst and send it out.
At this point we also request the timestamp and checksum offloads.

Checksum offload is verified by looking at the tcpdump on the other side.
The tool prints pseudo-header csum and the final one it expects.
The final checksum actually matches the incoming packets checksum
because we only flip the src/dst and don't change the payload.

Some other related changes:
- switched to zerocopy mode by default; new flag can be used to force
  old behavior
- request fixed tx_metadata_len headroom
- some other small fixes (umem size, fill idx+i, etc)

mvbz3:~# ./xdp_hw_metadata eth3
...
xsk_ring_cons__peek: 1
0x19546f8: rx_desc[0]->addr=80100 addr=80100 comp_addr=80100
rx_hash: 0x80B7EA8B with RSS type:0x2A
rx_timestamp:  1697580171852147395 (sec:1697580171.8521)
HW RX-time:   1697580171852147395 (sec:1697580171.8521), delta to User RX-time sec:0.2797 (279673.082 usec)
XDP RX-time:   1697580172131699047 (sec:1697580172.1317), delta to User RX-time sec:0.0001 (121.430 usec)
0x19546f8: ping-pong with csum=3b8e (want d862) csum_start=54 csum_offset=6
0x19546f8: complete tx idx=0 addr=8
tx_timestamp:  1697580172056756493 (sec:1697580172.0568)
HW TX-complete-time:   1697580172056756493 (sec:1697580172.0568), delta to User TX-complete-time sec:0.0852 (85175.537 usec)
XDP RX-time:   1697580172131699047 (sec:1697580172.1317), delta to User TX-complete-time sec:0.0102 (10232.983 usec)
HW RX-time:   1697580171852147395 (sec:1697580171.8521), delta to HW TX-complete-time sec:0.2046 (204609.098 usec)
0x19546f8: complete rx idx=128 addr=80100

mvbz4:~# nc  -Nu -q1 ${MVBZ3_LINK_LOCAL_IP}%eth3 9091

mvbz4:~# tcpdump -vvx -i eth3 udp
        tcpdump: listening on eth3, link-type EN10MB (Ethernet), snapshot length 262144 bytes
12:26:09.301074 IP6 (flowlabel 0x35fa5, hlim 127, next-header UDP (17) payload length: 11) fe80::1270:fdff:fe48:1087.55807 > fe80::1270:fdff:fe48:1077.9091: [bad udp cksum 0x3b8e -> 0xde7e!] UDP, length 3
        0x0000:  6003 5fa5 000b 117f fe80 0000 0000 0000
        0x0010:  1270 fdff fe48 1087 fe80 0000 0000 0000
        0x0020:  1270 fdff fe48 1077 d9ff 2383 000b 3b8e
        0x0030:  7864 70
12:26:09.301976 IP6 (flowlabel 0x35fa5, hlim 127, next-header UDP (17) payload length: 11) fe80::1270:fdff:fe48:1077.9091 > fe80::1270:fdff:fe48:1087.55807: [udp sum ok] UDP, length 3
        0x0000:  6003 5fa5 000b 117f fe80 0000 0000 0000
        0x0010:  1270 fdff fe48 1077 fe80 0000 0000 0000
        0x0020:  1270 fdff fe48 1087 2383 d9ff 000b de7e
        0x0030:  7864 70

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20231127190319.1190813-14-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 164 +++++++++++++++++++++++++-
 1 file changed, 160 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 4484b17c7a56..3291625ba4fb 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -10,7 +10,9 @@
  *   - rx_hash
  *
  * TX:
- * - TBD
+ * - UDP 9091 packets trigger TX reply
+ * - TX HW timestamp is requested and reported back upon completion
+ * - TX checksum is requested
  */
 
 #include <test_progs.h>
@@ -24,11 +26,14 @@
 #include <linux/net_tstamp.h>
 #include <linux/udp.h>
 #include <linux/sockios.h>
+#include <linux/if_xdp.h>
 #include <sys/mman.h>
 #include <net/if.h>
 #include <ctype.h>
 #include <poll.h>
 #include <time.h>
+#include <unistd.h>
+#include <libgen.h>
 
 #include "xdp_metadata.h"
 
@@ -53,6 +58,9 @@ struct xsk *rx_xsk;
 const char *ifname;
 int ifindex;
 int rxq;
+bool skip_tx;
+__u64 last_hw_rx_timestamp;
+__u64 last_xdp_rx_timestamp;
 
 void test__fail(void) { /* for network_helpers.c */ }
 
@@ -69,6 +77,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
 		.flags = XSK_UMEM__DEFAULT_FLAGS,
+		.tx_metadata_len = sizeof(struct xsk_tx_metadata),
 	};
 	__u32 idx;
 	u64 addr;
@@ -185,15 +194,19 @@ static void verify_xdp_metadata(void *data, clockid_t clock_id)
 		printf("rx_hash: 0x%X with RSS type:0x%X\n",
 		       meta->rx_hash, meta->rx_hash_type);
 
-	printf("rx_timestamp:  %llu (sec:%0.4f)\n", meta->rx_timestamp,
-	       (double)meta->rx_timestamp / NANOSEC_PER_SEC);
 	if (meta->rx_timestamp) {
 		__u64 ref_tstamp = gettime(clock_id);
 
+		/* store received timestamps to calculate a delta at tx */
+		last_hw_rx_timestamp = meta->rx_timestamp;
+		last_xdp_rx_timestamp = meta->xdp_timestamp;
+
 		print_tstamp_delta("HW RX-time", "User RX-time",
 				   meta->rx_timestamp, ref_tstamp);
 		print_tstamp_delta("XDP RX-time", "User RX-time",
 				   meta->xdp_timestamp, ref_tstamp);
+	} else {
+		printf("No rx_timestamp\n");
 	}
 }
 
@@ -242,6 +255,129 @@ static void verify_skb_metadata(int fd)
 	printf("skb hwtstamp is not found!\n");
 }
 
+static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
+{
+	struct xsk_tx_metadata *meta;
+	__u64 addr;
+	void *data;
+	__u32 idx;
+
+	if (!xsk_ring_cons__peek(&xsk->comp, 1, &idx))
+		return false;
+
+	addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
+	data = xsk_umem__get_data(xsk->umem_area, addr);
+	meta = data - sizeof(struct xsk_tx_metadata);
+
+	printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+
+	if (meta->completion.tx_timestamp) {
+		__u64 ref_tstamp = gettime(clock_id);
+
+		print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
+				   meta->completion.tx_timestamp, ref_tstamp);
+		print_tstamp_delta("XDP RX-time", "User TX-complete-time",
+				   last_xdp_rx_timestamp, ref_tstamp);
+		print_tstamp_delta("HW RX-time", "HW TX-complete-time",
+				   last_hw_rx_timestamp, meta->completion.tx_timestamp);
+	} else {
+		printf("No tx_timestamp\n");
+	}
+
+	xsk_ring_cons__release(&xsk->comp, 1);
+
+	return true;
+}
+
+#define swap(a, b, len) do { \
+	for (int i = 0; i < len; i++) { \
+		__u8 tmp = ((__u8 *)a)[i]; \
+		((__u8 *)a)[i] = ((__u8 *)b)[i]; \
+		((__u8 *)b)[i] = tmp; \
+	} \
+} while (0)
+
+static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
+{
+	struct xsk_tx_metadata *meta;
+	struct ipv6hdr *ip6h = NULL;
+	struct iphdr *iph = NULL;
+	struct xdp_desc *tx_desc;
+	struct udphdr *udph;
+	struct ethhdr *eth;
+	__sum16 want_csum;
+	void *data;
+	__u32 idx;
+	int ret;
+	int len;
+
+	ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
+	if (ret != 1) {
+		printf("%p: failed to reserve tx slot\n", xsk);
+		return;
+	}
+
+	tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
+	tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
+	data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+
+	meta = data - sizeof(struct xsk_tx_metadata);
+	memset(meta, 0, sizeof(*meta));
+	meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
+
+	eth = rx_packet;
+
+	if (eth->h_proto == htons(ETH_P_IP)) {
+		iph = (void *)(eth + 1);
+		udph = (void *)(iph + 1);
+	} else if (eth->h_proto == htons(ETH_P_IPV6)) {
+		ip6h = (void *)(eth + 1);
+		udph = (void *)(ip6h + 1);
+	} else {
+		printf("%p: failed to detect IP version for ping pong %04x\n", xsk, eth->h_proto);
+		xsk_ring_prod__cancel(&xsk->tx, 1);
+		return;
+	}
+
+	len = ETH_HLEN;
+	if (ip6h)
+		len += sizeof(*ip6h) + ntohs(ip6h->payload_len);
+	if (iph)
+		len += ntohs(iph->tot_len);
+
+	swap(eth->h_dest, eth->h_source, ETH_ALEN);
+	if (iph)
+		swap(&iph->saddr, &iph->daddr, 4);
+	else
+		swap(&ip6h->saddr, &ip6h->daddr, 16);
+	swap(&udph->source, &udph->dest, 2);
+
+	want_csum = udph->check;
+	if (ip6h)
+		udph->check = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					       ntohs(udph->len), IPPROTO_UDP, 0);
+	else
+		udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+						 ntohs(udph->len), IPPROTO_UDP, 0);
+
+	meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
+	if (iph)
+		meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
+	else
+		meta->request.csum_start = sizeof(*eth) + sizeof(*ip6h);
+	meta->request.csum_offset = offsetof(struct udphdr, check);
+
+	printf("%p: ping-pong with csum=%04x (want %04x) csum_start=%d csum_offset=%d\n",
+	       xsk, ntohs(udph->check), ntohs(want_csum),
+	       meta->request.csum_start, meta->request.csum_offset);
+
+	memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
+	tx_desc->options |= XDP_TX_METADATA;
+	tx_desc->len = len;
+
+	xsk_ring_prod__submit(&xsk->tx, 1);
+}
+
 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
 {
 	const struct xdp_desc *rx_desc;
@@ -307,6 +443,22 @@ peek:
 				verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
 						    clock_id);
 				first_seg = false;
+
+				if (!skip_tx) {
+					/* mirror first chunk back */
+					ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr),
+						  clock_id);
+
+					ret = kick_tx(xsk);
+					if (ret)
+						printf("kick_tx ret=%d\n", ret);
+
+					for (int j = 0; j < 500; j++) {
+						if (complete_tx(xsk, clock_id))
+							break;
+						usleep(10*1000);
+					}
+				}
 			}
 
 			xsk_ring_cons__release(&xsk->rx, 1);
@@ -442,6 +594,7 @@ static void print_usage(void)
 		"  -c    Run in copy mode (zerocopy is default)\n"
 		"  -h    Display this help and exit\n\n"
 		"  -m    Enable multi-buffer XDP for larger MTU\n"
+		"  -r    Don't generate AF_XDP reply (rx metadata only)\n"
 		"Generate test packets on the other machine with:\n"
 		"  echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
 
@@ -452,7 +605,7 @@ static void read_args(int argc, char *argv[])
 {
 	int opt;
 
-	while ((opt = getopt(argc, argv, "chm")) != -1) {
+	while ((opt = getopt(argc, argv, "chmr")) != -1) {
 		switch (opt) {
 		case 'c':
 			bind_flags &= ~XDP_USE_NEED_WAKEUP;
@@ -465,6 +618,9 @@ static void read_args(int argc, char *argv[])
 		case 'm':
 			bind_flags |= XDP_USE_SG;
 			break;
+		case 'r':
+			skip_tx = true;
+			break;
 		case '?':
 			if (isprint(optopt))
 				fprintf(stderr, "Unknown option: -%c\n", optopt);
-- 
cgit v1.2.3-70-g09d2


From ee1eb9de81dbdbf078df659062e4df256a009627 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 Nov 2023 11:36:19 -0800
Subject: tools: ynl: fix build of the page-pool sample
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The name of the "destroyed" field in the reply was not changed
in the sample after we started calling it "detach_time".

page-pool.c: In function ‘main’:
page-pool.c:84:33: error: ‘struct <anonymous>’ has no member named ‘destroyed’
   84 |                 if (pp->_present.destroyed)
      |                                 ^

Fixes: 637567e4a3ef ("tools: ynl: add sample for getting page-pool information")
Link: https://lore.kernel.org/r/20231129193622.2912353-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/samples/page-pool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c
index 18d359713469..098b5190d0e5 100644
--- a/tools/net/ynl/samples/page-pool.c
+++ b/tools/net/ynl/samples/page-pool.c
@@ -81,7 +81,7 @@ int main(int argc, char **argv)
 		struct stat *s = find_ifc(&a, pp->ifindex);
 
 		count(s, 1, pp);
-		if (pp->_present.destroyed)
+		if (pp->_present.detach_time)
 			count(s, 0, pp);
 	}
 	netdev_page_pool_get_list_free(pools);
-- 
cgit v1.2.3-70-g09d2


From 929003723f6d1998155bf0472f97d6c75c3db93f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 Nov 2023 11:36:20 -0800
Subject: tools: ynl: make sure we use local headers for page-pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Building samples generates the following warning:

  In file included from page-pool.c:11:
  generated/netdev-user.h:21:45: warning: ‘enum netdev_xdp_rx_metadata’ declared inside parameter list will not be visible outside of this definition or declaration
   21 | const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value);
      |                                             ^~~~~~~~~~~~~~~~~~~~~~

Our magic way of including uAPI headers assumes the sample
name matches the family name. We need to copy the flags over.

Fixes: 637567e4a3ef ("tools: ynl: add sample for getting page-pool information")
Link: https://lore.kernel.org/r/20231129193622.2912353-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/samples/Makefile | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
index 1afefc266b7a..28bdb1557a54 100644
--- a/tools/net/ynl/samples/Makefile
+++ b/tools/net/ynl/samples/Makefile
@@ -18,6 +18,8 @@ include $(wildcard *.d)
 
 all: $(BINS)
 
+CFLAGS_page-pool=$(CFLAGS_netdev)
+
 $(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS)
 	@echo -e '\tCC sample $@'
 	@$(COMPILE.c) $(CFLAGS_$@) $@.c -o $@.o
-- 
cgit v1.2.3-70-g09d2


From 9cf9b57082411ad42d6d12c7b857e60add673877 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 Nov 2023 11:36:21 -0800
Subject: tools: ynl: order building samples after generated code

Parallel builds of ynl:

  make -C tools/net/ynl/ -j 4

don't work correctly right now. samples get handled before
generated, so build of samples does not notice that protos.a
has changed. Order samples to be last.

Link: https://lore.kernel.org/r/20231129193622.2912353-4-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/Makefile | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index d664b36deb5b..da1aa10bbcc3 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -4,6 +4,8 @@ SUBDIRS = lib generated samples
 
 all: $(SUBDIRS)
 
+samples: | lib generated
+
 $(SUBDIRS):
 	@if [ -f "$@/Makefile" ] ; then \
 		$(MAKE) -C $@ ; \
-- 
cgit v1.2.3-70-g09d2


From a115b9279f4897b8afdfbc30035b1382c047fc26 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 Nov 2023 11:36:22 -0800
Subject: tools: ynl: don't skip regeneration from make targets

Commit 2b7ac0c87d98 ("tools: ynl-gen: don't touch the output file if
content is the same") is working too well. It was added so that
ynl-regen -f doesn't make us rebuild half of the kernel, if there
are no actual changes in any generated code.

When ynl-gen-c is called by make, however, we're better off trusting
make's tracking and overwrite the file. Otherwise if output is identical
we won't update file timestamps and make will retry code gen on every
invocation.

Link: https://lore.kernel.org/r/20231129193622.2912353-5-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-c.py | 12 ++++++++----
 tools/net/ynl/ynl-regen.sh |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index cbbda276f6d1..ba1c3611c6fa 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -1164,8 +1164,9 @@ class RenderInfo:
 
 
 class CodeWriter:
-    def __init__(self, nlib, out_file=None):
+    def __init__(self, nlib, out_file=None, overwrite=True):
         self.nlib = nlib
+        self._overwrite = overwrite
 
         self._nl = False
         self._block_end = False
@@ -1186,8 +1187,9 @@ class CodeWriter:
             return
         # Avoid modifying the file if contents didn't change
         self._out.flush()
-        if os.path.isfile(self._out_file) and filecmp.cmp(self._out.name, self._out_file, shallow=False):
-            return
+        if not self._overwrite and os.path.isfile(self._out_file):
+            if filecmp.cmp(self._out.name, self._out_file, shallow=False):
+                return
         with open(self._out_file, 'w+') as out_file:
             self._out.seek(0)
             shutil.copyfileobj(self._out, out_file)
@@ -2516,6 +2518,8 @@ def main():
     parser.add_argument('--header', dest='header', action='store_true', default=None)
     parser.add_argument('--source', dest='header', action='store_false')
     parser.add_argument('--user-header', nargs='+', default=[])
+    parser.add_argument('--cmp-out', action='store_true', default=None,
+                        help='Do not overwrite the output file if the new output is identical to the old')
     parser.add_argument('--exclude-op', action='append', default=[])
     parser.add_argument('-o', dest='out_file', type=str, default=None)
     args = parser.parse_args()
@@ -2543,7 +2547,7 @@ def main():
         print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation')
         os.sys.exit(1)
 
-    cw = CodeWriter(BaseNlLib(), args.out_file)
+    cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out))
 
     _, spec_kernel = find_kernel_root(args.spec)
     if args.mode == 'uapi' or args.header:
diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh
index bdba24066cf1..a37304dcc88e 100755
--- a/tools/net/ynl/ynl-regen.sh
+++ b/tools/net/ynl/ynl-regen.sh
@@ -30,8 +30,8 @@ for f in $files; do
     fi
 
     echo -e "\tGEN ${params[2]}\t$f"
-    $TOOL --mode ${params[2]} --${params[3]} --spec $KDIR/${params[0]} \
-	  $args -o $f
+    $TOOL --cmp-out --mode ${params[2]} --${params[3]} \
+	  --spec $KDIR/${params[0]} $args -o $f
 done
 
 popd >>/dev/null
-- 
cgit v1.2.3-70-g09d2


From 06848c0f341ee3f9226ed01e519c72e4d2b6f001 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:46 -0800
Subject: selftests: mptcp: add evts_get_info helper

This patch adds a new helper get_info_value(), using 'sed' command to
parse the value of the given item name in the line with the given keyword,
to make chk_mptcp_info() and pedit_action_pkts() more readable.

Also add another helper evts_get_info() to use get_info_value() to parse
the output of 'pm_nl_ctl events' command, to make all the userspace pm
selftests more readable, both in mptcp_join.sh and userspace_pm.sh.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-2-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh   | 19 +++--
 tools/testing/selftests/net/mptcp/mptcp_lib.sh    | 10 +++
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 86 ++++++++++-------------
 3 files changed, 57 insertions(+), 58 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 3c94f2f194d6..d24b0e5e73ef 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1869,10 +1869,8 @@ chk_mptcp_info()
 
 	print_check "mptcp_info ${info1:0:8}=$exp1:$exp2"
 
-	cnt1=$(ss -N $ns1 -inmHM | grep "$info1:" |
-	       sed -n 's/.*\('"$info1"':\)\([[:digit:]]*\).*$/\2/p;q')
-	cnt2=$(ss -N $ns2 -inmHM | grep "$info2:" |
-	       sed -n 's/.*\('"$info2"':\)\([[:digit:]]*\).*$/\2/p;q')
+	cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1")
+	cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2")
 	# 'ss' only display active connections and counters that are not 0.
 	[ -z "$cnt1" ] && cnt1=0
 	[ -z "$cnt2" ] && cnt2=0
@@ -2848,13 +2846,13 @@ verify_listener_events()
 		return
 	fi
 
-	type=$(grep "type:$e_type," $evt | sed -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
-	family=$(grep "type:$e_type," $evt | sed -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
-	sport=$(grep "type:$e_type," $evt | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+	type=$(mptcp_lib_evts_get_info type "$evt" "$e_type")
+	family=$(mptcp_lib_evts_get_info family "$evt" "$e_type")
+	sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type")
 	if [ $family ] && [ $family = $AF_INET6 ]; then
-		saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+		saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type")
 	else
-		saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
+		saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type")
 	fi
 
 	if [ $type ] && [ $type = $e_type ] &&
@@ -3249,8 +3247,7 @@ fastclose_tests()
 pedit_action_pkts()
 {
 	tc -n $ns2 -j -s action show action pedit index 100 | \
-		grep "packets" | \
-		sed 's/.*"packets":\([0-9]\+\),.*/\1/'
+		mptcp_lib_get_info_value \"packets\" packets
 }
 
 fail_tests()
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 92a5befe8039..56cbd57abbae 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -207,3 +207,13 @@ mptcp_lib_result_print_all_tap() {
 		printf "%s\n" "${subtest}"
 	done
 }
+
+# get the value of keyword $1 in the line marked by keyword $2
+mptcp_lib_get_info_value() {
+	grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+}
+
+# $1: info name ; $2: evts_ns ; $3: event type
+mptcp_lib_evts_get_info() {
+	mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}"
+}
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index b25a3e33eb25..2413059a42e5 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -247,14 +247,11 @@ make_connection()
 	local server_token
 	local server_serverside
 
-	client_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
-	client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
-	client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
-				      "$client_evts")
-	server_token=$(grep "type:1," "$server_evts" |
-		       sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
-	server_serverside=$(grep "type:1," "$server_evts" |
-			    sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q')
+	client_token=$(mptcp_lib_evts_get_info token "$client_evts")
+	client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
+	client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+	server_token=$(mptcp_lib_evts_get_info token "$server_evts")
+	server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
 
 	print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
 	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
@@ -340,16 +337,16 @@ verify_announce_event()
 	local dport
 	local id
 
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+	token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
 	if [ "$e_af" = "v6" ]
 	then
-		addr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+		addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
 	else
-		addr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+		addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
 	fi
-	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+	id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
 
 	check_expected "type" "token" "addr" "dport" "id"
 }
@@ -367,7 +364,7 @@ test_announce()
 	   $client_addr_id dev ns2eth1 > /dev/null 2>&1
 
 	local type
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	type=$(mptcp_lib_evts_get_info type "$server_evts")
 	print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token"
 	if [ "$type" = "" ]
 	then
@@ -446,9 +443,9 @@ verify_remove_event()
 	local token
 	local id
 
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+	token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+	id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
 
 	check_expected "type" "token" "id"
 }
@@ -466,7 +463,7 @@ test_remove()
 	   $client_addr_id > /dev/null 2>&1
 	print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token"
 	local type
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	type=$(mptcp_lib_evts_get_info type "$server_evts")
 	if [ "$type" = "" ]
 	then
 		test_pass
@@ -479,7 +476,7 @@ test_remove()
 	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
 	   $invalid_id > /dev/null 2>&1
 	print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id"
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	type=$(mptcp_lib_evts_get_info type "$server_evts")
 	if [ "$type" = "" ]
 	then
 		test_pass
@@ -583,19 +580,19 @@ verify_subflow_events()
 		fi
 	fi
 
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	family=$(sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	locid=$(sed --unbuffered -n 's/.*\(loc_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	remid=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+	token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+	family=$(mptcp_lib_evts_get_info family "$evt" $e_type)
+	dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+	locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type)
+	remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
 	if [ "$family" = "$AF_INET6" ]
 	then
-		saddr=$(sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
-		daddr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+		saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type)
+		daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
 	else
-		saddr=$(sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
-		daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+		saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type)
+		daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
 	fi
 
 	check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid"
@@ -630,7 +627,7 @@ test_subflows()
 	kill_wait $listener_pid
 
 	local sport
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from server to client machine
 	:>"$server_evts"
@@ -668,7 +665,7 @@ test_subflows()
 	# Delete the listener from the client ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW6 from server to client machine
 	:>"$server_evts"
@@ -707,7 +704,7 @@ test_subflows()
 	# Delete the listener from the client ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from server to client machine
 	:>"$server_evts"
@@ -745,7 +742,7 @@ test_subflows()
 	# Delete the listener from the server ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from client to server machine
 	:>"$client_evts"
@@ -784,7 +781,7 @@ test_subflows()
 	# Delete the listener from the server ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW6 from client to server machine
 	:>"$client_evts"
@@ -821,7 +818,7 @@ test_subflows()
 	# Delete the listener from the server ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from client to server machine
 	:>"$client_evts"
@@ -867,7 +864,7 @@ test_subflows_v4_v6_mix()
 	# Delete the listener from the server ns, if one was created
 	kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from client to server machine
 	:>"$client_evts"
@@ -933,18 +930,13 @@ verify_listener_events()
 		print_test "CLOSE_LISTENER $e_saddr:$e_sport"
 	fi
 
-	type=$(grep "type:$e_type," $evt |
-	       sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
-	family=$(grep "type:$e_type," $evt |
-		 sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
-	sport=$(grep "type:$e_type," $evt |
-		sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+	type=$(mptcp_lib_evts_get_info type $evt $e_type)
+	family=$(mptcp_lib_evts_get_info family $evt $e_type)
+	sport=$(mptcp_lib_evts_get_info sport $evt $e_type)
 	if [ $family ] && [ $family = $AF_INET6 ]; then
-		saddr=$(grep "type:$e_type," $evt |
-			sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+		saddr=$(mptcp_lib_evts_get_info saddr6 $evt $e_type)
 	else
-		saddr=$(grep "type:$e_type," $evt |
-			sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
+		saddr=$(mptcp_lib_evts_get_info saddr4 $evt $e_type)
 	fi
 
 	check_expected "type" "family" "saddr" "sport"
-- 
cgit v1.2.3-70-g09d2


From 80775412882e273b8ef62124fae861cde8e6fb3d Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:47 -0800
Subject: selftests: mptcp: add chk_subflows_total helper

This patch adds a new helper chk_subflows_total(), in it use the newly
added counter mptcpi_subflows_total to get the "correct" amount of
subflows, including the initial one.

To be compatible with old 'ss' or kernel versions not supporting this
counter, get the total subflows by listing TCP connections that are
MPTCP subflows:

    ss -ti state state established state syn-sent state syn-recv |
        grep -c tcp-ulp-mptcp.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-3-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 42 ++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index d24b0e5e73ef..566fa0f6506f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1867,7 +1867,7 @@ chk_mptcp_info()
 	local cnt2
 	local dump_stats
 
-	print_check "mptcp_info ${info1:0:8}=$exp1:$exp2"
+	print_check "mptcp_info ${info1:0:15}=$exp1:$exp2"
 
 	cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1")
 	cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2")
@@ -1888,6 +1888,42 @@ chk_mptcp_info()
 	fi
 }
 
+# $1: subflows in ns1 ; $2: subflows in ns2
+# number of all subflows, including the initial subflow.
+chk_subflows_total()
+{
+	local cnt1
+	local cnt2
+	local info="subflows_total"
+	local dump_stats
+
+	# if subflows_total counter is supported, use it:
+	if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then
+		chk_mptcp_info $info $1 $info $2
+		return
+	fi
+
+	print_check "$info $1:$2"
+
+	# if not, count the TCP connections that are in fact MPTCP subflows
+	cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv |
+	       grep -c tcp-ulp-mptcp)
+	cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv |
+	       grep -c tcp-ulp-mptcp)
+
+	if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then
+		fail_test "got subflows $cnt1:$cnt2 expected $1:$2"
+		dump_stats=1
+	else
+		print_ok
+	fi
+
+	if [ "$dump_stats" = 1 ]; then
+		ss -N $ns1 -ti
+		ss -N $ns2 -ti
+	fi
+}
+
 chk_link_usage()
 {
 	local ns=$1
@@ -3431,10 +3467,12 @@ userspace_tests()
 		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
 		chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
 		userspace_pm_rm_sf_addr_ns1 10.0.2.1 10
 		chk_rm_nr 1 1 invert
 		chk_mptcp_info subflows 0 subflows 0
+		chk_subflows_total 1 1
 		kill_events_pids
 		wait $tests_pid
 	fi
@@ -3451,9 +3489,11 @@ userspace_tests()
 		userspace_pm_add_sf 10.0.3.2 20
 		chk_join_nr 1 1 1
 		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
 		userspace_pm_rm_sf_addr_ns2 10.0.3.2 20
 		chk_rm_nr 1 1
 		chk_mptcp_info subflows 0 subflows 0
+		chk_subflows_total 1 1
 		kill_events_pids
 		wait $tests_pid
 	fi
-- 
cgit v1.2.3-70-g09d2


From 757c828ce94905a2975873d5e90a376c701b2b90 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:48 -0800
Subject: selftests: mptcp: update userspace pm test helpers

This patch adds a new argument namespace to userspace_pm_add_addr() and
userspace_pm_add_sf() to make these two helper more versatile.

Add two more versatile helpers for userspace pm remove subflow or address:
userspace_pm_rm_addr() and userspace_pm_rm_sf(). The original test helpers
userspace_pm_rm_sf_addr_ns1() and userspace_pm_rm_sf_addr_ns2() can be
replaced by these new helpers.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-4-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 102 ++++++++++++------------
 1 file changed, 50 insertions(+), 52 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 566fa0f6506f..6d84c7f5296a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -2848,6 +2848,7 @@ backup_tests()
 	fi
 }
 
+SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
 LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED
 LISTENER_CLOSED=16  #MPTCP_EVENT_LISTENER_CLOSED
 
@@ -3308,75 +3309,70 @@ fail_tests()
 	fi
 }
 
+# $1: ns ; $2: addr ; $3: id
 userspace_pm_add_addr()
 {
-	local addr=$1
-	local id=$2
+	local evts=$evts_ns1
 	local tk
 
-	tk=$(grep "type:1," "$evts_ns1" |
-	     sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
-	ip netns exec $ns1 ./pm_nl_ctl ann $addr token $tk id $id
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+
+	ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3
 	sleep 1
 }
 
-userspace_pm_rm_sf_addr_ns1()
+# $1: ns ; $2: id
+userspace_pm_rm_addr()
 {
-	local addr=$1
-	local id=$2
-	local tk sp da dp
-	local cnt_addr cnt_sf
-
-	tk=$(grep "type:1," "$evts_ns1" |
-	     sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
-	sp=$(grep "type:10" "$evts_ns1" |
-	     sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
-	da=$(grep "type:10" "$evts_ns1" |
-	     sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
-	dp=$(grep "type:10" "$evts_ns1" |
-	     sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q')
-	cnt_addr=$(rm_addr_count ${ns1})
-	cnt_sf=$(rm_sf_count ${ns1})
-	ip netns exec $ns1 ./pm_nl_ctl rem token $tk id $id
-	ip netns exec $ns1 ./pm_nl_ctl dsf lip "::ffff:$addr" \
-				lport $sp rip $da rport $dp token $tk
-	wait_rm_addr $ns1 "${cnt_addr}"
-	wait_rm_sf $ns1 "${cnt_sf}"
+	local evts=$evts_ns1
+	local tk
+	local cnt
+
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+
+	cnt=$(rm_addr_count ${1})
+	ip netns exec $1 ./pm_nl_ctl rem token $tk id $2
+	wait_rm_addr $1 "${cnt}"
 }
 
+# $1: ns ; $2: addr ; $3: id
 userspace_pm_add_sf()
 {
-	local addr=$1
-	local id=$2
+	local evts=$evts_ns1
 	local tk da dp
 
-	tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
-	da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
-	dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
-	ip netns exec $ns2 ./pm_nl_ctl csf lip $addr lid $id \
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+	da=$(mptcp_lib_evts_get_info daddr4 "$evts")
+	dp=$(mptcp_lib_evts_get_info dport "$evts")
+
+	ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \
 				rip $da rport $dp token $tk
 	sleep 1
 }
 
-userspace_pm_rm_sf_addr_ns2()
+# $1: ns ; $2: addr $3: event type
+userspace_pm_rm_sf()
 {
-	local addr=$1
-	local id=$2
+	local evts=$evts_ns1
+	local t=${3:-1}
+	local ip=4
 	local tk da dp sp
-	local cnt_addr cnt_sf
-
-	tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
-	da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
-	dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
-	sp=$(grep "type:10" "$evts_ns2" |
-	     sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
-	cnt_addr=$(rm_addr_count ${ns2})
-	cnt_sf=$(rm_sf_count ${ns2})
-	ip netns exec $ns2 ./pm_nl_ctl rem token $tk id $id
-	ip netns exec $ns2 ./pm_nl_ctl dsf lip $addr lport $sp \
+	local cnt
+
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	if is_v6 $2; then ip=6; fi
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+	da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t)
+	dp=$(mptcp_lib_evts_get_info dport "$evts" $t)
+	sp=$(mptcp_lib_evts_get_info sport "$evts" $t)
+
+	cnt=$(rm_sf_count ${1})
+	ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \
 				rip $da rport $dp token $tk
-	wait_rm_addr $ns2 "${cnt_addr}"
-	wait_rm_sf $ns2 "${cnt_sf}"
+	wait_rm_sf $1 "${cnt}"
 }
 
 userspace_tests()
@@ -3463,13 +3459,14 @@ userspace_tests()
 			run_tests $ns1 $ns2 10.0.1.1 &
 		local tests_pid=$!
 		wait_mpj $ns1
-		userspace_pm_add_addr 10.0.2.1 10
+		userspace_pm_add_addr $ns1 10.0.2.1 10
 		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 2 2
 		chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
-		userspace_pm_rm_sf_addr_ns1 10.0.2.1 10
+		userspace_pm_rm_addr $ns1 10
+		userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED
 		chk_rm_nr 1 1 invert
 		chk_mptcp_info subflows 0 subflows 0
 		chk_subflows_total 1 1
@@ -3486,11 +3483,12 @@ userspace_tests()
 			run_tests $ns1 $ns2 10.0.1.1 &
 		local tests_pid=$!
 		wait_mpj $ns2
-		userspace_pm_add_sf 10.0.3.2 20
+		userspace_pm_add_sf $ns2 10.0.3.2 20
 		chk_join_nr 1 1 1
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 2 2
-		userspace_pm_rm_sf_addr_ns2 10.0.3.2 20
+		userspace_pm_rm_addr $ns2 20
+		userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED
 		chk_rm_nr 1 1
 		chk_mptcp_info subflows 0 subflows 0
 		chk_subflows_total 1 1
-- 
cgit v1.2.3-70-g09d2


From b2e2248f365a7ef0687fe048c335fe1a32f98b36 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:49 -0800
Subject: selftests: mptcp: userspace pm create id 0 subflow

This patch adds a selftest to create id 0 subflow. Pass id 0 to the
helper userspace_pm_add_sf() to create id 0 subflow. chk_mptcp_info
shows one subflow but chk_subflows_total shows two subflows in each
namespace.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-5-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 6d84c7f5296a..4357a46ca3f3 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3495,6 +3495,25 @@ userspace_tests()
 		kill_events_pids
 		wait $tests_pid
 	fi
+
+	# userspace pm create id 0 subflow
+	if reset_with_events "userspace pm create id 0 subflow" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns2
+		chk_mptcp_info subflows 0 subflows 0
+		chk_subflows_total 1 1
+		userspace_pm_add_sf $ns2 10.0.3.2 0
+		chk_join_nr 1 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		kill_events_pids
+		wait $tests_pid
+	fi
 }
 
 endpoint_tests()
-- 
cgit v1.2.3-70-g09d2


From e3b47e460b4bd0c61d0082f1ac02650dcb79e5d1 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:51 -0800
Subject: selftests: mptcp: userspace pm remove initial subflow

This patch adds a selftest for userspace PM to remove the initial
subflow.

Use userspace_pm_add_sf() to add a subflow, and pass initial IP address
to userspace_pm_rm_sf() to remove the initial subflow.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-7-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 4357a46ca3f3..812a5ee54158 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3514,6 +3514,30 @@ userspace_tests()
 		kill_events_pids
 		wait $tests_pid
 	fi
+
+	# userspace pm remove initial subflow
+	if reset_with_events "userspace pm remove initial subflow" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns2
+		userspace_pm_add_sf $ns2 10.0.3.2 20
+		chk_join_nr 1 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		userspace_pm_rm_sf $ns2 10.0.1.2
+		# we don't look at the counter linked to the RM_ADDR but
+		# to the one linked to the subflows that have been removed
+		chk_rm_nr 0 1
+		chk_rst_nr 0 0 invert
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 1 1
+		kill_events_pids
+		wait $tests_pid
+	fi
 }
 
 endpoint_tests()
-- 
cgit v1.2.3-70-g09d2


From b9fb176081fb216c43d923888f0d53d9e3a5965b Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:52 -0800
Subject: selftests: mptcp: userspace pm send RM_ADDR for ID 0

This patch adds a selftest for userspace PM to remove id 0 address.

Use userspace_pm_add_addr() helper to add an id 10 address, then use
userspace_pm_rm_addr() helper to remove id 0 address.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-8-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 26 +++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 812a5ee54158..a98a72e55d2c 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3538,6 +3538,32 @@ userspace_tests()
 		kill_events_pids
 		wait $tests_pid
 	fi
+
+	# userspace pm send RM_ADDR for ID 0
+	if reset_with_events "userspace pm send RM_ADDR for ID 0" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns2 1 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns1
+		userspace_pm_add_addr $ns1 10.0.2.1 10
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+		userspace_pm_rm_addr $ns1 0
+		# we don't look at the counter linked to the subflows that
+		# have been removed but to the one linked to the RM_ADDR
+		chk_rm_nr 1 0 invert
+		chk_rst_nr 0 0 invert
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 1 1
+		kill_events_pids
+		wait $tests_pid
+	fi
 }
 
 endpoint_tests()
-- 
cgit v1.2.3-70-g09d2


From bdbef0a6ff10603895b0ba39f56bf874cb2b551a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:53 -0800
Subject: selftests: mptcp: add mptcp_lib_kill_wait

To avoid duplicated code in different MPTCP selftests, we can add
and use helpers defined in mptcp_lib.sh.

Export kill_wait() helper in userspace_pm.sh into mptcp_lib.sh and
rename it as mptcp_lib_kill_wait(). It can be used to instead of
kill_wait() in mptcp_join.sh. Use the new helper in both scripts.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-9-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh   | 10 ++------
 tools/testing/selftests/net/mptcp/mptcp_lib.sh    |  9 +++++++
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 31 ++++++++---------------
 3 files changed, 22 insertions(+), 28 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index a98a72e55d2c..e7557f6a0dc1 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -682,16 +682,10 @@ wait_mpj()
 	done
 }
 
-kill_wait()
-{
-	kill $1 > /dev/null 2>&1
-	wait $1 2>/dev/null
-}
-
 kill_events_pids()
 {
-	kill_wait $evts_ns1_pid
-	kill_wait $evts_ns2_pid
+	mptcp_lib_kill_wait $evts_ns1_pid
+	mptcp_lib_kill_wait $evts_ns2_pid
 }
 
 kill_tests_wait()
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 56cbd57abbae..e421b658d748 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -217,3 +217,12 @@ mptcp_lib_get_info_value() {
 mptcp_lib_evts_get_info() {
 	mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}"
 }
+
+# $1: PID
+mptcp_lib_kill_wait() {
+	[ "${1}" -eq 0 ] && return 0
+
+	kill -SIGUSR1 "${1}" > /dev/null 2>&1
+	kill "${1}" > /dev/null 2>&1
+	wait "${1}" 2>/dev/null
+}
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 2413059a42e5..f4e352494f05 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -108,15 +108,6 @@ test_fail()
 	mptcp_lib_result_fail "${test_name}"
 }
 
-kill_wait()
-{
-	[ $1 -eq 0 ] && return 0
-
-	kill -SIGUSR1 $1 > /dev/null 2>&1
-	kill $1 > /dev/null 2>&1
-	wait $1 2>/dev/null
-}
-
 # This function is used in the cleanup trap
 #shellcheck disable=SC2317
 cleanup()
@@ -128,7 +119,7 @@ cleanup()
 	for pid in $client4_pid $server4_pid $client6_pid $server6_pid\
 		   $server_evts_pid $client_evts_pid
 	do
-		kill_wait $pid
+		mptcp_lib_kill_wait $pid
 	done
 
 	local netns
@@ -210,7 +201,7 @@ make_connection()
 	fi
 	:>"$client_evts"
 	if [ $client_evts_pid -ne 0 ]; then
-		kill_wait $client_evts_pid
+		mptcp_lib_kill_wait $client_evts_pid
 	fi
 	ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
 	client_evts_pid=$!
@@ -219,7 +210,7 @@ make_connection()
 	fi
 	:>"$server_evts"
 	if [ $server_evts_pid -ne 0 ]; then
-		kill_wait $server_evts_pid
+		mptcp_lib_kill_wait $server_evts_pid
 	fi
 	ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
 	server_evts_pid=$!
@@ -624,7 +615,7 @@ test_subflows()
 			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
 
 	# Delete the listener from the client ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	local sport
 	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
@@ -663,7 +654,7 @@ test_subflows()
 			      "$client_addr_id" "ns1" "ns2"
 
 	# Delete the listener from the client ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
 
@@ -702,7 +693,7 @@ test_subflows()
 			      "$client_addr_id" "ns1" "ns2"
 
 	# Delete the listener from the client ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
 
@@ -740,7 +731,7 @@ test_subflows()
 			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
@@ -779,7 +770,7 @@ test_subflows()
 			      "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
@@ -816,7 +807,7 @@ test_subflows()
 			      "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
@@ -862,7 +853,7 @@ test_subflows_v4_v6_mix()
 			      "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
@@ -974,7 +965,7 @@ test_listener()
 	sleep 0.5
 
 	# Delete the listener from the client ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	sleep 0.5
 	verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
-- 
cgit v1.2.3-70-g09d2


From b850f2c7dd85ecd14a333685c4ffd23f12665e94 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:54 -0800
Subject: selftests: mptcp: add mptcp_lib_is_v6

To avoid duplicated code in different MPTCP selftests, we can add
and use helpers defined in mptcp_lib.sh.

is_v6() helper is defined in mptcp_connect.sh, mptcp_join.sh and
mptcp_sockopt.sh, so export it into mptcp_lib.sh and rename it as
mptcp_lib_is_v6(). Use this new helper in all scripts.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-10-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 16 +++++-----------
 tools/testing/selftests/net/mptcp/mptcp_join.sh    | 14 ++++----------
 tools/testing/selftests/net/mptcp/mptcp_lib.sh     |  5 +++++
 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh |  8 +-------
 4 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index b1fc8afd072d..4cf62b2b0480 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -310,12 +310,6 @@ check_mptcp_disabled()
 	return 0
 }
 
-# $1: IP address
-is_v6()
-{
-	[ -z "${1##*:*}" ]
-}
-
 do_ping()
 {
 	local listener_ns="$1"
@@ -324,7 +318,7 @@ do_ping()
 	local ping_args="-q -c 1"
 	local rc=0
 
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		$ipv6 || return 0
 		ping_args="${ping_args} -6"
 	fi
@@ -635,12 +629,12 @@ run_tests_lo()
 	fi
 
 	# skip if we don't want v6
-	if ! $ipv6 && is_v6 "${connect_addr}"; then
+	if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
 		return 0
 	fi
 
 	local local_addr
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		local_addr="::"
 	else
 		local_addr="0.0.0.0"
@@ -708,7 +702,7 @@ run_test_transparent()
 	TEST_GROUP="${msg}"
 
 	# skip if we don't want v6
-	if ! $ipv6 && is_v6 "${connect_addr}"; then
+	if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
 		return 0
 	fi
 
@@ -741,7 +735,7 @@ EOF
 	fi
 
 	local local_addr
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		local_addr="::"
 		r6flag="-6"
 	else
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index e7557f6a0dc1..2ec5c5006d38 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -587,12 +587,6 @@ link_failure()
 	done
 }
 
-# $1: IP address
-is_v6()
-{
-	[ -z "${1##*:*}" ]
-}
-
 # $1: ns, $2: port
 wait_local_port_listen()
 {
@@ -895,7 +889,7 @@ pm_nl_set_endpoint()
 		local id=10
 		while [ $add_nr_ns1 -gt 0 ]; do
 			local addr
-			if is_v6 "${connect_addr}"; then
+			if mptcp_lib_is_v6 "${connect_addr}"; then
 				addr="dead:beef:$counter::1"
 			else
 				addr="10.0.$counter.1"
@@ -947,7 +941,7 @@ pm_nl_set_endpoint()
 		local id=20
 		while [ $add_nr_ns2 -gt 0 ]; do
 			local addr
-			if is_v6 "${connect_addr}"; then
+			if mptcp_lib_is_v6 "${connect_addr}"; then
 				addr="dead:beef:$counter::2"
 			else
 				addr="10.0.$counter.2"
@@ -989,7 +983,7 @@ pm_nl_set_endpoint()
 			pm_nl_flush_endpoint ${connector_ns}
 		elif [ $rm_nr_ns2 -eq 9 ]; then
 			local addr
-			if is_v6 "${connect_addr}"; then
+			if mptcp_lib_is_v6 "${connect_addr}"; then
 				addr="dead:beef:1::2"
 			else
 				addr="10.0.1.2"
@@ -3357,7 +3351,7 @@ userspace_pm_rm_sf()
 	local cnt
 
 	[ "$1" == "$ns2" ] && evts=$evts_ns2
-	if is_v6 $2; then ip=6; fi
+	if mptcp_lib_is_v6 $2; then ip=6; fi
 	tk=$(mptcp_lib_evts_get_info token "$evts")
 	da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t)
 	dp=$(mptcp_lib_evts_get_info dport "$evts" $t)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index e421b658d748..447292cad33c 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -226,3 +226,8 @@ mptcp_lib_kill_wait() {
 	kill "${1}" > /dev/null 2>&1
 	wait "${1}" 2>/dev/null
 }
+
+# $1: IP address
+mptcp_lib_is_v6() {
+	[ -z "${1##*:*}" ]
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index a817af6616ec..bfa744e350ef 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -161,12 +161,6 @@ check_transfer()
 	return 0
 }
 
-# $1: IP address
-is_v6()
-{
-	[ -z "${1##*:*}" ]
-}
-
 do_transfer()
 {
 	local listener_ns="$1"
@@ -183,7 +177,7 @@ do_transfer()
 	local mptcp_connect="./mptcp_connect -r 20"
 
 	local local_addr ip
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		local_addr="::"
 		ip=ipv6
 	else
-- 
cgit v1.2.3-70-g09d2


From 61c131f5d4d2b79904af2fdcb2839a9db8e7c55c Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:55 -0800
Subject: selftests: mptcp: add mptcp_lib_get_counter

To avoid duplicated code in different MPTCP selftests, we can add
and use helpers defined in mptcp_lib.sh.

The helper get_counter() in mptcp_join.sh and get_mib_counter() in
mptcp_connect.sh have the same functionality, export get_counter() into
mptcp_lib.sh and rename it as mptcp_lib_get_counter(). Use this new
helper instead of get_counter() and get_mib_counter().

Use this helper in test_prio() in userspace_pm.sh too instead of
open-coding.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-11-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 41 ++++------
 tools/testing/selftests/net/mptcp/mptcp_join.sh    | 88 +++++++++-------------
 tools/testing/selftests/net/mptcp/mptcp_lib.sh     | 16 ++++
 tools/testing/selftests/net/mptcp/userspace_pm.sh  | 14 ++--
 4 files changed, 73 insertions(+), 86 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 4cf62b2b0480..3b971d1617d8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -335,21 +335,6 @@ do_ping()
 	return 0
 }
 
-# $1: ns, $2: MIB counter
-get_mib_counter()
-{
-	local listener_ns="${1}"
-	local mib="${2}"
-
-	# strip the header
-	ip netns exec "${listener_ns}" \
-		nstat -z -a "${mib}" | \
-			tail -n+2 | \
-			while read a count c rest; do
-				echo $count
-			done
-}
-
 # $1: ns, $2: port
 wait_local_port_listen()
 {
@@ -435,12 +420,12 @@ do_transfer()
 			nstat -n
 	fi
 
-	local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
-	local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
-	local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
-	local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
-	local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
-	local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+	local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+	local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+	local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+	local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+	local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
 
 	timeout ${timeout_test} \
 		ip netns exec ${listener_ns} \
@@ -503,11 +488,11 @@ do_transfer()
 	check_transfer $cin $sout "file received by server"
 	rets=$?
 
-	local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
-	local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
-	local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
-	local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
-	local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+	local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+	local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+	local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+	local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
 
 	expect_synrx=$((stat_synrx_last_l))
 	expect_ackrx=$((stat_ackrx_last_l))
@@ -536,8 +521,8 @@ do_transfer()
 	fi
 
 	if $checksum; then
-		local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
-		local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+		local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+		local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
 
 		local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
 		if [ $csum_err_s_nr -gt 0 ]; then
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 2ec5c5006d38..a5d66d1bfeb4 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -605,25 +605,9 @@ wait_local_port_listen()
 	done
 }
 
-# $1: ns ; $2: counter
-get_counter()
-{
-	local ns="${1}"
-	local counter="${2}"
-	local count
-
-	count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}')
-	if [ -z "${count}" ]; then
-		mptcp_lib_fail_if_expected_feature "${counter} counter"
-		return 1
-	fi
-
-	echo "${count}"
-}
-
 rm_addr_count()
 {
-	get_counter "${1}" "MPTcpExtRmAddr"
+	mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr"
 }
 
 # $1: ns, $2: old rm_addr counter in $ns
@@ -643,7 +627,7 @@ wait_rm_addr()
 
 rm_sf_count()
 {
-	get_counter "${1}" "MPTcpExtRmSubflow"
+	mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow"
 }
 
 # $1: ns, $2: old rm_sf counter in $ns
@@ -666,11 +650,11 @@ wait_mpj()
 	local ns="${1}"
 	local cnt old_cnt
 
-	old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx")
+	old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
 
 	local i
 	for i in $(seq 10); do
-		cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx")
+		cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
 		[ "$cnt" = "${old_cnt}" ] || break
 		sleep 0.1
 	done
@@ -1272,7 +1256,7 @@ chk_csum_nr()
 	fi
 
 	print_check "sum"
-	count=$(get_counter ${ns1} "MPTcpExtDataCsumErr")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
 	if [ "$count" != "$csum_ns1" ]; then
 		extra_msg="$extra_msg ns1=$count"
 	fi
@@ -1285,7 +1269,7 @@ chk_csum_nr()
 		print_ok
 	fi
 	print_check "csum"
-	count=$(get_counter ${ns2} "MPTcpExtDataCsumErr")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
 	if [ "$count" != "$csum_ns2" ]; then
 		extra_msg="$extra_msg ns2=$count"
 	fi
@@ -1329,7 +1313,7 @@ chk_fail_nr()
 	fi
 
 	print_check "ftx"
-	count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx")
+	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
 	if [ "$count" != "$fail_tx" ]; then
 		extra_msg="$extra_msg,tx=$count"
 	fi
@@ -1343,7 +1327,7 @@ chk_fail_nr()
 	fi
 
 	print_check "failrx"
-	count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx")
+	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
 	if [ "$count" != "$fail_rx" ]; then
 		extra_msg="$extra_msg,rx=$count"
 	fi
@@ -1376,7 +1360,7 @@ chk_fclose_nr()
 	fi
 
 	print_check "ctx"
-	count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
+	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$fclose_tx" ]; then
@@ -1387,7 +1371,7 @@ chk_fclose_nr()
 	fi
 
 	print_check "fclzrx"
-	count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
+	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$fclose_rx" ]; then
@@ -1417,7 +1401,7 @@ chk_rst_nr()
 	fi
 
 	print_check "rtx"
-	count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx")
+	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx")
 	if [ -z "$count" ]; then
 		print_skip
 	# accept more rst than expected except if we don't expect any
@@ -1429,7 +1413,7 @@ chk_rst_nr()
 	fi
 
 	print_check "rstrx"
-	count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx")
+	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx")
 	if [ -z "$count" ]; then
 		print_skip
 	# accept more rst than expected except if we don't expect any
@@ -1450,7 +1434,7 @@ chk_infi_nr()
 	local count
 
 	print_check "itx"
-	count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$infi_tx" ]; then
@@ -1460,7 +1444,7 @@ chk_infi_nr()
 	fi
 
 	print_check "infirx"
-	count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$infi_rx" ]; then
@@ -1489,7 +1473,7 @@ chk_join_nr()
 	fi
 
 	print_check "syn"
-	count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$syn_nr" ]; then
@@ -1500,7 +1484,7 @@ chk_join_nr()
 
 	print_check "synack"
 	with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
-	count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$syn_ack_nr" ]; then
@@ -1517,7 +1501,7 @@ chk_join_nr()
 	fi
 
 	print_check "ack"
-	count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$ack_nr" ]; then
@@ -1550,8 +1534,8 @@ chk_stale_nr()
 
 	print_check "stale"
 
-	stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale")
-	recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover")
+	stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale")
+	recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover")
 	if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then
 		print_skip
 	elif [ $stale_nr -lt $stale_min ] ||
@@ -1588,7 +1572,7 @@ chk_add_nr()
 	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
 
 	print_check "add"
-	count=$(get_counter ${ns2} "MPTcpExtAddAddr")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr")
 	if [ -z "$count" ]; then
 		print_skip
 	# if the test configured a short timeout tolerate greater then expected
@@ -1600,7 +1584,7 @@ chk_add_nr()
 	fi
 
 	print_check "echo"
-	count=$(get_counter ${ns1} "MPTcpExtEchoAdd")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$echo_nr" ]; then
@@ -1611,7 +1595,7 @@ chk_add_nr()
 
 	if [ $port_nr -gt 0 ]; then
 		print_check "pt"
-		count=$(get_counter ${ns2} "MPTcpExtPortAdd")
+		count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$port_nr" ]; then
@@ -1621,7 +1605,7 @@ chk_add_nr()
 		fi
 
 		print_check "syn"
-		count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$syn_nr" ]; then
@@ -1632,7 +1616,7 @@ chk_add_nr()
 		fi
 
 		print_check "synack"
-		count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
+		count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$syn_ack_nr" ]; then
@@ -1643,7 +1627,7 @@ chk_add_nr()
 		fi
 
 		print_check "ack"
-		count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$ack_nr" ]; then
@@ -1654,7 +1638,7 @@ chk_add_nr()
 		fi
 
 		print_check "syn"
-		count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$mis_syn_nr" ]; then
@@ -1665,7 +1649,7 @@ chk_add_nr()
 		fi
 
 		print_check "ack"
-		count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$mis_ack_nr" ]; then
@@ -1687,7 +1671,7 @@ chk_add_tx_nr()
 	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
 
 	print_check "add TX"
-	count=$(get_counter ${ns1} "MPTcpExtAddAddrTx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
 	if [ -z "$count" ]; then
 		print_skip
 	# if the test configured a short timeout tolerate greater then expected
@@ -1699,7 +1683,7 @@ chk_add_tx_nr()
 	fi
 
 	print_check "echo TX"
-	count=$(get_counter ${ns2} "MPTcpExtEchoAddTx")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$echo_tx_nr" ]; then
@@ -1737,7 +1721,7 @@ chk_rm_nr()
 	fi
 
 	print_check "rm"
-	count=$(get_counter ${addr_ns} "MPTcpExtRmAddr")
+	count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$rm_addr_nr" ]; then
@@ -1747,13 +1731,13 @@ chk_rm_nr()
 	fi
 
 	print_check "rmsf"
-	count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow")
+	count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ -n "$simult" ]; then
 		local cnt suffix
 
-		cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow")
+		cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow")
 
 		# in case of simult flush, the subflow removal count on each side is
 		# unreliable
@@ -1782,7 +1766,7 @@ chk_rm_tx_nr()
 	local rm_addr_tx_nr=$1
 
 	print_check "rm TX"
-	count=$(get_counter ${ns2} "MPTcpExtRmAddrTx")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$rm_addr_tx_nr" ]; then
@@ -1799,7 +1783,7 @@ chk_prio_nr()
 	local count
 
 	print_check "ptx"
-	count=$(get_counter ${ns1} "MPTcpExtMPPrioTx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$mp_prio_nr_tx" ]; then
@@ -1809,7 +1793,7 @@ chk_prio_nr()
 	fi
 
 	print_check "prx"
-	count=$(get_counter ${ns1} "MPTcpExtMPPrioRx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$mp_prio_nr_rx" ]; then
@@ -1943,7 +1927,7 @@ wait_attempt_fail()
 	while [ $time -lt $timeout_ms ]; do
 		local cnt
 
-		cnt=$(get_counter ${ns} "TcpAttemptFails")
+		cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails")
 
 		[ "$cnt" = 1 ] && return 1
 		time=$((time + 100))
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 447292cad33c..718c79dda2b3 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -231,3 +231,19 @@ mptcp_lib_kill_wait() {
 mptcp_lib_is_v6() {
 	[ -z "${1##*:*}" ]
 }
+
+# $1: ns, $2: MIB counter
+mptcp_lib_get_counter() {
+	local ns="${1}"
+	local counter="${2}"
+	local count
+
+	count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+		awk 'NR==1 {next} {print $2}')
+	if [ -z "${count}" ]; then
+		mptcp_lib_fail_if_expected_feature "${counter} counter"
+		return 1
+	fi
+
+	echo "${count}"
+}
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index f4e352494f05..f136956f6c95 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -884,9 +884,10 @@ test_prio()
 
 	# Check TX
 	print_test "MP_PRIO TX"
-	count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
-	[ -z "$count" ] && count=0
-	if [ $count != 1 ]; then
+	count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx")
+	if [ -z "$count" ]; then
+		test_skip
+	elif [ $count != 1 ]; then
 		test_fail "Count != 1: ${count}"
 	else
 		test_pass
@@ -894,9 +895,10 @@ test_prio()
 
 	# Check RX
 	print_test "MP_PRIO RX"
-	count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
-	[ -z "$count" ] && count=0
-	if [ $count != 1 ]; then
+	count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx")
+	if [ -z "$count" ]; then
+		test_skip
+	elif [ $count != 1 ]; then
 		test_fail "Count != 1: ${count}"
 	else
 		test_pass
-- 
cgit v1.2.3-70-g09d2


From 119931cc88ce7073b9a289c13abaf2348d55fdfa Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:56 -0800
Subject: selftests: mptcp: add missing oflag=append

In mptcp_connect.sh we are missing something like "oflag=append"
because this will write "${rem}" bytes at the beginning of the file
where there is already some random bytes. It should write that at
the end.

This patch adds this missing 'oflag=append' flag for 'dd' command in
make_file().

Suggested-by: Matthieu Baerts <matttbe@kernel.org>
Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-12-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 3b971d1617d8..c4f08976c418 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -593,7 +593,7 @@ make_file()
 	rem=$((SIZE - (ksize * 1024)))
 
 	dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
-	dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
+	dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
 	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
 
 	echo "Created $name (size $(du -b "$name")) containing data sent by $who"
-- 
cgit v1.2.3-70-g09d2


From 3a96dea9f88763cfa29e15f681f95fca8952ec77 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:57 -0800
Subject: selftests: mptcp: add mptcp_lib_make_file

To avoid duplicated code in different MPTCP selftests, we can add
and use helpers defined in mptcp_lib.sh.

make_file() helper in mptcp_sockopt.sh and userspace_pm.sh are the same.
Export it into mptcp_lib.sh and rename it as mptcp_lib_kill_wait(). Use
it in both mptcp_connect.sh and mptcp_join.sh.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-13-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh |  3 +--
 tools/testing/selftests/net/mptcp/mptcp_join.sh    |  3 +--
 tools/testing/selftests/net/mptcp/mptcp_lib.sh     |  9 +++++++++
 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh |  3 +--
 tools/testing/selftests/net/mptcp/userspace_pm.sh  | 12 +-----------
 5 files changed, 13 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index c4f08976c418..d20e278480fb 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -592,9 +592,8 @@ make_file()
 	ksize=$((SIZE / 1024))
 	rem=$((SIZE - (ksize * 1024)))
 
-	dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
+	mptcp_lib_make_file $name 1024 $ksize
 	dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
 
 	echo "Created $name (size $(du -b "$name")) containing data sent by $who"
 }
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index a5d66d1bfeb4..37f434cbfa44 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1171,8 +1171,7 @@ make_file()
 	local who=$2
 	local size=$3
 
-	dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+	mptcp_lib_make_file $name 1024 $size
 
 	print_info "Test file (size $size KB) for $who"
 }
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 718c79dda2b3..61be4f29a69a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -247,3 +247,12 @@ mptcp_lib_get_counter() {
 
 	echo "${count}"
 }
+
+mptcp_lib_make_file() {
+	local name="${1}"
+	local bs="${2}"
+	local size="${3}"
+
+	dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null
+	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}"
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index bfa744e350ef..96dc46eda133 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -251,8 +251,7 @@ make_file()
 	local who=$2
 	local size=$3
 
-	dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+	mptcp_lib_make_file $name 1024 $size
 
 	echo "Created $name (size $size KB) containing data sent by $who"
 }
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index f136956f6c95..6167837f48e1 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -164,22 +164,12 @@ print_title "Init"
 print_test "Created network namespaces ns1, ns2"
 test_pass
 
-make_file()
-{
-	# Store a chunk of data in a file to transmit over an MPTCP connection
-	local name=$1
-	local ksize=1
-
-	dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
-}
-
 make_connection()
 {
 	if [ -z "$file" ]; then
 		file=$(mktemp)
 	fi
-	make_file "$file" "client"
+	mptcp_lib_make_file "$file" 2 1
 
 	local is_v6=$1
 	local app_port=$app4_port
-- 
cgit v1.2.3-70-g09d2


From 9d9095bbc24df4432b38fb33a3cf59ea1e9213d0 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:58 -0800
Subject: selftests: mptcp: add mptcp_lib_check_transfer

To avoid duplicated code in different MPTCP selftests, we can add
and use helpers defined in mptcp_lib.sh.

check_transfer() and print_file_err() helpers are defined both in
mptcp_connect.sh and mptcp_sockopt.sh, export them into mptcp_lib.sh
and rename them with mptcp_lib_ prefix. And use them in all scripts.

Note: In mptcp_sockopt.sh it is OK to drop 'ret=1' in check_transfer()
because it will be set in run_tests() anyway.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-14-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 29 ++--------------------
 tools/testing/selftests/net/mptcp/mptcp_join.sh    | 11 ++------
 tools/testing/selftests/net/mptcp/mptcp_lib.sh     | 24 ++++++++++++++++++
 tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 28 +--------------------
 4 files changed, 29 insertions(+), 63 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index d20e278480fb..537f180aa51e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -254,31 +254,6 @@ else
 	set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
 fi
 
-print_file_err()
-{
-	ls -l "$1" 1>&2
-	echo "Trailing bytes are: "
-	tail -c 27 "$1"
-}
-
-check_transfer()
-{
-	local in=$1
-	local out=$2
-	local what=$3
-
-	cmp "$in" "$out" > /dev/null 2>&1
-	if [ $? -ne 0 ] ;then
-		echo "[ FAIL ] $what does not match (in, out):"
-		print_file_err "$in"
-		print_file_err "$out"
-
-		return 1
-	fi
-
-	return 0
-}
-
 check_mptcp_disabled()
 {
 	local disabled_ns="ns_disabled-$rndh"
@@ -483,9 +458,9 @@ do_transfer()
 		return 1
 	fi
 
-	check_transfer $sin $cout "file received by client"
+	mptcp_lib_check_transfer $sin $cout "file received by client"
 	retc=$?
-	check_transfer $cin $sout "file received by server"
+	mptcp_lib_check_transfer $cin $sout "file received by server"
 	rets=$?
 
 	local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 37f434cbfa44..420e4b3f9ad0 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -511,13 +511,6 @@ get_failed_tests_ids()
 	done | sort -n
 }
 
-print_file_err()
-{
-	ls -l "$1" 1>&2
-	echo -n "Trailing bytes are: "
-	tail -c 27 "$1"
-}
-
 check_transfer()
 {
 	local in=$1
@@ -548,8 +541,8 @@ check_transfer()
 		local sum=$((0${a} + 0${b}))
 		if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then
 			fail_test "$what does not match (in, out):"
-			print_file_err "$in"
-			print_file_err "$out"
+			mptcp_lib_print_file_err "$in"
+			mptcp_lib_print_file_err "$out"
 
 			return 1
 		else
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 61be4f29a69a..86f4003ab6f6 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -256,3 +256,27 @@ mptcp_lib_make_file() {
 	dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null
 	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}"
 }
+
+# $1: file
+mptcp_lib_print_file_err() {
+	ls -l "${1}" 1>&2
+	echo "Trailing bytes are: "
+	tail -c 27 "${1}"
+}
+
+# $1: input file ; $2: output file ; $3: what kind of file
+mptcp_lib_check_transfer() {
+	local in="${1}"
+	local out="${2}"
+	local what="${3}"
+
+	if ! cmp "$in" "$out" > /dev/null 2>&1; then
+		echo "[ FAIL ] $what does not match (in, out):"
+		mptcp_lib_print_file_err "$in"
+		mptcp_lib_print_file_err "$out"
+
+		return 1
+	fi
+
+	return 0
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 96dc46eda133..c643872ddf47 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -135,32 +135,6 @@ check_mark()
 	return 0
 }
 
-print_file_err()
-{
-	ls -l "$1" 1>&2
-	echo "Trailing bytes are: "
-	tail -c 27 "$1"
-}
-
-check_transfer()
-{
-	local in=$1
-	local out=$2
-	local what=$3
-
-	cmp "$in" "$out" > /dev/null 2>&1
-	if [ $? -ne 0 ] ;then
-		echo "[ FAIL ] $what does not match (in, out):"
-		print_file_err "$in"
-		print_file_err "$out"
-		ret=1
-
-		return 1
-	fi
-
-	return 0
-}
-
 do_transfer()
 {
 	local listener_ns="$1"
@@ -232,7 +206,7 @@ do_transfer()
 		check_mark $connector_ns 4 || retc=1
 	fi
 
-	check_transfer $cin $sout "file received by server"
+	mptcp_lib_check_transfer $cin $sout "file received by server"
 	rets=$?
 
 	mptcp_lib_result_code "${retc}" "mark ${ip}"
-- 
cgit v1.2.3-70-g09d2


From 9369777c29395730cec967e7d0f48aed872b7110 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 28 Nov 2023 15:18:59 -0800
Subject: selftests: mptcp: add mptcp_lib_wait_local_port_listen

To avoid duplicated code in different MPTCP selftests, we can add
and use helpers defined in mptcp_lib.sh.

wait_local_port_listen() helper is defined in diag.sh, mptcp_connect.sh,
mptcp_join.sh and simult_flows.sh, export it into mptcp_lib.sh and
rename it with mptcp_lib_ prefix. Use this new helper in all these
scripts.

Note: We only have IPv4 connections in this helper, not looking at IPv6
(tcp6) but that's OK because we only have IPv4 connections here in diag.sh.

Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20231128-send-net-next-2023107-v4-15-8d6b94150f6b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/diag.sh          | 23 +++-------------------
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 19 +-----------------
 tools/testing/selftests/net/mptcp/mptcp_join.sh    | 20 +------------------
 tools/testing/selftests/net/mptcp/mptcp_lib.sh     | 18 +++++++++++++++++
 tools/testing/selftests/net/mptcp/simult_flows.sh  | 19 +-----------------
 5 files changed, 24 insertions(+), 75 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 85a8ee9395b3..95b498efacd1 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -182,23 +182,6 @@ chk_msk_inuse()
 	__chk_nr get_msk_inuse $expected "$msg" 0
 }
 
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex i
-
-	port_hex="$(printf "%04X" "${port}")"
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
-}
-
 wait_connected()
 {
 	local listener_ns="${1}"
@@ -222,7 +205,7 @@ echo "a" | \
 		ip netns exec $ns \
 			./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \
 				0.0.0.0 >/dev/null &
-wait_local_port_listen $ns 10000
+mptcp_lib_wait_local_port_listen $ns 10000
 chk_msk_nr 0 "no msk on netns creation"
 chk_msk_listen 10000
 
@@ -245,7 +228,7 @@ echo "a" | \
 		ip netns exec $ns \
 			./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \
 				0.0.0.0 >/dev/null &
-wait_local_port_listen $ns 10001
+mptcp_lib_wait_local_port_listen $ns 10001
 echo "b" | \
 	timeout ${timeout_test} \
 		ip netns exec $ns \
@@ -266,7 +249,7 @@ for I in `seq 1 $NR_CLIENTS`; do
 				./mptcp_connect -p $((I+10001)) -l -w 20 \
 					-t ${timeout_poll} 0.0.0.0 >/dev/null &
 done
-wait_local_port_listen $ns $((NR_CLIENTS + 10001))
+mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001))
 
 for I in `seq 1 $NR_CLIENTS`; do
 	echo "b" | \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 537f180aa51e..7898d62fce0b 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -310,23 +310,6 @@ do_ping()
 	return 0
 }
 
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex i
-
-	port_hex="$(printf "%04X" "${port}")"
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
-}
-
 do_transfer()
 {
 	local listener_ns="$1"
@@ -408,7 +391,7 @@ do_transfer()
 				$extra_args $local_addr < "$sin" > "$sout" &
 	local spid=$!
 
-	wait_local_port_listen "${listener_ns}" "${port}"
+	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
 	local start
 	start=$(date +%s%3N)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 420e4b3f9ad0..8362ea454af3 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -580,24 +580,6 @@ link_failure()
 	done
 }
 
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex
-	port_hex="$(printf "%04X" "${port}")"
-
-	local i
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
-}
-
 rm_addr_count()
 {
 	mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr"
@@ -1082,7 +1064,7 @@ do_transfer()
 	fi
 	local spid=$!
 
-	wait_local_port_listen "${listener_ns}" "${port}"
+	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
 	extra_cl_args="$extra_args $extra_cl_args"
 	if [ "$test_linkfail" -eq 0 ];then
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 86f4003ab6f6..022262a2cfe0 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -280,3 +280,21 @@ mptcp_lib_check_transfer() {
 
 	return 0
 }
+
+# $1: ns, $2: port
+mptcp_lib_wait_local_port_listen() {
+	local listener_ns="${1}"
+	local port="${2}"
+
+	local port_hex
+	port_hex="$(printf "%04X" "${port}")"
+
+	local _
+	for _ in $(seq 10); do
+		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \
+			     {rc=0; exit}} END {exit rc}" &&
+			break
+		sleep 0.1
+	done
+}
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index ce9203b817f8..ae8ad5d6fb9d 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -123,23 +123,6 @@ setup()
 	grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550))
 }
 
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex i
-
-	port_hex="$(printf "%04X" "${port}")"
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
-}
-
 do_transfer()
 {
 	local cin=$1
@@ -179,7 +162,7 @@ do_transfer()
 				0.0.0.0 < "$sin" > "$sout" &
 	local spid=$!
 
-	wait_local_port_listen "${ns3}" "${port}"
+	mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
 
 	timeout ${timeout_test} \
 		ip netns exec ${ns1} \
-- 
cgit v1.2.3-70-g09d2


From e8c780a5706066eba210e4c514968d95ba29e052 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 28 Nov 2023 20:14:27 -0800
Subject: docs: netlink: link to family documentations from spec info

To increase the chances of people finding the rendered docs
add a link to specs.rst and index.rst.

Add a label in the generated index.rst and while at it adjust
the title a little bit.

Reviewed-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20231129041427.2763074-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/userspace-api/netlink/index.rst | 4 +++-
 Documentation/userspace-api/netlink/specs.rst | 2 +-
 tools/net/ynl/ynl-gen-rst.py                  | 8 +++++++-
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/Documentation/userspace-api/netlink/index.rst b/Documentation/userspace-api/netlink/index.rst
index 62725dafbbdb..c1b6765cc963 100644
--- a/Documentation/userspace-api/netlink/index.rst
+++ b/Documentation/userspace-api/netlink/index.rst
@@ -16,4 +16,6 @@ Netlink documentation for users.
    genetlink-legacy
    netlink-raw
 
-See also :ref:`Documentation/core-api/netlink.rst <kernel_netlink>`.
+See also:
+ - :ref:`Documentation/core-api/netlink.rst <kernel_netlink>`
+ - :ref:`Documentation/networking/netlink_spec/index.rst <specs>`
diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst
index c1b951649113..1b50d97d8d7c 100644
--- a/Documentation/userspace-api/netlink/specs.rst
+++ b/Documentation/userspace-api/netlink/specs.rst
@@ -15,7 +15,7 @@ kernel headers directly.
 Internally kernel uses the YAML specs to generate:
 
  - the C uAPI header
- - documentation of the protocol as a ReST file
+ - documentation of the protocol as a ReST file - see :ref:`Documentation/networking/netlink_spec/index.rst <specs>`
  - policy tables for input attribute validation
  - operation tables
 
diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py
index b6292109e236..8c62e040df5d 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/ynl-gen-rst.py
@@ -122,6 +122,11 @@ def rst_toctree(maxdepth: int = 2) -> str:
     return "\n".join(lines)
 
 
+def rst_label(title: str) -> str:
+    """Return a formatted label"""
+    return f".. _{title}:\n\n"
+
+
 # Parsers
 # =======
 
@@ -349,7 +354,8 @@ def generate_main_index_rst(output: str) -> None:
     lines = []
 
     lines.append(rst_header())
-    lines.append(rst_title("Netlink Specification"))
+    lines.append(rst_label("specs"))
+    lines.append(rst_title("Netlink Family Specifications"))
     lines.append(rst_toctree(1))
 
     index_dir = os.path.dirname(output)
-- 
cgit v1.2.3-70-g09d2


From f7580f00cc6e5bf57256d61c223d3ac6b4001ae7 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Wed, 29 Nov 2023 19:24:21 -0300
Subject: selftests: tc-testing: remove spurious nsPlugin usage

Tests using DEV2 should not be run in a dedicated net namespace,
and in parallel, as this device cannot be shared.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231129222424.910148-2-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/tc-tests/filters/tests.json | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 361235ad574b..4598f1d330fe 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -48,9 +48,6 @@
             "filter",
             "flower"
         ],
-        "plugins": {
-                "requires": "nsPlugin"
-        },
         "setup": [
             "$TC qdisc add dev $DEV2 ingress",
             "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
@@ -72,9 +69,6 @@
             "filter",
             "flower"
         ],
-        "plugins": {
-                "requires": "nsPlugin"
-        },
         "setup": [
             "$TC qdisc add dev $DEV2 ingress",
             "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
-- 
cgit v1.2.3-70-g09d2


From 74f7e7eeb1d2c1b00f1a8fa1a6666a509f274da8 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Wed, 29 Nov 2023 19:24:22 -0300
Subject: selftests: tc-testing: remove spurious './' from Makefile

Patchwork CI didn't like the extra './', so remove it.

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231129222424.910148-3-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile
index e8b3dde4fa16..9153e3428a77 100644
--- a/tools/testing/selftests/tc-testing/Makefile
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-TEST_PROGS += ./tdc.sh
+TEST_PROGS += tdc.sh
 TEST_FILES := action-ebpf tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts
 
 include ../lib.mk
-- 
cgit v1.2.3-70-g09d2


From 7de8b2efafeb770e3f2113d506c31be7f4c9618e Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Wed, 29 Nov 2023 19:24:23 -0300
Subject: selftests: tc-testing: rename concurrency.json to flower.json

All tests in this file pertain to flower, so name it appropriately

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231129222424.910148-4-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/filters/concurrency.json   | 177 ---------------------
 .../tc-testing/tc-tests/filters/flower.json        | 177 +++++++++++++++++++++
 2 files changed, 177 insertions(+), 177 deletions(-)
 delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/flower.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json b/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json
deleted file mode 100644
index c2a433a4737e..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json
+++ /dev/null
@@ -1,177 +0,0 @@
-[
-    {
-        "id": "e41d",
-        "name": "Add 1M flower filters with 10 parallel tc instances",
-        "category": [
-            "filter",
-            "flower",
-            "concurrency"
-        ],
-        "setup": [
-            "/bin/mkdir $BATCH_DIR",
-            "$TC qdisc add dev $DEV2 ingress",
-            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 add"
-        ],
-        "cmdUnderTest": "bash -c \"find $BATCH_DIR/add* -print | xargs -n 1 -P 10 $TC -b\"",
-        "expExitCode": "0",
-        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
-        "matchCount": "1000000",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress",
-            "/bin/rm -rf $BATCH_DIR"
-        ]
-    },
-    {
-        "id": "6f52",
-        "name": "Delete 1M flower filters with 10 parallel tc instances",
-        "category": [
-            "filter",
-            "flower",
-            "concurrency"
-        ],
-        "setup": [
-            "/bin/mkdir $BATCH_DIR",
-            "$TC qdisc add dev $DEV2 ingress",
-            "./tdc_multibatch.py $DEV2 $BATCH_DIR 1000000 1 add",
-            "$TC -b $BATCH_DIR/add_0",
-            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 del"
-        ],
-        "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -b\"",
-        "expExitCode": "0",
-        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
-        "matchCount": "0",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress",
-            "/bin/rm -rf $BATCH_DIR"
-        ]
-    },
-    {
-        "id": "c9da",
-        "name": "Replace 1M flower filters with 10 parallel tc instances",
-        "category": [
-            "filter",
-            "flower",
-            "concurrency"
-        ],
-        "setup": [
-            "/bin/mkdir $BATCH_DIR",
-            "$TC qdisc add dev $DEV2 ingress",
-            "./tdc_multibatch.py $DEV2 $BATCH_DIR 1000000 1 add",
-            "$TC -b $BATCH_DIR/add_0",
-            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 replace"
-        ],
-        "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"",
-        "expExitCode": "0",
-        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
-        "matchCount": "1000000",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress",
-            "/bin/rm -rf $BATCH_DIR"
-        ]
-    },
-    {
-        "id": "14be",
-        "name": "Concurrently replace same range of 100k flower filters from 10 tc instances",
-        "category": [
-            "filter",
-            "flower",
-            "concurrency"
-        ],
-        "setup": [
-            "/bin/mkdir $BATCH_DIR",
-            "$TC qdisc add dev $DEV2 ingress",
-            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 1 add",
-            "$TC -b $BATCH_DIR/add_0",
-            "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 replace"
-        ],
-        "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"",
-        "expExitCode": "0",
-        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
-        "matchCount": "100000",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress",
-            "/bin/rm -rf $BATCH_DIR"
-        ]
-    },
-    {
-        "id": "0c44",
-        "name": "Concurrently delete same range of 100k flower filters from 10 tc instances",
-        "category": [
-            "filter",
-            "flower",
-            "concurrency"
-        ],
-        "setup": [
-            "/bin/mkdir $BATCH_DIR",
-            "$TC qdisc add dev $DEV2 ingress",
-            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 1 add",
-            "$TC -b $BATCH_DIR/add_0",
-            "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 del"
-        ],
-        "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -f -b\"",
-        "expExitCode": "123",
-        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
-        "matchCount": "0",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress",
-	    "/bin/rm -rf $BATCH_DIR"
-        ]
-    },
-    {
-        "id": "ab62",
-        "name": "Add and delete from same tp with 10 tc instances",
-        "category": [
-            "filter",
-            "flower",
-            "concurrency"
-        ],
-        "setup": [
-            "/bin/mkdir $BATCH_DIR",
-            "$TC qdisc add dev $DEV2 ingress",
-            "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 5 add",
-            "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"",
-            "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 add",
-            "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del"
-        ],
-        "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"",
-        "expExitCode": "0",
-        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
-        "matchCount": "500000",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress",
-            "/bin/rm -rf $BATCH_DIR"
-        ]
-    },
-    {
-        "id": "6e8f",
-        "name": "Replace and delete from same tp with 10 tc instances",
-        "category": [
-            "filter",
-            "flower",
-            "concurrency"
-        ],
-        "setup": [
-            "/bin/mkdir $BATCH_DIR",
-            "$TC qdisc add dev $DEV2 ingress",
-            "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 10 add",
-            "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"",
-            "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 replace",
-            "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del"
-        ],
-        "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"",
-        "expExitCode": "0",
-        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
-        "matchCount": "500000",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress",
-            "/bin/rm -rf $BATCH_DIR"
-        ]
-    }
-]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json
new file mode 100644
index 000000000000..c2a433a4737e
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json
@@ -0,0 +1,177 @@
+[
+    {
+        "id": "e41d",
+        "name": "Add 1M flower filters with 10 parallel tc instances",
+        "category": [
+            "filter",
+            "flower",
+            "concurrency"
+        ],
+        "setup": [
+            "/bin/mkdir $BATCH_DIR",
+            "$TC qdisc add dev $DEV2 ingress",
+            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 add"
+        ],
+        "cmdUnderTest": "bash -c \"find $BATCH_DIR/add* -print | xargs -n 1 -P 10 $TC -b\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+        "matchCount": "1000000",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress",
+            "/bin/rm -rf $BATCH_DIR"
+        ]
+    },
+    {
+        "id": "6f52",
+        "name": "Delete 1M flower filters with 10 parallel tc instances",
+        "category": [
+            "filter",
+            "flower",
+            "concurrency"
+        ],
+        "setup": [
+            "/bin/mkdir $BATCH_DIR",
+            "$TC qdisc add dev $DEV2 ingress",
+            "./tdc_multibatch.py $DEV2 $BATCH_DIR 1000000 1 add",
+            "$TC -b $BATCH_DIR/add_0",
+            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 del"
+        ],
+        "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -b\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress",
+            "/bin/rm -rf $BATCH_DIR"
+        ]
+    },
+    {
+        "id": "c9da",
+        "name": "Replace 1M flower filters with 10 parallel tc instances",
+        "category": [
+            "filter",
+            "flower",
+            "concurrency"
+        ],
+        "setup": [
+            "/bin/mkdir $BATCH_DIR",
+            "$TC qdisc add dev $DEV2 ingress",
+            "./tdc_multibatch.py $DEV2 $BATCH_DIR 1000000 1 add",
+            "$TC -b $BATCH_DIR/add_0",
+            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 replace"
+        ],
+        "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+        "matchCount": "1000000",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress",
+            "/bin/rm -rf $BATCH_DIR"
+        ]
+    },
+    {
+        "id": "14be",
+        "name": "Concurrently replace same range of 100k flower filters from 10 tc instances",
+        "category": [
+            "filter",
+            "flower",
+            "concurrency"
+        ],
+        "setup": [
+            "/bin/mkdir $BATCH_DIR",
+            "$TC qdisc add dev $DEV2 ingress",
+            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 1 add",
+            "$TC -b $BATCH_DIR/add_0",
+            "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 replace"
+        ],
+        "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+        "matchCount": "100000",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress",
+            "/bin/rm -rf $BATCH_DIR"
+        ]
+    },
+    {
+        "id": "0c44",
+        "name": "Concurrently delete same range of 100k flower filters from 10 tc instances",
+        "category": [
+            "filter",
+            "flower",
+            "concurrency"
+        ],
+        "setup": [
+            "/bin/mkdir $BATCH_DIR",
+            "$TC qdisc add dev $DEV2 ingress",
+            "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 1 add",
+            "$TC -b $BATCH_DIR/add_0",
+            "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 del"
+        ],
+        "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -f -b\"",
+        "expExitCode": "123",
+        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress",
+	    "/bin/rm -rf $BATCH_DIR"
+        ]
+    },
+    {
+        "id": "ab62",
+        "name": "Add and delete from same tp with 10 tc instances",
+        "category": [
+            "filter",
+            "flower",
+            "concurrency"
+        ],
+        "setup": [
+            "/bin/mkdir $BATCH_DIR",
+            "$TC qdisc add dev $DEV2 ingress",
+            "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 5 add",
+            "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"",
+            "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 add",
+            "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del"
+        ],
+        "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+        "matchCount": "500000",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress",
+            "/bin/rm -rf $BATCH_DIR"
+        ]
+    },
+    {
+        "id": "6e8f",
+        "name": "Replace and delete from same tp with 10 tc instances",
+        "category": [
+            "filter",
+            "flower",
+            "concurrency"
+        ],
+        "setup": [
+            "/bin/mkdir $BATCH_DIR",
+            "$TC qdisc add dev $DEV2 ingress",
+            "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 10 add",
+            "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"",
+            "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 replace",
+            "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del"
+        ],
+        "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+        "matchCount": "500000",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress",
+            "/bin/rm -rf $BATCH_DIR"
+        ]
+    }
+]
-- 
cgit v1.2.3-70-g09d2


From 0fbb5a54f9416953fa8a3857425a62ea6b1efd5c Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Wed, 29 Nov 2023 19:24:24 -0300
Subject: selftests: tc-testing: remove filters/tests.json

Remove this generic file and move the tests to their appropriate files

Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20231129222424.910148-5-pctammela@mojatatu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/filters/flower.json        |  98 ++++++++++++++++
 .../tc-testing/tc-tests/filters/matchall.json      |  23 ++++
 .../tc-testing/tc-tests/filters/tests.json         | 123 ---------------------
 3 files changed, 121 insertions(+), 123 deletions(-)
 delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/tests.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json
index c2a433a4737e..6b08c0642069 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json
@@ -173,5 +173,103 @@
             "$TC qdisc del dev $DEV2 ingress",
             "/bin/rm -rf $BATCH_DIR"
         ]
+    },
+    {
+        "id": "2ff3",
+        "name": "Add flower with max handle and then dump it",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
+    },
+    {
+        "id": "d052",
+        "name": "Add 1M filters with the same action",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress",
+            "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
+        ],
+        "cmdUnderTest": "$TC -b $BATCH_FILE",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress",
+            "/bin/rm $BATCH_FILE"
+        ]
+    },
+    {
+        "id": "4cbd",
+        "name": "Try to add filter with duplicate key",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress",
+            "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
+        "expExitCode": "2",
+        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
+    },
+    {
+        "id": "7c65",
+        "name": "Add flower filter and then terse dump it",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower.*handle",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
+    },
+    {
+        "id": "d45e",
+        "name": "Add flower filter and verify that terse dump doesn't output filter key",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
+        "matchPattern": "  dst_mac e4:11:22:11:4a:51",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
index afa1b9b0c856..f8d28c415bc3 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
@@ -480,5 +480,28 @@
             "$TC qdisc del dev $DUMMY ingress",
             "$TC actions del action police index 199"
         ]
+    },
+    {
+        "id": "2638",
+        "name": "Add matchall and try to get it",
+        "category": [
+            "filter",
+            "matchall"
+        ],
+        "plugins": {
+                "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 clsact",
+            "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
+        ],
+        "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 ingress",
+        "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 clsact"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
deleted file mode 100644
index 4598f1d330fe..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ /dev/null
@@ -1,123 +0,0 @@
-[
-    {
-        "id": "2638",
-        "name": "Add matchall and try to get it",
-        "category": [
-            "filter",
-            "matchall"
-        ],
-        "plugins": {
-                "requires": "nsPlugin"
-        },
-        "setup": [
-            "$TC qdisc add dev $DEV1 clsact",
-            "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
-        ],
-        "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV1 ingress",
-        "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV1 clsact"
-        ]
-    },
-    {
-        "id": "2ff3",
-        "name": "Add flower with max handle and then dump it",
-        "category": [
-            "filter",
-            "flower"
-        ],
-        "setup": [
-            "$TC qdisc add dev $DEV2 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok",
-        "expExitCode": "0",
-        "verifyCmd": "$TC filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress"
-        ]
-    },
-    {
-        "id": "d052",
-        "name": "Add 1M filters with the same action",
-        "category": [
-            "filter",
-            "flower"
-        ],
-        "setup": [
-            "$TC qdisc add dev $DEV2 ingress",
-            "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
-        ],
-        "cmdUnderTest": "$TC -b $BATCH_FILE",
-        "expExitCode": "0",
-        "verifyCmd": "$TC actions list action gact",
-        "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress",
-            "/bin/rm $BATCH_FILE"
-        ]
-    },
-    {
-        "id": "4cbd",
-        "name": "Try to add filter with duplicate key",
-        "category": [
-            "filter",
-            "flower"
-        ],
-        "setup": [
-            "$TC qdisc add dev $DEV2 ingress",
-            "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
-        "expExitCode": "2",
-        "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress"
-        ]
-    },
-    {
-        "id": "7c65",
-        "name": "Add flower filter and then terse dump it",
-        "category": [
-            "filter",
-            "flower"
-        ],
-        "setup": [
-            "$TC qdisc add dev $DEV2 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
-        "expExitCode": "0",
-        "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
-        "matchPattern": "filter protocol ip pref 1 flower.*handle",
-        "matchCount": "1",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress"
-        ]
-    },
-    {
-        "id": "d45e",
-        "name": "Add flower filter and verify that terse dump doesn't output filter key",
-        "category": [
-            "filter",
-            "flower"
-        ],
-        "setup": [
-            "$TC qdisc add dev $DEV2 ingress"
-        ],
-        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
-        "expExitCode": "0",
-        "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
-        "matchPattern": "  dst_mac e4:11:22:11:4a:51",
-        "matchCount": "0",
-        "teardown": [
-            "$TC qdisc del dev $DEV2 ingress"
-        ]
-    }
-]
-- 
cgit v1.2.3-70-g09d2


From b6a3451e0847d5d70fb5fa2b2a80ab9f80bf2c7b Mon Sep 17 00:00:00 2001
From: Jeroen van Ingen Schenau <jeroen.vaningenschenau@novoserve.com>
Date: Thu, 30 Nov 2023 13:03:53 +0100
Subject: selftests/bpf: Fix erroneous bitmask operation

xdp_synproxy_kern.c is a BPF program that generates SYN cookies on
allowed TCP ports and sends SYNACKs to clients, accelerating synproxy
iptables module.

Fix the bitmask operation when checking the status of an existing
conntrack entry within tcp_lookup() function. Do not AND with the bit
position number, but with the bitmask value to check whether the entry
found has the IPS_CONFIRMED flag set.

Fixes: fb5cd0ce70d4 ("selftests/bpf: Add selftests for raw syncookie helpers")
Signed-off-by: Jeroen van Ingen Schenau <jeroen.vaningenschenau@novoserve.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Minh Le Hoang <minh.lehoang@novoserve.com>
Link: https://lore.kernel.org/xdp-newbies/CAAi1gX7owA+Tcxq-titC-h-KPM7Ri-6ZhTNMhrnPq5gmYYwKow@mail.gmail.com/T/#u
Link: https://lore.kernel.org/bpf/20231130120353.3084-1-jeroen.vaningenschenau@novoserve.com
---
 tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 80f620602d50..518329c666e9 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -467,13 +467,13 @@ static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bo
 		unsigned long status = ct->status;
 
 		bpf_ct_release(ct);
-		if (status & IPS_CONFIRMED_BIT)
+		if (status & IPS_CONFIRMED)
 			return XDP_PASS;
 	} else if (ct_lookup_opts.error != -ENOENT) {
 		return XDP_ABORTED;
 	}
 
-	/* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */
+	/* error == -ENOENT || !(status & IPS_CONFIRMED) */
 	return XDP_TX;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 6b0ae4566aba566a2ab4a2de9c59ab3d7f4b43c2 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Wed, 29 Nov 2023 15:44:15 -0800
Subject: selftests/bpf: Sort config in alphabetic order

Move CONFIG_VSOCKETS up, so the CONFIGs are in alphabetic order.

Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20231129234417.856536-5-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3ec5927ec3e5..782876452acf 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -82,7 +82,7 @@ CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
 CONFIG_TEST_BPF=m
 CONFIG_USERFAULTFD=y
+CONFIG_VSOCKETS=y
 CONFIG_VXLAN=y
 CONFIG_XDP_SOCKETS=y
 CONFIG_XFRM_INTERFACE=y
-CONFIG_VSOCKETS=y
-- 
cgit v1.2.3-70-g09d2


From 341f06fdddf72cd60a10945152f69f0f1d614519 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Wed, 29 Nov 2023 15:44:16 -0800
Subject: selftests/bpf: Add tests for filesystem kfuncs

Add selftests for two new filesystem kfuncs:
  1. bpf_get_file_xattr
  2. bpf_get_fsverity_digest

These tests simply make sure the two kfuncs work. Another selftest will be
added to demonstrate how to use these kfuncs to verify file signature.

CONFIG_FS_VERITY is added to selftests config. However, this is not
sufficient to guarantee bpf_get_fsverity_digest works. This is because
fsverity need to be enabled at file system level (for example, with tune2fs
on ext4). If local file system doesn't have this feature enabled, just skip
the test.

Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20231129234417.856536-6-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_kfuncs.h           |   3 +
 tools/testing/selftests/bpf/config                 |   1 +
 tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c | 134 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_fsverity.c  |  48 ++++++++
 tools/testing/selftests/bpf/progs/test_get_xattr.c |  37 ++++++
 5 files changed, 223 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_fsverity.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_get_xattr.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 5ca68ff0b59f..c2c084a44eae 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -55,4 +55,7 @@ void *bpf_cast_to_kern_ctx(void *) __ksym;
 
 void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym;
 
+extern int bpf_get_file_xattr(struct file *file, const char *name,
+			      struct bpf_dynptr *value_ptr) __ksym;
+extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym;
 #endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 782876452acf..c125c441abc7 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -23,6 +23,7 @@ CONFIG_FPROBE=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_FUNCTION_ERROR_INJECTION=y
 CONFIG_FUNCTION_TRACER=y
+CONFIG_FS_VERITY=y
 CONFIG_GENEVE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
new file mode 100644
index 000000000000..d3196a4b089f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <linux/fsverity.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "test_get_xattr.skel.h"
+#include "test_fsverity.skel.h"
+
+static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
+
+static void test_xattr(void)
+{
+	struct test_get_xattr *skel = NULL;
+	int fd = -1, err;
+
+	fd = open(testfile, O_CREAT | O_RDONLY, 0644);
+	if (!ASSERT_GE(fd, 0, "create_file"))
+		return;
+
+	close(fd);
+	fd = -1;
+
+	err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0);
+	if (!ASSERT_OK(err, "setxattr"))
+		goto out;
+
+	skel = test_get_xattr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_get_xattr__open_and_load"))
+		goto out;
+
+	skel->bss->monitored_pid = getpid();
+	err = test_get_xattr__attach(skel);
+
+	if (!ASSERT_OK(err, "test_get_xattr__attach"))
+		goto out;
+
+	fd = open(testfile, O_RDONLY, 0644);
+	if (!ASSERT_GE(fd, 0, "open_file"))
+		goto out;
+
+	ASSERT_EQ(skel->bss->found_xattr, 1, "found_xattr");
+
+out:
+	close(fd);
+	test_get_xattr__destroy(skel);
+	remove(testfile);
+}
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE      32
+#endif
+
+static void test_fsverity(void)
+{
+	struct fsverity_enable_arg arg = {0};
+	struct test_fsverity *skel = NULL;
+	struct fsverity_digest *d;
+	int fd, err;
+	char buffer[4096];
+
+	fd = open(testfile, O_CREAT | O_RDWR, 0644);
+	if (!ASSERT_GE(fd, 0, "create_file"))
+		return;
+
+	/* Write random buffer, so the file is not empty */
+	err = write(fd, buffer, 4096);
+	if (!ASSERT_EQ(err, 4096, "write_file"))
+		goto out;
+	close(fd);
+
+	/* Reopen read-only, otherwise FS_IOC_ENABLE_VERITY will fail */
+	fd = open(testfile, O_RDONLY, 0644);
+	if (!ASSERT_GE(fd, 0, "open_file1"))
+		return;
+
+	/* Enable fsverity for the file.
+	 * If the file system doesn't support verity, this will fail. Skip
+	 * the test in such case.
+	 */
+	arg.version = 1;
+	arg.hash_algorithm = FS_VERITY_HASH_ALG_SHA256;
+	arg.block_size = 4096;
+	err = ioctl(fd, FS_IOC_ENABLE_VERITY, &arg);
+	if (err) {
+		printf("%s:SKIP:local fs doesn't support fsverity (%d)\n"
+		       "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n",
+		       __func__, errno);
+		test__skip();
+		goto out;
+	}
+
+	skel = test_fsverity__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_fsverity__open_and_load"))
+		goto out;
+
+	/* Get fsverity_digest from ioctl */
+	d = (struct fsverity_digest *)skel->bss->expected_digest;
+	d->digest_algorithm = FS_VERITY_HASH_ALG_SHA256;
+	d->digest_size = SHA256_DIGEST_SIZE;
+	err = ioctl(fd, FS_IOC_MEASURE_VERITY, skel->bss->expected_digest);
+	if (!ASSERT_OK(err, "ioctl_FS_IOC_MEASURE_VERITY"))
+		goto out;
+
+	skel->bss->monitored_pid = getpid();
+	err = test_fsverity__attach(skel);
+	if (!ASSERT_OK(err, "test_fsverity__attach"))
+		goto out;
+
+	/* Reopen the file to trigger the program */
+	close(fd);
+	fd = open(testfile, O_RDONLY);
+	if (!ASSERT_GE(fd, 0, "open_file2"))
+		goto out;
+
+	ASSERT_EQ(skel->bss->got_fsverity, 1, "got_fsverity");
+	ASSERT_EQ(skel->bss->digest_matches, 1, "digest_matches");
+out:
+	close(fd);
+	test_fsverity__destroy(skel);
+	remove(testfile);
+}
+
+void test_fs_kfuncs(void)
+{
+	if (test__start_subtest("xattr"))
+		test_xattr();
+
+	if (test__start_subtest("fsverity"))
+		test_fsverity();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_fsverity.c b/tools/testing/selftests/bpf/progs/test_fsverity.c
new file mode 100644
index 000000000000..3975495b75c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_fsverity.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE      32
+#endif
+
+#define SIZEOF_STRUCT_FSVERITY_DIGEST 4  /* sizeof(struct fsverity_digest) */
+
+char expected_digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+char digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+__u32 monitored_pid;
+__u32 got_fsverity;
+__u32 digest_matches;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+	struct bpf_dynptr digest_ptr;
+	__u32 pid;
+	int ret;
+	int i;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	if (pid != monitored_pid)
+		return 0;
+
+	bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr);
+	ret = bpf_get_fsverity_digest(f, &digest_ptr);
+	if (ret < 0)
+		return 0;
+	got_fsverity = 1;
+
+	for (i = 0; i < sizeof(digest); i++) {
+		if (digest[i] != expected_digest[i])
+			return 0;
+	}
+
+	digest_matches = 1;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
new file mode 100644
index 000000000000..7eb2a4e5a3e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+__u32 found_xattr;
+
+static const char expected_value[] = "hello";
+char value[32];
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+	struct bpf_dynptr value_ptr;
+	__u32 pid;
+	int ret;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	if (pid != monitored_pid)
+		return 0;
+
+	bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr);
+
+	ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr);
+	if (ret != sizeof(expected_value))
+		return 0;
+	if (bpf_strncmp(value, ret, expected_value))
+		return 0;
+	found_xattr = 1;
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 1030e9154258b54e3c7dc07c39e7b6dcf24bc3d2 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Wed, 29 Nov 2023 15:44:17 -0800
Subject: selftests/bpf: Add test that uses fsverity and xattr to sign a file

This selftests shows a proof of concept method to use BPF LSM to enforce
file signature. This test is added to verify_pkcs7_sig, so that some
existing logic can be reused.

This file signature method uses fsverity, which provides reliable and
efficient hash (known as digest) of the file. The file digest is signed
with asymmetic key, and the signature is stored in xattr. At the run time,
BPF LSM reads file digest and the signature, and then checks them against
the public key.

Note that this solution does NOT require FS_VERITY_BUILTIN_SIGNATURES.
fsverity is only used to provide file digest. The signature verification
and access control is all implemented in BPF LSM.

Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20231129234417.856536-7-song@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_kfuncs.h           |   7 +
 .../selftests/bpf/prog_tests/verify_pkcs7_sig.c    | 165 ++++++++++++++++++++-
 .../selftests/bpf/progs/test_sig_in_xattr.c        |  83 +++++++++++
 .../selftests/bpf/progs/test_verify_pkcs7_sig.c    |   8 +-
 tools/testing/selftests/bpf/verify_sig_setup.sh    |  25 ++++
 5 files changed, 280 insertions(+), 8 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_sig_in_xattr.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index c2c084a44eae..b4e78c1eb37b 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -58,4 +58,11 @@ void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym;
 extern int bpf_get_file_xattr(struct file *file, const char *name,
 			      struct bpf_dynptr *value_ptr) __ksym;
 extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym;
+
+extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
+				      struct bpf_dynptr *sig_ptr,
+				      struct bpf_key *trusted_keyring) __ksym;
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
index dd7f2bc70048..6c90372b772d 100644
--- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -16,9 +16,12 @@
 #include <sys/wait.h>
 #include <sys/mman.h>
 #include <linux/keyctl.h>
+#include <sys/xattr.h>
+#include <linux/fsverity.h>
 #include <test_progs.h>
 
 #include "test_verify_pkcs7_sig.skel.h"
+#include "test_sig_in_xattr.skel.h"
 
 #define MAX_DATA_SIZE (1024 * 1024)
 #define MAX_SIG_SIZE 1024
@@ -26,6 +29,10 @@
 #define VERIFY_USE_SECONDARY_KEYRING (1UL)
 #define VERIFY_USE_PLATFORM_KEYRING  (2UL)
 
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE      32
+#endif
+
 /* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
 #define MODULE_SIG_STRING "~Module signature appended~\n"
 
@@ -254,7 +261,7 @@ out:
 	return ret;
 }
 
-void test_verify_pkcs7_sig(void)
+static void test_verify_pkcs7_sig_from_map(void)
 {
 	libbpf_print_fn_t old_print_cb;
 	char tmp_dir_template[] = "/tmp/verify_sigXXXXXX";
@@ -400,3 +407,159 @@ close_prog:
 	skel->bss->monitored_pid = 0;
 	test_verify_pkcs7_sig__destroy(skel);
 }
+
+static int get_signature_size(const char *sig_path)
+{
+	struct stat st;
+
+	if (stat(sig_path, &st) == -1)
+		return -1;
+
+	return st.st_size;
+}
+
+static int add_signature_to_xattr(const char *data_path, const char *sig_path)
+{
+	char sig[MAX_SIG_SIZE] = {0};
+	int fd, size, ret;
+
+	if (sig_path) {
+		fd = open(sig_path, O_RDONLY);
+		if (fd < 0)
+			return -1;
+
+		size = read(fd, sig, MAX_SIG_SIZE);
+		close(fd);
+		if (size <= 0)
+			return -1;
+	} else {
+		/* no sig_path, just write 32 bytes of zeros */
+		size = 32;
+	}
+	ret = setxattr(data_path, "user.sig", sig, size, 0);
+	if (!ASSERT_OK(ret, "setxattr"))
+		return -1;
+
+	return 0;
+}
+
+static int test_open_file(struct test_sig_in_xattr *skel, char *data_path,
+			  pid_t pid, bool should_success, char *name)
+{
+	int ret;
+
+	skel->bss->monitored_pid = pid;
+	ret = open(data_path, O_RDONLY);
+	close(ret);
+	skel->bss->monitored_pid = 0;
+
+	if (should_success) {
+		if (!ASSERT_GE(ret, 0, name))
+			return -1;
+	} else {
+		if (!ASSERT_LT(ret, 0, name))
+			return -1;
+	}
+	return 0;
+}
+
+static void test_pkcs7_sig_fsverity(void)
+{
+	char data_path[PATH_MAX];
+	char sig_path[PATH_MAX];
+	char tmp_dir_template[] = "/tmp/verify_sigXXXXXX";
+	char *tmp_dir;
+	struct test_sig_in_xattr *skel = NULL;
+	pid_t pid;
+	int ret;
+
+	tmp_dir = mkdtemp(tmp_dir_template);
+	if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp"))
+		return;
+
+	snprintf(data_path, PATH_MAX, "%s/data-file", tmp_dir);
+	snprintf(sig_path, PATH_MAX, "%s/sig-file", tmp_dir);
+
+	ret = _run_setup_process(tmp_dir, "setup");
+	if (!ASSERT_OK(ret, "_run_setup_process"))
+		goto out;
+
+	ret = _run_setup_process(tmp_dir, "fsverity-create-sign");
+
+	if (ret) {
+		printf("%s: SKIP: fsverity [sign|enable] doesn't work.\n"
+		       "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n",
+		       __func__);
+		test__skip();
+		goto out;
+	}
+
+	skel = test_sig_in_xattr__open();
+	if (!ASSERT_OK_PTR(skel, "test_sig_in_xattr__open"))
+		goto out;
+	ret = get_signature_size(sig_path);
+	if (!ASSERT_GT(ret, 0, "get_signaure_size"))
+		goto out;
+	skel->bss->sig_size = ret;
+	skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring",
+						 "ebpf_testing_keyring", NULL,
+						 KEY_SPEC_SESSION_KEYRING);
+	memcpy(skel->bss->digest, "FSVerity", 8);
+
+	ret = test_sig_in_xattr__load(skel);
+	if (!ASSERT_OK(ret, "test_sig_in_xattr__load"))
+		goto out;
+
+	ret = test_sig_in_xattr__attach(skel);
+	if (!ASSERT_OK(ret, "test_sig_in_xattr__attach"))
+		goto out;
+
+	pid = getpid();
+
+	/* Case 1: fsverity is not enabled, open should succeed */
+	if (test_open_file(skel, data_path, pid, true, "open_1"))
+		goto out;
+
+	/* Case 2: fsverity is enabled, xattr is missing, open should
+	 * fail
+	 */
+	ret = _run_setup_process(tmp_dir, "fsverity-enable");
+	if (!ASSERT_OK(ret, "fsverity-enable"))
+		goto out;
+	if (test_open_file(skel, data_path, pid, false, "open_2"))
+		goto out;
+
+	/* Case 3: fsverity is enabled, xattr has valid signature, open
+	 * should succeed
+	 */
+	ret = add_signature_to_xattr(data_path, sig_path);
+	if (!ASSERT_OK(ret, "add_signature_to_xattr_1"))
+		goto out;
+
+	if (test_open_file(skel, data_path, pid, true, "open_3"))
+		goto out;
+
+	/* Case 4: fsverity is enabled, xattr has invalid signature, open
+	 * should fail
+	 */
+	ret = add_signature_to_xattr(data_path, NULL);
+	if (!ASSERT_OK(ret, "add_signature_to_xattr_2"))
+		goto out;
+	test_open_file(skel, data_path, pid, false, "open_4");
+
+out:
+	_run_setup_process(tmp_dir, "cleanup");
+	if (!skel)
+		return;
+
+	skel->bss->monitored_pid = 0;
+	test_sig_in_xattr__destroy(skel);
+}
+
+void test_verify_pkcs7_sig(void)
+{
+	if (test__start_subtest("pkcs7_sig_from_map"))
+		test_verify_pkcs7_sig_from_map();
+	if (test__start_subtest("pkcs7_sig_fsverity"))
+		test_pkcs7_sig_fsverity();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
new file mode 100644
index 000000000000..2f0eb1334d65
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE      32
+#endif
+
+#define MAX_SIG_SIZE 1024
+
+/* By default, "fsverity sign" signs a file with fsverity_formatted_digest
+ * of the file. fsverity_formatted_digest on the kernel side is only used
+ * with CONFIG_FS_VERITY_BUILTIN_SIGNATURES. However, BPF LSM doesn't not
+ * require CONFIG_FS_VERITY_BUILTIN_SIGNATURES, so vmlinux.h may not have
+ * fsverity_formatted_digest. In this test, we intentionally avoid using
+ * fsverity_formatted_digest.
+ *
+ * Luckily, fsverity_formatted_digest is simply 8-byte magic followed by
+ * fsverity_digest. We use a char array of size fsverity_formatted_digest
+ * plus SHA256_DIGEST_SIZE. The magic part of it is filled by user space,
+ * and the rest of it is filled by bpf_get_fsverity_digest.
+ *
+ * Note that, generating signatures based on fsverity_formatted_digest is
+ * the design choice of this selftest (and "fsverity sign"). With BPF
+ * LSM, we have the flexibility to generate signature based on other data
+ * sets, for example, fsverity_digest or only the digest[] part of it.
+ */
+#define MAGIC_SIZE 8
+#define SIZEOF_STRUCT_FSVERITY_DIGEST 4  /* sizeof(struct fsverity_digest) */
+char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+
+__u32 monitored_pid;
+char sig[MAX_SIG_SIZE];
+__u32 sig_size;
+__u32 user_keyring_serial;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+	struct bpf_dynptr digest_ptr, sig_ptr;
+	struct bpf_key *trusted_keyring;
+	__u32 pid;
+	int ret;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	if (pid != monitored_pid)
+		return 0;
+
+	/* digest_ptr points to fsverity_digest */
+	bpf_dynptr_from_mem(digest + MAGIC_SIZE, sizeof(digest) - MAGIC_SIZE, 0, &digest_ptr);
+
+	ret = bpf_get_fsverity_digest(f, &digest_ptr);
+	/* No verity, allow access */
+	if (ret < 0)
+		return 0;
+
+	/* Move digest_ptr to fsverity_formatted_digest */
+	bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr);
+
+	/* Read signature from xattr */
+	bpf_dynptr_from_mem(sig, sizeof(sig), 0, &sig_ptr);
+	ret = bpf_get_file_xattr(f, "user.sig", &sig_ptr);
+	/* No signature, reject access */
+	if (ret < 0)
+		return -EPERM;
+
+	trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0);
+	if (!trusted_keyring)
+		return -ENOENT;
+
+	/* Verify signature */
+	ret = bpf_verify_pkcs7_signature(&digest_ptr, &sig_ptr, trusted_keyring);
+
+	bpf_key_put(trusted_keyring);
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index 7748cc23de8a..f42e9f3831a1 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -10,17 +10,11 @@
 #include <errno.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
 
 #define MAX_DATA_SIZE (1024 * 1024)
 #define MAX_SIG_SIZE 1024
 
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
-extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
-extern void bpf_key_put(struct bpf_key *key) __ksym;
-extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
-				      struct bpf_dynptr *sig_ptr,
-				      struct bpf_key *trusted_keyring) __ksym;
-
 __u32 monitored_pid;
 __u32 user_keyring_serial;
 __u64 system_keyring_id;
diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh
index ba08922b4a27..f2cac42298ba 100755
--- a/tools/testing/selftests/bpf/verify_sig_setup.sh
+++ b/tools/testing/selftests/bpf/verify_sig_setup.sh
@@ -60,6 +60,27 @@ cleanup() {
 	rm -rf ${tmp_dir}
 }
 
+fsverity_create_sign_file() {
+	local tmp_dir="$1"
+
+	data_file=${tmp_dir}/data-file
+	sig_file=${tmp_dir}/sig-file
+	dd if=/dev/urandom of=$data_file bs=1 count=12345 2> /dev/null
+	fsverity sign --key ${tmp_dir}/signing_key.pem $data_file $sig_file
+
+	# We do not want to enable fsverity on $data_file yet. Try whether
+	# the file system support fsverity on a different file.
+	touch ${tmp_dir}/tmp-file
+	fsverity enable ${tmp_dir}/tmp-file
+}
+
+fsverity_enable_file() {
+	local tmp_dir="$1"
+
+	data_file=${tmp_dir}/data-file
+	fsverity enable $data_file
+}
+
 catch()
 {
 	local exit_code="$1"
@@ -86,6 +107,10 @@ main()
 		setup "${tmp_dir}"
 	elif [[ "${action}" == "cleanup" ]]; then
 		cleanup "${tmp_dir}"
+	elif [[ "${action}" == "fsverity-create-sign" ]]; then
+		fsverity_create_sign_file "${tmp_dir}"
+	elif [[ "${action}" == "fsverity-enable" ]]; then
+		fsverity_enable_file "${tmp_dir}"
 	else
 		echo "Unknown action: ${action}"
 		exit 1
-- 
cgit v1.2.3-70-g09d2


From 5fad52bee30414270104525e3a0266327a6e9d11 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 2 Dec 2023 09:56:56 -0800
Subject: bpf: provide correct register name for exception callback retval
 check

bpf_throw() is checking R1, so let's report R1 in the log.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231202175705.885270-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                                 | 12 ++++++------
 tools/testing/selftests/bpf/progs/exceptions_assert.c |  2 +-
 tools/testing/selftests/bpf/progs/exceptions_fail.c   |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8e7b6072e3f4..25b9d470957e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11805,7 +11805,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
 	return 0;
 }
 
-static int check_return_code(struct bpf_verifier_env *env, int regno);
+static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
 
 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			    int *insn_idx_p)
@@ -11942,7 +11942,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		 * to bpf_throw becomes the return value of the program.
 		 */
 		if (!env->exception_callback_subprog) {
-			err = check_return_code(env, BPF_REG_1);
+			err = check_return_code(env, BPF_REG_1, "R1");
 			if (err < 0)
 				return err;
 		}
@@ -14972,7 +14972,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
-static int check_return_code(struct bpf_verifier_env *env, int regno)
+static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
 {
 	struct tnum enforce_attach_type_range = tnum_unknown;
 	const struct bpf_prog *prog = env->prog;
@@ -15026,7 +15026,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno)
 		}
 
 		if (!tnum_in(const_0, reg->var_off)) {
-			verbose_invalid_scalar(env, reg, &const_0, "async callback", "R0");
+			verbose_invalid_scalar(env, reg, &const_0, "async callback", reg_name);
 			return -EINVAL;
 		}
 		return 0;
@@ -15126,7 +15126,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno)
 	}
 
 	if (!tnum_in(range, reg->var_off)) {
-		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
+		verbose_invalid_scalar(env, reg, &range, "program exit", reg_name);
 		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
 		    prog_type == BPF_PROG_TYPE_LSM &&
 		    !prog->aux->attach_func_proto->type)
@@ -17410,7 +17410,7 @@ process_bpf_exit_full:
 					continue;
 				}
 
-				err = check_return_code(env, BPF_REG_0);
+				err = check_return_code(env, BPF_REG_0, "R0");
 				if (err)
 					return err;
 process_bpf_exit:
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 49efaed143fc..575e7dd719c4 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -125,7 +125,7 @@ int check_assert_generic(struct __sk_buff *ctx)
 }
 
 SEC("?fentry/bpf_check")
-__failure __msg("At program exit the register R0 has value (0x40; 0x0)")
+__failure __msg("At program exit the register R1 has value (0x40; 0x0)")
 int check_assert_with_return(void *ctx)
 {
 	bpf_assert_with(!ctx, 64);
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
index 8c0ef2742208..81ead7512ba2 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_fail.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -308,7 +308,7 @@ int reject_set_exception_cb_bad_ret1(void *ctx)
 }
 
 SEC("?fentry/bpf_check")
-__failure __msg("At program exit the register R0 has value (0x40; 0x0) should")
+__failure __msg("At program exit the register R1 has value (0x40; 0x0) should")
 int reject_set_exception_cb_bad_ret2(void *ctx)
 {
 	bpf_throw(64);
-- 
cgit v1.2.3-70-g09d2


From 60a6b2c78c62d0a99ccb7ad5edc950f79e56306a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 2 Dec 2023 09:56:59 -0800
Subject: selftests/bpf: add selftest validating callback result is enforced

BPF verifier expects callback subprogs to return values from specified
range (typically [0, 1]). This requires that r0 at exit is both precise
(because we rely on specific value range) and is marked as read
(otherwise state comparison will ignore such register as unimportant).

Add a simple test that validates that all these conditions are enforced.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231202175705.885270-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/progs/verifier_subprog_precision.c         | 50 ++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index b5efcaeaa1ae..d41d2a8bb97e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -117,6 +117,56 @@ __naked int global_subprog_result_precise(void)
 	);
 }
 
+__naked __noinline __used
+static unsigned long loop_callback_bad()
+{
+	/* bpf_loop() callback that can return values outside of [0, 1] range */
+	asm volatile (
+		"call %[bpf_get_prandom_u32];"
+		"if r0 s> 1000 goto 1f;"
+		"r0 = 0;"
+	"1:"
+		"goto +0;" /* checkpoint */
+		/* bpf_loop() expects [0, 1] values, so branch above skipping
+		 * r0 = 0; should lead to a failure, but if exit instruction
+		 * doesn't enforce r0's precision, this callback will be
+		 * successfully verified
+		 */
+		"exit;"
+		:
+		: __imm(bpf_get_prandom_u32)
+		: __clobber_common
+	);
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame1: regs=r0 stack= before 11: (b7) r0 = 0")
+/* check that we have branch code path doing its own validation */
+__msg("from 10 to 12: frame1: R0=scalar(smin=umin=1001")
+/* check that branch code path marks r0 as precise, before failing */
+__msg("mark_precise: frame1: regs=r0 stack= before 9: (85) call bpf_get_prandom_u32#7")
+__msg("At callback return the register R0 has value (0x0; 0x7fffffffffffffff) should have been in (0x0; 0x1)")
+__naked int callback_precise_return_fail(void)
+{
+	asm volatile (
+		"r1 = 1;"			/* nr_loops */
+		"r2 = %[loop_callback_bad];"	/* callback_fn */
+		"r3 = 0;"			/* callback_ctx */
+		"r4 = 0;"			/* flags */
+		"call %[bpf_loop];"
+
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm_ptr(loop_callback_bad),
+		  __imm(bpf_loop)
+		: __clobber_common
+	);
+}
+
 SEC("?raw_tp")
 __success __log_level(2)
 /* First simulated path does not include callback body,
-- 
cgit v1.2.3-70-g09d2


From c871d0e00f0e8c207ce8ff89025e35cc49a8a3c3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 2 Dec 2023 09:57:00 -0800
Subject: bpf: enforce precise retval range on program exit

Similarly to subprog/callback logic, enforce return value of BPF program
using more precise smin/smax range.

We need to adjust a bunch of tests due to a changed format of an error
message.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231202175705.885270-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                              | 56 +++++++++++-----------
 .../selftests/bpf/progs/exceptions_assert.c        |  2 +-
 .../testing/selftests/bpf/progs/exceptions_fail.c  |  2 +-
 .../selftests/bpf/progs/test_global_func15.c       |  2 +-
 tools/testing/selftests/bpf/progs/timer_failure.c  |  2 +-
 .../selftests/bpf/progs/user_ringbuf_fail.c        |  2 +-
 .../bpf/progs/verifier_cgroup_inv_retcode.c        |  8 ++--
 .../bpf/progs/verifier_netfilter_retcode.c         |  2 +-
 .../bpf/progs/verifier_subprog_precision.c         |  2 +-
 9 files changed, 40 insertions(+), 38 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f3d9d7de68da..9411c1046268 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -362,20 +362,23 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 
 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
 				   struct bpf_reg_state *reg,
-				   struct tnum *range, const char *ctx,
+				   struct bpf_retval_range range, const char *ctx,
 				   const char *reg_name)
 {
-	char tn_buf[48];
+	bool unknown = true;
 
-	verbose(env, "At %s the register %s ", ctx, reg_name);
-	if (!tnum_is_unknown(reg->var_off)) {
-		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-		verbose(env, "has value %s", tn_buf);
-	} else {
-		verbose(env, "has unknown scalar value");
+	verbose(env, "At %s the register %s has", ctx, reg_name);
+	if (reg->smin_value > S64_MIN) {
+		verbose(env, " smin=%lld", reg->smin_value);
+		unknown = false;
 	}
-	tnum_strn(tn_buf, sizeof(tn_buf), *range);
-	verbose(env, " should have been in %s\n", tn_buf);
+	if (reg->smax_value < S64_MAX) {
+		verbose(env, " smax=%lld", reg->smax_value);
+		unknown = false;
+	}
+	if (unknown)
+		verbose(env, " unknown scalar value");
+	verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
 }
 
 static bool type_may_be_null(u32 type)
@@ -9606,10 +9609,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 
 		/* enforce R0 return value range */
 		if (!retval_range_within(callee->callback_ret_range, r0)) {
-			struct tnum range = tnum_range(callee->callback_ret_range.minval,
-						       callee->callback_ret_range.maxval);
-
-			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
+			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
+					       "callback return", "R0");
 			return -EINVAL;
 		}
 		if (!calls_callback(env, callee->callsite)) {
@@ -14995,7 +14996,8 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 	struct tnum enforce_attach_type_range = tnum_unknown;
 	const struct bpf_prog *prog = env->prog;
 	struct bpf_reg_state *reg;
-	struct tnum range = tnum_range(0, 1), const_0 = tnum_const(0);
+	struct bpf_retval_range range = retval_range(0, 1);
+	struct bpf_retval_range const_0 = retval_range(0, 0);
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
 	int err;
 	struct bpf_func_state *frame = env->cur_state->frame[0];
@@ -15043,8 +15045,8 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 			return -EINVAL;
 		}
 
-		if (!tnum_in(const_0, reg->var_off)) {
-			verbose_invalid_scalar(env, reg, &const_0, "async callback", reg_name);
+		if (!retval_range_within(const_0, reg)) {
+			verbose_invalid_scalar(env, reg, const_0, "async callback", reg_name);
 			return -EINVAL;
 		}
 		return 0;
@@ -15070,14 +15072,14 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
 		    env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
-			range = tnum_range(1, 1);
+			range = retval_range(1, 1);
 		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
-			range = tnum_range(0, 3);
+			range = retval_range(0, 3);
 		break;
 	case BPF_PROG_TYPE_CGROUP_SKB:
 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
-			range = tnum_range(0, 3);
+			range = retval_range(0, 3);
 			enforce_attach_type_range = tnum_range(2, 3);
 		}
 		break;
@@ -15090,13 +15092,13 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 		if (!env->prog->aux->attach_btf_id)
 			return 0;
-		range = tnum_const(0);
+		range = retval_range(0, 0);
 		break;
 	case BPF_PROG_TYPE_TRACING:
 		switch (env->prog->expected_attach_type) {
 		case BPF_TRACE_FENTRY:
 		case BPF_TRACE_FEXIT:
-			range = tnum_const(0);
+			range = retval_range(0, 0);
 			break;
 		case BPF_TRACE_RAW_TP:
 		case BPF_MODIFY_RETURN:
@@ -15108,7 +15110,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 		}
 		break;
 	case BPF_PROG_TYPE_SK_LOOKUP:
-		range = tnum_range(SK_DROP, SK_PASS);
+		range = retval_range(SK_DROP, SK_PASS);
 		break;
 
 	case BPF_PROG_TYPE_LSM:
@@ -15122,12 +15124,12 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 			/* Make sure programs that attach to void
 			 * hooks don't try to modify return value.
 			 */
-			range = tnum_range(1, 1);
+			range = retval_range(1, 1);
 		}
 		break;
 
 	case BPF_PROG_TYPE_NETFILTER:
-		range = tnum_range(NF_DROP, NF_ACCEPT);
+		range = retval_range(NF_DROP, NF_ACCEPT);
 		break;
 	case BPF_PROG_TYPE_EXT:
 		/* freplace program can return anything as its return value
@@ -15143,8 +15145,8 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 		return -EINVAL;
 	}
 
-	if (!tnum_in(range, reg->var_off)) {
-		verbose_invalid_scalar(env, reg, &range, "program exit", reg_name);
+	if (!retval_range_within(range, reg)) {
+		verbose_invalid_scalar(env, reg, range, "program exit", reg_name);
 		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
 		    prog_type == BPF_PROG_TYPE_LSM &&
 		    !prog->aux->attach_func_proto->type)
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 575e7dd719c4..0ef81040da59 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -125,7 +125,7 @@ int check_assert_generic(struct __sk_buff *ctx)
 }
 
 SEC("?fentry/bpf_check")
-__failure __msg("At program exit the register R1 has value (0x40; 0x0)")
+__failure __msg("At program exit the register R1 has smin=64 smax=64")
 int check_assert_with_return(void *ctx)
 {
 	bpf_assert_with(!ctx, 64);
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
index 81ead7512ba2..9cceb6521143 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_fail.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -308,7 +308,7 @@ int reject_set_exception_cb_bad_ret1(void *ctx)
 }
 
 SEC("?fentry/bpf_check")
-__failure __msg("At program exit the register R1 has value (0x40; 0x0) should")
+__failure __msg("At program exit the register R1 has smin=64 smax=64 should")
 int reject_set_exception_cb_bad_ret2(void *ctx)
 {
 	bpf_throw(64);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c
index b512d6a6c75e..f80207480e8a 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func15.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func15.c
@@ -13,7 +13,7 @@ __noinline int foo(unsigned int *v)
 }
 
 SEC("cgroup_skb/ingress")
-__failure __msg("At program exit the register R0 has value")
+__failure __msg("At program exit the register R0 has ")
 int global_func15(struct __sk_buff *skb)
 {
 	unsigned int v = 1;
diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c
index 226d33b5a05c..9000da1e2120 100644
--- a/tools/testing/selftests/bpf/progs/timer_failure.c
+++ b/tools/testing/selftests/bpf/progs/timer_failure.c
@@ -30,7 +30,7 @@ static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer)
 }
 
 SEC("fentry/bpf_fentry_test1")
-__failure __msg("should have been in (0x0; 0x0)")
+__failure __msg("should have been in [0, 0]")
 int BPF_PROG2(test_ret_1, int, a)
 {
 	int key = 0;
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
index 03ee946c6bf7..11ab25c42c36 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -184,7 +184,7 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context)
  * not be able to write to that pointer.
  */
 SEC("?raw_tp")
-__failure __msg("At callback return the register R0 has value")
+__failure __msg("At callback return the register R0 has ")
 int user_ringbuf_callback_invalid_return(void *ctx)
 {
 	bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0);
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
index d6c4a7f3f790..6e0f349f8f15 100644
--- a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
@@ -7,7 +7,7 @@
 
 SEC("cgroup/sock")
 __description("bpf_exit with invalid return code. test1")
-__failure __msg("R0 has value (0x0; 0xffffffff)")
+__failure __msg("smin=0 smax=4294967295 should have been in [0, 1]")
 __naked void with_invalid_return_code_test1(void)
 {
 	asm volatile ("					\
@@ -30,7 +30,7 @@ __naked void with_invalid_return_code_test2(void)
 
 SEC("cgroup/sock")
 __description("bpf_exit with invalid return code. test3")
-__failure __msg("R0 has value (0x0; 0x3)")
+__failure __msg("smin=0 smax=3 should have been in [0, 1]")
 __naked void with_invalid_return_code_test3(void)
 {
 	asm volatile ("					\
@@ -53,7 +53,7 @@ __naked void with_invalid_return_code_test4(void)
 
 SEC("cgroup/sock")
 __description("bpf_exit with invalid return code. test5")
-__failure __msg("R0 has value (0x2; 0x0)")
+__failure __msg("smin=2 smax=2 should have been in [0, 1]")
 __naked void with_invalid_return_code_test5(void)
 {
 	asm volatile ("					\
@@ -75,7 +75,7 @@ __naked void with_invalid_return_code_test6(void)
 
 SEC("cgroup/sock")
 __description("bpf_exit with invalid return code. test7")
-__failure __msg("R0 has unknown scalar value")
+__failure __msg("R0 has unknown scalar value should have been in [0, 1]")
 __naked void with_invalid_return_code_test7(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
index 353ae6da00e1..e1ffa5d32ff0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
@@ -39,7 +39,7 @@ __naked void with_valid_return_code_test3(void)
 
 SEC("netfilter")
 __description("bpf_exit with invalid return code. test4")
-__failure __msg("R0 has value (0x2; 0x0)")
+__failure __msg("R0 has smin=2 smax=2 should have been in [0, 1]")
 __naked void with_invalid_return_code_test4(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index d41d2a8bb97e..0dfe3f8b69ac 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -148,7 +148,7 @@ __msg("mark_precise: frame1: regs=r0 stack= before 11: (b7) r0 = 0")
 __msg("from 10 to 12: frame1: R0=scalar(smin=umin=1001")
 /* check that branch code path marks r0 as precise, before failing */
 __msg("mark_precise: frame1: regs=r0 stack= before 9: (85) call bpf_get_prandom_u32#7")
-__msg("At callback return the register R0 has value (0x0; 0x7fffffffffffffff) should have been in (0x0; 0x1)")
+__msg("At callback return the register R0 has smin=1001 should have been in [0, 1]")
 __naked int callback_precise_return_fail(void)
 {
 	asm volatile (
-- 
cgit v1.2.3-70-g09d2


From e02dea158ddaebe6e725be715e0009923b96ec8e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 2 Dec 2023 09:57:03 -0800
Subject: selftests/bpf: validate async callback return value check correctness

Adjust timer/timer_ret_1 test to validate more carefully verifier logic
of enforcing async callback return value. This test will pass only if
return result is marked precise and read.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231202175705.885270-10-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/timer_failure.c | 36 ++++++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c
index 9000da1e2120..9fbc69c77bbb 100644
--- a/tools/testing/selftests/bpf/progs/timer_failure.c
+++ b/tools/testing/selftests/bpf/progs/timer_failure.c
@@ -21,17 +21,37 @@ struct {
 	__type(value, struct elem);
 } timer_map SEC(".maps");
 
-static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer)
+__naked __noinline __used
+static unsigned long timer_cb_ret_bad()
 {
-	if (bpf_get_smp_processor_id() % 2)
-		return 1;
-	else
-		return 0;
+	asm volatile (
+		"call %[bpf_get_prandom_u32];"
+		"if r0 s> 1000 goto 1f;"
+		"r0 = 0;"
+	"1:"
+		"goto +0;" /* checkpoint */
+		/* async callback is expected to return 0, so branch above
+		 * skipping r0 = 0; should lead to a failure, but if exit
+		 * instruction doesn't enforce r0's precision, this callback
+		 * will be successfully verified
+		 */
+		"exit;"
+		:
+		: __imm(bpf_get_prandom_u32)
+		: __clobber_common
+	);
 }
 
 SEC("fentry/bpf_fentry_test1")
-__failure __msg("should have been in [0, 0]")
-int BPF_PROG2(test_ret_1, int, a)
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 22: (b7) r0 = 0")
+/* check that branch code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 24: (85) call bpf_get_prandom_u32#7")
+__msg("should have been in [0, 0]")
+long BPF_PROG2(test_bad_ret, int, a)
 {
 	int key = 0;
 	struct bpf_timer *timer;
@@ -39,7 +59,7 @@ int BPF_PROG2(test_ret_1, int, a)
 	timer = bpf_map_lookup_elem(&timer_map, &key);
 	if (timer) {
 		bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME);
-		bpf_timer_set_callback(timer, timer_cb_ret1);
+		bpf_timer_set_callback(timer, timer_cb_ret_bad);
 		bpf_timer_start(timer, 1000, 0);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 5c19e1d05e9e71b42d8e779f41959254239709da Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 2 Dec 2023 09:57:04 -0800
Subject: selftests/bpf: adjust global_func15 test to validate prog exit
 precision

Add one more subtest to  global_func15 selftest to validate that
verifier properly marks r0 as precise and avoids erroneous state pruning
of the branch that has return value outside of expected [0, 1] value.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231202175705.885270-11-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/test_global_func15.c       | 32 ++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c
index f80207480e8a..b4e089d6981d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func15.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func15.c
@@ -22,3 +22,35 @@ int global_func15(struct __sk_buff *skb)
 
 	return v;
 }
+
+SEC("cgroup_skb/ingress")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__failure
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (b7) r0 = 1")
+/* check that branch code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7")
+__msg("At program exit the register R0 has ")
+__naked int global_func15_tricky_pruning(void)
+{
+	asm volatile (
+		"call %[bpf_get_prandom_u32];"
+		"if r0 s> 1000 goto 1f;"
+		"r0 = 1;"
+	"1:"
+		"goto +0;" /* checkpoint */
+		/* cgroup_skb/ingress program is expected to return [0, 1]
+		 * values, so branch above makes sure that in a fallthrough
+		 * case we have a valid 1 stored in R0 register, but in
+		 * a branch case we assign some random value to R0.  So if
+		 * there is something wrong with precision tracking for R0 at
+		 * program exit, we might erronenously prune branch case,
+		 * because R0 in fallthrough case is imprecise (and thus any
+		 * value is valid from POV of verifier is_state_equal() logic)
+		 */
+		"exit;"
+		:
+		: __imm(bpf_get_prandom_u32)
+		: __clobber_common
+	);
+}
-- 
cgit v1.2.3-70-g09d2


From 81eff2e36481c5cf4a2ac906ae56c3fbd3e6f305 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Sat, 2 Dec 2023 09:57:05 -0800
Subject: bpf: simplify tnum output if a fully known constant

Emit tnum representation as just a constant if all bits are known.
Use decimal-vs-hex logic to determine exact format of emitted
constant value, just like it's done for register range values.
For that move tnum_strn() to kernel/bpf/log.c to reuse decimal-vs-hex
determination logic and constants.

Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231202175705.885270-12-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/log.c                                            | 13 +++++++++++++
 kernel/bpf/tnum.c                                           |  6 ------
 .../selftests/bpf/progs/verifier_direct_packet_access.c     |  2 +-
 tools/testing/selftests/bpf/progs/verifier_int_ptr.c        |  2 +-
 tools/testing/selftests/bpf/progs/verifier_stack_ptr.c      |  4 ++--
 5 files changed, 17 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 3505f3e5ae96..55d019f30e91 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -539,6 +539,19 @@ static void verbose_snum(struct bpf_verifier_env *env, s64 num)
 		verbose(env, "%#llx", num);
 }
 
+int tnum_strn(char *str, size_t size, struct tnum a)
+{
+	/* print as a constant, if tnum is fully known */
+	if (a.mask == 0) {
+		if (is_unum_decimal(a.value))
+			return snprintf(str, size, "%llu", a.value);
+		else
+			return snprintf(str, size, "%#llx", a.value);
+	}
+	return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
+}
+EXPORT_SYMBOL_GPL(tnum_strn);
+
 static void print_scalar_ranges(struct bpf_verifier_env *env,
 				const struct bpf_reg_state *reg,
 				const char **sep)
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index f4c91c9b27d7..9dbc31b25e3d 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -172,12 +172,6 @@ bool tnum_in(struct tnum a, struct tnum b)
 	return a.value == b.value;
 }
 
-int tnum_strn(char *str, size_t size, struct tnum a)
-{
-	return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
-}
-EXPORT_SYMBOL_GPL(tnum_strn);
-
 int tnum_sbin(char *str, size_t size, struct tnum a)
 {
 	size_t n;
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index 99a23dea8233..be95570ab382 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -411,7 +411,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("tc")
 __description("direct packet access: test17 (pruning, alignment)")
-__failure __msg("misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4")
+__failure __msg("misaligned packet access off 2+0+15+-4 size 4")
 __flag(BPF_F_STRICT_ALIGNMENT)
 __naked void packet_access_test17_pruning_alignment(void)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
index b054f9c48143..74d9cad469d9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -67,7 +67,7 @@ __naked void ptr_to_long_half_uninitialized(void)
 
 SEC("cgroup/sysctl")
 __description("ARG_PTR_TO_LONG misaligned")
-__failure __msg("misaligned stack access off (0x0; 0x0)+-20+0 size 8")
+__failure __msg("misaligned stack access off 0+-20+0 size 8")
 __naked void arg_ptr_to_long_misaligned(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
index e0f77e3e7869..417c61cd4b19 100644
--- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -37,7 +37,7 @@ __naked void ptr_to_stack_store_load(void)
 
 SEC("socket")
 __description("PTR_TO_STACK store/load - bad alignment on off")
-__failure __msg("misaligned stack access off (0x0; 0x0)+-8+2 size 8")
+__failure __msg("misaligned stack access off 0+-8+2 size 8")
 __failure_unpriv
 __naked void load_bad_alignment_on_off(void)
 {
@@ -53,7 +53,7 @@ __naked void load_bad_alignment_on_off(void)
 
 SEC("socket")
 __description("PTR_TO_STACK store/load - bad alignment on reg")
-__failure __msg("misaligned stack access off (0x0; 0x0)+-10+8 size 8")
+__failure __msg("misaligned stack access off 0+-10+8 size 8")
 __failure_unpriv
 __naked void load_bad_alignment_on_reg(void)
 {
-- 
cgit v1.2.3-70-g09d2


From 153de60e8bfb4501e1462a2f74cb787c137b996c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 4 Dec 2023 09:39:40 +0000
Subject: selftests/bpf: Fix spelling mistake "get_signaure_size" ->
 "get_signature_size"

There is a spelling mistake in an ASSERT_GT message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20231204093940.2611954-1-colin.i.king@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
index 6c90372b772d..ab0f02faa80c 100644
--- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -498,7 +498,7 @@ static void test_pkcs7_sig_fsverity(void)
 	if (!ASSERT_OK_PTR(skel, "test_sig_in_xattr__open"))
 		goto out;
 	ret = get_signature_size(sig_path);
-	if (!ASSERT_GT(ret, 0, "get_signaure_size"))
+	if (!ASSERT_GT(ret, 0, "get_signature_size"))
 		goto out;
 	skel->bss->sig_size = ret;
 	skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring",
-- 
cgit v1.2.3-70-g09d2


From 1624918be84a8bcc4f592e55635bc4fe4a96460a Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Mon, 4 Dec 2023 22:04:24 +0800
Subject: selftests/bpf: Add test cases for inner map

Add test cases to test the race between the destroy of inner map due to
map-in-map update and the access of inner map in bpf program. The
following 4 combinations are added:
(1) array map in map array + bpf program
(2) array map in map array + sleepable bpf program
(3) array map in map htab + bpf program
(4) array map in map htab + sleepable bpf program

Before applying the fixes, when running `./test_prog -a map_in_map`, the
following error was reported:

  ==================================================================
  BUG: KASAN: slab-use-after-free in array_map_update_elem+0x48/0x3e0
  Read of size 4 at addr ffff888114f33824 by task test_progs/1858

  CPU: 1 PID: 1858 Comm: test_progs Tainted: G           O     6.6.0+ #7
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ......
  Call Trace:
   <TASK>
   dump_stack_lvl+0x4a/0x90
   print_report+0xd2/0x620
   kasan_report+0xd1/0x110
   __asan_load4+0x81/0xa0
   array_map_update_elem+0x48/0x3e0
   bpf_prog_be94a9f26772f5b7_access_map_in_array+0xe6/0xf6
   trace_call_bpf+0x1aa/0x580
   kprobe_perf_func+0xdd/0x430
   kprobe_dispatcher+0xa0/0xb0
   kprobe_ftrace_handler+0x18b/0x2e0
   0xffffffffc02280f7
  RIP: 0010:__x64_sys_getpgid+0x1/0x30
  ......
   </TASK>

  Allocated by task 1857:
   kasan_save_stack+0x26/0x50
   kasan_set_track+0x25/0x40
   kasan_save_alloc_info+0x1e/0x30
   __kasan_kmalloc+0x98/0xa0
   __kmalloc_node+0x6a/0x150
   __bpf_map_area_alloc+0x141/0x170
   bpf_map_area_alloc+0x10/0x20
   array_map_alloc+0x11f/0x310
   map_create+0x28a/0xb40
   __sys_bpf+0x753/0x37c0
   __x64_sys_bpf+0x44/0x60
   do_syscall_64+0x36/0xb0
   entry_SYSCALL_64_after_hwframe+0x6e/0x76

  Freed by task 11:
   kasan_save_stack+0x26/0x50
   kasan_set_track+0x25/0x40
   kasan_save_free_info+0x2b/0x50
   __kasan_slab_free+0x113/0x190
   slab_free_freelist_hook+0xd7/0x1e0
   __kmem_cache_free+0x170/0x260
   kfree+0x9b/0x160
   kvfree+0x2d/0x40
   bpf_map_area_free+0xe/0x20
   array_map_free+0x120/0x2c0
   bpf_map_free_deferred+0xd7/0x1e0
   process_one_work+0x462/0x990
   worker_thread+0x370/0x670
   kthread+0x1b0/0x200
   ret_from_fork+0x3a/0x70
   ret_from_fork_asm+0x1b/0x30

  Last potentially related work creation:
   kasan_save_stack+0x26/0x50
   __kasan_record_aux_stack+0x94/0xb0
   kasan_record_aux_stack_noalloc+0xb/0x20
   __queue_work+0x331/0x950
   queue_work_on+0x75/0x80
   bpf_map_put+0xfa/0x160
   bpf_map_fd_put_ptr+0xe/0x20
   bpf_fd_array_map_update_elem+0x174/0x1b0
   bpf_map_update_value+0x2b7/0x4a0
   __sys_bpf+0x2551/0x37c0
   __x64_sys_bpf+0x44/0x60
   do_syscall_64+0x36/0xb0
   entry_SYSCALL_64_after_hwframe+0x6e/0x76

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20231204140425.1480317-7-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/map_in_map.c  | 141 +++++++++++++++++++++
 .../selftests/bpf/progs/access_map_in_map.c        |  93 ++++++++++++++
 2 files changed, 234 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/map_in_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/access_map_in_map.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_in_map.c b/tools/testing/selftests/bpf/prog_tests/map_in_map.c
new file mode 100644
index 000000000000..d2a10eb4e5b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_in_map.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "access_map_in_map.skel.h"
+
+struct thread_ctx {
+	pthread_barrier_t barrier;
+	int outer_map_fd;
+	int start, abort;
+	int loop, err;
+};
+
+static int wait_for_start_or_abort(struct thread_ctx *ctx)
+{
+	while (!ctx->start && !ctx->abort)
+		usleep(1);
+	return ctx->abort ? -1 : 0;
+}
+
+static void *update_map_fn(void *data)
+{
+	struct thread_ctx *ctx = data;
+	int loop = ctx->loop, err = 0;
+
+	if (wait_for_start_or_abort(ctx) < 0)
+		return NULL;
+	pthread_barrier_wait(&ctx->barrier);
+
+	while (loop-- > 0) {
+		int fd, zero = 0;
+
+		fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+		if (fd < 0) {
+			err |= 1;
+			pthread_barrier_wait(&ctx->barrier);
+			continue;
+		}
+
+		/* Remove the old inner map */
+		if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0)
+			err |= 2;
+		close(fd);
+		pthread_barrier_wait(&ctx->barrier);
+	}
+
+	ctx->err = err;
+
+	return NULL;
+}
+
+static void *access_map_fn(void *data)
+{
+	struct thread_ctx *ctx = data;
+	int loop = ctx->loop;
+
+	if (wait_for_start_or_abort(ctx) < 0)
+		return NULL;
+	pthread_barrier_wait(&ctx->barrier);
+
+	while (loop-- > 0) {
+		/* Access the old inner map */
+		syscall(SYS_getpgid);
+		pthread_barrier_wait(&ctx->barrier);
+	}
+
+	return NULL;
+}
+
+static void test_map_in_map_access(const char *prog_name, const char *map_name)
+{
+	struct access_map_in_map *skel;
+	struct bpf_map *outer_map;
+	struct bpf_program *prog;
+	struct thread_ctx ctx;
+	pthread_t tid[2];
+	int err;
+
+	skel = access_map_in_map__open();
+	if (!ASSERT_OK_PTR(skel, "access_map_in_map open"))
+		return;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "find program"))
+		goto out;
+	bpf_program__set_autoload(prog, true);
+
+	outer_map = bpf_object__find_map_by_name(skel->obj, map_name);
+	if (!ASSERT_OK_PTR(outer_map, "find map"))
+		goto out;
+
+	err = access_map_in_map__load(skel);
+	if (!ASSERT_OK(err, "access_map_in_map load"))
+		goto out;
+
+	err = access_map_in_map__attach(skel);
+	if (!ASSERT_OK(err, "access_map_in_map attach"))
+		goto out;
+
+	skel->bss->tgid = getpid();
+
+	memset(&ctx, 0, sizeof(ctx));
+	pthread_barrier_init(&ctx.barrier, NULL, 2);
+	ctx.outer_map_fd = bpf_map__fd(outer_map);
+	ctx.loop = 4;
+
+	err = pthread_create(&tid[0], NULL, update_map_fn, &ctx);
+	if (!ASSERT_OK(err, "close_thread"))
+		goto out;
+
+	err = pthread_create(&tid[1], NULL, access_map_fn, &ctx);
+	if (!ASSERT_OK(err, "read_thread")) {
+		ctx.abort = 1;
+		pthread_join(tid[0], NULL);
+		goto out;
+	}
+
+	ctx.start = 1;
+	pthread_join(tid[0], NULL);
+	pthread_join(tid[1], NULL);
+
+	ASSERT_OK(ctx.err, "err");
+out:
+	access_map_in_map__destroy(skel);
+}
+
+void test_map_in_map(void)
+{
+	if (test__start_subtest("acc_map_in_array"))
+		test_map_in_map_access("access_map_in_array", "outer_array_map");
+	if (test__start_subtest("sleepable_acc_map_in_array"))
+		test_map_in_map_access("sleepable_access_map_in_array", "outer_array_map");
+	if (test__start_subtest("acc_map_in_htab"))
+		test_map_in_map_access("access_map_in_htab", "outer_htab_map");
+	if (test__start_subtest("sleepable_acc_map_in_htab"))
+		test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map");
+}
+
diff --git a/tools/testing/selftests/bpf/progs/access_map_in_map.c b/tools/testing/selftests/bpf/progs/access_map_in_map.c
new file mode 100644
index 000000000000..1126871c2ebd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/access_map_in_map.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct inner_map_type {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, 4);
+	__uint(value_size, 4);
+	__uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__type(key, int);
+	__type(value, int);
+	__uint(max_entries, 1);
+	__array(values, struct inner_map_type);
+} outer_array_map SEC(".maps") = {
+	.values = {
+		[0] = &inner_map,
+	},
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__type(key, int);
+	__type(value, int);
+	__uint(max_entries, 1);
+	__array(values, struct inner_map_type);
+} outer_htab_map SEC(".maps") = {
+	.values = {
+		[0] = &inner_map,
+	},
+};
+
+char _license[] SEC("license") = "GPL";
+
+int tgid = 0;
+
+static int acc_map_in_map(void *outer_map)
+{
+	int i, key, value = 0xdeadbeef;
+	void *inner_map;
+
+	if ((bpf_get_current_pid_tgid() >> 32) != tgid)
+		return 0;
+
+	/* Find nonexistent inner map */
+	key = 1;
+	inner_map = bpf_map_lookup_elem(outer_map, &key);
+	if (inner_map)
+		return 0;
+
+	/* Find the old inner map */
+	key = 0;
+	inner_map = bpf_map_lookup_elem(outer_map, &key);
+	if (!inner_map)
+		return 0;
+
+	/* Wait for the old inner map to be replaced */
+	for (i = 0; i < 2048; i++)
+		bpf_map_update_elem(inner_map, &key, &value, 0);
+
+	return 0;
+}
+
+SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
+int access_map_in_array(void *ctx)
+{
+	return acc_map_in_map(&outer_array_map);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int sleepable_access_map_in_array(void *ctx)
+{
+	return acc_map_in_map(&outer_array_map);
+}
+
+SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
+int access_map_in_htab(void *ctx)
+{
+	return acc_map_in_map(&outer_htab_map);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int sleepable_access_map_in_htab(void *ctx)
+{
+	return acc_map_in_map(&outer_htab_map);
+}
-- 
cgit v1.2.3-70-g09d2


From e3dd40828534a67931e0dd00fcd35846271fd4e8 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Mon, 4 Dec 2023 22:04:25 +0800
Subject: selftests/bpf: Test outer map update operations in syscall program

Syscall program is running with rcu_read_lock_trace being held, so if
bpf_map_update_elem() or bpf_map_delete_elem() invokes
synchronize_rcu_tasks_trace() when operating on an outer map, there will
be dead-lock, so add a test to guarantee that it is dead-lock free.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20231204140425.1480317-8-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/syscall.c | 30 +++++++-
 tools/testing/selftests/bpf/progs/syscall.c      | 96 ++++++++++++++++++++++--
 2 files changed, 119 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
index f4d40001155a..0be8301c0ffd 100644
--- a/tools/testing/selftests/bpf/prog_tests/syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -12,7 +12,7 @@ struct args {
 	int btf_fd;
 };
 
-void test_syscall(void)
+static void test_syscall_load_prog(void)
 {
 	static char verifier_log[8192];
 	struct args ctx = {
@@ -32,7 +32,7 @@ void test_syscall(void)
 	if (!ASSERT_OK_PTR(skel, "skel_load"))
 		goto cleanup;
 
-	prog_fd = bpf_program__fd(skel->progs.bpf_prog);
+	prog_fd = bpf_program__fd(skel->progs.load_prog);
 	err = bpf_prog_test_run_opts(prog_fd, &tattr);
 	ASSERT_EQ(err, 0, "err");
 	ASSERT_EQ(tattr.retval, 1, "retval");
@@ -53,3 +53,29 @@ cleanup:
 	if (ctx.btf_fd > 0)
 		close(ctx.btf_fd);
 }
+
+static void test_syscall_update_outer_map(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	struct syscall *skel;
+	int err, prog_fd;
+
+	skel = syscall__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.update_outer_map);
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	ASSERT_EQ(err, 0, "err");
+	ASSERT_EQ(opts.retval, 1, "retval");
+cleanup:
+	syscall__destroy(skel);
+}
+
+void test_syscall(void)
+{
+	if (test__start_subtest("load_prog"))
+		test_syscall_load_prog();
+	if (test__start_subtest("update_outer_map"))
+		test_syscall_update_outer_map();
+}
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
index e550f728962d..3d3cafdebe72 100644
--- a/tools/testing/selftests/bpf/progs/syscall.c
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -6,9 +6,15 @@
 #include <bpf/bpf_tracing.h>
 #include <../../../tools/include/linux/filter.h>
 #include <linux/btf.h>
+#include <string.h>
+#include <errno.h>
 
 char _license[] SEC("license") = "GPL";
 
+struct bpf_map {
+	int id;
+}  __attribute__((preserve_access_index));
+
 struct args {
 	__u64 log_buf;
 	__u32 log_size;
@@ -27,6 +33,37 @@ struct args {
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
 	BTF_INT_ENC(encoding, bits_offset, bits)
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, union bpf_attr);
+	__uint(max_entries, 1);
+} bpf_attr_array SEC(".maps");
+
+struct inner_map_type {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, 4);
+	__uint(value_size, 4);
+	__uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__type(key, int);
+	__type(value, int);
+	__uint(max_entries, 1);
+	__array(values, struct inner_map_type);
+} outer_array_map SEC(".maps") = {
+	.values = {
+		[0] = &inner_map,
+	},
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
 static int btf_load(void)
 {
 	struct btf_blob {
@@ -58,7 +95,7 @@ static int btf_load(void)
 }
 
 SEC("syscall")
-int bpf_prog(struct args *ctx)
+int load_prog(struct args *ctx)
 {
 	static char license[] = "GPL";
 	static struct bpf_insn insns[] = {
@@ -94,8 +131,8 @@ int bpf_prog(struct args *ctx)
 	map_create_attr.max_entries = ctx->max_entries;
 	map_create_attr.btf_fd = ret;
 
-	prog_load_attr.license = (long) license;
-	prog_load_attr.insns = (long) insns;
+	prog_load_attr.license = ptr_to_u64(license);
+	prog_load_attr.insns = ptr_to_u64(insns);
 	prog_load_attr.log_buf = ctx->log_buf;
 	prog_load_attr.log_size = ctx->log_size;
 	prog_load_attr.log_level = 1;
@@ -107,8 +144,8 @@ int bpf_prog(struct args *ctx)
 	insns[3].imm = ret;
 
 	map_update_attr.map_fd = ret;
-	map_update_attr.key = (long) &key;
-	map_update_attr.value = (long) &value;
+	map_update_attr.key = ptr_to_u64(&key);
+	map_update_attr.value = ptr_to_u64(&value);
 	ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr));
 	if (ret < 0)
 		return ret;
@@ -119,3 +156,52 @@ int bpf_prog(struct args *ctx)
 	ctx->prog_fd = ret;
 	return 1;
 }
+
+SEC("syscall")
+int update_outer_map(void *ctx)
+{
+	int zero = 0, ret = 0, outer_fd = -1, inner_fd = -1, err;
+	const int attr_sz = sizeof(union bpf_attr);
+	union bpf_attr *attr;
+
+	attr = bpf_map_lookup_elem((struct bpf_map *)&bpf_attr_array, &zero);
+	if (!attr)
+		goto out;
+
+	memset(attr, 0, attr_sz);
+	attr->map_id = ((struct bpf_map *)&outer_array_map)->id;
+	outer_fd = bpf_sys_bpf(BPF_MAP_GET_FD_BY_ID, attr, attr_sz);
+	if (outer_fd < 0)
+		goto out;
+
+	memset(attr, 0, attr_sz);
+	attr->map_type = BPF_MAP_TYPE_ARRAY;
+	attr->key_size = 4;
+	attr->value_size = 4;
+	attr->max_entries = 1;
+	inner_fd = bpf_sys_bpf(BPF_MAP_CREATE, attr, attr_sz);
+	if (inner_fd < 0)
+		goto out;
+
+	memset(attr, 0, attr_sz);
+	attr->map_fd = outer_fd;
+	attr->key = ptr_to_u64(&zero);
+	attr->value = ptr_to_u64(&inner_fd);
+	err = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, attr, attr_sz);
+	if (err)
+		goto out;
+
+	memset(attr, 0, attr_sz);
+	attr->map_fd = outer_fd;
+	attr->key = ptr_to_u64(&zero);
+	err = bpf_sys_bpf(BPF_MAP_DELETE_ELEM, attr, attr_sz);
+	if (err)
+		goto out;
+	ret = 1;
+out:
+	if (inner_fd >= 0)
+		bpf_sys_close(inner_fd);
+	if (outer_fd >= 0)
+		bpf_sys_close(outer_fd);
+	return ret;
+}
-- 
cgit v1.2.3-70-g09d2


From bc877956272f0521fef107838555817112a450dc Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 1 Dec 2023 15:28:29 -0800
Subject: netdev-genl: spec: Extend netdev netlink spec in YAML for queue

Add support in netlink spec(netdev.yaml) for queue information.
Add code generated from the spec.

Note: The "queue-type" attribute takes values 0 and 1 for rx
and tx queue type respectively.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Link: https://lore.kernel.org/r/170147330963.5260.2576294626647300472.stgit@anambiarhost.jf.intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml |  52 +++++++++++
 include/uapi/linux/netdev.h             |  16 ++++
 net/core/netdev-genl-gen.c              |  26 ++++++
 net/core/netdev-genl-gen.h              |   3 +
 net/core/netdev-genl.c                  |  10 +++
 tools/include/uapi/linux/netdev.h       |  16 ++++
 tools/net/ynl/generated/netdev-user.c   | 153 ++++++++++++++++++++++++++++++++
 tools/net/ynl/generated/netdev-user.h   |  99 +++++++++++++++++++++
 8 files changed, 375 insertions(+)

(limited to 'tools')

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index eef6358ec587..719e6aafbfdb 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -66,6 +66,10 @@ definitions:
         name: tx-checksum
         doc:
           L3 checksum HW offload is supported by the driver.
+  -
+    name: queue-type
+    type: enum
+    entries: [ rx, tx ]
 
 attribute-sets:
   -
@@ -209,6 +213,31 @@ attribute-sets:
         name: recycle-released-refcnt
         type: uint
 
+  -
+    name: queue
+    attributes:
+      -
+        name: id
+        doc: Queue index; most queue types are indexed like a C array, with
+             indexes starting at 0 and ending at queue count - 1. Queue indexes
+             are scoped to an interface and queue type.
+        type: u32
+      -
+        name: ifindex
+        doc: ifindex of the netdevice to which the queue belongs.
+        type: u32
+        checks:
+          min: 1
+      -
+        name: type
+        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
+        type: u32
+        enum: queue-type
+      -
+        name: napi-id
+        doc: ID of the NAPI instance which services this queue.
+        type: u32
+
 operations:
   list:
     -
@@ -307,6 +336,29 @@ operations:
       dump:
         reply: *pp-stats-reply
       config-cond: page-pool-stats
+    -
+      name: queue-get
+      doc: Get queue information from the kernel.
+           Only configured queues will be reported (as opposed to all available
+           hardware queues).
+      attribute-set: queue
+      do:
+        request:
+          attributes:
+            - ifindex
+            - type
+            - id
+        reply: &queue-get-op
+          attributes:
+            - id
+            - type
+            - napi-id
+            - ifindex
+      dump:
+        request:
+          attributes:
+            - ifindex
+        reply: *queue-get-op
 
 mcast-groups:
   list:
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 6244c0164976..f857960a7f06 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -62,6 +62,11 @@ enum netdev_xsk_flags {
 	NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
 };
 
+enum netdev_queue_type {
+	NETDEV_QUEUE_TYPE_RX,
+	NETDEV_QUEUE_TYPE_TX,
+};
+
 enum {
 	NETDEV_A_DEV_IFINDEX = 1,
 	NETDEV_A_DEV_PAD,
@@ -104,6 +109,16 @@ enum {
 	NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1)
 };
 
+enum {
+	NETDEV_A_QUEUE_ID = 1,
+	NETDEV_A_QUEUE_IFINDEX,
+	NETDEV_A_QUEUE_TYPE,
+	NETDEV_A_QUEUE_NAPI_ID,
+
+	__NETDEV_A_QUEUE_MAX,
+	NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
+};
+
 enum {
 	NETDEV_CMD_DEV_GET = 1,
 	NETDEV_CMD_DEV_ADD_NTF,
@@ -114,6 +129,7 @@ enum {
 	NETDEV_CMD_PAGE_POOL_DEL_NTF,
 	NETDEV_CMD_PAGE_POOL_CHANGE_NTF,
 	NETDEV_CMD_PAGE_POOL_STATS_GET,
+	NETDEV_CMD_QUEUE_GET,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index dccd8c3a141e..b1dcf88c82cf 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -46,6 +46,18 @@ static const struct nla_policy netdev_page_pool_stats_get_nl_policy[NETDEV_A_PAG
 };
 #endif /* CONFIG_PAGE_POOL_STATS */
 
+/* NETDEV_CMD_QUEUE_GET - do */
+static const struct nla_policy netdev_queue_get_do_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = {
+	[NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+	[NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
+	[NETDEV_A_QUEUE_ID] = { .type = NLA_U32, },
+};
+
+/* NETDEV_CMD_QUEUE_GET - dump */
+static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IFINDEX + 1] = {
+	[NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
 /* Ops table for netdev */
 static const struct genl_split_ops netdev_nl_ops[] = {
 	{
@@ -88,6 +100,20 @@ static const struct genl_split_ops netdev_nl_ops[] = {
 		.flags	= GENL_CMD_CAP_DUMP,
 	},
 #endif /* CONFIG_PAGE_POOL_STATS */
+	{
+		.cmd		= NETDEV_CMD_QUEUE_GET,
+		.doit		= netdev_nl_queue_get_doit,
+		.policy		= netdev_queue_get_do_nl_policy,
+		.maxattr	= NETDEV_A_QUEUE_TYPE,
+		.flags		= GENL_CMD_CAP_DO,
+	},
+	{
+		.cmd		= NETDEV_CMD_QUEUE_GET,
+		.dumpit		= netdev_nl_queue_get_dumpit,
+		.policy		= netdev_queue_get_dump_nl_policy,
+		.maxattr	= NETDEV_A_QUEUE_IFINDEX,
+		.flags		= GENL_CMD_CAP_DUMP,
+	},
 };
 
 static const struct genl_multicast_group netdev_nl_mcgrps[] = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 649e4b46eccf..086623c1797a 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -23,6 +23,9 @@ int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
 				       struct genl_info *info);
 int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
 					 struct netlink_callback *cb);
+int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_queue_get_dumpit(struct sk_buff *skb,
+			       struct netlink_callback *cb);
 
 enum {
 	NETDEV_NLGRP_MGMT,
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 10f2124e9e23..35e2d692f651 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -140,6 +140,16 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	return -EOPNOTSUPP;
+}
+
+int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 static int netdev_genl_netdevice_event(struct notifier_block *nb,
 				       unsigned long event, void *ptr)
 {
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 6244c0164976..f857960a7f06 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -62,6 +62,11 @@ enum netdev_xsk_flags {
 	NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
 };
 
+enum netdev_queue_type {
+	NETDEV_QUEUE_TYPE_RX,
+	NETDEV_QUEUE_TYPE_TX,
+};
+
 enum {
 	NETDEV_A_DEV_IFINDEX = 1,
 	NETDEV_A_DEV_PAD,
@@ -104,6 +109,16 @@ enum {
 	NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1)
 };
 
+enum {
+	NETDEV_A_QUEUE_ID = 1,
+	NETDEV_A_QUEUE_IFINDEX,
+	NETDEV_A_QUEUE_TYPE,
+	NETDEV_A_QUEUE_NAPI_ID,
+
+	__NETDEV_A_QUEUE_MAX,
+	NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
+};
+
 enum {
 	NETDEV_CMD_DEV_GET = 1,
 	NETDEV_CMD_DEV_ADD_NTF,
@@ -114,6 +129,7 @@ enum {
 	NETDEV_CMD_PAGE_POOL_DEL_NTF,
 	NETDEV_CMD_PAGE_POOL_CHANGE_NTF,
 	NETDEV_CMD_PAGE_POOL_STATS_GET,
+	NETDEV_CMD_QUEUE_GET,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
index 3b9dee94d4ce..fbf7e24ade91 100644
--- a/tools/net/ynl/generated/netdev-user.c
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -23,6 +23,7 @@ static const char * const netdev_op_strmap[] = {
 	[NETDEV_CMD_PAGE_POOL_DEL_NTF] = "page-pool-del-ntf",
 	[NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf",
 	[NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get",
+	[NETDEV_CMD_QUEUE_GET] = "queue-get",
 };
 
 const char *netdev_op_str(int op)
@@ -76,6 +77,18 @@ const char *netdev_xsk_flags_str(enum netdev_xsk_flags value)
 	return netdev_xsk_flags_strmap[value];
 }
 
+static const char * const netdev_queue_type_strmap[] = {
+	[0] = "rx",
+	[1] = "tx",
+};
+
+const char *netdev_queue_type_str(enum netdev_queue_type value)
+{
+	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_queue_type_strmap))
+		return NULL;
+	return netdev_queue_type_strmap[value];
+}
+
 /* Policies */
 struct ynl_policy_attr netdev_page_pool_info_policy[NETDEV_A_PAGE_POOL_MAX + 1] = {
 	[NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, },
@@ -135,6 +148,18 @@ struct ynl_policy_nest netdev_page_pool_stats_nest = {
 	.table = netdev_page_pool_stats_policy,
 };
 
+struct ynl_policy_attr netdev_queue_policy[NETDEV_A_QUEUE_MAX + 1] = {
+	[NETDEV_A_QUEUE_ID] = { .name = "id", .type = YNL_PT_U32, },
+	[NETDEV_A_QUEUE_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
+	[NETDEV_A_QUEUE_TYPE] = { .name = "type", .type = YNL_PT_U32, },
+	[NETDEV_A_QUEUE_NAPI_ID] = { .name = "napi-id", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest netdev_queue_nest = {
+	.max_attr = NETDEV_A_QUEUE_MAX,
+	.table = netdev_queue_policy,
+};
+
 /* Common nested types */
 void netdev_page_pool_info_free(struct netdev_page_pool_info *obj)
 {
@@ -617,6 +642,134 @@ free_list:
 	return NULL;
 }
 
+/* ============== NETDEV_CMD_QUEUE_GET ============== */
+/* NETDEV_CMD_QUEUE_GET - do */
+void netdev_queue_get_req_free(struct netdev_queue_get_req *req)
+{
+	free(req);
+}
+
+void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp)
+{
+	free(rsp);
+}
+
+int netdev_queue_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+	struct ynl_parse_arg *yarg = data;
+	struct netdev_queue_get_rsp *dst;
+	const struct nlattr *attr;
+
+	dst = yarg->data;
+
+	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+		unsigned int type = mnl_attr_get_type(attr);
+
+		if (type == NETDEV_A_QUEUE_ID) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.id = 1;
+			dst->id = mnl_attr_get_u32(attr);
+		} else if (type == NETDEV_A_QUEUE_TYPE) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.type = 1;
+			dst->type = mnl_attr_get_u32(attr);
+		} else if (type == NETDEV_A_QUEUE_NAPI_ID) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.napi_id = 1;
+			dst->napi_id = mnl_attr_get_u32(attr);
+		} else if (type == NETDEV_A_QUEUE_IFINDEX) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.ifindex = 1;
+			dst->ifindex = mnl_attr_get_u32(attr);
+		}
+	}
+
+	return MNL_CB_OK;
+}
+
+struct netdev_queue_get_rsp *
+netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req)
+{
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+	struct netdev_queue_get_rsp *rsp;
+	struct nlmsghdr *nlh;
+	int err;
+
+	nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1);
+	ys->req_policy = &netdev_queue_nest;
+	yrs.yarg.rsp_policy = &netdev_queue_nest;
+
+	if (req->_present.ifindex)
+		mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex);
+	if (req->_present.type)
+		mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_TYPE, req->type);
+	if (req->_present.id)
+		mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_ID, req->id);
+
+	rsp = calloc(1, sizeof(*rsp));
+	yrs.yarg.data = rsp;
+	yrs.cb = netdev_queue_get_rsp_parse;
+	yrs.rsp_cmd = NETDEV_CMD_QUEUE_GET;
+
+	err = ynl_exec(ys, nlh, &yrs);
+	if (err < 0)
+		goto err_free;
+
+	return rsp;
+
+err_free:
+	netdev_queue_get_rsp_free(rsp);
+	return NULL;
+}
+
+/* NETDEV_CMD_QUEUE_GET - dump */
+void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp)
+{
+	struct netdev_queue_get_list *next = rsp;
+
+	while ((void *)next != YNL_LIST_END) {
+		rsp = next;
+		next = rsp->next;
+
+		free(rsp);
+	}
+}
+
+struct netdev_queue_get_list *
+netdev_queue_get_dump(struct ynl_sock *ys,
+		      struct netdev_queue_get_req_dump *req)
+{
+	struct ynl_dump_state yds = {};
+	struct nlmsghdr *nlh;
+	int err;
+
+	yds.ys = ys;
+	yds.alloc_sz = sizeof(struct netdev_queue_get_list);
+	yds.cb = netdev_queue_get_rsp_parse;
+	yds.rsp_cmd = NETDEV_CMD_QUEUE_GET;
+	yds.rsp_policy = &netdev_queue_nest;
+
+	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1);
+	ys->req_policy = &netdev_queue_nest;
+
+	if (req->_present.ifindex)
+		mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex);
+
+	err = ynl_exec_dump(ys, nlh, &yds);
+	if (err < 0)
+		goto free_list;
+
+	return yds.first;
+
+free_list:
+	netdev_queue_get_list_free(yds.first);
+	return NULL;
+}
+
 static const struct ynl_ntf_info netdev_ntf_info[] =  {
 	[NETDEV_CMD_DEV_ADD_NTF] =  {
 		.alloc_sz	= sizeof(struct netdev_dev_get_ntf),
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
index cc3d80d1cf8c..d7daf6df3df0 100644
--- a/tools/net/ynl/generated/netdev-user.h
+++ b/tools/net/ynl/generated/netdev-user.h
@@ -20,6 +20,7 @@ const char *netdev_op_str(int op);
 const char *netdev_xdp_act_str(enum netdev_xdp_act value);
 const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value);
 const char *netdev_xsk_flags_str(enum netdev_xsk_flags value);
+const char *netdev_queue_type_str(enum netdev_queue_type value);
 
 /* Common nested types */
 struct netdev_page_pool_info {
@@ -261,4 +262,102 @@ netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp
 struct netdev_page_pool_stats_get_list *
 netdev_page_pool_stats_get_dump(struct ynl_sock *ys);
 
+/* ============== NETDEV_CMD_QUEUE_GET ============== */
+/* NETDEV_CMD_QUEUE_GET - do */
+struct netdev_queue_get_req {
+	struct {
+		__u32 ifindex:1;
+		__u32 type:1;
+		__u32 id:1;
+	} _present;
+
+	__u32 ifindex;
+	enum netdev_queue_type type;
+	__u32 id;
+};
+
+static inline struct netdev_queue_get_req *netdev_queue_get_req_alloc(void)
+{
+	return calloc(1, sizeof(struct netdev_queue_get_req));
+}
+void netdev_queue_get_req_free(struct netdev_queue_get_req *req);
+
+static inline void
+netdev_queue_get_req_set_ifindex(struct netdev_queue_get_req *req,
+				 __u32 ifindex)
+{
+	req->_present.ifindex = 1;
+	req->ifindex = ifindex;
+}
+static inline void
+netdev_queue_get_req_set_type(struct netdev_queue_get_req *req,
+			      enum netdev_queue_type type)
+{
+	req->_present.type = 1;
+	req->type = type;
+}
+static inline void
+netdev_queue_get_req_set_id(struct netdev_queue_get_req *req, __u32 id)
+{
+	req->_present.id = 1;
+	req->id = id;
+}
+
+struct netdev_queue_get_rsp {
+	struct {
+		__u32 id:1;
+		__u32 type:1;
+		__u32 napi_id:1;
+		__u32 ifindex:1;
+	} _present;
+
+	__u32 id;
+	enum netdev_queue_type type;
+	__u32 napi_id;
+	__u32 ifindex;
+};
+
+void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp);
+
+/*
+ * Get queue information from the kernel. Only configured queues will be reported (as opposed to all available hardware queues).
+ */
+struct netdev_queue_get_rsp *
+netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req);
+
+/* NETDEV_CMD_QUEUE_GET - dump */
+struct netdev_queue_get_req_dump {
+	struct {
+		__u32 ifindex:1;
+	} _present;
+
+	__u32 ifindex;
+};
+
+static inline struct netdev_queue_get_req_dump *
+netdev_queue_get_req_dump_alloc(void)
+{
+	return calloc(1, sizeof(struct netdev_queue_get_req_dump));
+}
+void netdev_queue_get_req_dump_free(struct netdev_queue_get_req_dump *req);
+
+static inline void
+netdev_queue_get_req_dump_set_ifindex(struct netdev_queue_get_req_dump *req,
+				      __u32 ifindex)
+{
+	req->_present.ifindex = 1;
+	req->ifindex = ifindex;
+}
+
+struct netdev_queue_get_list {
+	struct netdev_queue_get_list *next;
+	struct netdev_queue_get_rsp obj __attribute__((aligned(8)));
+};
+
+void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp);
+
+struct netdev_queue_get_list *
+netdev_queue_get_dump(struct ynl_sock *ys,
+		      struct netdev_queue_get_req_dump *req);
+
 #endif /* _LINUX_NETDEV_GEN_H */
-- 
cgit v1.2.3-70-g09d2


From ff9991499fb53575c45eb92cd064bcd7141bb572 Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 1 Dec 2023 15:28:51 -0800
Subject: netdev-genl: spec: Extend netdev netlink spec in YAML for NAPI

Add support in netlink spec(netdev.yaml) for napi related information.
Add code generated from the spec.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Link: https://lore.kernel.org/r/170147333119.5260.7050639053080529108.stgit@anambiarhost.jf.intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml |  30 ++++++++
 include/uapi/linux/netdev.h             |   9 +++
 net/core/netdev-genl-gen.c              |  24 +++++++
 net/core/netdev-genl-gen.h              |   2 +
 net/core/netdev-genl.c                  |  10 +++
 tools/include/uapi/linux/netdev.h       |   9 +++
 tools/net/ynl/generated/netdev-user.c   | 124 ++++++++++++++++++++++++++++++++
 tools/net/ynl/generated/netdev-user.h   |  75 +++++++++++++++++++
 8 files changed, 283 insertions(+)

(limited to 'tools')

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 719e6aafbfdb..76d6b2e15b67 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -213,6 +213,19 @@ attribute-sets:
         name: recycle-released-refcnt
         type: uint
 
+  -
+    name: napi
+    attributes:
+      -
+        name: ifindex
+        doc: ifindex of the netdevice to which NAPI instance belongs.
+        type: u32
+        checks:
+          min: 1
+      -
+        name: id
+        doc: ID of the NAPI instance.
+        type: u32
   -
     name: queue
     attributes:
@@ -359,6 +372,23 @@ operations:
           attributes:
             - ifindex
         reply: *queue-get-op
+    -
+      name: napi-get
+      doc: Get information about NAPI instances configured on the system.
+      attribute-set: napi
+      do:
+        request:
+          attributes:
+            - id
+        reply: &napi-get-op
+          attributes:
+            - id
+            - ifindex
+      dump:
+        request:
+          attributes:
+            - ifindex
+        reply: *napi-get-op
 
 mcast-groups:
   list:
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index f857960a7f06..e7bdbcb01f22 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -109,6 +109,14 @@ enum {
 	NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1)
 };
 
+enum {
+	NETDEV_A_NAPI_IFINDEX = 1,
+	NETDEV_A_NAPI_ID,
+
+	__NETDEV_A_NAPI_MAX,
+	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
+};
+
 enum {
 	NETDEV_A_QUEUE_ID = 1,
 	NETDEV_A_QUEUE_IFINDEX,
@@ -130,6 +138,7 @@ enum {
 	NETDEV_CMD_PAGE_POOL_CHANGE_NTF,
 	NETDEV_CMD_PAGE_POOL_STATS_GET,
 	NETDEV_CMD_QUEUE_GET,
+	NETDEV_CMD_NAPI_GET,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index b1dcf88c82cf..be7f2ebd61b2 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -58,6 +58,16 @@ static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IF
 	[NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
 };
 
+/* NETDEV_CMD_NAPI_GET - do */
+static const struct nla_policy netdev_napi_get_do_nl_policy[NETDEV_A_NAPI_ID + 1] = {
+	[NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
+};
+
+/* NETDEV_CMD_NAPI_GET - dump */
+static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFINDEX + 1] = {
+	[NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
 /* Ops table for netdev */
 static const struct genl_split_ops netdev_nl_ops[] = {
 	{
@@ -114,6 +124,20 @@ static const struct genl_split_ops netdev_nl_ops[] = {
 		.maxattr	= NETDEV_A_QUEUE_IFINDEX,
 		.flags		= GENL_CMD_CAP_DUMP,
 	},
+	{
+		.cmd		= NETDEV_CMD_NAPI_GET,
+		.doit		= netdev_nl_napi_get_doit,
+		.policy		= netdev_napi_get_do_nl_policy,
+		.maxattr	= NETDEV_A_NAPI_ID,
+		.flags		= GENL_CMD_CAP_DO,
+	},
+	{
+		.cmd		= NETDEV_CMD_NAPI_GET,
+		.dumpit		= netdev_nl_napi_get_dumpit,
+		.policy		= netdev_napi_get_dump_nl_policy,
+		.maxattr	= NETDEV_A_NAPI_IFINDEX,
+		.flags		= GENL_CMD_CAP_DUMP,
+	},
 };
 
 static const struct genl_multicast_group netdev_nl_mcgrps[] = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 086623c1797a..a47f2bcbe4fa 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -26,6 +26,8 @@ int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
 int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info);
 int netdev_nl_queue_get_dumpit(struct sk_buff *skb,
 			       struct netlink_callback *cb);
+int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
 
 enum {
 	NETDEV_NLGRP_MGMT,
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 3bc1661e6ebf..4c8ea6a4f015 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -155,6 +155,16 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	return -EOPNOTSUPP;
+}
+
+int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 static int
 netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
 			 u32 q_idx, u32 q_type, const struct genl_info *info)
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index f857960a7f06..e7bdbcb01f22 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -109,6 +109,14 @@ enum {
 	NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1)
 };
 
+enum {
+	NETDEV_A_NAPI_IFINDEX = 1,
+	NETDEV_A_NAPI_ID,
+
+	__NETDEV_A_NAPI_MAX,
+	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
+};
+
 enum {
 	NETDEV_A_QUEUE_ID = 1,
 	NETDEV_A_QUEUE_IFINDEX,
@@ -130,6 +138,7 @@ enum {
 	NETDEV_CMD_PAGE_POOL_CHANGE_NTF,
 	NETDEV_CMD_PAGE_POOL_STATS_GET,
 	NETDEV_CMD_QUEUE_GET,
+	NETDEV_CMD_NAPI_GET,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
index fbf7e24ade91..906b61554698 100644
--- a/tools/net/ynl/generated/netdev-user.c
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -24,6 +24,7 @@ static const char * const netdev_op_strmap[] = {
 	[NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf",
 	[NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get",
 	[NETDEV_CMD_QUEUE_GET] = "queue-get",
+	[NETDEV_CMD_NAPI_GET] = "napi-get",
 };
 
 const char *netdev_op_str(int op)
@@ -160,6 +161,16 @@ struct ynl_policy_nest netdev_queue_nest = {
 	.table = netdev_queue_policy,
 };
 
+struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = {
+	[NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
+	[NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, },
+};
+
+struct ynl_policy_nest netdev_napi_nest = {
+	.max_attr = NETDEV_A_NAPI_MAX,
+	.table = netdev_napi_policy,
+};
+
 /* Common nested types */
 void netdev_page_pool_info_free(struct netdev_page_pool_info *obj)
 {
@@ -770,6 +781,119 @@ free_list:
 	return NULL;
 }
 
+/* ============== NETDEV_CMD_NAPI_GET ============== */
+/* NETDEV_CMD_NAPI_GET - do */
+void netdev_napi_get_req_free(struct netdev_napi_get_req *req)
+{
+	free(req);
+}
+
+void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp)
+{
+	free(rsp);
+}
+
+int netdev_napi_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
+{
+	struct ynl_parse_arg *yarg = data;
+	struct netdev_napi_get_rsp *dst;
+	const struct nlattr *attr;
+
+	dst = yarg->data;
+
+	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
+		unsigned int type = mnl_attr_get_type(attr);
+
+		if (type == NETDEV_A_NAPI_ID) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.id = 1;
+			dst->id = mnl_attr_get_u32(attr);
+		} else if (type == NETDEV_A_NAPI_IFINDEX) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.ifindex = 1;
+			dst->ifindex = mnl_attr_get_u32(attr);
+		}
+	}
+
+	return MNL_CB_OK;
+}
+
+struct netdev_napi_get_rsp *
+netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req)
+{
+	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
+	struct netdev_napi_get_rsp *rsp;
+	struct nlmsghdr *nlh;
+	int err;
+
+	nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1);
+	ys->req_policy = &netdev_napi_nest;
+	yrs.yarg.rsp_policy = &netdev_napi_nest;
+
+	if (req->_present.id)
+		mnl_attr_put_u32(nlh, NETDEV_A_NAPI_ID, req->id);
+
+	rsp = calloc(1, sizeof(*rsp));
+	yrs.yarg.data = rsp;
+	yrs.cb = netdev_napi_get_rsp_parse;
+	yrs.rsp_cmd = NETDEV_CMD_NAPI_GET;
+
+	err = ynl_exec(ys, nlh, &yrs);
+	if (err < 0)
+		goto err_free;
+
+	return rsp;
+
+err_free:
+	netdev_napi_get_rsp_free(rsp);
+	return NULL;
+}
+
+/* NETDEV_CMD_NAPI_GET - dump */
+void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp)
+{
+	struct netdev_napi_get_list *next = rsp;
+
+	while ((void *)next != YNL_LIST_END) {
+		rsp = next;
+		next = rsp->next;
+
+		free(rsp);
+	}
+}
+
+struct netdev_napi_get_list *
+netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req)
+{
+	struct ynl_dump_state yds = {};
+	struct nlmsghdr *nlh;
+	int err;
+
+	yds.ys = ys;
+	yds.alloc_sz = sizeof(struct netdev_napi_get_list);
+	yds.cb = netdev_napi_get_rsp_parse;
+	yds.rsp_cmd = NETDEV_CMD_NAPI_GET;
+	yds.rsp_policy = &netdev_napi_nest;
+
+	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1);
+	ys->req_policy = &netdev_napi_nest;
+
+	if (req->_present.ifindex)
+		mnl_attr_put_u32(nlh, NETDEV_A_NAPI_IFINDEX, req->ifindex);
+
+	err = ynl_exec_dump(ys, nlh, &yds);
+	if (err < 0)
+		goto free_list;
+
+	return yds.first;
+
+free_list:
+	netdev_napi_get_list_free(yds.first);
+	return NULL;
+}
+
 static const struct ynl_ntf_info netdev_ntf_info[] =  {
 	[NETDEV_CMD_DEV_ADD_NTF] =  {
 		.alloc_sz	= sizeof(struct netdev_dev_get_ntf),
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
index d7daf6df3df0..481c9e45b689 100644
--- a/tools/net/ynl/generated/netdev-user.h
+++ b/tools/net/ynl/generated/netdev-user.h
@@ -360,4 +360,79 @@ struct netdev_queue_get_list *
 netdev_queue_get_dump(struct ynl_sock *ys,
 		      struct netdev_queue_get_req_dump *req);
 
+/* ============== NETDEV_CMD_NAPI_GET ============== */
+/* NETDEV_CMD_NAPI_GET - do */
+struct netdev_napi_get_req {
+	struct {
+		__u32 id:1;
+	} _present;
+
+	__u32 id;
+};
+
+static inline struct netdev_napi_get_req *netdev_napi_get_req_alloc(void)
+{
+	return calloc(1, sizeof(struct netdev_napi_get_req));
+}
+void netdev_napi_get_req_free(struct netdev_napi_get_req *req);
+
+static inline void
+netdev_napi_get_req_set_id(struct netdev_napi_get_req *req, __u32 id)
+{
+	req->_present.id = 1;
+	req->id = id;
+}
+
+struct netdev_napi_get_rsp {
+	struct {
+		__u32 id:1;
+		__u32 ifindex:1;
+	} _present;
+
+	__u32 id;
+	__u32 ifindex;
+};
+
+void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp);
+
+/*
+ * Get information about NAPI instances configured on the system.
+ */
+struct netdev_napi_get_rsp *
+netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req);
+
+/* NETDEV_CMD_NAPI_GET - dump */
+struct netdev_napi_get_req_dump {
+	struct {
+		__u32 ifindex:1;
+	} _present;
+
+	__u32 ifindex;
+};
+
+static inline struct netdev_napi_get_req_dump *
+netdev_napi_get_req_dump_alloc(void)
+{
+	return calloc(1, sizeof(struct netdev_napi_get_req_dump));
+}
+void netdev_napi_get_req_dump_free(struct netdev_napi_get_req_dump *req);
+
+static inline void
+netdev_napi_get_req_dump_set_ifindex(struct netdev_napi_get_req_dump *req,
+				     __u32 ifindex)
+{
+	req->_present.ifindex = 1;
+	req->ifindex = ifindex;
+}
+
+struct netdev_napi_get_list {
+	struct netdev_napi_get_list *next;
+	struct netdev_napi_get_rsp obj __attribute__((aligned(8)));
+};
+
+void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp);
+
+struct netdev_napi_get_list *
+netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req);
+
 #endif /* _LINUX_NETDEV_GEN_H */
-- 
cgit v1.2.3-70-g09d2


From 5a5131d66fe02337de0b1b2e021b58f0f55c6df5 Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 1 Dec 2023 15:29:02 -0800
Subject: netdev-genl: spec: Add irq in netdev netlink YAML spec

Add support in netlink spec(netdev.yaml) for interrupt number
among the NAPI attributes. Add code generated from the spec.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Link: https://lore.kernel.org/r/170147334210.5260.18178387869057516983.stgit@anambiarhost.jf.intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml | 5 +++++
 include/uapi/linux/netdev.h             | 1 +
 tools/include/uapi/linux/netdev.h       | 1 +
 tools/net/ynl/generated/netdev-user.c   | 6 ++++++
 tools/net/ynl/generated/netdev-user.h   | 2 ++
 5 files changed, 15 insertions(+)

(limited to 'tools')

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 76d6b2e15b67..a3a1c6ad521b 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -226,6 +226,10 @@ attribute-sets:
         name: id
         doc: ID of the NAPI instance.
         type: u32
+      -
+        name: irq
+        doc: The associated interrupt vector number for the napi
+        type: u32
   -
     name: queue
     attributes:
@@ -384,6 +388,7 @@ operations:
           attributes:
             - id
             - ifindex
+            - irq
       dump:
         request:
           attributes:
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index e7bdbcb01f22..30fea409b71e 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -112,6 +112,7 @@ enum {
 enum {
 	NETDEV_A_NAPI_IFINDEX = 1,
 	NETDEV_A_NAPI_ID,
+	NETDEV_A_NAPI_IRQ,
 
 	__NETDEV_A_NAPI_MAX,
 	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index e7bdbcb01f22..30fea409b71e 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -112,6 +112,7 @@ enum {
 enum {
 	NETDEV_A_NAPI_IFINDEX = 1,
 	NETDEV_A_NAPI_ID,
+	NETDEV_A_NAPI_IRQ,
 
 	__NETDEV_A_NAPI_MAX,
 	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
index 906b61554698..58e5196da4bd 100644
--- a/tools/net/ynl/generated/netdev-user.c
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -164,6 +164,7 @@ struct ynl_policy_nest netdev_queue_nest = {
 struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = {
 	[NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
 	[NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, },
+	[NETDEV_A_NAPI_IRQ] = { .name = "irq", .type = YNL_PT_U32, },
 };
 
 struct ynl_policy_nest netdev_napi_nest = {
@@ -210,6 +211,11 @@ int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg,
 				return MNL_CB_ERROR;
 			dst->_present.ifindex = 1;
 			dst->ifindex = mnl_attr_get_u32(attr);
+		} else if (type == NETDEV_A_NAPI_IRQ) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.irq = 1;
+			dst->irq = mnl_attr_get_u32(attr);
 		}
 	}
 
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
index 481c9e45b689..0c3224017c12 100644
--- a/tools/net/ynl/generated/netdev-user.h
+++ b/tools/net/ynl/generated/netdev-user.h
@@ -387,10 +387,12 @@ struct netdev_napi_get_rsp {
 	struct {
 		__u32 id:1;
 		__u32 ifindex:1;
+		__u32 irq:1;
 	} _present;
 
 	__u32 id;
 	__u32 ifindex;
+	__u32 irq;
 };
 
 void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp);
-- 
cgit v1.2.3-70-g09d2


From 8481a249a0eaf0000dbb18f7689ccd50ea9835cd Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 1 Dec 2023 15:29:13 -0800
Subject: netdev-genl: spec: Add PID in netdev netlink YAML spec

Add support in netlink spec(netdev.yaml) for PID of the
NAPI thread. Add code generated from the spec.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Link: https://lore.kernel.org/r/170147335301.5260.11872351477120434501.stgit@anambiarhost.jf.intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml | 7 +++++++
 include/uapi/linux/netdev.h             | 1 +
 tools/include/uapi/linux/netdev.h       | 1 +
 tools/net/ynl/generated/netdev-user.c   | 6 ++++++
 tools/net/ynl/generated/netdev-user.h   | 2 ++
 5 files changed, 17 insertions(+)

(limited to 'tools')

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index a3a1c6ad521b..f2c76d103bd8 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -230,6 +230,12 @@ attribute-sets:
         name: irq
         doc: The associated interrupt vector number for the napi
         type: u32
+      -
+        name: pid
+        doc: PID of the napi thread, if NAPI is configured to operate in
+             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
+             softirq context), the attribute will be absent.
+        type: u32
   -
     name: queue
     attributes:
@@ -389,6 +395,7 @@ operations:
             - id
             - ifindex
             - irq
+            - pid
       dump:
         request:
           attributes:
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 30fea409b71e..424c5e28f495 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -113,6 +113,7 @@ enum {
 	NETDEV_A_NAPI_IFINDEX = 1,
 	NETDEV_A_NAPI_ID,
 	NETDEV_A_NAPI_IRQ,
+	NETDEV_A_NAPI_PID,
 
 	__NETDEV_A_NAPI_MAX,
 	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 30fea409b71e..424c5e28f495 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -113,6 +113,7 @@ enum {
 	NETDEV_A_NAPI_IFINDEX = 1,
 	NETDEV_A_NAPI_ID,
 	NETDEV_A_NAPI_IRQ,
+	NETDEV_A_NAPI_PID,
 
 	__NETDEV_A_NAPI_MAX,
 	NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
index 58e5196da4bd..ed8bcb855a1d 100644
--- a/tools/net/ynl/generated/netdev-user.c
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -165,6 +165,7 @@ struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = {
 	[NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
 	[NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, },
 	[NETDEV_A_NAPI_IRQ] = { .name = "irq", .type = YNL_PT_U32, },
+	[NETDEV_A_NAPI_PID] = { .name = "pid", .type = YNL_PT_U32, },
 };
 
 struct ynl_policy_nest netdev_napi_nest = {
@@ -216,6 +217,11 @@ int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg,
 				return MNL_CB_ERROR;
 			dst->_present.irq = 1;
 			dst->irq = mnl_attr_get_u32(attr);
+		} else if (type == NETDEV_A_NAPI_PID) {
+			if (ynl_attr_validate(yarg, attr))
+				return MNL_CB_ERROR;
+			dst->_present.pid = 1;
+			dst->pid = mnl_attr_get_u32(attr);
 		}
 	}
 
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
index 0c3224017c12..3830cf2ab6b8 100644
--- a/tools/net/ynl/generated/netdev-user.h
+++ b/tools/net/ynl/generated/netdev-user.h
@@ -388,11 +388,13 @@ struct netdev_napi_get_rsp {
 		__u32 id:1;
 		__u32 ifindex:1;
 		__u32 irq:1;
+		__u32 pid:1;
 	} _present;
 
 	__u32 id;
 	__u32 ifindex;
 	__u32 irq;
+	__u32 pid;
 };
 
 void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp);
-- 
cgit v1.2.3-70-g09d2


From 25ae948b447881bf689d459cd5bd4629d9c04b20 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:00:57 +0800
Subject: selftests/net: add lib.sh

Add a lib.sh for net selftests. This file can be used to define commonly
used variables and functions. Some commonly used functions can be moved
from forwarding/lib.sh to this lib file. e.g. busywait().

Add function setup_ns() for user to create unique namespaces with given
prefix name.

Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/Makefile          |  2 +-
 tools/testing/selftests/net/forwarding/lib.sh | 27 +--------
 tools/testing/selftests/net/lib.sh            | 85 +++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 27 deletions(-)
 create mode 100644 tools/testing/selftests/net/lib.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 9274edfb76ff..14bd68da7466 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -54,7 +54,7 @@ TEST_PROGS += ip_local_port_range.sh
 TEST_PROGS += rps_default_mask.sh
 TEST_PROGS += big_tcp.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
-TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
+TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index e37a15eda6c2..8f6ca458af9a 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -4,9 +4,6 @@
 ##############################################################################
 # Defines
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
 # Can be overridden by the configuration file.
 PING=${PING:=ping}
 PING6=${PING6:=ping6}
@@ -41,6 +38,7 @@ if [[ -f $relative_path/forwarding.config ]]; then
 	source "$relative_path/forwarding.config"
 fi
 
+source ../lib.sh
 ##############################################################################
 # Sanity checks
 
@@ -395,29 +393,6 @@ log_info()
 	echo "INFO: $msg"
 }
 
-busywait()
-{
-	local timeout=$1; shift
-
-	local start_time="$(date -u +%s%3N)"
-	while true
-	do
-		local out
-		out=$("$@")
-		local ret=$?
-		if ((!ret)); then
-			echo -n "$out"
-			return 0
-		fi
-
-		local current_time="$(date -u +%s%3N)"
-		if ((current_time - start_time > timeout)); then
-			echo -n "$out"
-			return 1
-		fi
-	done
-}
-
 not()
 {
 	"$@"
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
new file mode 100644
index 000000000000..518eca57b815
--- /dev/null
+++ b/tools/testing/selftests/net/lib.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+##############################################################################
+# Helpers
+busywait()
+{
+	local timeout=$1; shift
+
+	local start_time="$(date -u +%s%3N)"
+	while true
+	do
+		local out
+		out=$("$@")
+		local ret=$?
+		if ((!ret)); then
+			echo -n "$out"
+			return 0
+		fi
+
+		local current_time="$(date -u +%s%3N)"
+		if ((current_time - start_time > timeout)); then
+			echo -n "$out"
+			return 1
+		fi
+	done
+}
+
+cleanup_ns()
+{
+	local ns=""
+	local errexit=0
+	local ret=0
+
+	# disable errexit temporary
+	if [[ $- =~ "e" ]]; then
+		errexit=1
+		set +e
+	fi
+
+	for ns in "$@"; do
+		ip netns delete "${ns}" &> /dev/null
+		if ! busywait 2 ip netns list \| grep -vq "^$ns$" &> /dev/null; then
+			echo "Warn: Failed to remove namespace $ns"
+			ret=1
+		fi
+	done
+
+	[ $errexit -eq 1 ] && set -e
+	return $ret
+}
+
+# setup netns with given names as prefix. e.g
+# setup_ns local remote
+setup_ns()
+{
+	local ns=""
+	local ns_name=""
+	local ns_list=""
+	for ns_name in "$@"; do
+		# Some test may setup/remove same netns multi times
+		if unset ${ns_name} 2> /dev/null; then
+			ns="${ns_name,,}-$(mktemp -u XXXXXX)"
+			eval readonly ${ns_name}="$ns"
+		else
+			eval ns='$'${ns_name}
+			cleanup_ns "$ns"
+
+		fi
+
+		if ! ip netns add "$ns"; then
+			echo "Failed to create namespace $ns_name"
+			cleanup_ns "$ns_list"
+			return $ksft_skip
+		fi
+		ip -n "$ns" link set lo up
+		ns_list="$ns_list $ns"
+	done
+}
-- 
cgit v1.2.3-70-g09d2


From 64227511ad57796fac514ad8df6f2830cc887563 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:00:58 +0800
Subject: selftests/net: convert arp_ndisc_evict_nocarrier.sh to run it in
 unique namespace

Here is the test result after conversion.

]# ./arp_ndisc_evict_nocarrier.sh
run arp_evict_nocarrier=1 test
ok
run arp_evict_nocarrier=0 test
ok
run all.arp_evict_nocarrier=0 test
ok
run ndisc_evict_nocarrier=1 test
ok
run ndisc_evict_nocarrier=0 test
ok
run all.ndisc_evict_nocarrier=0 test
ok

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../selftests/net/arp_ndisc_evict_nocarrier.sh     | 46 ++++++++--------------
 1 file changed, 16 insertions(+), 30 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
index 4a110bb01e53..92eb880c52f2 100755
--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -12,7 +12,8 @@
 # {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry
 #
 
-readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+source lib.sh
+
 readonly V4_ADDR0=10.0.10.1
 readonly V4_ADDR1=10.0.10.2
 readonly V6_ADDR0=2001:db8:91::1
@@ -22,43 +23,29 @@ ret=0
 
 cleanup_v6()
 {
-    ip netns del me
-    ip netns del peer
+    cleanup_ns ${me} ${peer}
 
     sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1
     sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1
 }
 
-create_ns()
-{
-    local n=${1}
-
-    ip netns del ${n} 2>/dev/null
-
-    ip netns add ${n}
-    ip netns set ${n} $((nsid++))
-    ip -netns ${n} link set lo up
-}
-
-
 setup_v6() {
-    create_ns me
-    create_ns peer
+    setup_ns me peer
 
-    IP="ip -netns me"
+    IP="ip -netns ${me}"
 
     $IP li add veth1 type veth peer name veth2
     $IP li set veth1 up
     $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad
-    $IP li set veth2 netns peer up
-    ip -netns peer -6 addr add $V6_ADDR1/64 dev veth2 nodad
+    $IP li set veth2 netns ${peer} up
+    ip -netns ${peer} -6 addr add $V6_ADDR1/64 dev veth2 nodad
 
-    ip netns exec me sysctl -w $1 >/dev/null 2>&1
+    ip netns exec ${me} sysctl -w $1 >/dev/null 2>&1
 
     # Establish an ND cache entry
-    ip netns exec me ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1
+    ip netns exec ${me} ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1
     # Should have the veth1 entry in ND table
-    ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+    ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
     if [ $? -ne 0 ]; then
         cleanup_v6
         echo "failed"
@@ -66,11 +53,11 @@ setup_v6() {
     fi
 
     # Set veth2 down, which will put veth1 in NOCARRIER state
-    ip netns exec peer ip link set veth2 down
+    ip netns exec ${peer} ip link set veth2 down
 }
 
 setup_v4() {
-    ip netns add "${PEER_NS}"
+    setup_ns PEER_NS
     ip link add name veth0 type veth peer name veth1
     ip link set dev veth0 up
     ip link set dev veth1 netns "${PEER_NS}"
@@ -99,8 +86,7 @@ setup_v4() {
 cleanup_v4() {
     ip neigh flush dev veth0
     ip link del veth0
-    local -r ns="$(ip netns list|grep $PEER_NS)"
-    [ -n "$ns" ] && ip netns del $ns 2>/dev/null
+    cleanup_ns $PEER_NS
 
     sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1
     sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1
@@ -163,7 +149,7 @@ run_ndisc_evict_nocarrier_enabled() {
 
     setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1"
 
-    ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+    ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
 
     if [ $? -eq 0 ];then
         echo "failed"
@@ -180,7 +166,7 @@ run_ndisc_evict_nocarrier_disabled() {
 
     setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0"
 
-    ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+    ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
 
     if [ $? -eq 0 ];then
         echo "ok"
@@ -197,7 +183,7 @@ run_ndisc_evict_nocarrier_disabled_all() {
 
     setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0"
 
-    ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+    ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
 
     if [ $? -eq 0 ];then
         echo "ok"
-- 
cgit v1.2.3-70-g09d2


From 7f770d28f2e5abfd442ad689ba1129dd66593529 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:00:59 +0800
Subject: selftests/net: specify the interface when do arping

When do arping, the interface need to be specified. Or we will
get error: Interface "lo" is not ARPable. And the test failed.
]# ./arp_ndisc_untracked_subnets.sh
    TEST: test_arp:  accept_arp=0                                       [ OK ]
    TEST: test_arp:  accept_arp=1                                       [FAIL]
    TEST: test_arp:  accept_arp=2  same_subnet=0                        [ OK ]
    TEST: test_arp:  accept_arp=2  same_subnet=1                        [FAIL]

After fix:
]# ./arp_ndisc_untracked_subnets.sh
    TEST: test_arp:  accept_arp=0                                       [ OK ]
    TEST: test_arp:  accept_arp=1                                       [ OK ]
    TEST: test_arp:  accept_arp=2  same_subnet=0                        [ OK ]
    TEST: test_arp:  accept_arp=2  same_subnet=1                        [ OK ]

Fixes: 0ea7b0a454ca ("selftests: net: arp_ndisc_untracked_subnets: test for arp_accept and accept_untracked_na")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
index c899b446acb6..327427ec10f5 100755
--- a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
+++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
@@ -150,7 +150,7 @@ arp_test_gratuitous() {
 	fi
 	# Supply arp_accept option to set up which sets it in sysctl
 	setup ${arp_accept}
-	ip netns exec ${HOST_NS} arping -A -U ${HOST_ADDR} -c1 2>&1 >/dev/null
+	ip netns exec ${HOST_NS} arping -A -I ${HOST_INTF} -U ${HOST_ADDR} -c1 2>&1 >/dev/null
 
 	if verify_arp $1 $2; then
 		printf "    TEST: %-60s  [ OK ]\n" "${test_msg[*]}"
-- 
cgit v1.2.3-70-g09d2


From 3a0f3367006f7cb4203e8eecc77ce7d63190a1df Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:00 +0800
Subject: selftests/net: convert arp_ndisc_untracked_subnets.sh to run it in
 unique namespace

Here is the test result after conversion.

2 tests also failed without this patch

]# ./arp_ndisc_untracked_subnets.sh
    TEST: test_arp:  accept_arp=0                                       [ OK ]
    TEST: test_arp:  accept_arp=1                                       [ OK ]
    TEST: test_arp:  accept_arp=2  same_subnet=0                        [ OK ]
    TEST: test_arp:  accept_arp=2  same_subnet=1                        [ OK ]
    TEST: test_ndisc:  accept_untracked_na=0                            [ OK ]
    TEST: test_ndisc:  accept_untracked_na=1                            [ OK ]
    TEST: test_ndisc:  accept_untracked_na=2  same_subnet=0             [ OK ]
    TEST: test_ndisc:  accept_untracked_na=2  same_subnet=1             [ OK ]

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../selftests/net/arp_ndisc_untracked_subnets.sh       | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
index 327427ec10f5..a40c0e9bd023 100755
--- a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
+++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
@@ -5,16 +5,14 @@
 # garp to the router. Router accepts or ignores based on its arp_accept
 # or accept_untracked_na configuration.
 
+source lib.sh
+
 TESTS="arp ndisc"
 
-ROUTER_NS="ns-router"
-ROUTER_NS_V6="ns-router-v6"
 ROUTER_INTF="veth-router"
 ROUTER_ADDR="10.0.10.1"
 ROUTER_ADDR_V6="2001:db8:abcd:0012::1"
 
-HOST_NS="ns-host"
-HOST_NS_V6="ns-host-v6"
 HOST_INTF="veth-host"
 HOST_ADDR="10.0.10.2"
 HOST_ADDR_V6="2001:db8:abcd:0012::2"
@@ -23,13 +21,11 @@ SUBNET_WIDTH=24
 PREFIX_WIDTH_V6=64
 
 cleanup() {
-	ip netns del ${HOST_NS}
-	ip netns del ${ROUTER_NS}
+	cleanup_ns ${HOST_NS} ${ROUTER_NS}
 }
 
 cleanup_v6() {
-	ip netns del ${HOST_NS_V6}
-	ip netns del ${ROUTER_NS_V6}
+	cleanup_ns ${HOST_NS_V6} ${ROUTER_NS_V6}
 }
 
 setup() {
@@ -37,8 +33,7 @@ setup() {
 	local arp_accept=$1
 
 	# Set up two namespaces
-	ip netns add ${ROUTER_NS}
-	ip netns add ${HOST_NS}
+	setup_ns HOST_NS ROUTER_NS
 
 	# Set up interfaces veth0 and veth1, which are pairs in separate
 	# namespaces. veth0 is veth-router, veth1 is veth-host.
@@ -72,8 +67,7 @@ setup_v6() {
 	local accept_untracked_na=$1
 
 	# Set up two namespaces
-	ip netns add ${ROUTER_NS_V6}
-	ip netns add ${HOST_NS_V6}
+	setup_ns HOST_NS_V6 ROUTER_NS_V6
 
 	# Set up interfaces veth0 and veth1, which are pairs in separate
 	# namespaces. veth0 is veth-router, veth1 is veth-host.
-- 
cgit v1.2.3-70-g09d2


From 7c16d485fec5d0010b992e75ad996e7382950879 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:01 +0800
Subject: selftests/net: convert cmsg tests to make them run in unique
 namespace

Here is the test result after conversion.

]# ./cmsg_ipv6.sh
OK
]# ./cmsg_so_mark.sh
OK
]# ./cmsg_time.sh
OK

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/cmsg_ipv6.sh    | 10 ++++------
 tools/testing/selftests/net/cmsg_so_mark.sh |  7 ++++---
 tools/testing/selftests/net/cmsg_time.sh    |  7 ++++---
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
index 330d0b1ceced..f30bd57d5e38 100755
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -1,9 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ksft_skip=4
+source lib.sh
 
-NS=ns
 IP6=2001:db8:1::1/64
 TGT6=2001:db8:1::2
 TMPF=$(mktemp --suffix ".pcap")
@@ -11,13 +10,11 @@ TMPF=$(mktemp --suffix ".pcap")
 cleanup()
 {
     rm -f $TMPF
-    ip netns del $NS
+    cleanup_ns $NS
 }
 
 trap cleanup EXIT
 
-NSEXE="ip netns exec $NS"
-
 tcpdump -h | grep immediate-mode >> /dev/null
 if [ $? -ne 0 ]; then
     echo "SKIP - tcpdump with --immediate-mode option required"
@@ -25,7 +22,8 @@ if [ $? -ne 0 ]; then
 fi
 
 # Namespaces
-ip netns add $NS
+setup_ns NS
+NSEXE="ip netns exec $NS"
 
 $NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
 
diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh
index 1650b8622f2f..772ad0cc2630 100755
--- a/tools/testing/selftests/net/cmsg_so_mark.sh
+++ b/tools/testing/selftests/net/cmsg_so_mark.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-NS=ns
+source lib.sh
+
 IP4=172.16.0.1/24
 TGT4=172.16.0.2
 IP6=2001:db8:1::1/64
@@ -10,13 +11,13 @@ MARK=1000
 
 cleanup()
 {
-    ip netns del $NS
+    cleanup_ns $NS
 }
 
 trap cleanup EXIT
 
 # Namespaces
-ip netns add $NS
+setup_ns NS
 
 ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
 
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
index 91161e1da734..af85267ad1e3 100755
--- a/tools/testing/selftests/net/cmsg_time.sh
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-NS=ns
+source lib.sh
+
 IP4=172.16.0.1/24
 TGT4=172.16.0.2
 IP6=2001:db8:1::1/64
@@ -9,13 +10,13 @@ TGT6=2001:db8:1::2
 
 cleanup()
 {
-    ip netns del $NS
+    cleanup_ns $NS
 }
 
 trap cleanup EXIT
 
 # Namespaces
-ip netns add $NS
+setup_ns NS
 
 ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
 
-- 
cgit v1.2.3-70-g09d2


From 0d8b488792e4f2e26f54248f8a0380b73e1ff992 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:02 +0800
Subject: selftests/net: convert drop_monitor_tests.sh to run it in unique
 namespace

Here is the test result after conversion.

]# ./drop_monitor_tests.sh

Software drops test
    TEST: Capturing active software drops                               [ OK ]
    TEST: Capturing inactive software drops                             [ OK ]

Hardware drops test
    TEST: Capturing active hardware drops                               [ OK ]
    TEST: Capturing inactive hardware drops                             [ OK ]

Tests passed:   4
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/drop_monitor_tests.sh | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh
index b7650e30d18b..7c4818c971fc 100755
--- a/tools/testing/selftests/net/drop_monitor_tests.sh
+++ b/tools/testing/selftests/net/drop_monitor_tests.sh
@@ -2,10 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 # This test is for checking drop monitor functionality.
-
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
 TESTS="
@@ -13,10 +11,6 @@ TESTS="
 	hw_drops
 "
 
-IP="ip -netns ns1"
-TC="tc -netns ns1"
-DEVLINK="devlink -N ns1"
-NS_EXEC="ip netns exec ns1"
 NETDEVSIM_PATH=/sys/bus/netdevsim/
 DEV_ADDR=1337
 DEV=netdevsim${DEV_ADDR}
@@ -43,7 +37,7 @@ setup()
 	modprobe netdevsim &> /dev/null
 
 	set -e
-	ip netns add ns1
+	setup_ns NS1
 	$IP link add dummy10 up type dummy
 
 	$NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
@@ -57,7 +51,7 @@ setup()
 cleanup()
 {
 	$NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
-	ip netns del ns1
+	cleanup_ns ${NS1}
 }
 
 sw_drops_test()
@@ -194,8 +188,15 @@ if [ $? -ne 0 ]; then
 	exit $ksft_skip
 fi
 
-# start clean
+# create netns first so we can get the namespace name
+setup_ns NS1
 cleanup &> /dev/null
+trap cleanup EXIT
+
+IP="ip -netns ${NS1}"
+TC="tc -netns ${NS1}"
+DEVLINK="devlink -N ${NS1}"
+NS_EXEC="ip netns exec ${NS1}"
 
 for t in $TESTS
 do
-- 
cgit v1.2.3-70-g09d2


From baf37f213c88ae9e620195f34509a9a4be7ffccd Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:03 +0800
Subject: selftests/net: convert traceroute.sh to run it in unique namespace

Here is the test result after conversion.

]# ./traceroute.sh
TEST: IPV6 traceroute                                               [ OK ]
TEST: IPV4 traceroute                                               [ OK ]

Tests passed:   2
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/traceroute.sh | 82 ++++++++++++++-----------------
 1 file changed, 36 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
index de9ca97abc30..282f14760940 100755
--- a/tools/testing/selftests/net/traceroute.sh
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -4,6 +4,7 @@
 # Run traceroute/traceroute6 tests
 #
 
+source lib.sh
 VERBOSE=0
 PAUSE_ON_FAIL=no
 
@@ -69,9 +70,6 @@ create_ns()
 	[ -z "${addr}" ] && addr="-"
 	[ -z "${addr6}" ] && addr6="-"
 
-	ip netns add ${ns}
-
-	ip netns exec ${ns} ip link set lo up
 	if [ "${addr}" != "-" ]; then
 		ip netns exec ${ns} ip addr add dev lo ${addr}
 	fi
@@ -160,12 +158,7 @@ connect_ns()
 
 cleanup_traceroute6()
 {
-	local ns
-
-	for ns in host-1 host-2 router-1 router-2
-	do
-		ip netns del ${ns} 2>/dev/null
-	done
+	cleanup_ns $h1 $h2 $r1 $r2
 }
 
 setup_traceroute6()
@@ -176,33 +169,34 @@ setup_traceroute6()
 	cleanup_traceroute6
 
 	set -e
-	create_ns host-1
-	create_ns host-2
-	create_ns router-1
-	create_ns router-2
+	setup_ns h1 h2 r1 r2
+	create_ns $h1
+	create_ns $h2
+	create_ns $r1
+	create_ns $r2
 
 	# Setup N3
-	connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64
-	ip netns exec host-2 ip route add default via 2000:103::2
+	connect_ns $r2 eth3 - 2000:103::2/64 $h2 eth3 - 2000:103::4/64
+	ip netns exec $h2 ip route add default via 2000:103::2
 
 	# Setup N2
-	connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64
-	ip netns exec router-1 ip route add default via 2000:102::2
+	connect_ns $r1 eth2 - 2000:102::1/64 $r2 eth2 - 2000:102::2/64
+	ip netns exec $r1 ip route add default via 2000:102::2
 
 	# Setup N1. host-1 and router-2 connect to a bridge in router-1.
-	ip netns exec router-1 ip link add name ${brdev} type bridge
-	ip netns exec router-1 ip link set ${brdev} up
-	ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev}
+	ip netns exec $r1 ip link add name ${brdev} type bridge
+	ip netns exec $r1 ip link set ${brdev} up
+	ip netns exec $r1 ip addr add 2000:101::1/64 dev ${brdev}
 
-	connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - -
-	ip netns exec router-1 ip link set dev eth0 master ${brdev}
-	ip netns exec host-1 ip route add default via 2000:101::1
+	connect_ns $h1 eth0 - 2000:101::3/64 $r1 eth0 - -
+	ip netns exec $r1 ip link set dev eth0 master ${brdev}
+	ip netns exec $h1 ip route add default via 2000:101::1
 
-	connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - -
-	ip netns exec router-1 ip link set dev eth1 master ${brdev}
+	connect_ns $r2 eth1 - 2000:101::2/64 $r1 eth1 - -
+	ip netns exec $r1 ip link set dev eth1 master ${brdev}
 
 	# Prime the network
-	ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1
+	ip netns exec $h1 ping6 -c5 2000:103::4 >/dev/null 2>&1
 
 	set +e
 }
@@ -217,7 +211,7 @@ run_traceroute6()
 	setup_traceroute6
 
 	# traceroute6 host-2 from host-1 (expects 2000:102::2)
-	run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
+	run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
 	log_test $? 0 "IPV6 traceroute"
 
 	cleanup_traceroute6
@@ -240,12 +234,7 @@ run_traceroute6()
 
 cleanup_traceroute()
 {
-	local ns
-
-	for ns in host-1 host-2 router
-	do
-		ip netns del ${ns} 2>/dev/null
-	done
+	cleanup_ns $h1 $h2 $router
 }
 
 setup_traceroute()
@@ -254,24 +243,25 @@ setup_traceroute()
 	cleanup_traceroute
 
 	set -e
-	create_ns host-1
-	create_ns host-2
-	create_ns router
+	setup_ns h1 h2 router
+	create_ns $h1
+	create_ns $h2
+	create_ns $router
 
-	connect_ns host-1 eth0 1.0.1.3/24 - \
-	           router eth1 1.0.3.1/24 -
-	ip netns exec host-1 ip route add default via 1.0.1.1
+	connect_ns $h1 eth0 1.0.1.3/24 - \
+	           $router eth1 1.0.3.1/24 -
+	ip netns exec $h1 ip route add default via 1.0.1.1
 
-	ip netns exec router ip addr add 1.0.1.1/24 dev eth1
-	ip netns exec router sysctl -qw \
+	ip netns exec $router ip addr add 1.0.1.1/24 dev eth1
+	ip netns exec $router sysctl -qw \
 				net.ipv4.icmp_errors_use_inbound_ifaddr=1
 
-	connect_ns host-2 eth0 1.0.2.4/24 - \
-	           router eth2 1.0.2.1/24 -
-	ip netns exec host-2 ip route add default via 1.0.2.1
+	connect_ns $h2 eth0 1.0.2.4/24 - \
+	           $router eth2 1.0.2.1/24 -
+	ip netns exec $h2 ip route add default via 1.0.2.1
 
 	# Prime the network
-	ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1
+	ip netns exec $h1 ping -c5 1.0.2.4 >/dev/null 2>&1
 
 	set +e
 }
@@ -286,7 +276,7 @@ run_traceroute()
 	setup_traceroute
 
 	# traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while.
-	run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
+	run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
 	log_test $? 0 "IPV4 traceroute"
 
 	cleanup_traceroute
-- 
cgit v1.2.3-70-g09d2


From c1516b3563aca82a35277dc8999370868cf20175 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:04 +0800
Subject: selftests/net: convert icmp_redirect.sh to run it in unique namespace

Here is the test result after conversion.

 # ./icmp_redirect.sh

 ###########################################################################
 Legacy routing
 ###########################################################################

 TEST: IPv4: redirect exception                                      [ OK ]

 ...

 TEST: IPv4: mtu exception plus redirect                             [ OK ]
 TEST: IPv6: mtu exception plus redirect                             [ OK ]

 Tests passed:  40
 Tests failed:   0
 Tests xfailed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/icmp_redirect.sh | 182 +++++++++++++--------------
 1 file changed, 88 insertions(+), 94 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index 7b9d6e31b8e7..d6f0e449c029 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -19,6 +19,7 @@
 # Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent
 # from r1 to h1 telling h1 to use r2 when talking to h2.
 
+source lib.sh
 VERBOSE=0
 PAUSE_ON_FAIL=no
 
@@ -140,11 +141,7 @@ get_linklocal()
 
 cleanup()
 {
-	local ns
-
-	for ns in h1 h2 r1 r2; do
-		ip netns del $ns 2>/dev/null
-	done
+	cleanup_ns $h1 $h2 $r1 $r2
 }
 
 create_vrf()
@@ -171,102 +168,99 @@ setup()
 
 	#
 	# create nodes as namespaces
-	#
-	for ns in h1 h2 r1 r2; do
-		ip netns add $ns
-		ip -netns $ns li set lo up
-
-		case "${ns}" in
-		h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
-			;;
-		r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
-		       ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
-		       ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
-		       ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
-
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
-		esac
+	setup_ns h1 h2 r1 r2
+	for ns in $h1 $h2 $r1 $r2; do
+		if echo $ns | grep -q h[12]-; then
+			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+		else
+			ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+			ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
+			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+			ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
+		fi
 	done
 
 	#
 	# create interconnects
 	#
-	ip -netns h1 li add eth0 type veth peer name r1h1
-	ip -netns h1 li set r1h1 netns r1 name eth0 up
+	ip -netns $h1 li add eth0 type veth peer name r1h1
+	ip -netns $h1 li set r1h1 netns $r1 name eth0 up
 
-	ip -netns h1 li add eth1 type veth peer name r2h1
-	ip -netns h1 li set r2h1 netns r2 name eth0 up
+	ip -netns $h1 li add eth1 type veth peer name r2h1
+	ip -netns $h1 li set r2h1 netns $r2 name eth0 up
 
-	ip -netns h2 li add eth0 type veth peer name r2h2
-	ip -netns h2 li set eth0 up
-	ip -netns h2 li set r2h2 netns r2 name eth2 up
+	ip -netns $h2 li add eth0 type veth peer name r2h2
+	ip -netns $h2 li set eth0 up
+	ip -netns $h2 li set r2h2 netns $r2 name eth2 up
 
-	ip -netns r1 li add eth1 type veth peer name r2r1
-	ip -netns r1 li set eth1 up
-	ip -netns r1 li set r2r1 netns r2 name eth1 up
+	ip -netns $r1 li add eth1 type veth peer name r2r1
+	ip -netns $r1 li set eth1 up
+	ip -netns $r1 li set r2r1 netns $r2 name eth1 up
 
 	#
 	# h1
 	#
 	if [ "${WITH_VRF}" = "yes" ]; then
-		create_vrf "h1"
+		create_vrf "$h1"
 		H1_VRF_ARG="vrf ${VRF}"
 		H1_PING_ARG="-I ${VRF}"
 	else
 		H1_VRF_ARG=
 		H1_PING_ARG=
 	fi
-	ip -netns h1 li add br0 type bridge
+	ip -netns $h1 li add br0 type bridge
 	if [ "${WITH_VRF}" = "yes" ]; then
-		ip -netns h1 li set br0 vrf ${VRF} up
+		ip -netns $h1 li set br0 vrf ${VRF} up
 	else
-		ip -netns h1 li set br0 up
+		ip -netns $h1 li set br0 up
 	fi
-	ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
-	ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
-	ip -netns h1 li set eth0 master br0 up
-	ip -netns h1 li set eth1 master br0 up
+	ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24
+	ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+	ip -netns $h1 li set eth0 master br0 up
+	ip -netns $h1 li set eth1 master br0 up
 
 	#
 	# h2
 	#
-	ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
-	ip -netns h2 ro add default via ${R2_N2_IP} dev eth0
-	ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
-	ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0
+	ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24
+	ip -netns $h2 ro add default via ${R2_N2_IP} dev eth0
+	ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+	ip -netns $h2 -6 ro add default via ${R2_N2_IP6} dev eth0
 
 	#
 	# r1
 	#
-	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
-	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
-	ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30
-	ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
+	ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns $r1 addr add dev eth1 ${R1_R2_N1_IP}/30
+	ip -netns $r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
 
 	#
 	# r2
 	#
-	ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
-	ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
-	ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30
-	ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
-	ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24
-	ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
+	ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24
+	ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+	ip -netns $r2 addr add dev eth1 ${R2_R1_N1_IP}/30
+	ip -netns $r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
+	ip -netns $r2 addr add dev eth2 ${R2_N2_IP}/24
+	ip -netns $r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
 
 	sleep 2
 
-	R1_LLADDR=$(get_linklocal r1 eth0)
+	R1_LLADDR=$(get_linklocal $r1 eth0)
 	if [ $? -ne 0 ]; then
 		echo "Error: Failed to get link-local address of r1's eth0"
 		exit 1
 	fi
 	log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}"
 
-	R2_LLADDR=$(get_linklocal r2 eth0)
+	R2_LLADDR=$(get_linklocal $r2 eth0)
 	if [ $? -ne 0 ]; then
 		echo "Error: Failed to get link-local address of r2's eth0"
 		exit 1
@@ -278,8 +272,8 @@ change_h2_mtu()
 {
 	local mtu=$1
 
-	run_cmd ip -netns h2 li set eth0 mtu ${mtu}
-	run_cmd ip -netns r2 li set eth2 mtu ${mtu}
+	run_cmd ip -netns $h2 li set eth0 mtu ${mtu}
+	run_cmd ip -netns $r2 li set eth2 mtu ${mtu}
 }
 
 check_exception()
@@ -291,40 +285,40 @@ check_exception()
 	# From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102)
 	if [ "$VERBOSE" = "1" ]; then
 		echo "Commands to check for exception:"
-		run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
-		run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
+		run_cmd ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
+		run_cmd ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
 	fi
 
 	if [ -n "${mtu}" ]; then
 		mtu=" mtu ${mtu}"
 	fi
 	if [ "$with_redirect" = "yes" ]; then
-		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
 		grep -q "cache <redirected> expires [0-9]*sec${mtu}"
 	elif [ -n "${mtu}" ]; then
-		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
 		grep -q "cache expires [0-9]*sec${mtu}"
 	else
 		# want to verify that neither mtu nor redirected appears in
 		# the route get output. The -v will wipe out the cache line
 		# if either are set so the last grep -q will not find a match
-		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
 		grep -E -v 'mtu|redirected' | grep -q "cache"
 	fi
 	log_test $? 0 "IPv4: ${desc}" 0
 
 	# No PMTU info for test "redirect" and "mtu exception plus redirect"
 	if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then
-		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
 		grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0"
 	elif [ -n "${mtu}" ]; then
-		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
 		grep -q "${mtu}"
 	else
 		# IPv6 is a bit harder. First strip out the match if it
 		# contains an mtu exception and then look for the first
 		# gateway - R1's lladdr
-		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
 		grep -v "mtu" | grep -q "${R1_LLADDR}"
 	fi
 	log_test $? 0 "IPv6: ${desc}" 1
@@ -334,21 +328,21 @@ run_ping()
 {
 	local sz=$1
 
-	run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
-	run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
+	run_cmd ip netns exec $h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
+	run_cmd ip netns exec $h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
 }
 
 replace_route_new()
 {
 	# r1 to h2 via r2 and eth0
-	run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
-	run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
+	run_cmd ip -netns $r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
+	run_cmd ip -netns $r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
 }
 
 reset_route_new()
 {
-	run_cmd ip -netns r1 nexthop flush
-	run_cmd ip -netns h1 nexthop flush
+	run_cmd ip -netns $r1 nexthop flush
+	run_cmd ip -netns $h1 nexthop flush
 
 	initial_route_new
 }
@@ -356,34 +350,34 @@ reset_route_new()
 initial_route_new()
 {
 	# r1 to h2 via r2 and eth1
-	run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
-	run_cmd ip -netns r1 ro add ${H2_N2} nhid 1
+	run_cmd ip -netns $r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
+	run_cmd ip -netns $r1 ro add ${H2_N2} nhid 1
 
-	run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
-	run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2
+	run_cmd ip -netns $r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
+	run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} nhid 2
 
 	# h1 to h2 via r1
-	run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0
-	run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
+	run_cmd ip -netns $h1 nexthop add id 1 via ${R1_N1_IP} dev br0
+	run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
 
-	run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0
-	run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
+	run_cmd ip -netns $h1 nexthop add id 2 via ${R1_LLADDR} dev br0
+	run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
 }
 
 replace_route_legacy()
 {
 	# r1 to h2 via r2 and eth0
-	run_cmd ip -netns r1    ro replace ${H2_N2}   via ${R2_N1_IP}  dev eth0
-	run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
+	run_cmd ip -netns $r1    ro replace ${H2_N2}   via ${R2_N1_IP}  dev eth0
+	run_cmd ip -netns $r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
 }
 
 reset_route_legacy()
 {
-	run_cmd ip -netns r1    ro del ${H2_N2}
-	run_cmd ip -netns r1 -6 ro del ${H2_N2_6}
+	run_cmd ip -netns $r1    ro del ${H2_N2}
+	run_cmd ip -netns $r1 -6 ro del ${H2_N2_6}
 
-	run_cmd ip -netns h1    ro del ${H1_VRF_ARG} ${H2_N2}
-	run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
+	run_cmd ip -netns $h1    ro del ${H1_VRF_ARG} ${H2_N2}
+	run_cmd ip -netns $h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
 
 	initial_route_legacy
 }
@@ -391,22 +385,22 @@ reset_route_legacy()
 initial_route_legacy()
 {
 	# r1 to h2 via r2 and eth1
-	run_cmd ip -netns r1    ro add ${H2_N2}   via ${R2_R1_N1_IP}  dev eth1
-	run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
+	run_cmd ip -netns $r1    ro add ${H2_N2}   via ${R2_R1_N1_IP}  dev eth1
+	run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
 
 	# h1 to h2 via r1
 	# - IPv6 redirect only works if gateway is the LLA
-	run_cmd ip -netns h1    ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
-	run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
+	run_cmd ip -netns $h1    ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
+	run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
 }
 
 check_connectivity()
 {
 	local rc
 
-	run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
+	run_cmd ip netns exec $h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
 	rc=$?
-	run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
+	run_cmd ip netns exec $h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
 	[ $? -ne 0 ] && rc=$?
 
 	return $rc
-- 
cgit v1.2.3-70-g09d2


From 80b74bd33421ddde1b716563e2e6e0da5edc5d9b Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:05 +0800
Subject: sleftests/net: convert icmp.sh to run it in unique namespace

Here is the test result after conversion.

]# ./icmp.sh
OK

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/icmp.sh | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh
index e4b04cd1644a..824cb0e35eff 100755
--- a/tools/testing/selftests/net/icmp.sh
+++ b/tools/testing/selftests/net/icmp.sh
@@ -18,8 +18,8 @@
 # that address space, so the kernel should substitute the dummy address
 # 192.0.0.8 defined in RFC7600.
 
-NS1=ns1
-NS2=ns2
+source lib.sh
+
 H1_IP=172.16.0.1/32
 H1_IP6=2001:db8:1::1
 RT1=172.16.1.0/24
@@ -32,15 +32,13 @@ TMPFILE=$(mktemp)
 cleanup()
 {
     rm -f "$TMPFILE"
-    ip netns del $NS1
-    ip netns del $NS2
+    cleanup_ns $NS1 $NS2
 }
 
 trap cleanup EXIT
 
 # Namespaces
-ip netns add $NS1
-ip netns add $NS2
+setup_ns NS1 NS2
 
 # Connectivity
 ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2
-- 
cgit v1.2.3-70-g09d2


From 2ab1ee827e976d411fd140225016c2e532e4df12 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:06 +0800
Subject: selftests/net: convert ioam6.sh to run it in unique namespace

Here is the test result after conversion.

]# ./ioam6.sh

--------------------------------------------------------------------------
OUTPUT tests
--------------------------------------------------------------------------
TEST: Unknown IOAM namespace (inline mode)                          [ OK ]
TEST: Unknown IOAM namespace (encap mode)                           [ OK ]
TEST: Missing trace room (inline mode)                              [ OK ]
TEST: Missing trace room (encap mode)                               [ OK ]
TEST: Trace type with bit 0 only (inline mode)                      [ OK ]
...
TEST: Full supported trace (encap mode)                             [ OK ]

--------------------------------------------------------------------------
GLOBAL tests
--------------------------------------------------------------------------
TEST: Forward - Full supported trace (inline mode)                  [ OK ]
TEST: Forward - Full supported trace (encap mode)                   [ OK ]

- Tests passed: 88
- Tests failed: 0

Acked-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Justin Iurman <justin.iurman@uliege.be>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/ioam6.sh | 247 +++++++++++++++++------------------
 1 file changed, 121 insertions(+), 126 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index 4ceb401da1bf..fe59ca3e5596 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -117,8 +117,7 @@
 #        | Schema Data         |                                     |
 #        +-----------------------------------------------------------+
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
 
 ################################################################################
 #                                                                              #
@@ -195,32 +194,32 @@ TESTS_GLOBAL="
 
 check_kernel_compatibility()
 {
-  ip netns add ioam-tmp-node
-  ip link add name veth0 netns ioam-tmp-node type veth \
-         peer name veth1 netns ioam-tmp-node
+  setup_ns ioam_tmp_node
+  ip link add name veth0 netns $ioam_tmp_node type veth \
+         peer name veth1 netns $ioam_tmp_node
 
-  ip -netns ioam-tmp-node link set veth0 up
-  ip -netns ioam-tmp-node link set veth1 up
+  ip -netns $ioam_tmp_node link set veth0 up
+  ip -netns $ioam_tmp_node link set veth1 up
 
-  ip -netns ioam-tmp-node ioam namespace add 0
+  ip -netns $ioam_tmp_node ioam namespace add 0
   ns_ad=$?
 
-  ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0"
+  ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0"
   ns_sh=$?
 
   if [[ $ns_ad != 0 || $ns_sh != 0 ]]
   then
     echo "SKIP: kernel version probably too old, missing ioam support"
     ip link del veth0 2>/dev/null || true
-    ip netns del ioam-tmp-node || true
+    cleanup_ns $ioam_tmp_node || true
     exit $ksft_skip
   fi
 
-  ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \
+  ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \
          trace prealloc type 0x800000 ns 0 size 4 dev veth0
   tr_ad=$?
 
-  ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6"
+  ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6"
   tr_sh=$?
 
   if [[ $tr_ad != 0 || $tr_sh != 0 ]]
@@ -228,12 +227,12 @@ check_kernel_compatibility()
     echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
          "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
     ip link del veth0 2>/dev/null || true
-    ip netns del ioam-tmp-node || true
+    cleanup_ns $ioam_tmp_node || true
     exit $ksft_skip
   fi
 
   ip link del veth0 2>/dev/null || true
-  ip netns del ioam-tmp-node || true
+  cleanup_ns $ioam_tmp_node || true
 
   lsmod | grep -q "ip6_tunnel"
   ip6tnl_loaded=$?
@@ -265,9 +264,7 @@ cleanup()
   ip link del ioam-veth-alpha 2>/dev/null || true
   ip link del ioam-veth-gamma 2>/dev/null || true
 
-  ip netns del ioam-node-alpha || true
-  ip netns del ioam-node-beta || true
-  ip netns del ioam-node-gamma || true
+  cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true
 
   if [ $ip6tnl_loaded != 0 ]
   then
@@ -277,69 +274,67 @@ cleanup()
 
 setup()
 {
-  ip netns add ioam-node-alpha
-  ip netns add ioam-node-beta
-  ip netns add ioam-node-gamma
-
-  ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \
-         peer name ioam-veth-betaL netns ioam-node-beta
-  ip link add name ioam-veth-betaR netns ioam-node-beta type veth \
-         peer name ioam-veth-gamma netns ioam-node-gamma
-
-  ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0
-  ip -netns ioam-node-beta link set ioam-veth-betaL name veth0
-  ip -netns ioam-node-beta link set ioam-veth-betaR name veth1
-  ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0
-
-  ip -netns ioam-node-alpha addr add db01::2/64 dev veth0
-  ip -netns ioam-node-alpha link set veth0 up
-  ip -netns ioam-node-alpha link set lo up
-  ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0
-  ip -netns ioam-node-alpha route del db01::/64
-  ip -netns ioam-node-alpha route add db01::/64 dev veth0
-
-  ip -netns ioam-node-beta addr add db01::1/64 dev veth0
-  ip -netns ioam-node-beta addr add db02::1/64 dev veth1
-  ip -netns ioam-node-beta link set veth0 up
-  ip -netns ioam-node-beta link set veth1 up
-  ip -netns ioam-node-beta link set lo up
-
-  ip -netns ioam-node-gamma addr add db02::2/64 dev veth0
-  ip -netns ioam-node-gamma link set veth0 up
-  ip -netns ioam-node-gamma link set lo up
-  ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0
+  setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma
+
+  ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \
+         peer name ioam-veth-betaL netns $ioam_node_beta
+  ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \
+         peer name ioam-veth-gamma netns $ioam_node_gamma
+
+  ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0
+  ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0
+  ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1
+  ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0
+
+  ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0
+  ip -netns $ioam_node_alpha link set veth0 up
+  ip -netns $ioam_node_alpha link set lo up
+  ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0
+  ip -netns $ioam_node_alpha route del db01::/64
+  ip -netns $ioam_node_alpha route add db01::/64 dev veth0
+
+  ip -netns $ioam_node_beta addr add db01::1/64 dev veth0
+  ip -netns $ioam_node_beta addr add db02::1/64 dev veth1
+  ip -netns $ioam_node_beta link set veth0 up
+  ip -netns $ioam_node_beta link set veth1 up
+  ip -netns $ioam_node_beta link set lo up
+
+  ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0
+  ip -netns $ioam_node_gamma link set veth0 up
+  ip -netns $ioam_node_gamma link set lo up
+  ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0
 
   # - IOAM config -
-  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
-  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
-  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
-  ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
-  ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
-  ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
-  ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
-
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
-  ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
-  ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}"
-  ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]}
-
-  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
-  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
-  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
-  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
-  ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
-  ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+  ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
+  ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
+  ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
+  ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
+  ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
+  ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
+  ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]}
+
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
+  ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
+  ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}"
+  ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]}
+
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
+  ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
+  ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
 
   sleep 1
 
-  ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+  ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
   if [ $? != 0 ]
   then
     echo "Setup FAILED"
@@ -412,7 +407,7 @@ run()
   echo
 
   # set OUTPUT settings
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
 
   for t in $TESTS_OUTPUT
   do
@@ -421,8 +416,8 @@ run()
   done
 
   # clean OUTPUT settings
-  ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
-  ip -netns ioam-node-alpha route change db01::/64 dev veth0
+  ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+  ip -netns $ioam_node_alpha route change db01::/64 dev veth0
 
 
   echo
@@ -433,7 +428,7 @@ run()
   echo
 
   # set INPUT settings
-  ip -netns ioam-node-alpha ioam namespace del 123
+  ip -netns $ioam_node_alpha ioam namespace del 123
 
   for t in $TESTS_INPUT
   do
@@ -442,10 +437,10 @@ run()
   done
 
   # clean INPUT settings
-  ip -netns ioam-node-alpha ioam namespace add 123 \
+  ip -netns $ioam_node_alpha ioam namespace add 123 \
          data ${ALPHA[6]} wide ${ALPHA[7]}
-  ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
-  ip -netns ioam-node-alpha route change db01::/64 dev veth0
+  ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]}
+  ip -netns $ioam_node_alpha route change db01::/64 dev veth0
 
   echo
   printf "%0.s-" {1..74}
@@ -488,15 +483,15 @@ out_undef_ns()
   local desc="Unknown IOAM namespace"
 
   [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
          trace prealloc type 0x800000 ns 0 size 4 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
          db01::2 db01::1 veth0 0x800000 0
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
 }
 
 out_no_room()
@@ -508,15 +503,15 @@ out_no_room()
   local desc="Missing trace room"
 
   [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
          trace prealloc type 0xc00000 ns 123 size 4 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
          db01::2 db01::1 veth0 0xc00000 123
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
 }
 
 out_bits()
@@ -532,11 +527,11 @@ out_bits()
   bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
 
   [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
 
   for i in {0..22}
   do
-    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+    ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
            trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
            dev veth0 &>/dev/null
 
@@ -554,12 +549,12 @@ out_bits()
         log_test_failed "$descr"
       fi
     else
-	run_test "out_bit$i" "$descr ($1 mode)" ioam-node-alpha \
-           ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
+	run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \
+           $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
     fi
   done
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
 
   bit2size[22]=$tmp
 }
@@ -573,15 +568,15 @@ out_full_supp_trace()
   local desc="Full supported trace"
 
   [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
          trace prealloc type 0xfff002 ns 123 size 100 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
          db01::2 db01::1 veth0 0xfff002 123
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
 }
 
 
@@ -603,15 +598,15 @@ in_undef_ns()
   local desc="Unknown IOAM namespace"
 
   [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
          trace prealloc type 0x800000 ns 0 size 4 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
          db01::2 db01::1 veth0 0x800000 0
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
 }
 
 in_no_room()
@@ -623,15 +618,15 @@ in_no_room()
   local desc="Missing trace room"
 
   [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
          trace prealloc type 0xc00000 ns 123 size 4 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
          db01::2 db01::1 veth0 0xc00000 123
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
 }
 
 in_bits()
@@ -647,19 +642,19 @@ in_bits()
   bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
 
   [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
 
   for i in {0..11} {22..22}
   do
-    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+    ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
            trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
            dev veth0
 
-    run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" ioam-node-alpha \
-           ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
+    run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \
+           $ioam_node_beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
   done
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
 
   bit2size[22]=$tmp
 }
@@ -675,22 +670,22 @@ in_oflag()
   # Exception:
   #   Here, we need the sender to set the Overflow flag. For that, we will add
   #   back the IOAM namespace that was previously configured on the sender.
-  ip -netns ioam-node-alpha ioam namespace add 123
+  ip -netns $ioam_node_alpha ioam namespace add 123
 
   [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
          trace prealloc type 0xc00000 ns 123 size 4 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
          db01::2 db01::1 veth0 0xc00000 123
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
 
   # And we clean the exception for this test to get things back to normal for
   # other INPUT tests
-  ip -netns ioam-node-alpha ioam namespace del 123
+  ip -netns $ioam_node_alpha ioam namespace del 123
 }
 
 in_full_supp_trace()
@@ -702,15 +697,15 @@ in_full_supp_trace()
   local desc="Full supported trace"
 
   [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+  ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
          trace prealloc type 0xfff002 ns 123 size 80 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
          db01::2 db01::1 veth0 0xfff002 123
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
 }
 
 
@@ -730,15 +725,15 @@ fwd_full_supp_trace()
   local desc="Forward - Full supported trace"
 
   [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1"
-  [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 up
+  [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up
 
-  ip -netns ioam-node-alpha route change db02::/64 encap ioam6 mode $mode \
+  ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \
          trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-gamma \
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \
          db01::2 db02::2 veth0 0xfff002 123
 
-  [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 down
+  [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From 4affb17c0d0e3708cd0088e81564af51e7e4d693 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:07 +0800
Subject: selftests/net: convert l2tp.sh to run it in unique namespace

Here is the test result after conversion.

]# ./l2tp.sh
TEST: IPv4 basic L2TP tunnel                                        [ OK ]
TEST: IPv4 route through L2TP tunnel                                [ OK ]
TEST: IPv6 basic L2TP tunnel                                        [ OK ]
TEST: IPv6 route through L2TP tunnel                                [ OK ]
TEST: IPv4 basic L2TP tunnel - with IPsec                           [ OK ]
TEST: IPv4 route through L2TP tunnel - with IPsec                   [ OK ]
TEST: IPv6 basic L2TP tunnel - with IPsec                           [ OK ]
TEST: IPv6 route through L2TP tunnel - with IPsec                   [ OK ]
TEST: IPv4 basic L2TP tunnel                                        [ OK ]
TEST: IPv4 route through L2TP tunnel                                [ OK ]
TEST: IPv6 basic L2TP tunnel - with IPsec                           [ OK ]
TEST: IPv6 route through L2TP tunnel - with IPsec                   [ OK ]
TEST: IPv4 basic L2TP tunnel - after IPsec teardown                 [ OK ]
TEST: IPv4 route through L2TP tunnel - after IPsec teardown         [ OK ]
TEST: IPv6 basic L2TP tunnel - after IPsec teardown                 [ OK ]
TEST: IPv6 route through L2TP tunnel - after IPsec teardown         [ OK ]

Tests passed:  16
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Reviewed-by: James Chapman <jchapman@katalix.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/l2tp.sh | 130 +++++++++++++++++-------------------
 1 file changed, 62 insertions(+), 68 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh
index 5782433886fc..88de7166c8ae 100755
--- a/tools/testing/selftests/net/l2tp.sh
+++ b/tools/testing/selftests/net/l2tp.sh
@@ -13,6 +13,7 @@
 #                10.1.1.1    |            |   10.1.2.1
 #              2001:db8:1::1 |            | 2001:db8:2::1
 
+source lib.sh
 VERBOSE=0
 PAUSE_ON_FAIL=no
 
@@ -80,9 +81,6 @@ create_ns()
 	[ -z "${addr}" ] && addr="-"
 	[ -z "${addr6}" ] && addr6="-"
 
-	ip netns add ${ns}
-
-	ip -netns ${ns} link set lo up
 	if [ "${addr}" != "-" ]; then
 		ip -netns ${ns} addr add dev lo ${addr}
 	fi
@@ -133,12 +131,7 @@ connect_ns()
 
 cleanup()
 {
-	local ns
-
-	for ns in host-1 host-2 router
-	do
-		ip netns del ${ns} 2>/dev/null
-	done
+	cleanup_ns $host_1 $host_2 $router
 }
 
 setup_l2tp_ipv4()
@@ -146,28 +139,28 @@ setup_l2tp_ipv4()
 	#
 	# configure l2tpv3 tunnel on host-1
 	#
-	ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \
+	ip -netns $host_1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \
 			 encap ip local 10.1.1.1 remote 10.1.2.1
-	ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \
+	ip -netns $host_1 l2tp add session name l2tp4 tunnel_id 1041 \
 			 session_id 1041 peer_session_id 1042
-	ip -netns host-1 link set dev l2tp4 up
-	ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2
+	ip -netns $host_1 link set dev l2tp4 up
+	ip -netns $host_1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2
 
 	#
 	# configure l2tpv3 tunnel on host-2
 	#
-	ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \
+	ip -netns $host_2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \
 			 encap ip local 10.1.2.1 remote 10.1.1.1
-	ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \
+	ip -netns $host_2 l2tp add session name l2tp4 tunnel_id 1042 \
 			 session_id 1042 peer_session_id 1041
-	ip -netns host-2 link set dev l2tp4 up
-	ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1
+	ip -netns $host_2 link set dev l2tp4 up
+	ip -netns $host_2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1
 
 	#
 	# add routes to loopback addresses
 	#
-	ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2
-	ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1
+	ip -netns $host_1 ro add 172.16.101.2/32 via 172.16.1.2
+	ip -netns $host_2 ro add 172.16.101.1/32 via 172.16.1.1
 }
 
 setup_l2tp_ipv6()
@@ -175,28 +168,28 @@ setup_l2tp_ipv6()
 	#
 	# configure l2tpv3 tunnel on host-1
 	#
-	ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \
+	ip -netns $host_1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \
 			 encap ip local 2001:db8:1::1 remote 2001:db8:2::1
-	ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \
+	ip -netns $host_1 l2tp add session name l2tp6 tunnel_id 1061 \
 			 session_id 1061 peer_session_id 1062
-	ip -netns host-1 link set dev l2tp6 up
-	ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2
+	ip -netns $host_1 link set dev l2tp6 up
+	ip -netns $host_1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2
 
 	#
 	# configure l2tpv3 tunnel on host-2
 	#
-	ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \
+	ip -netns $host_2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \
 			 encap ip local 2001:db8:2::1 remote 2001:db8:1::1
-	ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \
+	ip -netns $host_2 l2tp add session name l2tp6 tunnel_id 1062 \
 			 session_id 1062 peer_session_id 1061
-	ip -netns host-2 link set dev l2tp6 up
-	ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1
+	ip -netns $host_2 link set dev l2tp6 up
+	ip -netns $host_2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1
 
 	#
 	# add routes to loopback addresses
 	#
-	ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2
-	ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1
+	ip -netns $host_1 -6 ro add fc00:101::2/128 via fc00:1::2
+	ip -netns $host_2 -6 ro add fc00:101::1/128 via fc00:1::1
 }
 
 setup()
@@ -205,21 +198,22 @@ setup()
 	cleanup
 
 	set -e
-	create_ns host-1 172.16.101.1/32 fc00:101::1/128
-	create_ns host-2 172.16.101.2/32 fc00:101::2/128
-	create_ns router
+	setup_ns host_1 host_2 router
+	create_ns $host_1 172.16.101.1/32 fc00:101::1/128
+	create_ns $host_2 172.16.101.2/32 fc00:101::2/128
+	create_ns $router
 
-	connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \
-	           router eth1 10.1.1.2/24 2001:db8:1::2/64
+	connect_ns $host_1 eth0 10.1.1.1/24 2001:db8:1::1/64 \
+	           $router eth1 10.1.1.2/24 2001:db8:1::2/64
 
-	connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \
-	           router eth2 10.1.2.2/24 2001:db8:2::2/64
+	connect_ns $host_2 eth0 10.1.2.1/24 2001:db8:2::1/64 \
+	           $router eth2 10.1.2.2/24 2001:db8:2::2/64
 
-	ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2
-	ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2
+	ip -netns $host_1 ro add 10.1.2.0/24 via 10.1.1.2
+	ip -netns $host_1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2
 
-	ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2
-	ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2
+	ip -netns $host_2 ro add 10.1.1.0/24 via 10.1.2.2
+	ip -netns $host_2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2
 
 	setup_l2tp_ipv4
 	setup_l2tp_ipv6
@@ -231,38 +225,38 @@ setup_ipsec()
 	#
 	# IPv4
 	#
-	run_cmd host-1 ip xfrm policy add \
+	run_cmd $host_1 ip xfrm policy add \
 		src 10.1.1.1 dst 10.1.2.1 dir out \
 		tmpl proto esp mode transport
 
-	run_cmd host-1 ip xfrm policy add \
+	run_cmd $host_1 ip xfrm policy add \
 		src 10.1.2.1 dst 10.1.1.1 dir in \
 		tmpl proto esp mode transport
 
-	run_cmd host-2 ip xfrm policy add \
+	run_cmd $host_2 ip xfrm policy add \
 		src 10.1.1.1 dst 10.1.2.1 dir in \
 		tmpl proto esp mode transport
 
-	run_cmd host-2 ip xfrm policy add \
+	run_cmd $host_2 ip xfrm policy add \
 		src 10.1.2.1 dst 10.1.1.1 dir out \
 		tmpl proto esp mode transport
 
-	ip -netns host-1 xfrm state add \
+	ip -netns $host_1 xfrm state add \
 		src 10.1.1.1 dst 10.1.2.1 \
 		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-1 xfrm state add \
+	ip -netns $host_1 xfrm state add \
 		src 10.1.2.1 dst 10.1.1.1 \
 		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-2 xfrm state add \
+	ip -netns $host_2 xfrm state add \
 		src 10.1.1.1 dst 10.1.2.1 \
 		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-2 xfrm state add \
+	ip -netns $host_2 xfrm state add \
 		src 10.1.2.1 dst 10.1.1.1 \
 		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
@@ -270,38 +264,38 @@ setup_ipsec()
 	#
 	# IPV6
 	#
-	run_cmd host-1 ip -6 xfrm policy add \
+	run_cmd $host_1 ip -6 xfrm policy add \
 		src 2001:db8:1::1 dst 2001:db8:2::1 dir out \
 		tmpl proto esp mode transport
 
-	run_cmd host-1 ip -6 xfrm policy add \
+	run_cmd $host_1 ip -6 xfrm policy add \
 		src 2001:db8:2::1 dst 2001:db8:1::1 dir in \
 		tmpl proto esp mode transport
 
-	run_cmd host-2 ip -6 xfrm policy add \
+	run_cmd $host_2 ip -6 xfrm policy add \
 		src 2001:db8:1::1 dst 2001:db8:2::1 dir in \
 		tmpl proto esp mode transport
 
-	run_cmd host-2 ip -6 xfrm policy add \
+	run_cmd $host_2 ip -6 xfrm policy add \
 		src 2001:db8:2::1 dst 2001:db8:1::1 dir out \
 		tmpl proto esp mode transport
 
-	ip -netns host-1 -6 xfrm state add \
+	ip -netns $host_1 -6 xfrm state add \
 		src 2001:db8:1::1 dst 2001:db8:2::1 \
 		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-1 -6 xfrm state add \
+	ip -netns $host_1 -6 xfrm state add \
 		src 2001:db8:2::1 dst 2001:db8:1::1 \
 		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-2 -6 xfrm state add \
+	ip -netns $host_2 -6 xfrm state add \
 		src 2001:db8:1::1 dst 2001:db8:2::1 \
 		spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
 
-	ip -netns host-2 -6 xfrm state add \
+	ip -netns $host_2 -6 xfrm state add \
 		src 2001:db8:2::1 dst 2001:db8:1::1 \
 		spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
 		0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
@@ -309,10 +303,10 @@ setup_ipsec()
 
 teardown_ipsec()
 {
-	run_cmd host-1 ip xfrm state flush
-	run_cmd host-1 ip xfrm policy flush
-	run_cmd host-2 ip xfrm state flush
-	run_cmd host-2 ip xfrm policy flush
+	run_cmd $host_1 ip xfrm state flush
+	run_cmd $host_1 ip xfrm policy flush
+	run_cmd $host_2 ip xfrm state flush
+	run_cmd $host_2 ip xfrm policy flush
 }
 
 ################################################################################
@@ -322,16 +316,16 @@ run_ping()
 {
 	local desc="$1"
 
-	run_cmd host-1 ping -c1 -w1 172.16.1.2
+	run_cmd $host_1 ping -c1 -w1 172.16.1.2
 	log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
 
-	run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+	run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
 	log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
 
-	run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+	run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2
 	log_test $? 0 "IPv6 basic L2TP tunnel ${desc}"
 
-	run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+	run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
 	log_test $? 0 "IPv6 route through L2TP tunnel ${desc}"
 }
 
@@ -344,16 +338,16 @@ run_tests()
 
 	setup_ipsec
 	run_ping "- with IPsec"
-	run_cmd host-1 ping -c1 -w1 172.16.1.2
+	run_cmd $host_1 ping -c1 -w1 172.16.1.2
 	log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
 
-	run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+	run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
 	log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
 
-	run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+	run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2
 	log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec"
 
-	run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+	run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
 	log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec"
 
 	teardown_ipsec
-- 
cgit v1.2.3-70-g09d2


From 3e05fc0c56bbbd1470a80f63b156be52f1101c64 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:08 +0800
Subject: selftests/net: convert ndisc_unsolicited_na_test.sh to run it in
 unique namespace

Here is the test result after conversion.

]# ./ndisc_unsolicited_na_test.sh
    TEST: test_unsolicited_na:  drop_unsolicited_na=0  accept_untracked_na=1  forwarding=1  [ OK ]
    TEST: test_unsolicited_na:  drop_unsolicited_na=0  accept_untracked_na=0  forwarding=0  [ OK ]
    TEST: test_unsolicited_na:  drop_unsolicited_na=0  accept_untracked_na=0  forwarding=1  [ OK ]
    TEST: test_unsolicited_na:  drop_unsolicited_na=0  accept_untracked_na=1  forwarding=0  [ OK ]
    TEST: test_unsolicited_na:  drop_unsolicited_na=1  accept_untracked_na=0  forwarding=0  [ OK ]
    TEST: test_unsolicited_na:  drop_unsolicited_na=1  accept_untracked_na=0  forwarding=1  [ OK ]
    TEST: test_unsolicited_na:  drop_unsolicited_na=1  accept_untracked_na=1  forwarding=0  [ OK ]
    TEST: test_unsolicited_na:  drop_unsolicited_na=1  accept_untracked_na=1  forwarding=1  [ OK ]

Tests passed:   8
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../selftests/net/ndisc_unsolicited_na_test.sh        | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
index 86e621b7b9c7..5db69dad0cfc 100755
--- a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
+++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
@@ -10,16 +10,12 @@
 #    0        1       0  Don't update NC
 #    0        1       1  Add a STALE NC entry
 
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 PAUSE_ON_FAIL=no
 PAUSE=no
 
-HOST_NS="ns-host"
-ROUTER_NS="ns-router"
-
 HOST_INTF="veth-host"
 ROUTER_INTF="veth-router"
 
@@ -29,11 +25,6 @@ SUBNET_WIDTH=64
 ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}"
 HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}"
 
-IP_HOST="ip -6 -netns ${HOST_NS}"
-IP_HOST_EXEC="ip netns exec ${HOST_NS}"
-IP_ROUTER="ip -6 -netns ${ROUTER_NS}"
-IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}"
-
 tcpdump_stdout=
 tcpdump_stderr=
 
@@ -76,8 +67,12 @@ setup()
 
 	# Setup two namespaces and a veth tunnel across them.
 	# On end of the tunnel is a router and the other end is a host.
-	ip netns add ${HOST_NS}
-	ip netns add ${ROUTER_NS}
+	setup_ns HOST_NS ROUTER_NS
+	IP_HOST="ip -6 -netns ${HOST_NS}"
+	IP_HOST_EXEC="ip netns exec ${HOST_NS}"
+	IP_ROUTER="ip -6 -netns ${ROUTER_NS}"
+	IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}"
+
 	${IP_ROUTER} link add ${ROUTER_INTF} type veth \
                 peer name ${HOST_INTF} netns ${HOST_NS}
 
-- 
cgit v1.2.3-70-g09d2


From 90e271f65ee428ae5a75e783f5ba50a10dece09d Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:09 +0800
Subject: selftests/net: convert sctp_vrf.sh to run it in unique namespace

Here is the test result after conversion.

]# ./sctp_vrf.sh
Testing For SCTP VRF:
TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y [PASS]
...
TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N [PASS]
***v6 Tests Done***

Acked-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/sctp_vrf.sh | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh
index c721e952e5f3..c854034b6aa1 100755
--- a/tools/testing/selftests/net/sctp_vrf.sh
+++ b/tools/testing/selftests/net/sctp_vrf.sh
@@ -6,13 +6,11 @@
 #                                                  SERVER_NS
 #       CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2
 
-CLIENT_NS1="client-ns1"
-CLIENT_NS2="client-ns2"
+source lib.sh
 CLIENT_IP4="10.0.0.1"
 CLIENT_IP6="2000::1"
 CLIENT_PORT=1234
 
-SERVER_NS="server-ns"
 SERVER_IP4="10.0.0.2"
 SERVER_IP6="2000::2"
 SERVER_PORT=1234
@@ -20,9 +18,7 @@ SERVER_PORT=1234
 setup() {
 	modprobe sctp
 	modprobe sctp_diag
-	ip netns add $CLIENT_NS1
-	ip netns add $CLIENT_NS2
-	ip netns add $SERVER_NS
+	setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS
 
 	ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
 	ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
@@ -67,9 +63,7 @@ setup() {
 
 cleanup() {
 	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
-	ip netns del "$CLIENT_NS1"
-	ip netns del "$CLIENT_NS2"
-	ip netns del "$SERVER_NS"
+	cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS
 }
 
 wait_server() {
-- 
cgit v1.2.3-70-g09d2


From 0f4765d0b48d90ede9788c7edb2e072eee20f88e Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 2 Dec 2023 10:01:10 +0800
Subject: selftests/net: convert unicast_extensions.sh to run it in unique
 namespace

Here is the test result after conversion.

 # ./unicast_extensions.sh
 /usr/bin/which: no nettest in (/root/.local/bin:/root/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin)
 ###########################################################################
 Unicast address extensions tests (behavior of reserved IPv4 addresses)
 ###########################################################################
 TEST: assign and ping within 240/4 (1 of 2) (is allowed)            [ OK ]
 TEST: assign and ping within 240/4 (2 of 2) (is allowed)            [ OK ]
 TEST: assign and ping within 0/8 (1 of 2) (is allowed)              [ OK ]

 ...

 TEST: assign and ping class D address (is forbidden)                [ OK ]
 TEST: routing using class D (is forbidden)                          [ OK ]
 TEST: routing using 127/8 (is forbidden)                            [ OK ]

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/unicast_extensions.sh | 99 +++++++++++------------
 1 file changed, 46 insertions(+), 53 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index 2d10ccac898a..b7a2cb9e7477 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -28,8 +28,7 @@
 # These tests provide an easy way to flip the expected result of any
 # of these behaviors for testing kernel patches that change them.
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source ./lib.sh
 
 # nettest can be run from PATH or from same directory as this selftest
 if ! which nettest >/dev/null; then
@@ -61,20 +60,20 @@ _do_segmenttest(){
 	# foo --- bar
 	# Arguments: ip_a ip_b prefix_length test_description
 	#
-	# Caller must set up foo-ns and bar-ns namespaces
+	# Caller must set up $foo_ns and $bar_ns namespaces
 	# containing linked veth devices foo and bar,
 	# respectively.
 
-	ip -n foo-ns address add $1/$3 dev foo || return 1
-	ip -n foo-ns link set foo up || return 1
-	ip -n bar-ns address add $2/$3 dev bar || return 1
-	ip -n bar-ns link set bar up || return 1
+	ip -n $foo_ns address add $1/$3 dev foo || return 1
+	ip -n $foo_ns link set foo up || return 1
+	ip -n $bar_ns address add $2/$3 dev bar || return 1
+	ip -n $bar_ns link set bar up || return 1
 
-	ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1
-	ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1
+	ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1
+	ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1
 
-	nettest -B -N bar-ns -O foo-ns -r $1 || return 1
-	nettest -B -N foo-ns -O bar-ns -r $2 || return 1
+	nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1
+	nettest -B -N $foo_ns -O $bar_ns -r $2 || return 1
 
 	return 0
 }
@@ -88,31 +87,31 @@ _do_route_test(){
 	# Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description
 	# Displays test result and returns success or failure.
 
-	# Caller must set up foo-ns, bar-ns, and router-ns
+	# Caller must set up $foo_ns, $bar_ns, and $router_ns
 	# containing linked veth devices foo-foo1, bar1-bar
-	# (foo in foo-ns, foo1 and bar1 in router-ns, and
-	# bar in bar-ns).
-
-	ip -n foo-ns address add $1/$5 dev foo || return 1
-	ip -n foo-ns link set foo up || return 1
-	ip -n foo-ns route add default via $2 || return 1
-	ip -n bar-ns address add $4/$5 dev bar || return 1
-	ip -n bar-ns link set bar up || return 1
-	ip -n bar-ns route add default via $3 || return 1
-	ip -n router-ns address add $2/$5 dev foo1 || return 1
-	ip -n router-ns link set foo1 up || return 1
-	ip -n router-ns address add $3/$5 dev bar1 || return 1
-	ip -n router-ns link set bar1 up || return 1
-
-	echo 1 | ip netns exec router-ns tee /proc/sys/net/ipv4/ip_forward
-
-	ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1
-	ip netns exec foo-ns timeout 2 ping -c 1 $4 || return 1
-	ip netns exec bar-ns timeout 2 ping -c 1 $3 || return 1
-	ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1
-
-	nettest -B -N bar-ns -O foo-ns -r $1 || return 1
-	nettest -B -N foo-ns -O bar-ns -r $4 || return 1
+	# (foo in $foo_ns, foo1 and bar1 in $router_ns, and
+	# bar in $bar_ns).
+
+	ip -n $foo_ns address add $1/$5 dev foo || return 1
+	ip -n $foo_ns link set foo up || return 1
+	ip -n $foo_ns route add default via $2 || return 1
+	ip -n $bar_ns address add $4/$5 dev bar || return 1
+	ip -n $bar_ns link set bar up || return 1
+	ip -n $bar_ns route add default via $3 || return 1
+	ip -n $router_ns address add $2/$5 dev foo1 || return 1
+	ip -n $router_ns link set foo1 up || return 1
+	ip -n $router_ns address add $3/$5 dev bar1 || return 1
+	ip -n $router_ns link set bar1 up || return 1
+
+	echo 1 | ip netns exec $router_ns tee /proc/sys/net/ipv4/ip_forward
+
+	ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1
+	ip netns exec $foo_ns timeout 2 ping -c 1 $4 || return 1
+	ip netns exec $bar_ns timeout 2 ping -c 1 $3 || return 1
+	ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1
+
+	nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1
+	nettest -B -N $foo_ns -O $bar_ns -r $4 || return 1
 
 	return 0
 }
@@ -121,17 +120,15 @@ segmenttest(){
 	# Sets up veth link and tries to connect over it.
 	# Arguments: ip_a ip_b prefix_len test_description
 	hide_output
-	ip netns add foo-ns
-	ip netns add bar-ns
-	ip link add foo netns foo-ns type veth peer name bar netns bar-ns
+	setup_ns foo_ns bar_ns
+	ip link add foo netns $foo_ns type veth peer name bar netns $bar_ns
 
 	test_result=0
 	_do_segmenttest "$@" || test_result=1
 
-	ip netns pids foo-ns | xargs -r kill -9
-	ip netns pids bar-ns | xargs -r kill -9
-	ip netns del foo-ns
-	ip netns del bar-ns
+	ip netns pids $foo_ns | xargs -r kill -9
+	ip netns pids $bar_ns | xargs -r kill -9
+	cleanup_ns $foo_ns $bar_ns
 	show_output
 
 	# inverted tests will expect failure instead of success
@@ -147,21 +144,17 @@ route_test(){
 	# Returns success or failure.
 
 	hide_output
-	ip netns add foo-ns
-	ip netns add bar-ns
-	ip netns add router-ns
-	ip link add foo netns foo-ns type veth peer name foo1 netns router-ns
-	ip link add bar netns bar-ns type veth peer name bar1 netns router-ns
+	setup_ns foo_ns bar_ns router_ns
+	ip link add foo netns $foo_ns type veth peer name foo1 netns $router_ns
+	ip link add bar netns $bar_ns type veth peer name bar1 netns $router_ns
 
 	test_result=0
 	_do_route_test "$@" || test_result=1
 
-	ip netns pids foo-ns | xargs -r kill -9
-	ip netns pids bar-ns | xargs -r kill -9
-	ip netns pids router-ns | xargs -r kill -9
-	ip netns del foo-ns
-	ip netns del bar-ns
-	ip netns del router-ns
+	ip netns pids $foo_ns | xargs -r kill -9
+	ip netns pids $bar_ns | xargs -r kill -9
+	ip netns pids $router_ns | xargs -r kill -9
+	cleanup_ns $foo_ns $bar_ns $router_ns
 
 	show_output
 
-- 
cgit v1.2.3-70-g09d2


From 1b4c7e20bfd6cfe0efbc51756d930a9406d41ea7 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Mon, 4 Dec 2023 13:17:22 -0800
Subject: selftests/bpf: Test bpf_kptr_xchg stashing of bpf_rb_root

There was some confusion amongst Meta sched_ext folks regarding whether
stashing bpf_rb_root - the tree itself, rather than a single node - was
supported. This patch adds a small test which demonstrates this
functionality: a local kptr with rb_root is created, a node is created
and added to the tree, then the tree is kptr_xchg'd into a mapval.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/20231204211722.571346-1-davemarchevsky@fb.com
---
 .../selftests/bpf/prog_tests/local_kptr_stash.c    | 23 ++++++++++
 .../testing/selftests/bpf/progs/local_kptr_stash.c | 53 ++++++++++++++++++++++
 2 files changed, 76 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
index e6e50a394472..827e713f6cf1 100644
--- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -48,6 +48,27 @@ static void test_local_kptr_stash_plain(void)
 	local_kptr_stash__destroy(skel);
 }
 
+static void test_local_kptr_stash_local_with_root(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+	struct local_kptr_stash *skel;
+	int ret;
+
+	skel = local_kptr_stash__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_local_with_root), &opts);
+	ASSERT_OK(ret, "local_kptr_stash_add_local_with_root run");
+	ASSERT_OK(opts.retval, "local_kptr_stash_add_local_with_root retval");
+
+	local_kptr_stash__destroy(skel);
+}
+
 static void test_local_kptr_stash_unstash(void)
 {
 	LIBBPF_OPTS(bpf_test_run_opts, opts,
@@ -115,6 +136,8 @@ void test_local_kptr_stash(void)
 		test_local_kptr_stash_simple();
 	if (test__start_subtest("local_kptr_stash_plain"))
 		test_local_kptr_stash_plain();
+	if (test__start_subtest("local_kptr_stash_local_with_root"))
+		test_local_kptr_stash_local_with_root();
 	if (test__start_subtest("local_kptr_stash_unstash"))
 		test_local_kptr_stash_unstash();
 	if (test__start_subtest("refcount_acquire_without_unstash"))
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index 1769fdff6aea..75043ffc5dad 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -37,11 +37,18 @@ struct plain_local {
 	long data;
 };
 
+struct local_with_root {
+	long key;
+	struct bpf_spin_lock l;
+	struct bpf_rb_root r __contains(node_data, node);
+};
+
 struct map_value {
 	struct prog_test_ref_kfunc *not_kptr;
 	struct prog_test_ref_kfunc __kptr *val;
 	struct node_data __kptr *node;
 	struct plain_local __kptr *plain;
+	struct local_with_root __kptr *local_root;
 };
 
 /* This is necessary so that LLVM generates BTF for node_data struct
@@ -65,6 +72,17 @@ struct {
 	__uint(max_entries, 2);
 } some_nodes SEC(".maps");
 
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data *node_a;
+	struct node_data *node_b;
+
+	node_a = container_of(a, struct node_data, node);
+	node_b = container_of(b, struct node_data, node);
+
+	return node_a->key < node_b->key;
+}
+
 static int create_and_stash(int idx, int val)
 {
 	struct map_value *mapval;
@@ -113,6 +131,41 @@ long stash_plain(void *ctx)
 	return 0;
 }
 
+SEC("tc")
+long stash_local_with_root(void *ctx)
+{
+	struct local_with_root *res;
+	struct map_value *mapval;
+	struct node_data *n;
+	int idx = 0;
+
+	mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+	if (!mapval)
+		return 1;
+
+	res = bpf_obj_new(typeof(*res));
+	if (!res)
+		return 2;
+	res->key = 41;
+
+	n = bpf_obj_new(typeof(*n));
+	if (!n) {
+		bpf_obj_drop(res);
+		return 3;
+	}
+
+	bpf_spin_lock(&res->l);
+	bpf_rbtree_add(&res->r, &n->node, less);
+	bpf_spin_unlock(&res->l);
+
+	res = bpf_kptr_xchg(&mapval->local_root, res);
+	if (res) {
+		bpf_obj_drop(res);
+		return 4;
+	}
+	return 0;
+}
+
 SEC("tc")
 long unstash_rb_node(void *ctx)
 {
-- 
cgit v1.2.3-70-g09d2


From 5ffb260f754bf838507fe0c23d05254b33e2bf3d Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 4 Dec 2023 09:44:23 -0800
Subject: selftests/bpf: Make sure we trigger metadata kfuncs for dst 8080

xdp_metadata test is flaky sometimes:

  verify_xsk_metadata:FAIL:rx_hash_type unexpected rx_hash_type: actual 8 != expected 0

Where 8 means XDP_RSS_TYPE_L4_ANY and is exported from veth driver only when
'skb->l4_hash' condition is met. This makes me think that the program is
triggering again for some other packet.

Let's have a filter, similar to xdp_hw_metadata, where we trigger XDP kfuncs
only for UDP packets destined to port 8080.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20231204174423.3460052-1-sdf@google.com
---
 tools/testing/selftests/bpf/progs/xdp_metadata.c | 31 +++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index d151d406a123..5d6c1245c310 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -27,11 +27,40 @@ extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
 SEC("xdp")
 int rx(struct xdp_md *ctx)
 {
-	void *data, *data_meta;
+	void *data, *data_meta, *data_end;
+	struct ipv6hdr *ip6h = NULL;
+	struct ethhdr *eth = NULL;
+	struct udphdr *udp = NULL;
+	struct iphdr *iph = NULL;
 	struct xdp_meta *meta;
 	u64 timestamp = -1;
 	int ret;
 
+	data = (void *)(long)ctx->data;
+	data_end = (void *)(long)ctx->data_end;
+	eth = data;
+	if (eth + 1 < data_end) {
+		if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+			iph = (void *)(eth + 1);
+			if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP)
+				udp = (void *)(iph + 1);
+		}
+		if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+			ip6h = (void *)(eth + 1);
+			if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP)
+				udp = (void *)(ip6h + 1);
+		}
+		if (udp && udp + 1 > data_end)
+			udp = NULL;
+	}
+
+	if (!udp)
+		return XDP_PASS;
+
+	/* Forwarding UDP:8080 to AF_XDP */
+	if (udp->dest != bpf_htons(8080))
+		return XDP_PASS;
+
 	/* Reserve enough for all custom metadata. */
 
 	ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
-- 
cgit v1.2.3-70-g09d2


From f7c0e362a25f99fafa73d62a2e8c3da00cf1fc0e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 2 Dec 2023 13:17:59 -0800
Subject: tools: ynl: remove generated user space code from git

The ynl-generated user space C code is already above 25kLoC
and is growing.

The initial reason to commit these files was to make reviewing changes
to the generator easier. Unfortunately, it has the opposite effect on
reviewing changes to specs, and we get far more changes to specs
than to the generator.

Uncommit those fails, as they are generated on the fly as needed.
netdev patchwork now runs a script on each series to create a diff
of generated code on the fly, for the rare cases when looking at
it is helpful:
https://github.com/kuba-moo/nipa/blob/master/tests/series/ynl/ynl.sh

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/generated/.gitignore       |    2 +
 tools/net/ynl/generated/devlink-user.c   | 6864 ------------------------------
 tools/net/ynl/generated/devlink-user.h   | 5255 -----------------------
 tools/net/ynl/generated/ethtool-user.c   | 6370 ---------------------------
 tools/net/ynl/generated/ethtool-user.h   | 5535 ------------------------
 tools/net/ynl/generated/fou-user.c       |  330 --
 tools/net/ynl/generated/fou-user.h       |  343 --
 tools/net/ynl/generated/handshake-user.c |  332 --
 tools/net/ynl/generated/handshake-user.h |  145 -
 tools/net/ynl/generated/netdev-user.c    |  952 -----
 tools/net/ynl/generated/netdev-user.h    |  442 --
 tools/net/ynl/generated/nfsd-user.c      |  203 -
 tools/net/ynl/generated/nfsd-user.h      |   67 -
 13 files changed, 2 insertions(+), 26838 deletions(-)
 create mode 100644 tools/net/ynl/generated/.gitignore
 delete mode 100644 tools/net/ynl/generated/devlink-user.c
 delete mode 100644 tools/net/ynl/generated/devlink-user.h
 delete mode 100644 tools/net/ynl/generated/ethtool-user.c
 delete mode 100644 tools/net/ynl/generated/ethtool-user.h
 delete mode 100644 tools/net/ynl/generated/fou-user.c
 delete mode 100644 tools/net/ynl/generated/fou-user.h
 delete mode 100644 tools/net/ynl/generated/handshake-user.c
 delete mode 100644 tools/net/ynl/generated/handshake-user.h
 delete mode 100644 tools/net/ynl/generated/netdev-user.c
 delete mode 100644 tools/net/ynl/generated/netdev-user.h
 delete mode 100644 tools/net/ynl/generated/nfsd-user.c
 delete mode 100644 tools/net/ynl/generated/nfsd-user.h

(limited to 'tools')

diff --git a/tools/net/ynl/generated/.gitignore b/tools/net/ynl/generated/.gitignore
new file mode 100644
index 000000000000..ade488626d26
--- /dev/null
+++ b/tools/net/ynl/generated/.gitignore
@@ -0,0 +1,2 @@
+*-user.c
+*-user.h
diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c
deleted file mode 100644
index 8e757e249dab..000000000000
--- a/tools/net/ynl/generated/devlink-user.c
+++ /dev/null
@@ -1,6864 +0,0 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/devlink.yaml */
-/* YNL-GEN user source */
-
-#include <stdlib.h>
-#include <string.h>
-#include "devlink-user.h"
-#include "ynl.h"
-#include <linux/devlink.h>
-
-#include <libmnl/libmnl.h>
-#include <linux/genetlink.h>
-
-/* Enums */
-static const char * const devlink_op_strmap[] = {
-	[3] = "get",
-	// skip "port-get", duplicate reply value
-	[DEVLINK_CMD_PORT_NEW] = "port-new",
-	[13] = "sb-get",
-	[17] = "sb-pool-get",
-	[21] = "sb-port-pool-get",
-	[25] = "sb-tc-pool-bind-get",
-	[DEVLINK_CMD_ESWITCH_GET] = "eswitch-get",
-	[DEVLINK_CMD_DPIPE_TABLE_GET] = "dpipe-table-get",
-	[DEVLINK_CMD_DPIPE_ENTRIES_GET] = "dpipe-entries-get",
-	[DEVLINK_CMD_DPIPE_HEADERS_GET] = "dpipe-headers-get",
-	[DEVLINK_CMD_RESOURCE_DUMP] = "resource-dump",
-	[DEVLINK_CMD_RELOAD] = "reload",
-	[DEVLINK_CMD_PARAM_GET] = "param-get",
-	[DEVLINK_CMD_REGION_GET] = "region-get",
-	[DEVLINK_CMD_REGION_NEW] = "region-new",
-	[DEVLINK_CMD_REGION_READ] = "region-read",
-	[DEVLINK_CMD_PORT_PARAM_GET] = "port-param-get",
-	[DEVLINK_CMD_INFO_GET] = "info-get",
-	[DEVLINK_CMD_HEALTH_REPORTER_GET] = "health-reporter-get",
-	[DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET] = "health-reporter-dump-get",
-	[63] = "trap-get",
-	[67] = "trap-group-get",
-	[71] = "trap-policer-get",
-	[76] = "rate-get",
-	[80] = "linecard-get",
-	[DEVLINK_CMD_SELFTESTS_GET] = "selftests-get",
-};
-
-const char *devlink_op_str(int op)
-{
-	if (op < 0 || op >= (int)MNL_ARRAY_SIZE(devlink_op_strmap))
-		return NULL;
-	return devlink_op_strmap[op];
-}
-
-static const char * const devlink_sb_pool_type_strmap[] = {
-	[0] = "ingress",
-	[1] = "egress",
-};
-
-const char *devlink_sb_pool_type_str(enum devlink_sb_pool_type value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_sb_pool_type_strmap))
-		return NULL;
-	return devlink_sb_pool_type_strmap[value];
-}
-
-static const char * const devlink_port_type_strmap[] = {
-	[0] = "notset",
-	[1] = "auto",
-	[2] = "eth",
-	[3] = "ib",
-};
-
-const char *devlink_port_type_str(enum devlink_port_type value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_type_strmap))
-		return NULL;
-	return devlink_port_type_strmap[value];
-}
-
-static const char * const devlink_port_flavour_strmap[] = {
-	[0] = "physical",
-	[1] = "cpu",
-	[2] = "dsa",
-	[3] = "pci_pf",
-	[4] = "pci_vf",
-	[5] = "virtual",
-	[6] = "unused",
-	[7] = "pci_sf",
-};
-
-const char *devlink_port_flavour_str(enum devlink_port_flavour value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_flavour_strmap))
-		return NULL;
-	return devlink_port_flavour_strmap[value];
-}
-
-static const char * const devlink_port_fn_state_strmap[] = {
-	[0] = "inactive",
-	[1] = "active",
-};
-
-const char *devlink_port_fn_state_str(enum devlink_port_fn_state value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_fn_state_strmap))
-		return NULL;
-	return devlink_port_fn_state_strmap[value];
-}
-
-static const char * const devlink_port_fn_opstate_strmap[] = {
-	[0] = "detached",
-	[1] = "attached",
-};
-
-const char *devlink_port_fn_opstate_str(enum devlink_port_fn_opstate value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_fn_opstate_strmap))
-		return NULL;
-	return devlink_port_fn_opstate_strmap[value];
-}
-
-static const char * const devlink_port_fn_attr_cap_strmap[] = {
-	[0] = "roce-bit",
-	[1] = "migratable-bit",
-	[2] = "ipsec-crypto-bit",
-	[3] = "ipsec-packet-bit",
-};
-
-const char *devlink_port_fn_attr_cap_str(enum devlink_port_fn_attr_cap value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_port_fn_attr_cap_strmap))
-		return NULL;
-	return devlink_port_fn_attr_cap_strmap[value];
-}
-
-static const char * const devlink_sb_threshold_type_strmap[] = {
-	[0] = "static",
-	[1] = "dynamic",
-};
-
-const char *devlink_sb_threshold_type_str(enum devlink_sb_threshold_type value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_sb_threshold_type_strmap))
-		return NULL;
-	return devlink_sb_threshold_type_strmap[value];
-}
-
-static const char * const devlink_eswitch_mode_strmap[] = {
-	[0] = "legacy",
-	[1] = "switchdev",
-};
-
-const char *devlink_eswitch_mode_str(enum devlink_eswitch_mode value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_eswitch_mode_strmap))
-		return NULL;
-	return devlink_eswitch_mode_strmap[value];
-}
-
-static const char * const devlink_eswitch_inline_mode_strmap[] = {
-	[0] = "none",
-	[1] = "link",
-	[2] = "network",
-	[3] = "transport",
-};
-
-const char *
-devlink_eswitch_inline_mode_str(enum devlink_eswitch_inline_mode value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_eswitch_inline_mode_strmap))
-		return NULL;
-	return devlink_eswitch_inline_mode_strmap[value];
-}
-
-static const char * const devlink_eswitch_encap_mode_strmap[] = {
-	[0] = "none",
-	[1] = "basic",
-};
-
-const char *
-devlink_eswitch_encap_mode_str(enum devlink_eswitch_encap_mode value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_eswitch_encap_mode_strmap))
-		return NULL;
-	return devlink_eswitch_encap_mode_strmap[value];
-}
-
-static const char * const devlink_dpipe_match_type_strmap[] = {
-	[0] = "field-exact",
-};
-
-const char *devlink_dpipe_match_type_str(enum devlink_dpipe_match_type value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_dpipe_match_type_strmap))
-		return NULL;
-	return devlink_dpipe_match_type_strmap[value];
-}
-
-static const char * const devlink_dpipe_action_type_strmap[] = {
-	[0] = "field-modify",
-};
-
-const char *devlink_dpipe_action_type_str(enum devlink_dpipe_action_type value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_dpipe_action_type_strmap))
-		return NULL;
-	return devlink_dpipe_action_type_strmap[value];
-}
-
-static const char * const devlink_dpipe_field_mapping_type_strmap[] = {
-	[0] = "none",
-	[1] = "ifindex",
-};
-
-const char *
-devlink_dpipe_field_mapping_type_str(enum devlink_dpipe_field_mapping_type value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_dpipe_field_mapping_type_strmap))
-		return NULL;
-	return devlink_dpipe_field_mapping_type_strmap[value];
-}
-
-static const char * const devlink_resource_unit_strmap[] = {
-	[0] = "entry",
-};
-
-const char *devlink_resource_unit_str(enum devlink_resource_unit value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_resource_unit_strmap))
-		return NULL;
-	return devlink_resource_unit_strmap[value];
-}
-
-static const char * const devlink_reload_action_strmap[] = {
-	[1] = "driver-reinit",
-	[2] = "fw-activate",
-};
-
-const char *devlink_reload_action_str(enum devlink_reload_action value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_reload_action_strmap))
-		return NULL;
-	return devlink_reload_action_strmap[value];
-}
-
-static const char * const devlink_param_cmode_strmap[] = {
-	[0] = "runtime",
-	[1] = "driverinit",
-	[2] = "permanent",
-};
-
-const char *devlink_param_cmode_str(enum devlink_param_cmode value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_param_cmode_strmap))
-		return NULL;
-	return devlink_param_cmode_strmap[value];
-}
-
-static const char * const devlink_flash_overwrite_strmap[] = {
-	[0] = "settings-bit",
-	[1] = "identifiers-bit",
-};
-
-const char *devlink_flash_overwrite_str(enum devlink_flash_overwrite value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_flash_overwrite_strmap))
-		return NULL;
-	return devlink_flash_overwrite_strmap[value];
-}
-
-static const char * const devlink_trap_action_strmap[] = {
-	[0] = "drop",
-	[1] = "trap",
-	[2] = "mirror",
-};
-
-const char *devlink_trap_action_str(enum devlink_trap_action value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(devlink_trap_action_strmap))
-		return NULL;
-	return devlink_trap_action_strmap[value];
-}
-
-/* Policies */
-struct ynl_policy_attr devlink_dl_dpipe_match_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_MATCH_TYPE] = { .name = "dpipe-match-type", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_DPIPE_HEADER_INDEX] = { .name = "dpipe-header-index", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_match_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_match_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_match_value_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_MATCH] = { .name = "dpipe-match", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_nest, },
-	[DEVLINK_ATTR_DPIPE_VALUE] = { .name = "dpipe-value", .type = YNL_PT_BINARY,},
-	[DEVLINK_ATTR_DPIPE_VALUE_MASK] = { .name = "dpipe-value-mask", .type = YNL_PT_BINARY,},
-	[DEVLINK_ATTR_DPIPE_VALUE_MAPPING] = { .name = "dpipe-value-mapping", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_match_value_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_match_value_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_action_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_ACTION_TYPE] = { .name = "dpipe-action-type", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_DPIPE_HEADER_INDEX] = { .name = "dpipe-header-index", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_action_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_action_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_action_value_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_ACTION] = { .name = "dpipe-action", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_nest, },
-	[DEVLINK_ATTR_DPIPE_VALUE] = { .name = "dpipe-value", .type = YNL_PT_BINARY,},
-	[DEVLINK_ATTR_DPIPE_VALUE_MASK] = { .name = "dpipe-value-mask", .type = YNL_PT_BINARY,},
-	[DEVLINK_ATTR_DPIPE_VALUE_MAPPING] = { .name = "dpipe-value-mapping", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_action_value_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_action_value_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_field_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_FIELD_NAME] = { .name = "dpipe-field-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH] = { .name = "dpipe-field-bitwidth", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE] = { .name = "dpipe-field-mapping-type", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_field_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_field_policy,
-};
-
-struct ynl_policy_attr devlink_dl_resource_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_RESOURCE_NAME] = { .name = "resource-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_RESOURCE_ID] = { .name = "resource-id", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE] = { .name = "resource-size", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_NEW] = { .name = "resource-size-new", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_VALID] = { .name = "resource-size-valid", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_MIN] = { .name = "resource-size-min", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_MAX] = { .name = "resource-size-max", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_GRAN] = { .name = "resource-size-gran", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_UNIT] = { .name = "resource-unit", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_RESOURCE_OCC] = { .name = "resource-occ", .type = YNL_PT_U64, },
-};
-
-struct ynl_policy_nest devlink_dl_resource_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_resource_policy,
-};
-
-struct ynl_policy_attr devlink_dl_info_version_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, },
-};
-
-struct ynl_policy_nest devlink_dl_info_version_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_info_version_policy,
-};
-
-struct ynl_policy_attr devlink_dl_fmsg_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_FMSG_OBJ_NEST_START] = { .name = "fmsg-obj-nest-start", .type = YNL_PT_FLAG, },
-	[DEVLINK_ATTR_FMSG_PAIR_NEST_START] = { .name = "fmsg-pair-nest-start", .type = YNL_PT_FLAG, },
-	[DEVLINK_ATTR_FMSG_ARR_NEST_START] = { .name = "fmsg-arr-nest-start", .type = YNL_PT_FLAG, },
-	[DEVLINK_ATTR_FMSG_NEST_END] = { .name = "fmsg-nest-end", .type = YNL_PT_FLAG, },
-	[DEVLINK_ATTR_FMSG_OBJ_NAME] = { .name = "fmsg-obj-name", .type = YNL_PT_NUL_STR, },
-};
-
-struct ynl_policy_nest devlink_dl_fmsg_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_fmsg_policy,
-};
-
-struct ynl_policy_attr devlink_dl_port_function_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
-	[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .name = "hw-addr", .type = YNL_PT_BINARY,},
-	[DEVLINK_PORT_FN_ATTR_STATE] = { .name = "state", .type = YNL_PT_U8, },
-	[DEVLINK_PORT_FN_ATTR_OPSTATE] = { .name = "opstate", .type = YNL_PT_U8, },
-	[DEVLINK_PORT_FN_ATTR_CAPS] = { .name = "caps", .type = YNL_PT_BITFIELD32, },
-};
-
-struct ynl_policy_nest devlink_dl_port_function_nest = {
-	.max_attr = DEVLINK_PORT_FUNCTION_ATTR_MAX,
-	.table = devlink_dl_port_function_policy,
-};
-
-struct ynl_policy_attr devlink_dl_reload_stats_entry_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_RELOAD_STATS_LIMIT] = { .name = "reload-stats-limit", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_RELOAD_STATS_VALUE] = { .name = "reload-stats-value", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest devlink_dl_reload_stats_entry_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_reload_stats_entry_policy,
-};
-
-struct ynl_policy_attr devlink_dl_reload_act_stats_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_RELOAD_STATS_ENTRY] = { .name = "reload-stats-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_entry_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_reload_act_stats_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_reload_act_stats_policy,
-};
-
-struct ynl_policy_attr devlink_dl_selftest_id_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
-	[DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .name = "flash", .type = YNL_PT_FLAG, },
-};
-
-struct ynl_policy_nest devlink_dl_selftest_id_nest = {
-	.max_attr = DEVLINK_ATTR_SELFTEST_ID_MAX,
-	.table = devlink_dl_selftest_id_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_table_matches_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_MATCH] = { .name = "dpipe-match", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_table_matches_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_table_matches_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_table_actions_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_ACTION] = { .name = "dpipe-action", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_table_actions_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_table_actions_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_entry_match_values_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_MATCH_VALUE] = { .name = "dpipe-match-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_value_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_entry_match_values_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_entry_match_values_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_entry_action_values_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_ACTION_VALUE] = { .name = "dpipe-action-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_value_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_entry_action_values_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_entry_action_values_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_header_fields_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_FIELD] = { .name = "dpipe-field", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_field_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_header_fields_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_header_fields_policy,
-};
-
-struct ynl_policy_attr devlink_dl_resource_list_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_RESOURCE] = { .name = "resource", .type = YNL_PT_NEST, .nest = &devlink_dl_resource_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_resource_list_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_resource_list_policy,
-};
-
-struct ynl_policy_attr devlink_dl_reload_act_info_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_reload_act_info_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_reload_act_info_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_table_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .name = "dpipe-table-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_DPIPE_TABLE_SIZE] = { .name = "dpipe-table-size", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_DPIPE_TABLE_MATCHES] = { .name = "dpipe-table-matches", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_matches_nest, },
-	[DEVLINK_ATTR_DPIPE_TABLE_ACTIONS] = { .name = "dpipe-table-actions", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_actions_nest, },
-	[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .name = "dpipe-table-counters-enabled", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID] = { .name = "dpipe-table-resource-id", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS] = { .name = "dpipe-table-resource-units", .type = YNL_PT_U64, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_table_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_table_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_entry_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_ENTRY_INDEX] = { .name = "dpipe-entry-index", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES] = { .name = "dpipe-entry-match-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_match_values_nest, },
-	[DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES] = { .name = "dpipe-entry-action-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_action_values_nest, },
-	[DEVLINK_ATTR_DPIPE_ENTRY_COUNTER] = { .name = "dpipe-entry-counter", .type = YNL_PT_U64, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_entry_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_entry_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_header_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_HEADER_NAME] = { .name = "dpipe-header-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_DPIPE_HEADER_FIELDS] = { .name = "dpipe-header-fields", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_fields_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_header_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_header_policy,
-};
-
-struct ynl_policy_attr devlink_dl_reload_stats_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_reload_stats_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_reload_stats_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_tables_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_TABLE] = { .name = "dpipe-table", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_tables_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_tables_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_entries_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_ENTRY] = { .name = "dpipe-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_entries_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_entries_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dpipe_headers_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_DPIPE_HEADER] = { .name = "dpipe-header", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dpipe_headers_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dpipe_headers_policy,
-};
-
-struct ynl_policy_attr devlink_dl_dev_stats_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, },
-	[DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, },
-};
-
-struct ynl_policy_nest devlink_dl_dev_stats_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_dl_dev_stats_policy,
-};
-
-struct ynl_policy_attr devlink_policy[DEVLINK_ATTR_MAX + 1] = {
-	[DEVLINK_ATTR_BUS_NAME] = { .name = "bus-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_DEV_NAME] = { .name = "dev-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_PORT_INDEX] = { .name = "port-index", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_PORT_TYPE] = { .name = "port-type", .type = YNL_PT_U16, },
-	[DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .name = "port-split-count", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_SB_INDEX] = { .name = "sb-index", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_SB_POOL_INDEX] = { .name = "sb-pool-index", .type = YNL_PT_U16, },
-	[DEVLINK_ATTR_SB_POOL_TYPE] = { .name = "sb-pool-type", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_SB_POOL_SIZE] = { .name = "sb-pool-size", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .name = "sb-pool-threshold-type", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_SB_THRESHOLD] = { .name = "sb-threshold", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_SB_TC_INDEX] = { .name = "sb-tc-index", .type = YNL_PT_U16, },
-	[DEVLINK_ATTR_ESWITCH_MODE] = { .name = "eswitch-mode", .type = YNL_PT_U16, },
-	[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .name = "eswitch-inline-mode", .type = YNL_PT_U16, },
-	[DEVLINK_ATTR_DPIPE_TABLES] = { .name = "dpipe-tables", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_tables_nest, },
-	[DEVLINK_ATTR_DPIPE_TABLE] = { .name = "dpipe-table", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_nest, },
-	[DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .name = "dpipe-table-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_DPIPE_TABLE_SIZE] = { .name = "dpipe-table-size", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_DPIPE_TABLE_MATCHES] = { .name = "dpipe-table-matches", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_matches_nest, },
-	[DEVLINK_ATTR_DPIPE_TABLE_ACTIONS] = { .name = "dpipe-table-actions", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_table_actions_nest, },
-	[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .name = "dpipe-table-counters-enabled", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_DPIPE_ENTRIES] = { .name = "dpipe-entries", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entries_nest, },
-	[DEVLINK_ATTR_DPIPE_ENTRY] = { .name = "dpipe-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_nest, },
-	[DEVLINK_ATTR_DPIPE_ENTRY_INDEX] = { .name = "dpipe-entry-index", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES] = { .name = "dpipe-entry-match-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_match_values_nest, },
-	[DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES] = { .name = "dpipe-entry-action-values", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_entry_action_values_nest, },
-	[DEVLINK_ATTR_DPIPE_ENTRY_COUNTER] = { .name = "dpipe-entry-counter", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_DPIPE_MATCH] = { .name = "dpipe-match", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_nest, },
-	[DEVLINK_ATTR_DPIPE_MATCH_VALUE] = { .name = "dpipe-match-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_match_value_nest, },
-	[DEVLINK_ATTR_DPIPE_MATCH_TYPE] = { .name = "dpipe-match-type", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_ACTION] = { .name = "dpipe-action", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_nest, },
-	[DEVLINK_ATTR_DPIPE_ACTION_VALUE] = { .name = "dpipe-action-value", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_action_value_nest, },
-	[DEVLINK_ATTR_DPIPE_ACTION_TYPE] = { .name = "dpipe-action-type", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_VALUE] = { .name = "dpipe-value", .type = YNL_PT_BINARY,},
-	[DEVLINK_ATTR_DPIPE_VALUE_MASK] = { .name = "dpipe-value-mask", .type = YNL_PT_BINARY,},
-	[DEVLINK_ATTR_DPIPE_VALUE_MAPPING] = { .name = "dpipe-value-mapping", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_HEADERS] = { .name = "dpipe-headers", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_headers_nest, },
-	[DEVLINK_ATTR_DPIPE_HEADER] = { .name = "dpipe-header", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_nest, },
-	[DEVLINK_ATTR_DPIPE_HEADER_NAME] = { .name = "dpipe-header-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_DPIPE_HEADER_ID] = { .name = "dpipe-header-id", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_HEADER_FIELDS] = { .name = "dpipe-header-fields", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_header_fields_nest, },
-	[DEVLINK_ATTR_DPIPE_HEADER_GLOBAL] = { .name = "dpipe-header-global", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_DPIPE_HEADER_INDEX] = { .name = "dpipe-header-index", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_FIELD] = { .name = "dpipe-field", .type = YNL_PT_NEST, .nest = &devlink_dl_dpipe_field_nest, },
-	[DEVLINK_ATTR_DPIPE_FIELD_NAME] = { .name = "dpipe-field-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_DPIPE_FIELD_ID] = { .name = "dpipe-field-id", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH] = { .name = "dpipe-field-bitwidth", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE] = { .name = "dpipe-field-mapping-type", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
-	[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .name = "eswitch-encap-mode", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_RESOURCE_LIST] = { .name = "resource-list", .type = YNL_PT_NEST, .nest = &devlink_dl_resource_list_nest, },
-	[DEVLINK_ATTR_RESOURCE] = { .name = "resource", .type = YNL_PT_NEST, .nest = &devlink_dl_resource_nest, },
-	[DEVLINK_ATTR_RESOURCE_NAME] = { .name = "resource-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_RESOURCE_ID] = { .name = "resource-id", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE] = { .name = "resource-size", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_NEW] = { .name = "resource-size-new", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_VALID] = { .name = "resource-size-valid", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_MIN] = { .name = "resource-size-min", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_MAX] = { .name = "resource-size-max", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_SIZE_GRAN] = { .name = "resource-size-gran", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RESOURCE_UNIT] = { .name = "resource-unit", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_RESOURCE_OCC] = { .name = "resource-occ", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID] = { .name = "dpipe-table-resource-id", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS] = { .name = "dpipe-table-resource-units", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_PORT_FLAVOUR] = { .name = "port-flavour", .type = YNL_PT_U16, },
-	[DEVLINK_ATTR_PARAM_NAME] = { .name = "param-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_PARAM_TYPE] = { .name = "param-type", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .name = "param-value-cmode", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_REGION_NAME] = { .name = "region-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .name = "region-snapshot-id", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .name = "region-chunk-addr", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_REGION_CHUNK_LEN] = { .name = "region-chunk-len", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_INFO_DRIVER_NAME] = { .name = "info-driver-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_INFO_SERIAL_NUMBER] = { .name = "info-serial-number", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_INFO_VERSION_FIXED] = { .name = "info-version-fixed", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, },
-	[DEVLINK_ATTR_INFO_VERSION_RUNNING] = { .name = "info-version-running", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, },
-	[DEVLINK_ATTR_INFO_VERSION_STORED] = { .name = "info-version-stored", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, },
-	[DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_FMSG] = { .name = "fmsg", .type = YNL_PT_NEST, .nest = &devlink_dl_fmsg_nest, },
-	[DEVLINK_ATTR_FMSG_OBJ_NEST_START] = { .name = "fmsg-obj-nest-start", .type = YNL_PT_FLAG, },
-	[DEVLINK_ATTR_FMSG_PAIR_NEST_START] = { .name = "fmsg-pair-nest-start", .type = YNL_PT_FLAG, },
-	[DEVLINK_ATTR_FMSG_ARR_NEST_START] = { .name = "fmsg-arr-nest-start", .type = YNL_PT_FLAG, },
-	[DEVLINK_ATTR_FMSG_NEST_END] = { .name = "fmsg-nest-end", .type = YNL_PT_FLAG, },
-	[DEVLINK_ATTR_FMSG_OBJ_NAME] = { .name = "fmsg-obj-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .name = "health-reporter-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .name = "health-reporter-graceful-period", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .name = "health-reporter-auto-recover", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .name = "flash-update-file-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .name = "flash-update-component", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .name = "port-pci-pf-number", .type = YNL_PT_U16, },
-	[DEVLINK_ATTR_TRAP_NAME] = { .name = "trap-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_TRAP_ACTION] = { .name = "trap-action", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .name = "trap-group-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_RELOAD_FAILED] = { .name = "reload-failed", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_NETNS_FD] = { .name = "netns-fd", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_NETNS_PID] = { .name = "netns-pid", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_NETNS_ID] = { .name = "netns-id", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .name = "health-reporter-auto-dump", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_TRAP_POLICER_ID] = { .name = "trap-policer-id", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_TRAP_POLICER_RATE] = { .name = "trap-policer-rate", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_TRAP_POLICER_BURST] = { .name = "trap-policer-burst", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_PORT_FUNCTION] = { .name = "port-function", .type = YNL_PT_NEST, .nest = &devlink_dl_port_function_nest, },
-	[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .name = "port-controller-number", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = { .name = "flash-update-overwrite-mask", .type = YNL_PT_BITFIELD32, },
-	[DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED] = { .name = "reload-actions-performed", .type = YNL_PT_BITFIELD32, },
-	[DEVLINK_ATTR_RELOAD_LIMITS] = { .name = "reload-limits", .type = YNL_PT_BITFIELD32, },
-	[DEVLINK_ATTR_DEV_STATS] = { .name = "dev-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_dev_stats_nest, },
-	[DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, },
-	[DEVLINK_ATTR_RELOAD_STATS_ENTRY] = { .name = "reload-stats-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_entry_nest, },
-	[DEVLINK_ATTR_RELOAD_STATS_LIMIT] = { .name = "reload-stats-limit", .type = YNL_PT_U8, },
-	[DEVLINK_ATTR_RELOAD_STATS_VALUE] = { .name = "reload-stats-value", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, },
-	[DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, },
-	[DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, },
-	[DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .name = "port-pci-sf-number", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_RATE_TX_SHARE] = { .name = "rate-tx-share", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RATE_TX_MAX] = { .name = "rate-tx-max", .type = YNL_PT_U64, },
-	[DEVLINK_ATTR_RATE_NODE_NAME] = { .name = "rate-node-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .name = "rate-parent-node-name", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_LINECARD_INDEX] = { .name = "linecard-index", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_LINECARD_TYPE] = { .name = "linecard-type", .type = YNL_PT_NUL_STR, },
-	[DEVLINK_ATTR_SELFTESTS] = { .name = "selftests", .type = YNL_PT_NEST, .nest = &devlink_dl_selftest_id_nest, },
-	[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .name = "rate-tx-priority", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .name = "rate-tx-weight", .type = YNL_PT_U32, },
-	[DEVLINK_ATTR_REGION_DIRECT] = { .name = "region-direct", .type = YNL_PT_FLAG, },
-};
-
-struct ynl_policy_nest devlink_nest = {
-	.max_attr = DEVLINK_ATTR_MAX,
-	.table = devlink_policy,
-};
-
-/* Common nested types */
-void devlink_dl_dpipe_match_free(struct devlink_dl_dpipe_match *obj)
-{
-}
-
-int devlink_dl_dpipe_match_parse(struct ynl_parse_arg *yarg,
-				 const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_match *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_MATCH_TYPE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_match_type = 1;
-			dst->dpipe_match_type = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADER_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_header_id = 1;
-			dst->dpipe_header_id = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADER_GLOBAL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_header_global = 1;
-			dst->dpipe_header_global = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADER_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_header_index = 1;
-			dst->dpipe_header_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_FIELD_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_field_id = 1;
-			dst->dpipe_field_id = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return 0;
-}
-
-void
-devlink_dl_dpipe_match_value_free(struct devlink_dl_dpipe_match_value *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_match; i++)
-		devlink_dl_dpipe_match_free(&obj->dpipe_match[i]);
-	free(obj->dpipe_match);
-	free(obj->dpipe_value);
-	free(obj->dpipe_value_mask);
-}
-
-int devlink_dl_dpipe_match_value_parse(struct ynl_parse_arg *yarg,
-				       const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_match_value *dst = yarg->data;
-	unsigned int n_dpipe_match = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_match)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-match-value.dpipe-match)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_MATCH) {
-			n_dpipe_match++;
-		} else if (type == DEVLINK_ATTR_DPIPE_VALUE) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.dpipe_value_len = len;
-			dst->dpipe_value = malloc(len);
-			memcpy(dst->dpipe_value, mnl_attr_get_payload(attr), len);
-		} else if (type == DEVLINK_ATTR_DPIPE_VALUE_MASK) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.dpipe_value_mask_len = len;
-			dst->dpipe_value_mask = malloc(len);
-			memcpy(dst->dpipe_value_mask, mnl_attr_get_payload(attr), len);
-		} else if (type == DEVLINK_ATTR_DPIPE_VALUE_MAPPING) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_value_mapping = 1;
-			dst->dpipe_value_mapping = mnl_attr_get_u32(attr);
-		}
-	}
-
-	if (n_dpipe_match) {
-		dst->dpipe_match = calloc(n_dpipe_match, sizeof(*dst->dpipe_match));
-		dst->n_dpipe_match = n_dpipe_match;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_match_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_MATCH) {
-				parg.data = &dst->dpipe_match[i];
-				if (devlink_dl_dpipe_match_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_dpipe_action_free(struct devlink_dl_dpipe_action *obj)
-{
-}
-
-int devlink_dl_dpipe_action_parse(struct ynl_parse_arg *yarg,
-				  const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_action *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_ACTION_TYPE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_action_type = 1;
-			dst->dpipe_action_type = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADER_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_header_id = 1;
-			dst->dpipe_header_id = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADER_GLOBAL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_header_global = 1;
-			dst->dpipe_header_global = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADER_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_header_index = 1;
-			dst->dpipe_header_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_FIELD_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_field_id = 1;
-			dst->dpipe_field_id = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return 0;
-}
-
-void
-devlink_dl_dpipe_action_value_free(struct devlink_dl_dpipe_action_value *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_action; i++)
-		devlink_dl_dpipe_action_free(&obj->dpipe_action[i]);
-	free(obj->dpipe_action);
-	free(obj->dpipe_value);
-	free(obj->dpipe_value_mask);
-}
-
-int devlink_dl_dpipe_action_value_parse(struct ynl_parse_arg *yarg,
-					const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_action_value *dst = yarg->data;
-	unsigned int n_dpipe_action = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_action)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-action-value.dpipe-action)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_ACTION) {
-			n_dpipe_action++;
-		} else if (type == DEVLINK_ATTR_DPIPE_VALUE) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.dpipe_value_len = len;
-			dst->dpipe_value = malloc(len);
-			memcpy(dst->dpipe_value, mnl_attr_get_payload(attr), len);
-		} else if (type == DEVLINK_ATTR_DPIPE_VALUE_MASK) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.dpipe_value_mask_len = len;
-			dst->dpipe_value_mask = malloc(len);
-			memcpy(dst->dpipe_value_mask, mnl_attr_get_payload(attr), len);
-		} else if (type == DEVLINK_ATTR_DPIPE_VALUE_MAPPING) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_value_mapping = 1;
-			dst->dpipe_value_mapping = mnl_attr_get_u32(attr);
-		}
-	}
-
-	if (n_dpipe_action) {
-		dst->dpipe_action = calloc(n_dpipe_action, sizeof(*dst->dpipe_action));
-		dst->n_dpipe_action = n_dpipe_action;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_action_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ACTION) {
-				parg.data = &dst->dpipe_action[i];
-				if (devlink_dl_dpipe_action_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_dpipe_field_free(struct devlink_dl_dpipe_field *obj)
-{
-	free(obj->dpipe_field_name);
-}
-
-int devlink_dl_dpipe_field_parse(struct ynl_parse_arg *yarg,
-				 const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_field *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_FIELD_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dpipe_field_name_len = len;
-			dst->dpipe_field_name = malloc(len + 1);
-			memcpy(dst->dpipe_field_name, mnl_attr_get_str(attr), len);
-			dst->dpipe_field_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DPIPE_FIELD_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_field_id = 1;
-			dst->dpipe_field_id = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_FIELD_BITWIDTH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_field_bitwidth = 1;
-			dst->dpipe_field_bitwidth = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_FIELD_MAPPING_TYPE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_field_mapping_type = 1;
-			dst->dpipe_field_mapping_type = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_resource_free(struct devlink_dl_resource *obj)
-{
-	free(obj->resource_name);
-}
-
-int devlink_dl_resource_parse(struct ynl_parse_arg *yarg,
-			      const struct nlattr *nested)
-{
-	struct devlink_dl_resource *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_RESOURCE_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.resource_name_len = len;
-			dst->resource_name = malloc(len + 1);
-			memcpy(dst->resource_name, mnl_attr_get_str(attr), len);
-			dst->resource_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_RESOURCE_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_id = 1;
-			dst->resource_id = mnl_attr_get_u64(attr);
-		} else if (type == DEVLINK_ATTR_RESOURCE_SIZE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_size = 1;
-			dst->resource_size = mnl_attr_get_u64(attr);
-		} else if (type == DEVLINK_ATTR_RESOURCE_SIZE_NEW) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_size_new = 1;
-			dst->resource_size_new = mnl_attr_get_u64(attr);
-		} else if (type == DEVLINK_ATTR_RESOURCE_SIZE_VALID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_size_valid = 1;
-			dst->resource_size_valid = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_RESOURCE_SIZE_MIN) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_size_min = 1;
-			dst->resource_size_min = mnl_attr_get_u64(attr);
-		} else if (type == DEVLINK_ATTR_RESOURCE_SIZE_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_size_max = 1;
-			dst->resource_size_max = mnl_attr_get_u64(attr);
-		} else if (type == DEVLINK_ATTR_RESOURCE_SIZE_GRAN) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_size_gran = 1;
-			dst->resource_size_gran = mnl_attr_get_u64(attr);
-		} else if (type == DEVLINK_ATTR_RESOURCE_UNIT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_unit = 1;
-			dst->resource_unit = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_RESOURCE_OCC) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_occ = 1;
-			dst->resource_occ = mnl_attr_get_u64(attr);
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_info_version_free(struct devlink_dl_info_version *obj)
-{
-	free(obj->info_version_name);
-	free(obj->info_version_value);
-}
-
-int devlink_dl_info_version_parse(struct ynl_parse_arg *yarg,
-				  const struct nlattr *nested)
-{
-	struct devlink_dl_info_version *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_INFO_VERSION_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.info_version_name_len = len;
-			dst->info_version_name = malloc(len + 1);
-			memcpy(dst->info_version_name, mnl_attr_get_str(attr), len);
-			dst->info_version_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_INFO_VERSION_VALUE) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.info_version_value_len = len;
-			dst->info_version_value = malloc(len + 1);
-			memcpy(dst->info_version_value, mnl_attr_get_str(attr), len);
-			dst->info_version_value[len] = 0;
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_fmsg_free(struct devlink_dl_fmsg *obj)
-{
-	free(obj->fmsg_obj_name);
-}
-
-int devlink_dl_fmsg_parse(struct ynl_parse_arg *yarg,
-			  const struct nlattr *nested)
-{
-	struct devlink_dl_fmsg *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_FMSG_OBJ_NEST_START) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.fmsg_obj_nest_start = 1;
-		} else if (type == DEVLINK_ATTR_FMSG_PAIR_NEST_START) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.fmsg_pair_nest_start = 1;
-		} else if (type == DEVLINK_ATTR_FMSG_ARR_NEST_START) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.fmsg_arr_nest_start = 1;
-		} else if (type == DEVLINK_ATTR_FMSG_NEST_END) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.fmsg_nest_end = 1;
-		} else if (type == DEVLINK_ATTR_FMSG_OBJ_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.fmsg_obj_name_len = len;
-			dst->fmsg_obj_name = malloc(len + 1);
-			memcpy(dst->fmsg_obj_name, mnl_attr_get_str(attr), len);
-			dst->fmsg_obj_name[len] = 0;
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_port_function_free(struct devlink_dl_port_function *obj)
-{
-	free(obj->hw_addr);
-}
-
-int devlink_dl_port_function_put(struct nlmsghdr *nlh, unsigned int attr_type,
-				 struct devlink_dl_port_function *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.hw_addr_len)
-		mnl_attr_put(nlh, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, obj->_present.hw_addr_len, obj->hw_addr);
-	if (obj->_present.state)
-		mnl_attr_put_u8(nlh, DEVLINK_PORT_FN_ATTR_STATE, obj->state);
-	if (obj->_present.opstate)
-		mnl_attr_put_u8(nlh, DEVLINK_PORT_FN_ATTR_OPSTATE, obj->opstate);
-	if (obj->_present.caps)
-		mnl_attr_put(nlh, DEVLINK_PORT_FN_ATTR_CAPS, sizeof(struct nla_bitfield32), &obj->caps);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-void
-devlink_dl_reload_stats_entry_free(struct devlink_dl_reload_stats_entry *obj)
-{
-}
-
-int devlink_dl_reload_stats_entry_parse(struct ynl_parse_arg *yarg,
-					const struct nlattr *nested)
-{
-	struct devlink_dl_reload_stats_entry *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_RELOAD_STATS_LIMIT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.reload_stats_limit = 1;
-			dst->reload_stats_limit = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_RELOAD_STATS_VALUE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.reload_stats_value = 1;
-			dst->reload_stats_value = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_reload_act_stats_free(struct devlink_dl_reload_act_stats *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_reload_stats_entry; i++)
-		devlink_dl_reload_stats_entry_free(&obj->reload_stats_entry[i]);
-	free(obj->reload_stats_entry);
-}
-
-int devlink_dl_reload_act_stats_parse(struct ynl_parse_arg *yarg,
-				      const struct nlattr *nested)
-{
-	struct devlink_dl_reload_act_stats *dst = yarg->data;
-	unsigned int n_reload_stats_entry = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->reload_stats_entry)
-		return ynl_error_parse(yarg, "attribute already present (dl-reload-act-stats.reload-stats-entry)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_RELOAD_STATS_ENTRY) {
-			n_reload_stats_entry++;
-		}
-	}
-
-	if (n_reload_stats_entry) {
-		dst->reload_stats_entry = calloc(n_reload_stats_entry, sizeof(*dst->reload_stats_entry));
-		dst->n_reload_stats_entry = n_reload_stats_entry;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_reload_stats_entry_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_STATS_ENTRY) {
-				parg.data = &dst->reload_stats_entry[i];
-				if (devlink_dl_reload_stats_entry_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_selftest_id_free(struct devlink_dl_selftest_id *obj)
-{
-}
-
-int devlink_dl_selftest_id_put(struct nlmsghdr *nlh, unsigned int attr_type,
-			       struct devlink_dl_selftest_id *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.flash)
-		mnl_attr_put(nlh, DEVLINK_ATTR_SELFTEST_ID_FLASH, 0, NULL);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-void
-devlink_dl_dpipe_table_matches_free(struct devlink_dl_dpipe_table_matches *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_match; i++)
-		devlink_dl_dpipe_match_free(&obj->dpipe_match[i]);
-	free(obj->dpipe_match);
-}
-
-int devlink_dl_dpipe_table_matches_parse(struct ynl_parse_arg *yarg,
-					 const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_table_matches *dst = yarg->data;
-	unsigned int n_dpipe_match = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_match)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-table-matches.dpipe-match)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_MATCH) {
-			n_dpipe_match++;
-		}
-	}
-
-	if (n_dpipe_match) {
-		dst->dpipe_match = calloc(n_dpipe_match, sizeof(*dst->dpipe_match));
-		dst->n_dpipe_match = n_dpipe_match;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_match_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_MATCH) {
-				parg.data = &dst->dpipe_match[i];
-				if (devlink_dl_dpipe_match_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void
-devlink_dl_dpipe_table_actions_free(struct devlink_dl_dpipe_table_actions *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_action; i++)
-		devlink_dl_dpipe_action_free(&obj->dpipe_action[i]);
-	free(obj->dpipe_action);
-}
-
-int devlink_dl_dpipe_table_actions_parse(struct ynl_parse_arg *yarg,
-					 const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_table_actions *dst = yarg->data;
-	unsigned int n_dpipe_action = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_action)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-table-actions.dpipe-action)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_ACTION) {
-			n_dpipe_action++;
-		}
-	}
-
-	if (n_dpipe_action) {
-		dst->dpipe_action = calloc(n_dpipe_action, sizeof(*dst->dpipe_action));
-		dst->n_dpipe_action = n_dpipe_action;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_action_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ACTION) {
-				parg.data = &dst->dpipe_action[i];
-				if (devlink_dl_dpipe_action_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void
-devlink_dl_dpipe_entry_match_values_free(struct devlink_dl_dpipe_entry_match_values *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_match_value; i++)
-		devlink_dl_dpipe_match_value_free(&obj->dpipe_match_value[i]);
-	free(obj->dpipe_match_value);
-}
-
-int devlink_dl_dpipe_entry_match_values_parse(struct ynl_parse_arg *yarg,
-					      const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_entry_match_values *dst = yarg->data;
-	unsigned int n_dpipe_match_value = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_match_value)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-entry-match-values.dpipe-match-value)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_MATCH_VALUE) {
-			n_dpipe_match_value++;
-		}
-	}
-
-	if (n_dpipe_match_value) {
-		dst->dpipe_match_value = calloc(n_dpipe_match_value, sizeof(*dst->dpipe_match_value));
-		dst->n_dpipe_match_value = n_dpipe_match_value;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_match_value_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_MATCH_VALUE) {
-				parg.data = &dst->dpipe_match_value[i];
-				if (devlink_dl_dpipe_match_value_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void
-devlink_dl_dpipe_entry_action_values_free(struct devlink_dl_dpipe_entry_action_values *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_action_value; i++)
-		devlink_dl_dpipe_action_value_free(&obj->dpipe_action_value[i]);
-	free(obj->dpipe_action_value);
-}
-
-int devlink_dl_dpipe_entry_action_values_parse(struct ynl_parse_arg *yarg,
-					       const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_entry_action_values *dst = yarg->data;
-	unsigned int n_dpipe_action_value = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_action_value)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-entry-action-values.dpipe-action-value)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_ACTION_VALUE) {
-			n_dpipe_action_value++;
-		}
-	}
-
-	if (n_dpipe_action_value) {
-		dst->dpipe_action_value = calloc(n_dpipe_action_value, sizeof(*dst->dpipe_action_value));
-		dst->n_dpipe_action_value = n_dpipe_action_value;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_action_value_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ACTION_VALUE) {
-				parg.data = &dst->dpipe_action_value[i];
-				if (devlink_dl_dpipe_action_value_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void
-devlink_dl_dpipe_header_fields_free(struct devlink_dl_dpipe_header_fields *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_field; i++)
-		devlink_dl_dpipe_field_free(&obj->dpipe_field[i]);
-	free(obj->dpipe_field);
-}
-
-int devlink_dl_dpipe_header_fields_parse(struct ynl_parse_arg *yarg,
-					 const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_header_fields *dst = yarg->data;
-	unsigned int n_dpipe_field = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_field)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-header-fields.dpipe-field)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_FIELD) {
-			n_dpipe_field++;
-		}
-	}
-
-	if (n_dpipe_field) {
-		dst->dpipe_field = calloc(n_dpipe_field, sizeof(*dst->dpipe_field));
-		dst->n_dpipe_field = n_dpipe_field;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_field_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_FIELD) {
-				parg.data = &dst->dpipe_field[i];
-				if (devlink_dl_dpipe_field_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_resource_list_free(struct devlink_dl_resource_list *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_resource; i++)
-		devlink_dl_resource_free(&obj->resource[i]);
-	free(obj->resource);
-}
-
-int devlink_dl_resource_list_parse(struct ynl_parse_arg *yarg,
-				   const struct nlattr *nested)
-{
-	struct devlink_dl_resource_list *dst = yarg->data;
-	unsigned int n_resource = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->resource)
-		return ynl_error_parse(yarg, "attribute already present (dl-resource-list.resource)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_RESOURCE) {
-			n_resource++;
-		}
-	}
-
-	if (n_resource) {
-		dst->resource = calloc(n_resource, sizeof(*dst->resource));
-		dst->n_resource = n_resource;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_resource_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RESOURCE) {
-				parg.data = &dst->resource[i];
-				if (devlink_dl_resource_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_reload_act_info_free(struct devlink_dl_reload_act_info *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_reload_action_stats; i++)
-		devlink_dl_reload_act_stats_free(&obj->reload_action_stats[i]);
-	free(obj->reload_action_stats);
-}
-
-int devlink_dl_reload_act_info_parse(struct ynl_parse_arg *yarg,
-				     const struct nlattr *nested)
-{
-	struct devlink_dl_reload_act_info *dst = yarg->data;
-	unsigned int n_reload_action_stats = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->reload_action_stats)
-		return ynl_error_parse(yarg, "attribute already present (dl-reload-act-info.reload-action-stats)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_RELOAD_ACTION) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.reload_action = 1;
-			dst->reload_action = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_RELOAD_ACTION_STATS) {
-			n_reload_action_stats++;
-		}
-	}
-
-	if (n_reload_action_stats) {
-		dst->reload_action_stats = calloc(n_reload_action_stats, sizeof(*dst->reload_action_stats));
-		dst->n_reload_action_stats = n_reload_action_stats;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_reload_act_stats_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_STATS) {
-				parg.data = &dst->reload_action_stats[i];
-				if (devlink_dl_reload_act_stats_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_dpipe_table_free(struct devlink_dl_dpipe_table *obj)
-{
-	free(obj->dpipe_table_name);
-	devlink_dl_dpipe_table_matches_free(&obj->dpipe_table_matches);
-	devlink_dl_dpipe_table_actions_free(&obj->dpipe_table_actions);
-}
-
-int devlink_dl_dpipe_table_parse(struct ynl_parse_arg *yarg,
-				 const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_table *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_TABLE_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dpipe_table_name_len = len;
-			dst->dpipe_table_name = malloc(len + 1);
-			memcpy(dst->dpipe_table_name, mnl_attr_get_str(attr), len);
-			dst->dpipe_table_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DPIPE_TABLE_SIZE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_table_size = 1;
-			dst->dpipe_table_size = mnl_attr_get_u64(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_TABLE_MATCHES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_table_matches = 1;
-
-			parg.rsp_policy = &devlink_dl_dpipe_table_matches_nest;
-			parg.data = &dst->dpipe_table_matches;
-			if (devlink_dl_dpipe_table_matches_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == DEVLINK_ATTR_DPIPE_TABLE_ACTIONS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_table_actions = 1;
-
-			parg.rsp_policy = &devlink_dl_dpipe_table_actions_nest;
-			parg.data = &dst->dpipe_table_actions;
-			if (devlink_dl_dpipe_table_actions_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_table_counters_enabled = 1;
-			dst->dpipe_table_counters_enabled = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_table_resource_id = 1;
-			dst->dpipe_table_resource_id = mnl_attr_get_u64(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_TABLE_RESOURCE_UNITS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_table_resource_units = 1;
-			dst->dpipe_table_resource_units = mnl_attr_get_u64(attr);
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_dpipe_entry_free(struct devlink_dl_dpipe_entry *obj)
-{
-	devlink_dl_dpipe_entry_match_values_free(&obj->dpipe_entry_match_values);
-	devlink_dl_dpipe_entry_action_values_free(&obj->dpipe_entry_action_values);
-}
-
-int devlink_dl_dpipe_entry_parse(struct ynl_parse_arg *yarg,
-				 const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_entry *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_ENTRY_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_entry_index = 1;
-			dst->dpipe_entry_index = mnl_attr_get_u64(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_ENTRY_MATCH_VALUES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_entry_match_values = 1;
-
-			parg.rsp_policy = &devlink_dl_dpipe_entry_match_values_nest;
-			parg.data = &dst->dpipe_entry_match_values;
-			if (devlink_dl_dpipe_entry_match_values_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == DEVLINK_ATTR_DPIPE_ENTRY_ACTION_VALUES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_entry_action_values = 1;
-
-			parg.rsp_policy = &devlink_dl_dpipe_entry_action_values_nest;
-			parg.data = &dst->dpipe_entry_action_values;
-			if (devlink_dl_dpipe_entry_action_values_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == DEVLINK_ATTR_DPIPE_ENTRY_COUNTER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_entry_counter = 1;
-			dst->dpipe_entry_counter = mnl_attr_get_u64(attr);
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_dpipe_header_free(struct devlink_dl_dpipe_header *obj)
-{
-	free(obj->dpipe_header_name);
-	devlink_dl_dpipe_header_fields_free(&obj->dpipe_header_fields);
-}
-
-int devlink_dl_dpipe_header_parse(struct ynl_parse_arg *yarg,
-				  const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_header *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_HEADER_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dpipe_header_name_len = len;
-			dst->dpipe_header_name = malloc(len + 1);
-			memcpy(dst->dpipe_header_name, mnl_attr_get_str(attr), len);
-			dst->dpipe_header_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADER_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_header_id = 1;
-			dst->dpipe_header_id = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADER_GLOBAL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_header_global = 1;
-			dst->dpipe_header_global = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADER_FIELDS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_header_fields = 1;
-
-			parg.rsp_policy = &devlink_dl_dpipe_header_fields_nest;
-			parg.data = &dst->dpipe_header_fields;
-			if (devlink_dl_dpipe_header_fields_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_reload_stats_free(struct devlink_dl_reload_stats *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_reload_action_info; i++)
-		devlink_dl_reload_act_info_free(&obj->reload_action_info[i]);
-	free(obj->reload_action_info);
-}
-
-int devlink_dl_reload_stats_parse(struct ynl_parse_arg *yarg,
-				  const struct nlattr *nested)
-{
-	struct devlink_dl_reload_stats *dst = yarg->data;
-	unsigned int n_reload_action_info = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->reload_action_info)
-		return ynl_error_parse(yarg, "attribute already present (dl-reload-stats.reload-action-info)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_RELOAD_ACTION_INFO) {
-			n_reload_action_info++;
-		}
-	}
-
-	if (n_reload_action_info) {
-		dst->reload_action_info = calloc(n_reload_action_info, sizeof(*dst->reload_action_info));
-		dst->n_reload_action_info = n_reload_action_info;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_reload_act_info_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_INFO) {
-				parg.data = &dst->reload_action_info[i];
-				if (devlink_dl_reload_act_info_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_dpipe_tables_free(struct devlink_dl_dpipe_tables *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_table; i++)
-		devlink_dl_dpipe_table_free(&obj->dpipe_table[i]);
-	free(obj->dpipe_table);
-}
-
-int devlink_dl_dpipe_tables_parse(struct ynl_parse_arg *yarg,
-				  const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_tables *dst = yarg->data;
-	unsigned int n_dpipe_table = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_table)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-tables.dpipe-table)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_TABLE) {
-			n_dpipe_table++;
-		}
-	}
-
-	if (n_dpipe_table) {
-		dst->dpipe_table = calloc(n_dpipe_table, sizeof(*dst->dpipe_table));
-		dst->n_dpipe_table = n_dpipe_table;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_table_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_TABLE) {
-				parg.data = &dst->dpipe_table[i];
-				if (devlink_dl_dpipe_table_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_dpipe_entries_free(struct devlink_dl_dpipe_entries *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_entry; i++)
-		devlink_dl_dpipe_entry_free(&obj->dpipe_entry[i]);
-	free(obj->dpipe_entry);
-}
-
-int devlink_dl_dpipe_entries_parse(struct ynl_parse_arg *yarg,
-				   const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_entries *dst = yarg->data;
-	unsigned int n_dpipe_entry = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_entry)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-entries.dpipe-entry)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_ENTRY) {
-			n_dpipe_entry++;
-		}
-	}
-
-	if (n_dpipe_entry) {
-		dst->dpipe_entry = calloc(n_dpipe_entry, sizeof(*dst->dpipe_entry));
-		dst->n_dpipe_entry = n_dpipe_entry;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_entry_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_ENTRY) {
-				parg.data = &dst->dpipe_entry[i];
-				if (devlink_dl_dpipe_entry_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_dpipe_headers_free(struct devlink_dl_dpipe_headers *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_dpipe_header; i++)
-		devlink_dl_dpipe_header_free(&obj->dpipe_header[i]);
-	free(obj->dpipe_header);
-}
-
-int devlink_dl_dpipe_headers_parse(struct ynl_parse_arg *yarg,
-				   const struct nlattr *nested)
-{
-	struct devlink_dl_dpipe_headers *dst = yarg->data;
-	unsigned int n_dpipe_header = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->dpipe_header)
-		return ynl_error_parse(yarg, "attribute already present (dl-dpipe-headers.dpipe-header)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_DPIPE_HEADER) {
-			n_dpipe_header++;
-		}
-	}
-
-	if (n_dpipe_header) {
-		dst->dpipe_header = calloc(n_dpipe_header, sizeof(*dst->dpipe_header));
-		dst->n_dpipe_header = n_dpipe_header;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_dpipe_header_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_DPIPE_HEADER) {
-				parg.data = &dst->dpipe_header[i];
-				if (devlink_dl_dpipe_header_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void devlink_dl_dev_stats_free(struct devlink_dl_dev_stats *obj)
-{
-	devlink_dl_reload_stats_free(&obj->reload_stats);
-	devlink_dl_reload_stats_free(&obj->remote_reload_stats);
-}
-
-int devlink_dl_dev_stats_parse(struct ynl_parse_arg *yarg,
-			       const struct nlattr *nested)
-{
-	struct devlink_dl_dev_stats *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_RELOAD_STATS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.reload_stats = 1;
-
-			parg.rsp_policy = &devlink_dl_reload_stats_nest;
-			parg.data = &dst->reload_stats;
-			if (devlink_dl_reload_stats_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == DEVLINK_ATTR_REMOTE_RELOAD_STATS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.remote_reload_stats = 1;
-
-			parg.rsp_policy = &devlink_dl_reload_stats_nest;
-			parg.data = &dst->remote_reload_stats;
-			if (devlink_dl_reload_stats_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_GET ============== */
-/* DEVLINK_CMD_GET - do */
-void devlink_get_req_free(struct devlink_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_get_rsp_free(struct devlink_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	devlink_dl_dev_stats_free(&rsp->dev_stats);
-	free(rsp);
-}
-
-int devlink_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct devlink_get_rsp *dst;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_RELOAD_FAILED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.reload_failed = 1;
-			dst->reload_failed = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_DEV_STATS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dev_stats = 1;
-
-			parg.rsp_policy = &devlink_dl_dev_stats_nest;
-			parg.data = &dst->dev_stats;
-			if (devlink_dl_dev_stats_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_get_rsp *
-devlink_get(struct ynl_sock *ys, struct devlink_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_get_rsp_parse;
-	yrs.rsp_cmd = 3;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_GET - dump */
-void devlink_get_list_free(struct devlink_get_list *rsp)
-{
-	struct devlink_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		devlink_dl_dev_stats_free(&rsp->obj.dev_stats);
-		free(rsp);
-	}
-}
-
-struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_get_list);
-	yds.cb = devlink_get_rsp_parse;
-	yds.rsp_cmd = 3;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_GET, 1);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_PORT_GET ============== */
-/* DEVLINK_CMD_PORT_GET - do */
-void devlink_port_get_req_free(struct devlink_port_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_port_get_rsp_free(struct devlink_port_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_port_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct devlink_port_get_rsp *dst;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_port_get_rsp *
-devlink_port_get(struct ynl_sock *ys, struct devlink_port_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_port_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_port_get_rsp_parse;
-	yrs.rsp_cmd = 7;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_port_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_PORT_GET - dump */
-int devlink_port_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_port_get_rsp_dump *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-void devlink_port_get_rsp_list_free(struct devlink_port_get_rsp_list *rsp)
-{
-	struct devlink_port_get_rsp_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp);
-	}
-}
-
-struct devlink_port_get_rsp_list *
-devlink_port_get_dump(struct ynl_sock *ys,
-		      struct devlink_port_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_port_get_rsp_list);
-	yds.cb = devlink_port_get_rsp_dump_parse;
-	yds.rsp_cmd = 7;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PORT_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_port_get_rsp_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_PORT_SET ============== */
-/* DEVLINK_CMD_PORT_SET - do */
-void devlink_port_set_req_free(struct devlink_port_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	devlink_dl_port_function_free(&req->port_function);
-	free(req);
-}
-
-int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.port_type)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_TYPE, req->port_type);
-	if (req->_present.port_function)
-		devlink_dl_port_function_put(nlh, DEVLINK_ATTR_PORT_FUNCTION, &req->port_function);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_PORT_NEW ============== */
-/* DEVLINK_CMD_PORT_NEW - do */
-void devlink_port_new_req_free(struct devlink_port_new_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_port_new_rsp_free(struct devlink_port_new_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_port_new_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct devlink_port_new_rsp *dst;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_port_new_rsp *
-devlink_port_new(struct ynl_sock *ys, struct devlink_port_new_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_port_new_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_NEW, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.port_flavour)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_FLAVOUR, req->port_flavour);
-	if (req->_present.port_pci_pf_number)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, req->port_pci_pf_number);
-	if (req->_present.port_pci_sf_number)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_PCI_SF_NUMBER, req->port_pci_sf_number);
-	if (req->_present.port_controller_number)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, req->port_controller_number);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_port_new_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_PORT_NEW;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_port_new_rsp_free(rsp);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_PORT_DEL ============== */
-/* DEVLINK_CMD_PORT_DEL - do */
-void devlink_port_del_req_free(struct devlink_port_del_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_DEL, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_PORT_SPLIT ============== */
-/* DEVLINK_CMD_PORT_SPLIT - do */
-void devlink_port_split_req_free(struct devlink_port_split_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_SPLIT, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.port_split_count)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_SPLIT_COUNT, req->port_split_count);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_PORT_UNSPLIT ============== */
-/* DEVLINK_CMD_PORT_UNSPLIT - do */
-void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_port_unsplit(struct ynl_sock *ys,
-			 struct devlink_port_unsplit_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_UNSPLIT, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_SB_GET ============== */
-/* DEVLINK_CMD_SB_GET - do */
-void devlink_sb_get_req_free(struct devlink_sb_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_sb_get_rsp_free(struct devlink_sb_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_sb_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct devlink_sb_get_rsp *dst;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_SB_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sb_index = 1;
-			dst->sb_index = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_sb_get_rsp *
-devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_sb_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.sb_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_sb_get_rsp_parse;
-	yrs.rsp_cmd = 13;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_sb_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_SB_GET - dump */
-void devlink_sb_get_list_free(struct devlink_sb_get_list *rsp)
-{
-	struct devlink_sb_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp);
-	}
-}
-
-struct devlink_sb_get_list *
-devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_sb_get_list);
-	yds.cb = devlink_sb_get_rsp_parse;
-	yds.rsp_cmd = 13;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_sb_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_SB_POOL_GET ============== */
-/* DEVLINK_CMD_SB_POOL_GET - do */
-void devlink_sb_pool_get_req_free(struct devlink_sb_pool_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_sb_pool_get_rsp_free(struct devlink_sb_pool_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_sb_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_sb_pool_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_SB_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sb_index = 1;
-			dst->sb_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_SB_POOL_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sb_pool_index = 1;
-			dst->sb_pool_index = mnl_attr_get_u16(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_sb_pool_get_rsp *
-devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_sb_pool_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.sb_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
-	if (req->_present.sb_pool_index)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_sb_pool_get_rsp_parse;
-	yrs.rsp_cmd = 17;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_sb_pool_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_SB_POOL_GET - dump */
-void devlink_sb_pool_get_list_free(struct devlink_sb_pool_get_list *rsp)
-{
-	struct devlink_sb_pool_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp);
-	}
-}
-
-struct devlink_sb_pool_get_list *
-devlink_sb_pool_get_dump(struct ynl_sock *ys,
-			 struct devlink_sb_pool_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_sb_pool_get_list);
-	yds.cb = devlink_sb_pool_get_rsp_parse;
-	yds.rsp_cmd = 17;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_sb_pool_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_SB_POOL_SET ============== */
-/* DEVLINK_CMD_SB_POOL_SET - do */
-void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_sb_pool_set(struct ynl_sock *ys,
-			struct devlink_sb_pool_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_POOL_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.sb_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
-	if (req->_present.sb_pool_index)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index);
-	if (req->_present.sb_pool_threshold_type)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, req->sb_pool_threshold_type);
-	if (req->_present.sb_pool_size)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_POOL_SIZE, req->sb_pool_size);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_SB_PORT_POOL_GET ============== */
-/* DEVLINK_CMD_SB_PORT_POOL_GET - do */
-void
-devlink_sb_port_pool_get_req_free(struct devlink_sb_port_pool_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void
-devlink_sb_port_pool_get_rsp_free(struct devlink_sb_port_pool_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_sb_port_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_sb_port_pool_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_SB_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sb_index = 1;
-			dst->sb_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_SB_POOL_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sb_pool_index = 1;
-			dst->sb_pool_index = mnl_attr_get_u16(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_sb_port_pool_get_rsp *
-devlink_sb_port_pool_get(struct ynl_sock *ys,
-			 struct devlink_sb_port_pool_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_sb_port_pool_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.sb_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
-	if (req->_present.sb_pool_index)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_sb_port_pool_get_rsp_parse;
-	yrs.rsp_cmd = 21;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_sb_port_pool_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */
-void
-devlink_sb_port_pool_get_list_free(struct devlink_sb_port_pool_get_list *rsp)
-{
-	struct devlink_sb_port_pool_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp);
-	}
-}
-
-struct devlink_sb_port_pool_get_list *
-devlink_sb_port_pool_get_dump(struct ynl_sock *ys,
-			      struct devlink_sb_port_pool_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_sb_port_pool_get_list);
-	yds.cb = devlink_sb_port_pool_get_rsp_parse;
-	yds.rsp_cmd = 21;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_sb_port_pool_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_SB_PORT_POOL_SET ============== */
-/* DEVLINK_CMD_SB_PORT_POOL_SET - do */
-void
-devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_sb_port_pool_set(struct ynl_sock *ys,
-			     struct devlink_sb_port_pool_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.sb_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
-	if (req->_present.sb_pool_index)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index);
-	if (req->_present.sb_threshold)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_GET ============== */
-/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */
-void
-devlink_sb_tc_pool_bind_get_req_free(struct devlink_sb_tc_pool_bind_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void
-devlink_sb_tc_pool_bind_get_rsp_free(struct devlink_sb_tc_pool_bind_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_sb_tc_pool_bind_get_rsp_parse(const struct nlmsghdr *nlh,
-					  void *data)
-{
-	struct devlink_sb_tc_pool_bind_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_SB_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sb_index = 1;
-			dst->sb_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_SB_POOL_TYPE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sb_pool_type = 1;
-			dst->sb_pool_type = mnl_attr_get_u8(attr);
-		} else if (type == DEVLINK_ATTR_SB_TC_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sb_tc_index = 1;
-			dst->sb_tc_index = mnl_attr_get_u16(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_sb_tc_pool_bind_get_rsp *
-devlink_sb_tc_pool_bind_get(struct ynl_sock *ys,
-			    struct devlink_sb_tc_pool_bind_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_sb_tc_pool_bind_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.sb_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
-	if (req->_present.sb_pool_type)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_TYPE, req->sb_pool_type);
-	if (req->_present.sb_tc_index)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_TC_INDEX, req->sb_tc_index);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_sb_tc_pool_bind_get_rsp_parse;
-	yrs.rsp_cmd = 25;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_sb_tc_pool_bind_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */
-void
-devlink_sb_tc_pool_bind_get_list_free(struct devlink_sb_tc_pool_bind_get_list *rsp)
-{
-	struct devlink_sb_tc_pool_bind_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp);
-	}
-}
-
-struct devlink_sb_tc_pool_bind_get_list *
-devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys,
-				 struct devlink_sb_tc_pool_bind_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_sb_tc_pool_bind_get_list);
-	yds.cb = devlink_sb_tc_pool_bind_get_rsp_parse;
-	yds.rsp_cmd = 25;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_sb_tc_pool_bind_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_SET ============== */
-/* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */
-void
-devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys,
-				struct devlink_sb_tc_pool_bind_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.sb_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
-	if (req->_present.sb_pool_index)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_POOL_INDEX, req->sb_pool_index);
-	if (req->_present.sb_pool_type)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_SB_POOL_TYPE, req->sb_pool_type);
-	if (req->_present.sb_tc_index)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_SB_TC_INDEX, req->sb_tc_index);
-	if (req->_present.sb_threshold)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_THRESHOLD, req->sb_threshold);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_SB_OCC_SNAPSHOT ============== */
-/* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */
-void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_sb_occ_snapshot(struct ynl_sock *ys,
-			    struct devlink_sb_occ_snapshot_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_OCC_SNAPSHOT, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.sb_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_SB_OCC_MAX_CLEAR ============== */
-/* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */
-void
-devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_sb_occ_max_clear(struct ynl_sock *ys,
-			     struct devlink_sb_occ_max_clear_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SB_OCC_MAX_CLEAR, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.sb_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_SB_INDEX, req->sb_index);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_ESWITCH_GET ============== */
-/* DEVLINK_CMD_ESWITCH_GET - do */
-void devlink_eswitch_get_req_free(struct devlink_eswitch_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_eswitch_get_rsp_free(struct devlink_eswitch_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_eswitch_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_eswitch_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_ESWITCH_MODE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.eswitch_mode = 1;
-			dst->eswitch_mode = mnl_attr_get_u16(attr);
-		} else if (type == DEVLINK_ATTR_ESWITCH_INLINE_MODE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.eswitch_inline_mode = 1;
-			dst->eswitch_inline_mode = mnl_attr_get_u16(attr);
-		} else if (type == DEVLINK_ATTR_ESWITCH_ENCAP_MODE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.eswitch_encap_mode = 1;
-			dst->eswitch_encap_mode = mnl_attr_get_u8(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_eswitch_get_rsp *
-devlink_eswitch_get(struct ynl_sock *ys, struct devlink_eswitch_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_eswitch_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_ESWITCH_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_eswitch_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_ESWITCH_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_eswitch_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_ESWITCH_SET ============== */
-/* DEVLINK_CMD_ESWITCH_SET - do */
-void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_eswitch_set(struct ynl_sock *ys,
-			struct devlink_eswitch_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_ESWITCH_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.eswitch_mode)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_ESWITCH_MODE, req->eswitch_mode);
-	if (req->_present.eswitch_inline_mode)
-		mnl_attr_put_u16(nlh, DEVLINK_ATTR_ESWITCH_INLINE_MODE, req->eswitch_inline_mode);
-	if (req->_present.eswitch_encap_mode)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, req->eswitch_encap_mode);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_DPIPE_TABLE_GET ============== */
-/* DEVLINK_CMD_DPIPE_TABLE_GET - do */
-void devlink_dpipe_table_get_req_free(struct devlink_dpipe_table_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->dpipe_table_name);
-	free(req);
-}
-
-void devlink_dpipe_table_get_rsp_free(struct devlink_dpipe_table_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	devlink_dl_dpipe_tables_free(&rsp->dpipe_tables);
-	free(rsp);
-}
-
-int devlink_dpipe_table_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_dpipe_table_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DPIPE_TABLES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_tables = 1;
-
-			parg.rsp_policy = &devlink_dl_dpipe_tables_nest;
-			parg.data = &dst->dpipe_tables;
-			if (devlink_dl_dpipe_tables_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_dpipe_table_get_rsp *
-devlink_dpipe_table_get(struct ynl_sock *ys,
-			struct devlink_dpipe_table_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_dpipe_table_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_TABLE_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.dpipe_table_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DPIPE_TABLE_NAME, req->dpipe_table_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_dpipe_table_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_DPIPE_TABLE_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_dpipe_table_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_DPIPE_ENTRIES_GET ============== */
-/* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */
-void
-devlink_dpipe_entries_get_req_free(struct devlink_dpipe_entries_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->dpipe_table_name);
-	free(req);
-}
-
-void
-devlink_dpipe_entries_get_rsp_free(struct devlink_dpipe_entries_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	devlink_dl_dpipe_entries_free(&rsp->dpipe_entries);
-	free(rsp);
-}
-
-int devlink_dpipe_entries_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_dpipe_entries_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DPIPE_ENTRIES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_entries = 1;
-
-			parg.rsp_policy = &devlink_dl_dpipe_entries_nest;
-			parg.data = &dst->dpipe_entries;
-			if (devlink_dl_dpipe_entries_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_dpipe_entries_get_rsp *
-devlink_dpipe_entries_get(struct ynl_sock *ys,
-			  struct devlink_dpipe_entries_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_dpipe_entries_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_ENTRIES_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.dpipe_table_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DPIPE_TABLE_NAME, req->dpipe_table_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_dpipe_entries_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_dpipe_entries_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_DPIPE_HEADERS_GET ============== */
-/* DEVLINK_CMD_DPIPE_HEADERS_GET - do */
-void
-devlink_dpipe_headers_get_req_free(struct devlink_dpipe_headers_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void
-devlink_dpipe_headers_get_rsp_free(struct devlink_dpipe_headers_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	devlink_dl_dpipe_headers_free(&rsp->dpipe_headers);
-	free(rsp);
-}
-
-int devlink_dpipe_headers_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_dpipe_headers_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DPIPE_HEADERS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dpipe_headers = 1;
-
-			parg.rsp_policy = &devlink_dl_dpipe_headers_nest;
-			parg.data = &dst->dpipe_headers;
-			if (devlink_dl_dpipe_headers_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_dpipe_headers_get_rsp *
-devlink_dpipe_headers_get(struct ynl_sock *ys,
-			  struct devlink_dpipe_headers_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_dpipe_headers_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_HEADERS_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_dpipe_headers_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_DPIPE_HEADERS_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_dpipe_headers_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET ============== */
-/* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */
-void
-devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->dpipe_table_name);
-	free(req);
-}
-
-int devlink_dpipe_table_counters_set(struct ynl_sock *ys,
-				     struct devlink_dpipe_table_counters_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.dpipe_table_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DPIPE_TABLE_NAME, req->dpipe_table_name);
-	if (req->_present.dpipe_table_counters_enabled)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, req->dpipe_table_counters_enabled);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_RESOURCE_SET ============== */
-/* DEVLINK_CMD_RESOURCE_SET - do */
-void devlink_resource_set_req_free(struct devlink_resource_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_resource_set(struct ynl_sock *ys,
-			 struct devlink_resource_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RESOURCE_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.resource_id)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_ID, req->resource_id);
-	if (req->_present.resource_size)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_SIZE, req->resource_size);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_RESOURCE_DUMP ============== */
-/* DEVLINK_CMD_RESOURCE_DUMP - do */
-void devlink_resource_dump_req_free(struct devlink_resource_dump_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_resource_dump_rsp_free(struct devlink_resource_dump_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	devlink_dl_resource_list_free(&rsp->resource_list);
-	free(rsp);
-}
-
-int devlink_resource_dump_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_resource_dump_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_RESOURCE_LIST) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.resource_list = 1;
-
-			parg.rsp_policy = &devlink_dl_resource_list_nest;
-			parg.data = &dst->resource_list;
-			if (devlink_dl_resource_list_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_resource_dump_rsp *
-devlink_resource_dump(struct ynl_sock *ys,
-		      struct devlink_resource_dump_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_resource_dump_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RESOURCE_DUMP, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_resource_dump_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_RESOURCE_DUMP;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_resource_dump_rsp_free(rsp);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_RELOAD ============== */
-/* DEVLINK_CMD_RELOAD - do */
-void devlink_reload_req_free(struct devlink_reload_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_reload_rsp_free(struct devlink_reload_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_reload_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct devlink_reload_rsp *dst;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.reload_actions_performed = 1;
-			memcpy(&dst->reload_actions_performed, mnl_attr_get_payload(attr), sizeof(struct nla_bitfield32));
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_reload_rsp *
-devlink_reload(struct ynl_sock *ys, struct devlink_reload_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_reload_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RELOAD, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.reload_action)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_RELOAD_ACTION, req->reload_action);
-	if (req->_present.reload_limits)
-		mnl_attr_put(nlh, DEVLINK_ATTR_RELOAD_LIMITS, sizeof(struct nla_bitfield32), &req->reload_limits);
-	if (req->_present.netns_pid)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_NETNS_PID, req->netns_pid);
-	if (req->_present.netns_fd)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_NETNS_FD, req->netns_fd);
-	if (req->_present.netns_id)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_NETNS_ID, req->netns_id);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_reload_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_RELOAD;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_reload_rsp_free(rsp);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_PARAM_GET ============== */
-/* DEVLINK_CMD_PARAM_GET - do */
-void devlink_param_get_req_free(struct devlink_param_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->param_name);
-	free(req);
-}
-
-void devlink_param_get_rsp_free(struct devlink_param_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp->param_name);
-	free(rsp);
-}
-
-int devlink_param_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_param_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PARAM_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.param_name_len = len;
-			dst->param_name = malloc(len + 1);
-			memcpy(dst->param_name, mnl_attr_get_str(attr), len);
-			dst->param_name[len] = 0;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_param_get_rsp *
-devlink_param_get(struct ynl_sock *ys, struct devlink_param_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_param_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PARAM_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.param_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_NAME, req->param_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_param_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_PARAM_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_param_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_PARAM_GET - dump */
-void devlink_param_get_list_free(struct devlink_param_get_list *rsp)
-{
-	struct devlink_param_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp->obj.param_name);
-		free(rsp);
-	}
-}
-
-struct devlink_param_get_list *
-devlink_param_get_dump(struct ynl_sock *ys,
-		       struct devlink_param_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_param_get_list);
-	yds.cb = devlink_param_get_rsp_parse;
-	yds.rsp_cmd = DEVLINK_CMD_PARAM_GET;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PARAM_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_param_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_PARAM_SET ============== */
-/* DEVLINK_CMD_PARAM_SET - do */
-void devlink_param_set_req_free(struct devlink_param_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->param_name);
-	free(req);
-}
-
-int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PARAM_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.param_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_PARAM_NAME, req->param_name);
-	if (req->_present.param_type)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_TYPE, req->param_type);
-	if (req->_present.param_value_cmode)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_PARAM_VALUE_CMODE, req->param_value_cmode);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_REGION_GET ============== */
-/* DEVLINK_CMD_REGION_GET - do */
-void devlink_region_get_req_free(struct devlink_region_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->region_name);
-	free(req);
-}
-
-void devlink_region_get_rsp_free(struct devlink_region_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp->region_name);
-	free(rsp);
-}
-
-int devlink_region_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_region_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_REGION_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.region_name_len = len;
-			dst->region_name = malloc(len + 1);
-			memcpy(dst->region_name, mnl_attr_get_str(attr), len);
-			dst->region_name[len] = 0;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_region_get_rsp *
-devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_region_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.region_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_region_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_REGION_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_region_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_REGION_GET - dump */
-void devlink_region_get_list_free(struct devlink_region_get_list *rsp)
-{
-	struct devlink_region_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp->obj.region_name);
-		free(rsp);
-	}
-}
-
-struct devlink_region_get_list *
-devlink_region_get_dump(struct ynl_sock *ys,
-			struct devlink_region_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_region_get_list);
-	yds.cb = devlink_region_get_rsp_parse;
-	yds.rsp_cmd = DEVLINK_CMD_REGION_GET;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_REGION_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_region_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_REGION_NEW ============== */
-/* DEVLINK_CMD_REGION_NEW - do */
-void devlink_region_new_req_free(struct devlink_region_new_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->region_name);
-	free(req);
-}
-
-void devlink_region_new_rsp_free(struct devlink_region_new_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp->region_name);
-	free(rsp);
-}
-
-int devlink_region_new_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_region_new_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_REGION_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.region_name_len = len;
-			dst->region_name = malloc(len + 1);
-			memcpy(dst->region_name, mnl_attr_get_str(attr), len);
-			dst->region_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_REGION_SNAPSHOT_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.region_snapshot_id = 1;
-			dst->region_snapshot_id = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_region_new_rsp *
-devlink_region_new(struct ynl_sock *ys, struct devlink_region_new_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_region_new_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_NEW, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.region_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name);
-	if (req->_present.region_snapshot_id)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_region_new_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_REGION_NEW;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_region_new_rsp_free(rsp);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_REGION_DEL ============== */
-/* DEVLINK_CMD_REGION_DEL - do */
-void devlink_region_del_req_free(struct devlink_region_del_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->region_name);
-	free(req);
-}
-
-int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_REGION_DEL, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.region_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name);
-	if (req->_present.region_snapshot_id)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_REGION_READ ============== */
-/* DEVLINK_CMD_REGION_READ - dump */
-int devlink_region_read_rsp_dump_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_region_read_rsp_dump *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_REGION_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.region_name_len = len;
-			dst->region_name = malloc(len + 1);
-			memcpy(dst->region_name, mnl_attr_get_str(attr), len);
-			dst->region_name[len] = 0;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-void
-devlink_region_read_rsp_list_free(struct devlink_region_read_rsp_list *rsp)
-{
-	struct devlink_region_read_rsp_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp->obj.region_name);
-		free(rsp);
-	}
-}
-
-struct devlink_region_read_rsp_list *
-devlink_region_read_dump(struct ynl_sock *ys,
-			 struct devlink_region_read_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_region_read_rsp_list);
-	yds.cb = devlink_region_read_rsp_dump_parse;
-	yds.rsp_cmd = DEVLINK_CMD_REGION_READ;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_REGION_READ, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.region_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_REGION_NAME, req->region_name);
-	if (req->_present.region_snapshot_id)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_REGION_SNAPSHOT_ID, req->region_snapshot_id);
-	if (req->_present.region_direct)
-		mnl_attr_put(nlh, DEVLINK_ATTR_REGION_DIRECT, 0, NULL);
-	if (req->_present.region_chunk_addr)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_REGION_CHUNK_ADDR, req->region_chunk_addr);
-	if (req->_present.region_chunk_len)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_REGION_CHUNK_LEN, req->region_chunk_len);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_region_read_rsp_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_PORT_PARAM_GET ============== */
-/* DEVLINK_CMD_PORT_PARAM_GET - do */
-void devlink_port_param_get_req_free(struct devlink_port_param_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_port_param_get_rsp_free(struct devlink_port_param_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_port_param_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_port_param_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_port_param_get_rsp *
-devlink_port_param_get(struct ynl_sock *ys,
-		       struct devlink_port_param_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_port_param_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_PARAM_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_port_param_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_PORT_PARAM_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_port_param_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_PORT_PARAM_GET - dump */
-void devlink_port_param_get_list_free(struct devlink_port_param_get_list *rsp)
-{
-	struct devlink_port_param_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp);
-	}
-}
-
-struct devlink_port_param_get_list *
-devlink_port_param_get_dump(struct ynl_sock *ys)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_port_param_get_list);
-	yds.cb = devlink_port_param_get_rsp_parse;
-	yds.rsp_cmd = DEVLINK_CMD_PORT_PARAM_GET;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_PORT_PARAM_GET, 1);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_port_param_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_PORT_PARAM_SET ============== */
-/* DEVLINK_CMD_PORT_PARAM_SET - do */
-void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_port_param_set(struct ynl_sock *ys,
-			   struct devlink_port_param_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_PORT_PARAM_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_INFO_GET ============== */
-/* DEVLINK_CMD_INFO_GET - do */
-void devlink_info_get_req_free(struct devlink_info_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp)
-{
-	unsigned int i;
-
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp->info_driver_name);
-	free(rsp->info_serial_number);
-	for (i = 0; i < rsp->n_info_version_fixed; i++)
-		devlink_dl_info_version_free(&rsp->info_version_fixed[i]);
-	free(rsp->info_version_fixed);
-	for (i = 0; i < rsp->n_info_version_running; i++)
-		devlink_dl_info_version_free(&rsp->info_version_running[i]);
-	free(rsp->info_version_running);
-	for (i = 0; i < rsp->n_info_version_stored; i++)
-		devlink_dl_info_version_free(&rsp->info_version_stored[i]);
-	free(rsp->info_version_stored);
-	free(rsp);
-}
-
-int devlink_info_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	unsigned int n_info_version_running = 0;
-	unsigned int n_info_version_stored = 0;
-	unsigned int n_info_version_fixed = 0;
-	struct ynl_parse_arg *yarg = data;
-	struct devlink_info_get_rsp *dst;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	if (dst->info_version_fixed)
-		return ynl_error_parse(yarg, "attribute already present (devlink.info-version-fixed)");
-	if (dst->info_version_running)
-		return ynl_error_parse(yarg, "attribute already present (devlink.info-version-running)");
-	if (dst->info_version_stored)
-		return ynl_error_parse(yarg, "attribute already present (devlink.info-version-stored)");
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_INFO_DRIVER_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.info_driver_name_len = len;
-			dst->info_driver_name = malloc(len + 1);
-			memcpy(dst->info_driver_name, mnl_attr_get_str(attr), len);
-			dst->info_driver_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_INFO_SERIAL_NUMBER) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.info_serial_number_len = len;
-			dst->info_serial_number = malloc(len + 1);
-			memcpy(dst->info_serial_number, mnl_attr_get_str(attr), len);
-			dst->info_serial_number[len] = 0;
-		} else if (type == DEVLINK_ATTR_INFO_VERSION_FIXED) {
-			n_info_version_fixed++;
-		} else if (type == DEVLINK_ATTR_INFO_VERSION_RUNNING) {
-			n_info_version_running++;
-		} else if (type == DEVLINK_ATTR_INFO_VERSION_STORED) {
-			n_info_version_stored++;
-		}
-	}
-
-	if (n_info_version_fixed) {
-		dst->info_version_fixed = calloc(n_info_version_fixed, sizeof(*dst->info_version_fixed));
-		dst->n_info_version_fixed = n_info_version_fixed;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_info_version_nest;
-		mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_FIXED) {
-				parg.data = &dst->info_version_fixed[i];
-				if (devlink_dl_info_version_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-	if (n_info_version_running) {
-		dst->info_version_running = calloc(n_info_version_running, sizeof(*dst->info_version_running));
-		dst->n_info_version_running = n_info_version_running;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_info_version_nest;
-		mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_RUNNING) {
-				parg.data = &dst->info_version_running[i];
-				if (devlink_dl_info_version_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-	if (n_info_version_stored) {
-		dst->info_version_stored = calloc(n_info_version_stored, sizeof(*dst->info_version_stored));
-		dst->n_info_version_stored = n_info_version_stored;
-		i = 0;
-		parg.rsp_policy = &devlink_dl_info_version_nest;
-		mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-			if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_STORED) {
-				parg.data = &dst->info_version_stored[i];
-				if (devlink_dl_info_version_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_info_get_rsp *
-devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_info_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_INFO_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_info_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_INFO_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_info_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_INFO_GET - dump */
-void devlink_info_get_list_free(struct devlink_info_get_list *rsp)
-{
-	struct devlink_info_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		unsigned int i;
-
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp->obj.info_driver_name);
-		free(rsp->obj.info_serial_number);
-		for (i = 0; i < rsp->obj.n_info_version_fixed; i++)
-			devlink_dl_info_version_free(&rsp->obj.info_version_fixed[i]);
-		free(rsp->obj.info_version_fixed);
-		for (i = 0; i < rsp->obj.n_info_version_running; i++)
-			devlink_dl_info_version_free(&rsp->obj.info_version_running[i]);
-		free(rsp->obj.info_version_running);
-		for (i = 0; i < rsp->obj.n_info_version_stored; i++)
-			devlink_dl_info_version_free(&rsp->obj.info_version_stored[i]);
-		free(rsp->obj.info_version_stored);
-		free(rsp);
-	}
-}
-
-struct devlink_info_get_list *devlink_info_get_dump(struct ynl_sock *ys)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_info_get_list);
-	yds.cb = devlink_info_get_rsp_parse;
-	yds.rsp_cmd = DEVLINK_CMD_INFO_GET;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_INFO_GET, 1);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_info_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_GET ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_GET - do */
-void
-devlink_health_reporter_get_req_free(struct devlink_health_reporter_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->health_reporter_name);
-	free(req);
-}
-
-void
-devlink_health_reporter_get_rsp_free(struct devlink_health_reporter_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp->health_reporter_name);
-	free(rsp);
-}
-
-int devlink_health_reporter_get_rsp_parse(const struct nlmsghdr *nlh,
-					  void *data)
-{
-	struct devlink_health_reporter_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_HEALTH_REPORTER_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.health_reporter_name_len = len;
-			dst->health_reporter_name = malloc(len + 1);
-			memcpy(dst->health_reporter_name, mnl_attr_get_str(attr), len);
-			dst->health_reporter_name[len] = 0;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_health_reporter_get_rsp *
-devlink_health_reporter_get(struct ynl_sock *ys,
-			    struct devlink_health_reporter_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_health_reporter_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.health_reporter_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_health_reporter_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_HEALTH_REPORTER_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_health_reporter_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */
-void
-devlink_health_reporter_get_list_free(struct devlink_health_reporter_get_list *rsp)
-{
-	struct devlink_health_reporter_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp->obj.health_reporter_name);
-		free(rsp);
-	}
-}
-
-struct devlink_health_reporter_get_list *
-devlink_health_reporter_get_dump(struct ynl_sock *ys,
-				 struct devlink_health_reporter_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_health_reporter_get_list);
-	yds.cb = devlink_health_reporter_get_rsp_parse;
-	yds.rsp_cmd = DEVLINK_CMD_HEALTH_REPORTER_GET;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_health_reporter_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_SET ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */
-void
-devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->health_reporter_name);
-	free(req);
-}
-
-int devlink_health_reporter_set(struct ynl_sock *ys,
-				struct devlink_health_reporter_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.health_reporter_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
-	if (req->_present.health_reporter_graceful_period)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, req->health_reporter_graceful_period);
-	if (req->_present.health_reporter_auto_recover)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, req->health_reporter_auto_recover);
-	if (req->_present.health_reporter_auto_dump)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, req->health_reporter_auto_dump);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_RECOVER ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */
-void
-devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->health_reporter_name);
-	free(req);
-}
-
-int devlink_health_reporter_recover(struct ynl_sock *ys,
-				    struct devlink_health_reporter_recover_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_RECOVER, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.health_reporter_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */
-void
-devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnose_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->health_reporter_name);
-	free(req);
-}
-
-int devlink_health_reporter_diagnose(struct ynl_sock *ys,
-				     struct devlink_health_reporter_diagnose_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.health_reporter_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */
-int devlink_health_reporter_dump_get_rsp_dump_parse(const struct nlmsghdr *nlh,
-						    void *data)
-{
-	struct devlink_health_reporter_dump_get_rsp_dump *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_FMSG) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.fmsg = 1;
-
-			parg.rsp_policy = &devlink_dl_fmsg_nest;
-			parg.data = &dst->fmsg;
-			if (devlink_dl_fmsg_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-void
-devlink_health_reporter_dump_get_rsp_list_free(struct devlink_health_reporter_dump_get_rsp_list *rsp)
-{
-	struct devlink_health_reporter_dump_get_rsp_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		devlink_dl_fmsg_free(&rsp->obj.fmsg);
-		free(rsp);
-	}
-}
-
-struct devlink_health_reporter_dump_get_rsp_list *
-devlink_health_reporter_dump_get_dump(struct ynl_sock *ys,
-				      struct devlink_health_reporter_dump_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_health_reporter_dump_get_rsp_list);
-	yds.cb = devlink_health_reporter_dump_get_rsp_dump_parse;
-	yds.rsp_cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.health_reporter_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_health_reporter_dump_get_rsp_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */
-void
-devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_clear_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->health_reporter_name);
-	free(req);
-}
-
-int devlink_health_reporter_dump_clear(struct ynl_sock *ys,
-				       struct devlink_health_reporter_dump_clear_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.health_reporter_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_FLASH_UPDATE ============== */
-/* DEVLINK_CMD_FLASH_UPDATE - do */
-void devlink_flash_update_req_free(struct devlink_flash_update_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->flash_update_file_name);
-	free(req->flash_update_component);
-	free(req);
-}
-
-int devlink_flash_update(struct ynl_sock *ys,
-			 struct devlink_flash_update_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_FLASH_UPDATE, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.flash_update_file_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME, req->flash_update_file_name);
-	if (req->_present.flash_update_component_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT, req->flash_update_component);
-	if (req->_present.flash_update_overwrite_mask)
-		mnl_attr_put(nlh, DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, sizeof(struct nla_bitfield32), &req->flash_update_overwrite_mask);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_TRAP_GET ============== */
-/* DEVLINK_CMD_TRAP_GET - do */
-void devlink_trap_get_req_free(struct devlink_trap_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->trap_name);
-	free(req);
-}
-
-void devlink_trap_get_rsp_free(struct devlink_trap_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp->trap_name);
-	free(rsp);
-}
-
-int devlink_trap_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct devlink_trap_get_rsp *dst;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_TRAP_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.trap_name_len = len;
-			dst->trap_name = malloc(len + 1);
-			memcpy(dst->trap_name, mnl_attr_get_str(attr), len);
-			dst->trap_name[len] = 0;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_trap_get_rsp *
-devlink_trap_get(struct ynl_sock *ys, struct devlink_trap_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_trap_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.trap_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_NAME, req->trap_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_trap_get_rsp_parse;
-	yrs.rsp_cmd = 63;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_trap_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_TRAP_GET - dump */
-void devlink_trap_get_list_free(struct devlink_trap_get_list *rsp)
-{
-	struct devlink_trap_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp->obj.trap_name);
-		free(rsp);
-	}
-}
-
-struct devlink_trap_get_list *
-devlink_trap_get_dump(struct ynl_sock *ys,
-		      struct devlink_trap_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_trap_get_list);
-	yds.cb = devlink_trap_get_rsp_parse;
-	yds.rsp_cmd = 63;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_trap_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_TRAP_SET ============== */
-/* DEVLINK_CMD_TRAP_SET - do */
-void devlink_trap_set_req_free(struct devlink_trap_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->trap_name);
-	free(req);
-}
-
-int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.trap_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_NAME, req->trap_name);
-	if (req->_present.trap_action)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_TRAP_GROUP_GET ============== */
-/* DEVLINK_CMD_TRAP_GROUP_GET - do */
-void devlink_trap_group_get_req_free(struct devlink_trap_group_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->trap_group_name);
-	free(req);
-}
-
-void devlink_trap_group_get_rsp_free(struct devlink_trap_group_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp->trap_group_name);
-	free(rsp);
-}
-
-int devlink_trap_group_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_trap_group_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_TRAP_GROUP_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.trap_group_name_len = len;
-			dst->trap_group_name = malloc(len + 1);
-			memcpy(dst->trap_group_name, mnl_attr_get_str(attr), len);
-			dst->trap_group_name[len] = 0;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_trap_group_get_rsp *
-devlink_trap_group_get(struct ynl_sock *ys,
-		       struct devlink_trap_group_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_trap_group_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.trap_group_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_GROUP_NAME, req->trap_group_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_trap_group_get_rsp_parse;
-	yrs.rsp_cmd = 67;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_trap_group_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_TRAP_GROUP_GET - dump */
-void devlink_trap_group_get_list_free(struct devlink_trap_group_get_list *rsp)
-{
-	struct devlink_trap_group_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp->obj.trap_group_name);
-		free(rsp);
-	}
-}
-
-struct devlink_trap_group_get_list *
-devlink_trap_group_get_dump(struct ynl_sock *ys,
-			    struct devlink_trap_group_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_trap_group_get_list);
-	yds.cb = devlink_trap_group_get_rsp_parse;
-	yds.rsp_cmd = 67;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_trap_group_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_TRAP_GROUP_SET ============== */
-/* DEVLINK_CMD_TRAP_GROUP_SET - do */
-void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->trap_group_name);
-	free(req);
-}
-
-int devlink_trap_group_set(struct ynl_sock *ys,
-			   struct devlink_trap_group_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.trap_group_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_TRAP_GROUP_NAME, req->trap_group_name);
-	if (req->_present.trap_action)
-		mnl_attr_put_u8(nlh, DEVLINK_ATTR_TRAP_ACTION, req->trap_action);
-	if (req->_present.trap_policer_id)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_TRAP_POLICER_GET ============== */
-/* DEVLINK_CMD_TRAP_POLICER_GET - do */
-void
-devlink_trap_policer_get_req_free(struct devlink_trap_policer_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void
-devlink_trap_policer_get_rsp_free(struct devlink_trap_policer_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_trap_policer_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_trap_policer_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_TRAP_POLICER_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.trap_policer_id = 1;
-			dst->trap_policer_id = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_trap_policer_get_rsp *
-devlink_trap_policer_get(struct ynl_sock *ys,
-			 struct devlink_trap_policer_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_trap_policer_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.trap_policer_id)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_trap_policer_get_rsp_parse;
-	yrs.rsp_cmd = 71;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_trap_policer_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_TRAP_POLICER_GET - dump */
-void
-devlink_trap_policer_get_list_free(struct devlink_trap_policer_get_list *rsp)
-{
-	struct devlink_trap_policer_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp);
-	}
-}
-
-struct devlink_trap_policer_get_list *
-devlink_trap_policer_get_dump(struct ynl_sock *ys,
-			      struct devlink_trap_policer_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_trap_policer_get_list);
-	yds.cb = devlink_trap_policer_get_rsp_parse;
-	yds.rsp_cmd = 71;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_trap_policer_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_TRAP_POLICER_SET ============== */
-/* DEVLINK_CMD_TRAP_POLICER_SET - do */
-void
-devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-int devlink_trap_policer_set(struct ynl_sock *ys,
-			     struct devlink_trap_policer_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.trap_policer_id)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_TRAP_POLICER_ID, req->trap_policer_id);
-	if (req->_present.trap_policer_rate)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_RATE, req->trap_policer_rate);
-	if (req->_present.trap_policer_burst)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_TRAP_POLICER_BURST, req->trap_policer_burst);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_TEST ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */
-void
-devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->health_reporter_name);
-	free(req);
-}
-
-int devlink_health_reporter_test(struct ynl_sock *ys,
-				 struct devlink_health_reporter_test_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_HEALTH_REPORTER_TEST, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.health_reporter_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME, req->health_reporter_name);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_RATE_GET ============== */
-/* DEVLINK_CMD_RATE_GET - do */
-void devlink_rate_get_req_free(struct devlink_rate_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->rate_node_name);
-	free(req);
-}
-
-void devlink_rate_get_rsp_free(struct devlink_rate_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp->rate_node_name);
-	free(rsp);
-}
-
-int devlink_rate_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct devlink_rate_get_rsp *dst;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_PORT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port_index = 1;
-			dst->port_index = mnl_attr_get_u32(attr);
-		} else if (type == DEVLINK_ATTR_RATE_NODE_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.rate_node_name_len = len;
-			dst->rate_node_name = malloc(len + 1);
-			memcpy(dst->rate_node_name, mnl_attr_get_str(attr), len);
-			dst->rate_node_name[len] = 0;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_rate_get_rsp *
-devlink_rate_get(struct ynl_sock *ys, struct devlink_rate_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_rate_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.port_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_PORT_INDEX, req->port_index);
-	if (req->_present.rate_node_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_rate_get_rsp_parse;
-	yrs.rsp_cmd = 76;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_rate_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_RATE_GET - dump */
-void devlink_rate_get_list_free(struct devlink_rate_get_list *rsp)
-{
-	struct devlink_rate_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp->obj.rate_node_name);
-		free(rsp);
-	}
-}
-
-struct devlink_rate_get_list *
-devlink_rate_get_dump(struct ynl_sock *ys,
-		      struct devlink_rate_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_rate_get_list);
-	yds.cb = devlink_rate_get_rsp_parse;
-	yds.rsp_cmd = 76;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_RATE_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_rate_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_RATE_SET ============== */
-/* DEVLINK_CMD_RATE_SET - do */
-void devlink_rate_set_req_free(struct devlink_rate_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->rate_node_name);
-	free(req->rate_parent_node_name);
-	free(req);
-}
-
-int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.rate_node_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name);
-	if (req->_present.rate_tx_share)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_SHARE, req->rate_tx_share);
-	if (req->_present.rate_tx_max)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_MAX, req->rate_tx_max);
-	if (req->_present.rate_tx_priority)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_PRIORITY, req->rate_tx_priority);
-	if (req->_present.rate_tx_weight)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_WEIGHT, req->rate_tx_weight);
-	if (req->_present.rate_parent_node_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_RATE_NEW ============== */
-/* DEVLINK_CMD_RATE_NEW - do */
-void devlink_rate_new_req_free(struct devlink_rate_new_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->rate_node_name);
-	free(req->rate_parent_node_name);
-	free(req);
-}
-
-int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_NEW, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.rate_node_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name);
-	if (req->_present.rate_tx_share)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_SHARE, req->rate_tx_share);
-	if (req->_present.rate_tx_max)
-		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RATE_TX_MAX, req->rate_tx_max);
-	if (req->_present.rate_tx_priority)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_PRIORITY, req->rate_tx_priority);
-	if (req->_present.rate_tx_weight)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_RATE_TX_WEIGHT, req->rate_tx_weight);
-	if (req->_present.rate_parent_node_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_PARENT_NODE_NAME, req->rate_parent_node_name);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_RATE_DEL ============== */
-/* DEVLINK_CMD_RATE_DEL - do */
-void devlink_rate_del_req_free(struct devlink_rate_del_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->rate_node_name);
-	free(req);
-}
-
-int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_RATE_DEL, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.rate_node_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_RATE_NODE_NAME, req->rate_node_name);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_LINECARD_GET ============== */
-/* DEVLINK_CMD_LINECARD_GET - do */
-void devlink_linecard_get_req_free(struct devlink_linecard_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_linecard_get_rsp_free(struct devlink_linecard_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_linecard_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_linecard_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_LINECARD_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.linecard_index = 1;
-			dst->linecard_index = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_linecard_get_rsp *
-devlink_linecard_get(struct ynl_sock *ys, struct devlink_linecard_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_linecard_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_LINECARD_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.linecard_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_LINECARD_INDEX, req->linecard_index);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_linecard_get_rsp_parse;
-	yrs.rsp_cmd = 80;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_linecard_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_LINECARD_GET - dump */
-void devlink_linecard_get_list_free(struct devlink_linecard_get_list *rsp)
-{
-	struct devlink_linecard_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp);
-	}
-}
-
-struct devlink_linecard_get_list *
-devlink_linecard_get_dump(struct ynl_sock *ys,
-			  struct devlink_linecard_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_linecard_get_list);
-	yds.cb = devlink_linecard_get_rsp_parse;
-	yds.rsp_cmd = 80;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_LINECARD_GET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_linecard_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_LINECARD_SET ============== */
-/* DEVLINK_CMD_LINECARD_SET - do */
-void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req->linecard_type);
-	free(req);
-}
-
-int devlink_linecard_set(struct ynl_sock *ys,
-			 struct devlink_linecard_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_LINECARD_SET, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.linecard_index)
-		mnl_attr_put_u32(nlh, DEVLINK_ATTR_LINECARD_INDEX, req->linecard_index);
-	if (req->_present.linecard_type_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_LINECARD_TYPE, req->linecard_type);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== DEVLINK_CMD_SELFTESTS_GET ============== */
-/* DEVLINK_CMD_SELFTESTS_GET - do */
-void devlink_selftests_get_req_free(struct devlink_selftests_get_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	free(req);
-}
-
-void devlink_selftests_get_rsp_free(struct devlink_selftests_get_rsp *rsp)
-{
-	free(rsp->bus_name);
-	free(rsp->dev_name);
-	free(rsp);
-}
-
-int devlink_selftests_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct devlink_selftests_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == DEVLINK_ATTR_BUS_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.bus_name_len = len;
-			dst->bus_name = malloc(len + 1);
-			memcpy(dst->bus_name, mnl_attr_get_str(attr), len);
-			dst->bus_name[len] = 0;
-		} else if (type == DEVLINK_ATTR_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct devlink_selftests_get_rsp *
-devlink_selftests_get(struct ynl_sock *ys,
-		      struct devlink_selftests_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct devlink_selftests_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SELFTESTS_GET, 1);
-	ys->req_policy = &devlink_nest;
-	yrs.yarg.rsp_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = devlink_selftests_get_rsp_parse;
-	yrs.rsp_cmd = DEVLINK_CMD_SELFTESTS_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	devlink_selftests_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* DEVLINK_CMD_SELFTESTS_GET - dump */
-void devlink_selftests_get_list_free(struct devlink_selftests_get_list *rsp)
-{
-	struct devlink_selftests_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.bus_name);
-		free(rsp->obj.dev_name);
-		free(rsp);
-	}
-}
-
-struct devlink_selftests_get_list *
-devlink_selftests_get_dump(struct ynl_sock *ys)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct devlink_selftests_get_list);
-	yds.cb = devlink_selftests_get_rsp_parse;
-	yds.rsp_cmd = DEVLINK_CMD_SELFTESTS_GET;
-	yds.rsp_policy = &devlink_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SELFTESTS_GET, 1);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	devlink_selftests_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== DEVLINK_CMD_SELFTESTS_RUN ============== */
-/* DEVLINK_CMD_SELFTESTS_RUN - do */
-void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req)
-{
-	free(req->bus_name);
-	free(req->dev_name);
-	devlink_dl_selftest_id_free(&req->selftests);
-	free(req);
-}
-
-int devlink_selftests_run(struct ynl_sock *ys,
-			  struct devlink_selftests_run_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_SELFTESTS_RUN, 1);
-	ys->req_policy = &devlink_nest;
-
-	if (req->_present.bus_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name);
-	if (req->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name);
-	if (req->_present.selftests)
-		devlink_dl_selftest_id_put(nlh, DEVLINK_ATTR_SELFTESTS, &req->selftests);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-const struct ynl_family ynl_devlink_family =  {
-	.name		= "devlink",
-};
diff --git a/tools/net/ynl/generated/devlink-user.h b/tools/net/ynl/generated/devlink-user.h
deleted file mode 100644
index 1db4edc36eaa..000000000000
--- a/tools/net/ynl/generated/devlink-user.h
+++ /dev/null
@@ -1,5255 +0,0 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/devlink.yaml */
-/* YNL-GEN user header */
-
-#ifndef _LINUX_DEVLINK_GEN_H
-#define _LINUX_DEVLINK_GEN_H
-
-#include <stdlib.h>
-#include <string.h>
-#include <linux/types.h>
-#include <linux/netlink.h>
-#include <linux/devlink.h>
-
-struct ynl_sock;
-
-extern const struct ynl_family ynl_devlink_family;
-
-/* Enums */
-const char *devlink_op_str(int op);
-const char *devlink_sb_pool_type_str(enum devlink_sb_pool_type value);
-const char *devlink_port_type_str(enum devlink_port_type value);
-const char *devlink_port_flavour_str(enum devlink_port_flavour value);
-const char *devlink_port_fn_state_str(enum devlink_port_fn_state value);
-const char *devlink_port_fn_opstate_str(enum devlink_port_fn_opstate value);
-const char *devlink_port_fn_attr_cap_str(enum devlink_port_fn_attr_cap value);
-const char *
-devlink_sb_threshold_type_str(enum devlink_sb_threshold_type value);
-const char *devlink_eswitch_mode_str(enum devlink_eswitch_mode value);
-const char *
-devlink_eswitch_inline_mode_str(enum devlink_eswitch_inline_mode value);
-const char *
-devlink_eswitch_encap_mode_str(enum devlink_eswitch_encap_mode value);
-const char *devlink_dpipe_match_type_str(enum devlink_dpipe_match_type value);
-const char *
-devlink_dpipe_action_type_str(enum devlink_dpipe_action_type value);
-const char *
-devlink_dpipe_field_mapping_type_str(enum devlink_dpipe_field_mapping_type value);
-const char *devlink_resource_unit_str(enum devlink_resource_unit value);
-const char *devlink_reload_action_str(enum devlink_reload_action value);
-const char *devlink_param_cmode_str(enum devlink_param_cmode value);
-const char *devlink_flash_overwrite_str(enum devlink_flash_overwrite value);
-const char *devlink_trap_action_str(enum devlink_trap_action value);
-
-/* Common nested types */
-struct devlink_dl_dpipe_match {
-	struct {
-		__u32 dpipe_match_type:1;
-		__u32 dpipe_header_id:1;
-		__u32 dpipe_header_global:1;
-		__u32 dpipe_header_index:1;
-		__u32 dpipe_field_id:1;
-	} _present;
-
-	enum devlink_dpipe_match_type dpipe_match_type;
-	__u32 dpipe_header_id;
-	__u8 dpipe_header_global;
-	__u32 dpipe_header_index;
-	__u32 dpipe_field_id;
-};
-
-struct devlink_dl_dpipe_match_value {
-	struct {
-		__u32 dpipe_value_len;
-		__u32 dpipe_value_mask_len;
-		__u32 dpipe_value_mapping:1;
-	} _present;
-
-	unsigned int n_dpipe_match;
-	struct devlink_dl_dpipe_match *dpipe_match;
-	void *dpipe_value;
-	void *dpipe_value_mask;
-	__u32 dpipe_value_mapping;
-};
-
-struct devlink_dl_dpipe_action {
-	struct {
-		__u32 dpipe_action_type:1;
-		__u32 dpipe_header_id:1;
-		__u32 dpipe_header_global:1;
-		__u32 dpipe_header_index:1;
-		__u32 dpipe_field_id:1;
-	} _present;
-
-	enum devlink_dpipe_action_type dpipe_action_type;
-	__u32 dpipe_header_id;
-	__u8 dpipe_header_global;
-	__u32 dpipe_header_index;
-	__u32 dpipe_field_id;
-};
-
-struct devlink_dl_dpipe_action_value {
-	struct {
-		__u32 dpipe_value_len;
-		__u32 dpipe_value_mask_len;
-		__u32 dpipe_value_mapping:1;
-	} _present;
-
-	unsigned int n_dpipe_action;
-	struct devlink_dl_dpipe_action *dpipe_action;
-	void *dpipe_value;
-	void *dpipe_value_mask;
-	__u32 dpipe_value_mapping;
-};
-
-struct devlink_dl_dpipe_field {
-	struct {
-		__u32 dpipe_field_name_len;
-		__u32 dpipe_field_id:1;
-		__u32 dpipe_field_bitwidth:1;
-		__u32 dpipe_field_mapping_type:1;
-	} _present;
-
-	char *dpipe_field_name;
-	__u32 dpipe_field_id;
-	__u32 dpipe_field_bitwidth;
-	enum devlink_dpipe_field_mapping_type dpipe_field_mapping_type;
-};
-
-struct devlink_dl_resource {
-	struct {
-		__u32 resource_name_len;
-		__u32 resource_id:1;
-		__u32 resource_size:1;
-		__u32 resource_size_new:1;
-		__u32 resource_size_valid:1;
-		__u32 resource_size_min:1;
-		__u32 resource_size_max:1;
-		__u32 resource_size_gran:1;
-		__u32 resource_unit:1;
-		__u32 resource_occ:1;
-	} _present;
-
-	char *resource_name;
-	__u64 resource_id;
-	__u64 resource_size;
-	__u64 resource_size_new;
-	__u8 resource_size_valid;
-	__u64 resource_size_min;
-	__u64 resource_size_max;
-	__u64 resource_size_gran;
-	enum devlink_resource_unit resource_unit;
-	__u64 resource_occ;
-};
-
-struct devlink_dl_info_version {
-	struct {
-		__u32 info_version_name_len;
-		__u32 info_version_value_len;
-	} _present;
-
-	char *info_version_name;
-	char *info_version_value;
-};
-
-struct devlink_dl_fmsg {
-	struct {
-		__u32 fmsg_obj_nest_start:1;
-		__u32 fmsg_pair_nest_start:1;
-		__u32 fmsg_arr_nest_start:1;
-		__u32 fmsg_nest_end:1;
-		__u32 fmsg_obj_name_len;
-	} _present;
-
-	char *fmsg_obj_name;
-};
-
-struct devlink_dl_port_function {
-	struct {
-		__u32 hw_addr_len;
-		__u32 state:1;
-		__u32 opstate:1;
-		__u32 caps:1;
-	} _present;
-
-	void *hw_addr;
-	enum devlink_port_fn_state state;
-	enum devlink_port_fn_opstate opstate;
-	struct nla_bitfield32 caps;
-};
-
-struct devlink_dl_reload_stats_entry {
-	struct {
-		__u32 reload_stats_limit:1;
-		__u32 reload_stats_value:1;
-	} _present;
-
-	__u8 reload_stats_limit;
-	__u32 reload_stats_value;
-};
-
-struct devlink_dl_reload_act_stats {
-	unsigned int n_reload_stats_entry;
-	struct devlink_dl_reload_stats_entry *reload_stats_entry;
-};
-
-struct devlink_dl_selftest_id {
-	struct {
-		__u32 flash:1;
-	} _present;
-};
-
-struct devlink_dl_dpipe_table_matches {
-	unsigned int n_dpipe_match;
-	struct devlink_dl_dpipe_match *dpipe_match;
-};
-
-struct devlink_dl_dpipe_table_actions {
-	unsigned int n_dpipe_action;
-	struct devlink_dl_dpipe_action *dpipe_action;
-};
-
-struct devlink_dl_dpipe_entry_match_values {
-	unsigned int n_dpipe_match_value;
-	struct devlink_dl_dpipe_match_value *dpipe_match_value;
-};
-
-struct devlink_dl_dpipe_entry_action_values {
-	unsigned int n_dpipe_action_value;
-	struct devlink_dl_dpipe_action_value *dpipe_action_value;
-};
-
-struct devlink_dl_dpipe_header_fields {
-	unsigned int n_dpipe_field;
-	struct devlink_dl_dpipe_field *dpipe_field;
-};
-
-struct devlink_dl_resource_list {
-	unsigned int n_resource;
-	struct devlink_dl_resource *resource;
-};
-
-struct devlink_dl_reload_act_info {
-	struct {
-		__u32 reload_action:1;
-	} _present;
-
-	enum devlink_reload_action reload_action;
-	unsigned int n_reload_action_stats;
-	struct devlink_dl_reload_act_stats *reload_action_stats;
-};
-
-struct devlink_dl_dpipe_table {
-	struct {
-		__u32 dpipe_table_name_len;
-		__u32 dpipe_table_size:1;
-		__u32 dpipe_table_matches:1;
-		__u32 dpipe_table_actions:1;
-		__u32 dpipe_table_counters_enabled:1;
-		__u32 dpipe_table_resource_id:1;
-		__u32 dpipe_table_resource_units:1;
-	} _present;
-
-	char *dpipe_table_name;
-	__u64 dpipe_table_size;
-	struct devlink_dl_dpipe_table_matches dpipe_table_matches;
-	struct devlink_dl_dpipe_table_actions dpipe_table_actions;
-	__u8 dpipe_table_counters_enabled;
-	__u64 dpipe_table_resource_id;
-	__u64 dpipe_table_resource_units;
-};
-
-struct devlink_dl_dpipe_entry {
-	struct {
-		__u32 dpipe_entry_index:1;
-		__u32 dpipe_entry_match_values:1;
-		__u32 dpipe_entry_action_values:1;
-		__u32 dpipe_entry_counter:1;
-	} _present;
-
-	__u64 dpipe_entry_index;
-	struct devlink_dl_dpipe_entry_match_values dpipe_entry_match_values;
-	struct devlink_dl_dpipe_entry_action_values dpipe_entry_action_values;
-	__u64 dpipe_entry_counter;
-};
-
-struct devlink_dl_dpipe_header {
-	struct {
-		__u32 dpipe_header_name_len;
-		__u32 dpipe_header_id:1;
-		__u32 dpipe_header_global:1;
-		__u32 dpipe_header_fields:1;
-	} _present;
-
-	char *dpipe_header_name;
-	__u32 dpipe_header_id;
-	__u8 dpipe_header_global;
-	struct devlink_dl_dpipe_header_fields dpipe_header_fields;
-};
-
-struct devlink_dl_reload_stats {
-	unsigned int n_reload_action_info;
-	struct devlink_dl_reload_act_info *reload_action_info;
-};
-
-struct devlink_dl_dpipe_tables {
-	unsigned int n_dpipe_table;
-	struct devlink_dl_dpipe_table *dpipe_table;
-};
-
-struct devlink_dl_dpipe_entries {
-	unsigned int n_dpipe_entry;
-	struct devlink_dl_dpipe_entry *dpipe_entry;
-};
-
-struct devlink_dl_dpipe_headers {
-	unsigned int n_dpipe_header;
-	struct devlink_dl_dpipe_header *dpipe_header;
-};
-
-struct devlink_dl_dev_stats {
-	struct {
-		__u32 reload_stats:1;
-		__u32 remote_reload_stats:1;
-	} _present;
-
-	struct devlink_dl_reload_stats reload_stats;
-	struct devlink_dl_reload_stats remote_reload_stats;
-};
-
-/* ============== DEVLINK_CMD_GET ============== */
-/* DEVLINK_CMD_GET - do */
-struct devlink_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_get_req *devlink_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_get_req));
-}
-void devlink_get_req_free(struct devlink_get_req *req);
-
-static inline void
-devlink_get_req_set_bus_name(struct devlink_get_req *req, const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_get_req_set_dev_name(struct devlink_get_req *req, const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 reload_failed:1;
-		__u32 dev_stats:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u8 reload_failed;
-	struct devlink_dl_dev_stats dev_stats;
-};
-
-void devlink_get_rsp_free(struct devlink_get_rsp *rsp);
-
-/*
- * Get devlink instances.
- */
-struct devlink_get_rsp *
-devlink_get(struct ynl_sock *ys, struct devlink_get_req *req);
-
-/* DEVLINK_CMD_GET - dump */
-struct devlink_get_list {
-	struct devlink_get_list *next;
-	struct devlink_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_get_list_free(struct devlink_get_list *rsp);
-
-struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys);
-
-/* ============== DEVLINK_CMD_PORT_GET ============== */
-/* DEVLINK_CMD_PORT_GET - do */
-struct devlink_port_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-static inline struct devlink_port_get_req *devlink_port_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_port_get_req));
-}
-void devlink_port_get_req_free(struct devlink_port_get_req *req);
-
-static inline void
-devlink_port_get_req_set_bus_name(struct devlink_port_get_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_port_get_req_set_dev_name(struct devlink_port_get_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_port_get_req_set_port_index(struct devlink_port_get_req *req,
-				    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-
-struct devlink_port_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-void devlink_port_get_rsp_free(struct devlink_port_get_rsp *rsp);
-
-/*
- * Get devlink port instances.
- */
-struct devlink_port_get_rsp *
-devlink_port_get(struct ynl_sock *ys, struct devlink_port_get_req *req);
-
-/* DEVLINK_CMD_PORT_GET - dump */
-struct devlink_port_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_port_get_req_dump *
-devlink_port_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_port_get_req_dump));
-}
-void devlink_port_get_req_dump_free(struct devlink_port_get_req_dump *req);
-
-static inline void
-devlink_port_get_req_dump_set_bus_name(struct devlink_port_get_req_dump *req,
-				       const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_port_get_req_dump_set_dev_name(struct devlink_port_get_req_dump *req,
-				       const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_port_get_rsp_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-struct devlink_port_get_rsp_list {
-	struct devlink_port_get_rsp_list *next;
-	struct devlink_port_get_rsp_dump obj __attribute__((aligned(8)));
-};
-
-void devlink_port_get_rsp_list_free(struct devlink_port_get_rsp_list *rsp);
-
-struct devlink_port_get_rsp_list *
-devlink_port_get_dump(struct ynl_sock *ys,
-		      struct devlink_port_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_PORT_SET ============== */
-/* DEVLINK_CMD_PORT_SET - do */
-struct devlink_port_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 port_type:1;
-		__u32 port_function:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	enum devlink_port_type port_type;
-	struct devlink_dl_port_function port_function;
-};
-
-static inline struct devlink_port_set_req *devlink_port_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_port_set_req));
-}
-void devlink_port_set_req_free(struct devlink_port_set_req *req);
-
-static inline void
-devlink_port_set_req_set_bus_name(struct devlink_port_set_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_port_set_req_set_dev_name(struct devlink_port_set_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_port_set_req_set_port_index(struct devlink_port_set_req *req,
-				    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_port_set_req_set_port_type(struct devlink_port_set_req *req,
-				   enum devlink_port_type port_type)
-{
-	req->_present.port_type = 1;
-	req->port_type = port_type;
-}
-static inline void
-devlink_port_set_req_set_port_function_hw_addr(struct devlink_port_set_req *req,
-					       const void *hw_addr, size_t len)
-{
-	free(req->port_function.hw_addr);
-	req->port_function._present.hw_addr_len = len;
-	req->port_function.hw_addr = malloc(req->port_function._present.hw_addr_len);
-	memcpy(req->port_function.hw_addr, hw_addr, req->port_function._present.hw_addr_len);
-}
-static inline void
-devlink_port_set_req_set_port_function_state(struct devlink_port_set_req *req,
-					     enum devlink_port_fn_state state)
-{
-	req->_present.port_function = 1;
-	req->port_function._present.state = 1;
-	req->port_function.state = state;
-}
-static inline void
-devlink_port_set_req_set_port_function_opstate(struct devlink_port_set_req *req,
-					       enum devlink_port_fn_opstate opstate)
-{
-	req->_present.port_function = 1;
-	req->port_function._present.opstate = 1;
-	req->port_function.opstate = opstate;
-}
-static inline void
-devlink_port_set_req_set_port_function_caps(struct devlink_port_set_req *req,
-					    struct nla_bitfield32 *caps)
-{
-	req->_present.port_function = 1;
-	req->port_function._present.caps = 1;
-	memcpy(&req->port_function.caps, caps, sizeof(struct nla_bitfield32));
-}
-
-/*
- * Set devlink port instances.
- */
-int devlink_port_set(struct ynl_sock *ys, struct devlink_port_set_req *req);
-
-/* ============== DEVLINK_CMD_PORT_NEW ============== */
-/* DEVLINK_CMD_PORT_NEW - do */
-struct devlink_port_new_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 port_flavour:1;
-		__u32 port_pci_pf_number:1;
-		__u32 port_pci_sf_number:1;
-		__u32 port_controller_number:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	enum devlink_port_flavour port_flavour;
-	__u16 port_pci_pf_number;
-	__u32 port_pci_sf_number;
-	__u32 port_controller_number;
-};
-
-static inline struct devlink_port_new_req *devlink_port_new_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_port_new_req));
-}
-void devlink_port_new_req_free(struct devlink_port_new_req *req);
-
-static inline void
-devlink_port_new_req_set_bus_name(struct devlink_port_new_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_port_new_req_set_dev_name(struct devlink_port_new_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_port_new_req_set_port_index(struct devlink_port_new_req *req,
-				    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_port_new_req_set_port_flavour(struct devlink_port_new_req *req,
-				      enum devlink_port_flavour port_flavour)
-{
-	req->_present.port_flavour = 1;
-	req->port_flavour = port_flavour;
-}
-static inline void
-devlink_port_new_req_set_port_pci_pf_number(struct devlink_port_new_req *req,
-					    __u16 port_pci_pf_number)
-{
-	req->_present.port_pci_pf_number = 1;
-	req->port_pci_pf_number = port_pci_pf_number;
-}
-static inline void
-devlink_port_new_req_set_port_pci_sf_number(struct devlink_port_new_req *req,
-					    __u32 port_pci_sf_number)
-{
-	req->_present.port_pci_sf_number = 1;
-	req->port_pci_sf_number = port_pci_sf_number;
-}
-static inline void
-devlink_port_new_req_set_port_controller_number(struct devlink_port_new_req *req,
-						__u32 port_controller_number)
-{
-	req->_present.port_controller_number = 1;
-	req->port_controller_number = port_controller_number;
-}
-
-struct devlink_port_new_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-void devlink_port_new_rsp_free(struct devlink_port_new_rsp *rsp);
-
-/*
- * Create devlink port instances.
- */
-struct devlink_port_new_rsp *
-devlink_port_new(struct ynl_sock *ys, struct devlink_port_new_req *req);
-
-/* ============== DEVLINK_CMD_PORT_DEL ============== */
-/* DEVLINK_CMD_PORT_DEL - do */
-struct devlink_port_del_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-static inline struct devlink_port_del_req *devlink_port_del_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_port_del_req));
-}
-void devlink_port_del_req_free(struct devlink_port_del_req *req);
-
-static inline void
-devlink_port_del_req_set_bus_name(struct devlink_port_del_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_port_del_req_set_dev_name(struct devlink_port_del_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_port_del_req_set_port_index(struct devlink_port_del_req *req,
-				    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-
-/*
- * Delete devlink port instances.
- */
-int devlink_port_del(struct ynl_sock *ys, struct devlink_port_del_req *req);
-
-/* ============== DEVLINK_CMD_PORT_SPLIT ============== */
-/* DEVLINK_CMD_PORT_SPLIT - do */
-struct devlink_port_split_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 port_split_count:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	__u32 port_split_count;
-};
-
-static inline struct devlink_port_split_req *devlink_port_split_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_port_split_req));
-}
-void devlink_port_split_req_free(struct devlink_port_split_req *req);
-
-static inline void
-devlink_port_split_req_set_bus_name(struct devlink_port_split_req *req,
-				    const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_port_split_req_set_dev_name(struct devlink_port_split_req *req,
-				    const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_port_split_req_set_port_index(struct devlink_port_split_req *req,
-				      __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_port_split_req_set_port_split_count(struct devlink_port_split_req *req,
-					    __u32 port_split_count)
-{
-	req->_present.port_split_count = 1;
-	req->port_split_count = port_split_count;
-}
-
-/*
- * Split devlink port instances.
- */
-int devlink_port_split(struct ynl_sock *ys, struct devlink_port_split_req *req);
-
-/* ============== DEVLINK_CMD_PORT_UNSPLIT ============== */
-/* DEVLINK_CMD_PORT_UNSPLIT - do */
-struct devlink_port_unsplit_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-static inline struct devlink_port_unsplit_req *
-devlink_port_unsplit_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_port_unsplit_req));
-}
-void devlink_port_unsplit_req_free(struct devlink_port_unsplit_req *req);
-
-static inline void
-devlink_port_unsplit_req_set_bus_name(struct devlink_port_unsplit_req *req,
-				      const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_port_unsplit_req_set_dev_name(struct devlink_port_unsplit_req *req,
-				      const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_port_unsplit_req_set_port_index(struct devlink_port_unsplit_req *req,
-					__u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-
-/*
- * Unplit devlink port instances.
- */
-int devlink_port_unsplit(struct ynl_sock *ys,
-			 struct devlink_port_unsplit_req *req);
-
-/* ============== DEVLINK_CMD_SB_GET ============== */
-/* DEVLINK_CMD_SB_GET - do */
-struct devlink_sb_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 sb_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 sb_index;
-};
-
-static inline struct devlink_sb_get_req *devlink_sb_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_get_req));
-}
-void devlink_sb_get_req_free(struct devlink_sb_get_req *req);
-
-static inline void
-devlink_sb_get_req_set_bus_name(struct devlink_sb_get_req *req,
-				const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_get_req_set_dev_name(struct devlink_sb_get_req *req,
-				const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_sb_get_req_set_sb_index(struct devlink_sb_get_req *req, __u32 sb_index)
-{
-	req->_present.sb_index = 1;
-	req->sb_index = sb_index;
-}
-
-struct devlink_sb_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 sb_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 sb_index;
-};
-
-void devlink_sb_get_rsp_free(struct devlink_sb_get_rsp *rsp);
-
-/*
- * Get shared buffer instances.
- */
-struct devlink_sb_get_rsp *
-devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req);
-
-/* DEVLINK_CMD_SB_GET - dump */
-struct devlink_sb_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_sb_get_req_dump *
-devlink_sb_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_get_req_dump));
-}
-void devlink_sb_get_req_dump_free(struct devlink_sb_get_req_dump *req);
-
-static inline void
-devlink_sb_get_req_dump_set_bus_name(struct devlink_sb_get_req_dump *req,
-				     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_get_req_dump_set_dev_name(struct devlink_sb_get_req_dump *req,
-				     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_sb_get_list {
-	struct devlink_sb_get_list *next;
-	struct devlink_sb_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_sb_get_list_free(struct devlink_sb_get_list *rsp);
-
-struct devlink_sb_get_list *
-devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_SB_POOL_GET ============== */
-/* DEVLINK_CMD_SB_POOL_GET - do */
-struct devlink_sb_pool_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 sb_index:1;
-		__u32 sb_pool_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 sb_index;
-	__u16 sb_pool_index;
-};
-
-static inline struct devlink_sb_pool_get_req *
-devlink_sb_pool_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_pool_get_req));
-}
-void devlink_sb_pool_get_req_free(struct devlink_sb_pool_get_req *req);
-
-static inline void
-devlink_sb_pool_get_req_set_bus_name(struct devlink_sb_pool_get_req *req,
-				     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_pool_get_req_set_dev_name(struct devlink_sb_pool_get_req *req,
-				     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_sb_pool_get_req_set_sb_index(struct devlink_sb_pool_get_req *req,
-				     __u32 sb_index)
-{
-	req->_present.sb_index = 1;
-	req->sb_index = sb_index;
-}
-static inline void
-devlink_sb_pool_get_req_set_sb_pool_index(struct devlink_sb_pool_get_req *req,
-					  __u16 sb_pool_index)
-{
-	req->_present.sb_pool_index = 1;
-	req->sb_pool_index = sb_pool_index;
-}
-
-struct devlink_sb_pool_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 sb_index:1;
-		__u32 sb_pool_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 sb_index;
-	__u16 sb_pool_index;
-};
-
-void devlink_sb_pool_get_rsp_free(struct devlink_sb_pool_get_rsp *rsp);
-
-/*
- * Get shared buffer pool instances.
- */
-struct devlink_sb_pool_get_rsp *
-devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req);
-
-/* DEVLINK_CMD_SB_POOL_GET - dump */
-struct devlink_sb_pool_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_sb_pool_get_req_dump *
-devlink_sb_pool_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_pool_get_req_dump));
-}
-void
-devlink_sb_pool_get_req_dump_free(struct devlink_sb_pool_get_req_dump *req);
-
-static inline void
-devlink_sb_pool_get_req_dump_set_bus_name(struct devlink_sb_pool_get_req_dump *req,
-					  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_pool_get_req_dump_set_dev_name(struct devlink_sb_pool_get_req_dump *req,
-					  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_sb_pool_get_list {
-	struct devlink_sb_pool_get_list *next;
-	struct devlink_sb_pool_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_sb_pool_get_list_free(struct devlink_sb_pool_get_list *rsp);
-
-struct devlink_sb_pool_get_list *
-devlink_sb_pool_get_dump(struct ynl_sock *ys,
-			 struct devlink_sb_pool_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_SB_POOL_SET ============== */
-/* DEVLINK_CMD_SB_POOL_SET - do */
-struct devlink_sb_pool_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 sb_index:1;
-		__u32 sb_pool_index:1;
-		__u32 sb_pool_threshold_type:1;
-		__u32 sb_pool_size:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 sb_index;
-	__u16 sb_pool_index;
-	enum devlink_sb_threshold_type sb_pool_threshold_type;
-	__u32 sb_pool_size;
-};
-
-static inline struct devlink_sb_pool_set_req *
-devlink_sb_pool_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_pool_set_req));
-}
-void devlink_sb_pool_set_req_free(struct devlink_sb_pool_set_req *req);
-
-static inline void
-devlink_sb_pool_set_req_set_bus_name(struct devlink_sb_pool_set_req *req,
-				     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_pool_set_req_set_dev_name(struct devlink_sb_pool_set_req *req,
-				     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_sb_pool_set_req_set_sb_index(struct devlink_sb_pool_set_req *req,
-				     __u32 sb_index)
-{
-	req->_present.sb_index = 1;
-	req->sb_index = sb_index;
-}
-static inline void
-devlink_sb_pool_set_req_set_sb_pool_index(struct devlink_sb_pool_set_req *req,
-					  __u16 sb_pool_index)
-{
-	req->_present.sb_pool_index = 1;
-	req->sb_pool_index = sb_pool_index;
-}
-static inline void
-devlink_sb_pool_set_req_set_sb_pool_threshold_type(struct devlink_sb_pool_set_req *req,
-						   enum devlink_sb_threshold_type sb_pool_threshold_type)
-{
-	req->_present.sb_pool_threshold_type = 1;
-	req->sb_pool_threshold_type = sb_pool_threshold_type;
-}
-static inline void
-devlink_sb_pool_set_req_set_sb_pool_size(struct devlink_sb_pool_set_req *req,
-					 __u32 sb_pool_size)
-{
-	req->_present.sb_pool_size = 1;
-	req->sb_pool_size = sb_pool_size;
-}
-
-/*
- * Set shared buffer pool instances.
- */
-int devlink_sb_pool_set(struct ynl_sock *ys,
-			struct devlink_sb_pool_set_req *req);
-
-/* ============== DEVLINK_CMD_SB_PORT_POOL_GET ============== */
-/* DEVLINK_CMD_SB_PORT_POOL_GET - do */
-struct devlink_sb_port_pool_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 sb_index:1;
-		__u32 sb_pool_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	__u32 sb_index;
-	__u16 sb_pool_index;
-};
-
-static inline struct devlink_sb_port_pool_get_req *
-devlink_sb_port_pool_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_port_pool_get_req));
-}
-void
-devlink_sb_port_pool_get_req_free(struct devlink_sb_port_pool_get_req *req);
-
-static inline void
-devlink_sb_port_pool_get_req_set_bus_name(struct devlink_sb_port_pool_get_req *req,
-					  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_port_pool_get_req_set_dev_name(struct devlink_sb_port_pool_get_req *req,
-					  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_sb_port_pool_get_req_set_port_index(struct devlink_sb_port_pool_get_req *req,
-					    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_sb_port_pool_get_req_set_sb_index(struct devlink_sb_port_pool_get_req *req,
-					  __u32 sb_index)
-{
-	req->_present.sb_index = 1;
-	req->sb_index = sb_index;
-}
-static inline void
-devlink_sb_port_pool_get_req_set_sb_pool_index(struct devlink_sb_port_pool_get_req *req,
-					       __u16 sb_pool_index)
-{
-	req->_present.sb_pool_index = 1;
-	req->sb_pool_index = sb_pool_index;
-}
-
-struct devlink_sb_port_pool_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 sb_index:1;
-		__u32 sb_pool_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	__u32 sb_index;
-	__u16 sb_pool_index;
-};
-
-void
-devlink_sb_port_pool_get_rsp_free(struct devlink_sb_port_pool_get_rsp *rsp);
-
-/*
- * Get shared buffer port-pool combinations and threshold.
- */
-struct devlink_sb_port_pool_get_rsp *
-devlink_sb_port_pool_get(struct ynl_sock *ys,
-			 struct devlink_sb_port_pool_get_req *req);
-
-/* DEVLINK_CMD_SB_PORT_POOL_GET - dump */
-struct devlink_sb_port_pool_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_sb_port_pool_get_req_dump *
-devlink_sb_port_pool_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_port_pool_get_req_dump));
-}
-void
-devlink_sb_port_pool_get_req_dump_free(struct devlink_sb_port_pool_get_req_dump *req);
-
-static inline void
-devlink_sb_port_pool_get_req_dump_set_bus_name(struct devlink_sb_port_pool_get_req_dump *req,
-					       const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_port_pool_get_req_dump_set_dev_name(struct devlink_sb_port_pool_get_req_dump *req,
-					       const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_sb_port_pool_get_list {
-	struct devlink_sb_port_pool_get_list *next;
-	struct devlink_sb_port_pool_get_rsp obj __attribute__((aligned(8)));
-};
-
-void
-devlink_sb_port_pool_get_list_free(struct devlink_sb_port_pool_get_list *rsp);
-
-struct devlink_sb_port_pool_get_list *
-devlink_sb_port_pool_get_dump(struct ynl_sock *ys,
-			      struct devlink_sb_port_pool_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_SB_PORT_POOL_SET ============== */
-/* DEVLINK_CMD_SB_PORT_POOL_SET - do */
-struct devlink_sb_port_pool_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 sb_index:1;
-		__u32 sb_pool_index:1;
-		__u32 sb_threshold:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	__u32 sb_index;
-	__u16 sb_pool_index;
-	__u32 sb_threshold;
-};
-
-static inline struct devlink_sb_port_pool_set_req *
-devlink_sb_port_pool_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_port_pool_set_req));
-}
-void
-devlink_sb_port_pool_set_req_free(struct devlink_sb_port_pool_set_req *req);
-
-static inline void
-devlink_sb_port_pool_set_req_set_bus_name(struct devlink_sb_port_pool_set_req *req,
-					  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_port_pool_set_req_set_dev_name(struct devlink_sb_port_pool_set_req *req,
-					  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_sb_port_pool_set_req_set_port_index(struct devlink_sb_port_pool_set_req *req,
-					    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_sb_port_pool_set_req_set_sb_index(struct devlink_sb_port_pool_set_req *req,
-					  __u32 sb_index)
-{
-	req->_present.sb_index = 1;
-	req->sb_index = sb_index;
-}
-static inline void
-devlink_sb_port_pool_set_req_set_sb_pool_index(struct devlink_sb_port_pool_set_req *req,
-					       __u16 sb_pool_index)
-{
-	req->_present.sb_pool_index = 1;
-	req->sb_pool_index = sb_pool_index;
-}
-static inline void
-devlink_sb_port_pool_set_req_set_sb_threshold(struct devlink_sb_port_pool_set_req *req,
-					      __u32 sb_threshold)
-{
-	req->_present.sb_threshold = 1;
-	req->sb_threshold = sb_threshold;
-}
-
-/*
- * Set shared buffer port-pool combinations and threshold.
- */
-int devlink_sb_port_pool_set(struct ynl_sock *ys,
-			     struct devlink_sb_port_pool_set_req *req);
-
-/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_GET ============== */
-/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */
-struct devlink_sb_tc_pool_bind_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 sb_index:1;
-		__u32 sb_pool_type:1;
-		__u32 sb_tc_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	__u32 sb_index;
-	enum devlink_sb_pool_type sb_pool_type;
-	__u16 sb_tc_index;
-};
-
-static inline struct devlink_sb_tc_pool_bind_get_req *
-devlink_sb_tc_pool_bind_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_get_req));
-}
-void
-devlink_sb_tc_pool_bind_get_req_free(struct devlink_sb_tc_pool_bind_get_req *req);
-
-static inline void
-devlink_sb_tc_pool_bind_get_req_set_bus_name(struct devlink_sb_tc_pool_bind_get_req *req,
-					     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_tc_pool_bind_get_req_set_dev_name(struct devlink_sb_tc_pool_bind_get_req *req,
-					     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_sb_tc_pool_bind_get_req_set_port_index(struct devlink_sb_tc_pool_bind_get_req *req,
-					       __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_sb_tc_pool_bind_get_req_set_sb_index(struct devlink_sb_tc_pool_bind_get_req *req,
-					     __u32 sb_index)
-{
-	req->_present.sb_index = 1;
-	req->sb_index = sb_index;
-}
-static inline void
-devlink_sb_tc_pool_bind_get_req_set_sb_pool_type(struct devlink_sb_tc_pool_bind_get_req *req,
-						 enum devlink_sb_pool_type sb_pool_type)
-{
-	req->_present.sb_pool_type = 1;
-	req->sb_pool_type = sb_pool_type;
-}
-static inline void
-devlink_sb_tc_pool_bind_get_req_set_sb_tc_index(struct devlink_sb_tc_pool_bind_get_req *req,
-						__u16 sb_tc_index)
-{
-	req->_present.sb_tc_index = 1;
-	req->sb_tc_index = sb_tc_index;
-}
-
-struct devlink_sb_tc_pool_bind_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 sb_index:1;
-		__u32 sb_pool_type:1;
-		__u32 sb_tc_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	__u32 sb_index;
-	enum devlink_sb_pool_type sb_pool_type;
-	__u16 sb_tc_index;
-};
-
-void
-devlink_sb_tc_pool_bind_get_rsp_free(struct devlink_sb_tc_pool_bind_get_rsp *rsp);
-
-/*
- * Get shared buffer port-TC to pool bindings and threshold.
- */
-struct devlink_sb_tc_pool_bind_get_rsp *
-devlink_sb_tc_pool_bind_get(struct ynl_sock *ys,
-			    struct devlink_sb_tc_pool_bind_get_req *req);
-
-/* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */
-struct devlink_sb_tc_pool_bind_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_sb_tc_pool_bind_get_req_dump *
-devlink_sb_tc_pool_bind_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_get_req_dump));
-}
-void
-devlink_sb_tc_pool_bind_get_req_dump_free(struct devlink_sb_tc_pool_bind_get_req_dump *req);
-
-static inline void
-devlink_sb_tc_pool_bind_get_req_dump_set_bus_name(struct devlink_sb_tc_pool_bind_get_req_dump *req,
-						  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_tc_pool_bind_get_req_dump_set_dev_name(struct devlink_sb_tc_pool_bind_get_req_dump *req,
-						  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_sb_tc_pool_bind_get_list {
-	struct devlink_sb_tc_pool_bind_get_list *next;
-	struct devlink_sb_tc_pool_bind_get_rsp obj __attribute__((aligned(8)));
-};
-
-void
-devlink_sb_tc_pool_bind_get_list_free(struct devlink_sb_tc_pool_bind_get_list *rsp);
-
-struct devlink_sb_tc_pool_bind_get_list *
-devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys,
-				 struct devlink_sb_tc_pool_bind_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_SB_TC_POOL_BIND_SET ============== */
-/* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */
-struct devlink_sb_tc_pool_bind_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 sb_index:1;
-		__u32 sb_pool_index:1;
-		__u32 sb_pool_type:1;
-		__u32 sb_tc_index:1;
-		__u32 sb_threshold:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	__u32 sb_index;
-	__u16 sb_pool_index;
-	enum devlink_sb_pool_type sb_pool_type;
-	__u16 sb_tc_index;
-	__u32 sb_threshold;
-};
-
-static inline struct devlink_sb_tc_pool_bind_set_req *
-devlink_sb_tc_pool_bind_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_tc_pool_bind_set_req));
-}
-void
-devlink_sb_tc_pool_bind_set_req_free(struct devlink_sb_tc_pool_bind_set_req *req);
-
-static inline void
-devlink_sb_tc_pool_bind_set_req_set_bus_name(struct devlink_sb_tc_pool_bind_set_req *req,
-					     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_tc_pool_bind_set_req_set_dev_name(struct devlink_sb_tc_pool_bind_set_req *req,
-					     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_sb_tc_pool_bind_set_req_set_port_index(struct devlink_sb_tc_pool_bind_set_req *req,
-					       __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_sb_tc_pool_bind_set_req_set_sb_index(struct devlink_sb_tc_pool_bind_set_req *req,
-					     __u32 sb_index)
-{
-	req->_present.sb_index = 1;
-	req->sb_index = sb_index;
-}
-static inline void
-devlink_sb_tc_pool_bind_set_req_set_sb_pool_index(struct devlink_sb_tc_pool_bind_set_req *req,
-						  __u16 sb_pool_index)
-{
-	req->_present.sb_pool_index = 1;
-	req->sb_pool_index = sb_pool_index;
-}
-static inline void
-devlink_sb_tc_pool_bind_set_req_set_sb_pool_type(struct devlink_sb_tc_pool_bind_set_req *req,
-						 enum devlink_sb_pool_type sb_pool_type)
-{
-	req->_present.sb_pool_type = 1;
-	req->sb_pool_type = sb_pool_type;
-}
-static inline void
-devlink_sb_tc_pool_bind_set_req_set_sb_tc_index(struct devlink_sb_tc_pool_bind_set_req *req,
-						__u16 sb_tc_index)
-{
-	req->_present.sb_tc_index = 1;
-	req->sb_tc_index = sb_tc_index;
-}
-static inline void
-devlink_sb_tc_pool_bind_set_req_set_sb_threshold(struct devlink_sb_tc_pool_bind_set_req *req,
-						 __u32 sb_threshold)
-{
-	req->_present.sb_threshold = 1;
-	req->sb_threshold = sb_threshold;
-}
-
-/*
- * Set shared buffer port-TC to pool bindings and threshold.
- */
-int devlink_sb_tc_pool_bind_set(struct ynl_sock *ys,
-				struct devlink_sb_tc_pool_bind_set_req *req);
-
-/* ============== DEVLINK_CMD_SB_OCC_SNAPSHOT ============== */
-/* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */
-struct devlink_sb_occ_snapshot_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 sb_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 sb_index;
-};
-
-static inline struct devlink_sb_occ_snapshot_req *
-devlink_sb_occ_snapshot_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_occ_snapshot_req));
-}
-void devlink_sb_occ_snapshot_req_free(struct devlink_sb_occ_snapshot_req *req);
-
-static inline void
-devlink_sb_occ_snapshot_req_set_bus_name(struct devlink_sb_occ_snapshot_req *req,
-					 const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_occ_snapshot_req_set_dev_name(struct devlink_sb_occ_snapshot_req *req,
-					 const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_sb_occ_snapshot_req_set_sb_index(struct devlink_sb_occ_snapshot_req *req,
-					 __u32 sb_index)
-{
-	req->_present.sb_index = 1;
-	req->sb_index = sb_index;
-}
-
-/*
- * Take occupancy snapshot of shared buffer.
- */
-int devlink_sb_occ_snapshot(struct ynl_sock *ys,
-			    struct devlink_sb_occ_snapshot_req *req);
-
-/* ============== DEVLINK_CMD_SB_OCC_MAX_CLEAR ============== */
-/* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */
-struct devlink_sb_occ_max_clear_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 sb_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 sb_index;
-};
-
-static inline struct devlink_sb_occ_max_clear_req *
-devlink_sb_occ_max_clear_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_sb_occ_max_clear_req));
-}
-void
-devlink_sb_occ_max_clear_req_free(struct devlink_sb_occ_max_clear_req *req);
-
-static inline void
-devlink_sb_occ_max_clear_req_set_bus_name(struct devlink_sb_occ_max_clear_req *req,
-					  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_sb_occ_max_clear_req_set_dev_name(struct devlink_sb_occ_max_clear_req *req,
-					  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_sb_occ_max_clear_req_set_sb_index(struct devlink_sb_occ_max_clear_req *req,
-					  __u32 sb_index)
-{
-	req->_present.sb_index = 1;
-	req->sb_index = sb_index;
-}
-
-/*
- * Clear occupancy watermarks of shared buffer.
- */
-int devlink_sb_occ_max_clear(struct ynl_sock *ys,
-			     struct devlink_sb_occ_max_clear_req *req);
-
-/* ============== DEVLINK_CMD_ESWITCH_GET ============== */
-/* DEVLINK_CMD_ESWITCH_GET - do */
-struct devlink_eswitch_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_eswitch_get_req *
-devlink_eswitch_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_eswitch_get_req));
-}
-void devlink_eswitch_get_req_free(struct devlink_eswitch_get_req *req);
-
-static inline void
-devlink_eswitch_get_req_set_bus_name(struct devlink_eswitch_get_req *req,
-				     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_eswitch_get_req_set_dev_name(struct devlink_eswitch_get_req *req,
-				     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_eswitch_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 eswitch_mode:1;
-		__u32 eswitch_inline_mode:1;
-		__u32 eswitch_encap_mode:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	enum devlink_eswitch_mode eswitch_mode;
-	enum devlink_eswitch_inline_mode eswitch_inline_mode;
-	enum devlink_eswitch_encap_mode eswitch_encap_mode;
-};
-
-void devlink_eswitch_get_rsp_free(struct devlink_eswitch_get_rsp *rsp);
-
-/*
- * Get eswitch attributes.
- */
-struct devlink_eswitch_get_rsp *
-devlink_eswitch_get(struct ynl_sock *ys, struct devlink_eswitch_get_req *req);
-
-/* ============== DEVLINK_CMD_ESWITCH_SET ============== */
-/* DEVLINK_CMD_ESWITCH_SET - do */
-struct devlink_eswitch_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 eswitch_mode:1;
-		__u32 eswitch_inline_mode:1;
-		__u32 eswitch_encap_mode:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	enum devlink_eswitch_mode eswitch_mode;
-	enum devlink_eswitch_inline_mode eswitch_inline_mode;
-	enum devlink_eswitch_encap_mode eswitch_encap_mode;
-};
-
-static inline struct devlink_eswitch_set_req *
-devlink_eswitch_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_eswitch_set_req));
-}
-void devlink_eswitch_set_req_free(struct devlink_eswitch_set_req *req);
-
-static inline void
-devlink_eswitch_set_req_set_bus_name(struct devlink_eswitch_set_req *req,
-				     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_eswitch_set_req_set_dev_name(struct devlink_eswitch_set_req *req,
-				     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_eswitch_set_req_set_eswitch_mode(struct devlink_eswitch_set_req *req,
-					 enum devlink_eswitch_mode eswitch_mode)
-{
-	req->_present.eswitch_mode = 1;
-	req->eswitch_mode = eswitch_mode;
-}
-static inline void
-devlink_eswitch_set_req_set_eswitch_inline_mode(struct devlink_eswitch_set_req *req,
-						enum devlink_eswitch_inline_mode eswitch_inline_mode)
-{
-	req->_present.eswitch_inline_mode = 1;
-	req->eswitch_inline_mode = eswitch_inline_mode;
-}
-static inline void
-devlink_eswitch_set_req_set_eswitch_encap_mode(struct devlink_eswitch_set_req *req,
-					       enum devlink_eswitch_encap_mode eswitch_encap_mode)
-{
-	req->_present.eswitch_encap_mode = 1;
-	req->eswitch_encap_mode = eswitch_encap_mode;
-}
-
-/*
- * Set eswitch attributes.
- */
-int devlink_eswitch_set(struct ynl_sock *ys,
-			struct devlink_eswitch_set_req *req);
-
-/* ============== DEVLINK_CMD_DPIPE_TABLE_GET ============== */
-/* DEVLINK_CMD_DPIPE_TABLE_GET - do */
-struct devlink_dpipe_table_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 dpipe_table_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *dpipe_table_name;
-};
-
-static inline struct devlink_dpipe_table_get_req *
-devlink_dpipe_table_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_dpipe_table_get_req));
-}
-void devlink_dpipe_table_get_req_free(struct devlink_dpipe_table_get_req *req);
-
-static inline void
-devlink_dpipe_table_get_req_set_bus_name(struct devlink_dpipe_table_get_req *req,
-					 const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_dpipe_table_get_req_set_dev_name(struct devlink_dpipe_table_get_req *req,
-					 const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_dpipe_table_get_req_set_dpipe_table_name(struct devlink_dpipe_table_get_req *req,
-						 const char *dpipe_table_name)
-{
-	free(req->dpipe_table_name);
-	req->_present.dpipe_table_name_len = strlen(dpipe_table_name);
-	req->dpipe_table_name = malloc(req->_present.dpipe_table_name_len + 1);
-	memcpy(req->dpipe_table_name, dpipe_table_name, req->_present.dpipe_table_name_len);
-	req->dpipe_table_name[req->_present.dpipe_table_name_len] = 0;
-}
-
-struct devlink_dpipe_table_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 dpipe_tables:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	struct devlink_dl_dpipe_tables dpipe_tables;
-};
-
-void devlink_dpipe_table_get_rsp_free(struct devlink_dpipe_table_get_rsp *rsp);
-
-/*
- * Get dpipe table attributes.
- */
-struct devlink_dpipe_table_get_rsp *
-devlink_dpipe_table_get(struct ynl_sock *ys,
-			struct devlink_dpipe_table_get_req *req);
-
-/* ============== DEVLINK_CMD_DPIPE_ENTRIES_GET ============== */
-/* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */
-struct devlink_dpipe_entries_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 dpipe_table_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *dpipe_table_name;
-};
-
-static inline struct devlink_dpipe_entries_get_req *
-devlink_dpipe_entries_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_dpipe_entries_get_req));
-}
-void
-devlink_dpipe_entries_get_req_free(struct devlink_dpipe_entries_get_req *req);
-
-static inline void
-devlink_dpipe_entries_get_req_set_bus_name(struct devlink_dpipe_entries_get_req *req,
-					   const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_dpipe_entries_get_req_set_dev_name(struct devlink_dpipe_entries_get_req *req,
-					   const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_dpipe_entries_get_req_set_dpipe_table_name(struct devlink_dpipe_entries_get_req *req,
-						   const char *dpipe_table_name)
-{
-	free(req->dpipe_table_name);
-	req->_present.dpipe_table_name_len = strlen(dpipe_table_name);
-	req->dpipe_table_name = malloc(req->_present.dpipe_table_name_len + 1);
-	memcpy(req->dpipe_table_name, dpipe_table_name, req->_present.dpipe_table_name_len);
-	req->dpipe_table_name[req->_present.dpipe_table_name_len] = 0;
-}
-
-struct devlink_dpipe_entries_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 dpipe_entries:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	struct devlink_dl_dpipe_entries dpipe_entries;
-};
-
-void
-devlink_dpipe_entries_get_rsp_free(struct devlink_dpipe_entries_get_rsp *rsp);
-
-/*
- * Get dpipe entries attributes.
- */
-struct devlink_dpipe_entries_get_rsp *
-devlink_dpipe_entries_get(struct ynl_sock *ys,
-			  struct devlink_dpipe_entries_get_req *req);
-
-/* ============== DEVLINK_CMD_DPIPE_HEADERS_GET ============== */
-/* DEVLINK_CMD_DPIPE_HEADERS_GET - do */
-struct devlink_dpipe_headers_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_dpipe_headers_get_req *
-devlink_dpipe_headers_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_dpipe_headers_get_req));
-}
-void
-devlink_dpipe_headers_get_req_free(struct devlink_dpipe_headers_get_req *req);
-
-static inline void
-devlink_dpipe_headers_get_req_set_bus_name(struct devlink_dpipe_headers_get_req *req,
-					   const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_dpipe_headers_get_req_set_dev_name(struct devlink_dpipe_headers_get_req *req,
-					   const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_dpipe_headers_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 dpipe_headers:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	struct devlink_dl_dpipe_headers dpipe_headers;
-};
-
-void
-devlink_dpipe_headers_get_rsp_free(struct devlink_dpipe_headers_get_rsp *rsp);
-
-/*
- * Get dpipe headers attributes.
- */
-struct devlink_dpipe_headers_get_rsp *
-devlink_dpipe_headers_get(struct ynl_sock *ys,
-			  struct devlink_dpipe_headers_get_req *req);
-
-/* ============== DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET ============== */
-/* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */
-struct devlink_dpipe_table_counters_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 dpipe_table_name_len;
-		__u32 dpipe_table_counters_enabled:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *dpipe_table_name;
-	__u8 dpipe_table_counters_enabled;
-};
-
-static inline struct devlink_dpipe_table_counters_set_req *
-devlink_dpipe_table_counters_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_dpipe_table_counters_set_req));
-}
-void
-devlink_dpipe_table_counters_set_req_free(struct devlink_dpipe_table_counters_set_req *req);
-
-static inline void
-devlink_dpipe_table_counters_set_req_set_bus_name(struct devlink_dpipe_table_counters_set_req *req,
-						  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_dpipe_table_counters_set_req_set_dev_name(struct devlink_dpipe_table_counters_set_req *req,
-						  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_dpipe_table_counters_set_req_set_dpipe_table_name(struct devlink_dpipe_table_counters_set_req *req,
-							  const char *dpipe_table_name)
-{
-	free(req->dpipe_table_name);
-	req->_present.dpipe_table_name_len = strlen(dpipe_table_name);
-	req->dpipe_table_name = malloc(req->_present.dpipe_table_name_len + 1);
-	memcpy(req->dpipe_table_name, dpipe_table_name, req->_present.dpipe_table_name_len);
-	req->dpipe_table_name[req->_present.dpipe_table_name_len] = 0;
-}
-static inline void
-devlink_dpipe_table_counters_set_req_set_dpipe_table_counters_enabled(struct devlink_dpipe_table_counters_set_req *req,
-								      __u8 dpipe_table_counters_enabled)
-{
-	req->_present.dpipe_table_counters_enabled = 1;
-	req->dpipe_table_counters_enabled = dpipe_table_counters_enabled;
-}
-
-/*
- * Set dpipe counter attributes.
- */
-int devlink_dpipe_table_counters_set(struct ynl_sock *ys,
-				     struct devlink_dpipe_table_counters_set_req *req);
-
-/* ============== DEVLINK_CMD_RESOURCE_SET ============== */
-/* DEVLINK_CMD_RESOURCE_SET - do */
-struct devlink_resource_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 resource_id:1;
-		__u32 resource_size:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u64 resource_id;
-	__u64 resource_size;
-};
-
-static inline struct devlink_resource_set_req *
-devlink_resource_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_resource_set_req));
-}
-void devlink_resource_set_req_free(struct devlink_resource_set_req *req);
-
-static inline void
-devlink_resource_set_req_set_bus_name(struct devlink_resource_set_req *req,
-				      const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_resource_set_req_set_dev_name(struct devlink_resource_set_req *req,
-				      const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_resource_set_req_set_resource_id(struct devlink_resource_set_req *req,
-					 __u64 resource_id)
-{
-	req->_present.resource_id = 1;
-	req->resource_id = resource_id;
-}
-static inline void
-devlink_resource_set_req_set_resource_size(struct devlink_resource_set_req *req,
-					   __u64 resource_size)
-{
-	req->_present.resource_size = 1;
-	req->resource_size = resource_size;
-}
-
-/*
- * Set resource attributes.
- */
-int devlink_resource_set(struct ynl_sock *ys,
-			 struct devlink_resource_set_req *req);
-
-/* ============== DEVLINK_CMD_RESOURCE_DUMP ============== */
-/* DEVLINK_CMD_RESOURCE_DUMP - do */
-struct devlink_resource_dump_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_resource_dump_req *
-devlink_resource_dump_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_resource_dump_req));
-}
-void devlink_resource_dump_req_free(struct devlink_resource_dump_req *req);
-
-static inline void
-devlink_resource_dump_req_set_bus_name(struct devlink_resource_dump_req *req,
-				       const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_resource_dump_req_set_dev_name(struct devlink_resource_dump_req *req,
-				       const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_resource_dump_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 resource_list:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	struct devlink_dl_resource_list resource_list;
-};
-
-void devlink_resource_dump_rsp_free(struct devlink_resource_dump_rsp *rsp);
-
-/*
- * Get resource attributes.
- */
-struct devlink_resource_dump_rsp *
-devlink_resource_dump(struct ynl_sock *ys,
-		      struct devlink_resource_dump_req *req);
-
-/* ============== DEVLINK_CMD_RELOAD ============== */
-/* DEVLINK_CMD_RELOAD - do */
-struct devlink_reload_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 reload_action:1;
-		__u32 reload_limits:1;
-		__u32 netns_pid:1;
-		__u32 netns_fd:1;
-		__u32 netns_id:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	enum devlink_reload_action reload_action;
-	struct nla_bitfield32 reload_limits;
-	__u32 netns_pid;
-	__u32 netns_fd;
-	__u32 netns_id;
-};
-
-static inline struct devlink_reload_req *devlink_reload_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_reload_req));
-}
-void devlink_reload_req_free(struct devlink_reload_req *req);
-
-static inline void
-devlink_reload_req_set_bus_name(struct devlink_reload_req *req,
-				const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_reload_req_set_dev_name(struct devlink_reload_req *req,
-				const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_reload_req_set_reload_action(struct devlink_reload_req *req,
-				     enum devlink_reload_action reload_action)
-{
-	req->_present.reload_action = 1;
-	req->reload_action = reload_action;
-}
-static inline void
-devlink_reload_req_set_reload_limits(struct devlink_reload_req *req,
-				     struct nla_bitfield32 *reload_limits)
-{
-	req->_present.reload_limits = 1;
-	memcpy(&req->reload_limits, reload_limits, sizeof(struct nla_bitfield32));
-}
-static inline void
-devlink_reload_req_set_netns_pid(struct devlink_reload_req *req,
-				 __u32 netns_pid)
-{
-	req->_present.netns_pid = 1;
-	req->netns_pid = netns_pid;
-}
-static inline void
-devlink_reload_req_set_netns_fd(struct devlink_reload_req *req, __u32 netns_fd)
-{
-	req->_present.netns_fd = 1;
-	req->netns_fd = netns_fd;
-}
-static inline void
-devlink_reload_req_set_netns_id(struct devlink_reload_req *req, __u32 netns_id)
-{
-	req->_present.netns_id = 1;
-	req->netns_id = netns_id;
-}
-
-struct devlink_reload_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 reload_actions_performed:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	struct nla_bitfield32 reload_actions_performed;
-};
-
-void devlink_reload_rsp_free(struct devlink_reload_rsp *rsp);
-
-/*
- * Reload devlink.
- */
-struct devlink_reload_rsp *
-devlink_reload(struct ynl_sock *ys, struct devlink_reload_req *req);
-
-/* ============== DEVLINK_CMD_PARAM_GET ============== */
-/* DEVLINK_CMD_PARAM_GET - do */
-struct devlink_param_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 param_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *param_name;
-};
-
-static inline struct devlink_param_get_req *devlink_param_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_param_get_req));
-}
-void devlink_param_get_req_free(struct devlink_param_get_req *req);
-
-static inline void
-devlink_param_get_req_set_bus_name(struct devlink_param_get_req *req,
-				   const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_param_get_req_set_dev_name(struct devlink_param_get_req *req,
-				   const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_param_get_req_set_param_name(struct devlink_param_get_req *req,
-				     const char *param_name)
-{
-	free(req->param_name);
-	req->_present.param_name_len = strlen(param_name);
-	req->param_name = malloc(req->_present.param_name_len + 1);
-	memcpy(req->param_name, param_name, req->_present.param_name_len);
-	req->param_name[req->_present.param_name_len] = 0;
-}
-
-struct devlink_param_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 param_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *param_name;
-};
-
-void devlink_param_get_rsp_free(struct devlink_param_get_rsp *rsp);
-
-/*
- * Get param instances.
- */
-struct devlink_param_get_rsp *
-devlink_param_get(struct ynl_sock *ys, struct devlink_param_get_req *req);
-
-/* DEVLINK_CMD_PARAM_GET - dump */
-struct devlink_param_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_param_get_req_dump *
-devlink_param_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_param_get_req_dump));
-}
-void devlink_param_get_req_dump_free(struct devlink_param_get_req_dump *req);
-
-static inline void
-devlink_param_get_req_dump_set_bus_name(struct devlink_param_get_req_dump *req,
-					const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_param_get_req_dump_set_dev_name(struct devlink_param_get_req_dump *req,
-					const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_param_get_list {
-	struct devlink_param_get_list *next;
-	struct devlink_param_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_param_get_list_free(struct devlink_param_get_list *rsp);
-
-struct devlink_param_get_list *
-devlink_param_get_dump(struct ynl_sock *ys,
-		       struct devlink_param_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_PARAM_SET ============== */
-/* DEVLINK_CMD_PARAM_SET - do */
-struct devlink_param_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 param_name_len;
-		__u32 param_type:1;
-		__u32 param_value_cmode:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *param_name;
-	__u8 param_type;
-	enum devlink_param_cmode param_value_cmode;
-};
-
-static inline struct devlink_param_set_req *devlink_param_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_param_set_req));
-}
-void devlink_param_set_req_free(struct devlink_param_set_req *req);
-
-static inline void
-devlink_param_set_req_set_bus_name(struct devlink_param_set_req *req,
-				   const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_param_set_req_set_dev_name(struct devlink_param_set_req *req,
-				   const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_param_set_req_set_param_name(struct devlink_param_set_req *req,
-				     const char *param_name)
-{
-	free(req->param_name);
-	req->_present.param_name_len = strlen(param_name);
-	req->param_name = malloc(req->_present.param_name_len + 1);
-	memcpy(req->param_name, param_name, req->_present.param_name_len);
-	req->param_name[req->_present.param_name_len] = 0;
-}
-static inline void
-devlink_param_set_req_set_param_type(struct devlink_param_set_req *req,
-				     __u8 param_type)
-{
-	req->_present.param_type = 1;
-	req->param_type = param_type;
-}
-static inline void
-devlink_param_set_req_set_param_value_cmode(struct devlink_param_set_req *req,
-					    enum devlink_param_cmode param_value_cmode)
-{
-	req->_present.param_value_cmode = 1;
-	req->param_value_cmode = param_value_cmode;
-}
-
-/*
- * Set param instances.
- */
-int devlink_param_set(struct ynl_sock *ys, struct devlink_param_set_req *req);
-
-/* ============== DEVLINK_CMD_REGION_GET ============== */
-/* DEVLINK_CMD_REGION_GET - do */
-struct devlink_region_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 region_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *region_name;
-};
-
-static inline struct devlink_region_get_req *devlink_region_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_region_get_req));
-}
-void devlink_region_get_req_free(struct devlink_region_get_req *req);
-
-static inline void
-devlink_region_get_req_set_bus_name(struct devlink_region_get_req *req,
-				    const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_region_get_req_set_dev_name(struct devlink_region_get_req *req,
-				    const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_region_get_req_set_port_index(struct devlink_region_get_req *req,
-				      __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_region_get_req_set_region_name(struct devlink_region_get_req *req,
-				       const char *region_name)
-{
-	free(req->region_name);
-	req->_present.region_name_len = strlen(region_name);
-	req->region_name = malloc(req->_present.region_name_len + 1);
-	memcpy(req->region_name, region_name, req->_present.region_name_len);
-	req->region_name[req->_present.region_name_len] = 0;
-}
-
-struct devlink_region_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 region_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *region_name;
-};
-
-void devlink_region_get_rsp_free(struct devlink_region_get_rsp *rsp);
-
-/*
- * Get region instances.
- */
-struct devlink_region_get_rsp *
-devlink_region_get(struct ynl_sock *ys, struct devlink_region_get_req *req);
-
-/* DEVLINK_CMD_REGION_GET - dump */
-struct devlink_region_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_region_get_req_dump *
-devlink_region_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_region_get_req_dump));
-}
-void devlink_region_get_req_dump_free(struct devlink_region_get_req_dump *req);
-
-static inline void
-devlink_region_get_req_dump_set_bus_name(struct devlink_region_get_req_dump *req,
-					 const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_region_get_req_dump_set_dev_name(struct devlink_region_get_req_dump *req,
-					 const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_region_get_list {
-	struct devlink_region_get_list *next;
-	struct devlink_region_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_region_get_list_free(struct devlink_region_get_list *rsp);
-
-struct devlink_region_get_list *
-devlink_region_get_dump(struct ynl_sock *ys,
-			struct devlink_region_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_REGION_NEW ============== */
-/* DEVLINK_CMD_REGION_NEW - do */
-struct devlink_region_new_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 region_name_len;
-		__u32 region_snapshot_id:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *region_name;
-	__u32 region_snapshot_id;
-};
-
-static inline struct devlink_region_new_req *devlink_region_new_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_region_new_req));
-}
-void devlink_region_new_req_free(struct devlink_region_new_req *req);
-
-static inline void
-devlink_region_new_req_set_bus_name(struct devlink_region_new_req *req,
-				    const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_region_new_req_set_dev_name(struct devlink_region_new_req *req,
-				    const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_region_new_req_set_port_index(struct devlink_region_new_req *req,
-				      __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_region_new_req_set_region_name(struct devlink_region_new_req *req,
-				       const char *region_name)
-{
-	free(req->region_name);
-	req->_present.region_name_len = strlen(region_name);
-	req->region_name = malloc(req->_present.region_name_len + 1);
-	memcpy(req->region_name, region_name, req->_present.region_name_len);
-	req->region_name[req->_present.region_name_len] = 0;
-}
-static inline void
-devlink_region_new_req_set_region_snapshot_id(struct devlink_region_new_req *req,
-					      __u32 region_snapshot_id)
-{
-	req->_present.region_snapshot_id = 1;
-	req->region_snapshot_id = region_snapshot_id;
-}
-
-struct devlink_region_new_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 region_name_len;
-		__u32 region_snapshot_id:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *region_name;
-	__u32 region_snapshot_id;
-};
-
-void devlink_region_new_rsp_free(struct devlink_region_new_rsp *rsp);
-
-/*
- * Create region snapshot.
- */
-struct devlink_region_new_rsp *
-devlink_region_new(struct ynl_sock *ys, struct devlink_region_new_req *req);
-
-/* ============== DEVLINK_CMD_REGION_DEL ============== */
-/* DEVLINK_CMD_REGION_DEL - do */
-struct devlink_region_del_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 region_name_len;
-		__u32 region_snapshot_id:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *region_name;
-	__u32 region_snapshot_id;
-};
-
-static inline struct devlink_region_del_req *devlink_region_del_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_region_del_req));
-}
-void devlink_region_del_req_free(struct devlink_region_del_req *req);
-
-static inline void
-devlink_region_del_req_set_bus_name(struct devlink_region_del_req *req,
-				    const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_region_del_req_set_dev_name(struct devlink_region_del_req *req,
-				    const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_region_del_req_set_port_index(struct devlink_region_del_req *req,
-				      __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_region_del_req_set_region_name(struct devlink_region_del_req *req,
-				       const char *region_name)
-{
-	free(req->region_name);
-	req->_present.region_name_len = strlen(region_name);
-	req->region_name = malloc(req->_present.region_name_len + 1);
-	memcpy(req->region_name, region_name, req->_present.region_name_len);
-	req->region_name[req->_present.region_name_len] = 0;
-}
-static inline void
-devlink_region_del_req_set_region_snapshot_id(struct devlink_region_del_req *req,
-					      __u32 region_snapshot_id)
-{
-	req->_present.region_snapshot_id = 1;
-	req->region_snapshot_id = region_snapshot_id;
-}
-
-/*
- * Delete region snapshot.
- */
-int devlink_region_del(struct ynl_sock *ys, struct devlink_region_del_req *req);
-
-/* ============== DEVLINK_CMD_REGION_READ ============== */
-/* DEVLINK_CMD_REGION_READ - dump */
-struct devlink_region_read_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 region_name_len;
-		__u32 region_snapshot_id:1;
-		__u32 region_direct:1;
-		__u32 region_chunk_addr:1;
-		__u32 region_chunk_len:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *region_name;
-	__u32 region_snapshot_id;
-	__u64 region_chunk_addr;
-	__u64 region_chunk_len;
-};
-
-static inline struct devlink_region_read_req_dump *
-devlink_region_read_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_region_read_req_dump));
-}
-void
-devlink_region_read_req_dump_free(struct devlink_region_read_req_dump *req);
-
-static inline void
-devlink_region_read_req_dump_set_bus_name(struct devlink_region_read_req_dump *req,
-					  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_region_read_req_dump_set_dev_name(struct devlink_region_read_req_dump *req,
-					  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_region_read_req_dump_set_port_index(struct devlink_region_read_req_dump *req,
-					    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_region_read_req_dump_set_region_name(struct devlink_region_read_req_dump *req,
-					     const char *region_name)
-{
-	free(req->region_name);
-	req->_present.region_name_len = strlen(region_name);
-	req->region_name = malloc(req->_present.region_name_len + 1);
-	memcpy(req->region_name, region_name, req->_present.region_name_len);
-	req->region_name[req->_present.region_name_len] = 0;
-}
-static inline void
-devlink_region_read_req_dump_set_region_snapshot_id(struct devlink_region_read_req_dump *req,
-						    __u32 region_snapshot_id)
-{
-	req->_present.region_snapshot_id = 1;
-	req->region_snapshot_id = region_snapshot_id;
-}
-static inline void
-devlink_region_read_req_dump_set_region_direct(struct devlink_region_read_req_dump *req)
-{
-	req->_present.region_direct = 1;
-}
-static inline void
-devlink_region_read_req_dump_set_region_chunk_addr(struct devlink_region_read_req_dump *req,
-						   __u64 region_chunk_addr)
-{
-	req->_present.region_chunk_addr = 1;
-	req->region_chunk_addr = region_chunk_addr;
-}
-static inline void
-devlink_region_read_req_dump_set_region_chunk_len(struct devlink_region_read_req_dump *req,
-						  __u64 region_chunk_len)
-{
-	req->_present.region_chunk_len = 1;
-	req->region_chunk_len = region_chunk_len;
-}
-
-struct devlink_region_read_rsp_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 region_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *region_name;
-};
-
-struct devlink_region_read_rsp_list {
-	struct devlink_region_read_rsp_list *next;
-	struct devlink_region_read_rsp_dump obj __attribute__((aligned(8)));
-};
-
-void
-devlink_region_read_rsp_list_free(struct devlink_region_read_rsp_list *rsp);
-
-struct devlink_region_read_rsp_list *
-devlink_region_read_dump(struct ynl_sock *ys,
-			 struct devlink_region_read_req_dump *req);
-
-/* ============== DEVLINK_CMD_PORT_PARAM_GET ============== */
-/* DEVLINK_CMD_PORT_PARAM_GET - do */
-struct devlink_port_param_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-static inline struct devlink_port_param_get_req *
-devlink_port_param_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_port_param_get_req));
-}
-void devlink_port_param_get_req_free(struct devlink_port_param_get_req *req);
-
-static inline void
-devlink_port_param_get_req_set_bus_name(struct devlink_port_param_get_req *req,
-					const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_port_param_get_req_set_dev_name(struct devlink_port_param_get_req *req,
-					const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_port_param_get_req_set_port_index(struct devlink_port_param_get_req *req,
-					  __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-
-struct devlink_port_param_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-void devlink_port_param_get_rsp_free(struct devlink_port_param_get_rsp *rsp);
-
-/*
- * Get port param instances.
- */
-struct devlink_port_param_get_rsp *
-devlink_port_param_get(struct ynl_sock *ys,
-		       struct devlink_port_param_get_req *req);
-
-/* DEVLINK_CMD_PORT_PARAM_GET - dump */
-struct devlink_port_param_get_list {
-	struct devlink_port_param_get_list *next;
-	struct devlink_port_param_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_port_param_get_list_free(struct devlink_port_param_get_list *rsp);
-
-struct devlink_port_param_get_list *
-devlink_port_param_get_dump(struct ynl_sock *ys);
-
-/* ============== DEVLINK_CMD_PORT_PARAM_SET ============== */
-/* DEVLINK_CMD_PORT_PARAM_SET - do */
-struct devlink_port_param_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-static inline struct devlink_port_param_set_req *
-devlink_port_param_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_port_param_set_req));
-}
-void devlink_port_param_set_req_free(struct devlink_port_param_set_req *req);
-
-static inline void
-devlink_port_param_set_req_set_bus_name(struct devlink_port_param_set_req *req,
-					const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_port_param_set_req_set_dev_name(struct devlink_port_param_set_req *req,
-					const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_port_param_set_req_set_port_index(struct devlink_port_param_set_req *req,
-					  __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-
-/*
- * Set port param instances.
- */
-int devlink_port_param_set(struct ynl_sock *ys,
-			   struct devlink_port_param_set_req *req);
-
-/* ============== DEVLINK_CMD_INFO_GET ============== */
-/* DEVLINK_CMD_INFO_GET - do */
-struct devlink_info_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_info_get_req *devlink_info_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_info_get_req));
-}
-void devlink_info_get_req_free(struct devlink_info_get_req *req);
-
-static inline void
-devlink_info_get_req_set_bus_name(struct devlink_info_get_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_info_get_req_set_dev_name(struct devlink_info_get_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_info_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 info_driver_name_len;
-		__u32 info_serial_number_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *info_driver_name;
-	char *info_serial_number;
-	unsigned int n_info_version_fixed;
-	struct devlink_dl_info_version *info_version_fixed;
-	unsigned int n_info_version_running;
-	struct devlink_dl_info_version *info_version_running;
-	unsigned int n_info_version_stored;
-	struct devlink_dl_info_version *info_version_stored;
-};
-
-void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp);
-
-/*
- * Get device information, like driver name, hardware and firmware versions etc.
- */
-struct devlink_info_get_rsp *
-devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req);
-
-/* DEVLINK_CMD_INFO_GET - dump */
-struct devlink_info_get_list {
-	struct devlink_info_get_list *next;
-	struct devlink_info_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_info_get_list_free(struct devlink_info_get_list *rsp);
-
-struct devlink_info_get_list *devlink_info_get_dump(struct ynl_sock *ys);
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_GET ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_GET - do */
-struct devlink_health_reporter_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 health_reporter_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *health_reporter_name;
-};
-
-static inline struct devlink_health_reporter_get_req *
-devlink_health_reporter_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_health_reporter_get_req));
-}
-void
-devlink_health_reporter_get_req_free(struct devlink_health_reporter_get_req *req);
-
-static inline void
-devlink_health_reporter_get_req_set_bus_name(struct devlink_health_reporter_get_req *req,
-					     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_get_req_set_dev_name(struct devlink_health_reporter_get_req *req,
-					     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_get_req_set_port_index(struct devlink_health_reporter_get_req *req,
-					       __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_health_reporter_get_req_set_health_reporter_name(struct devlink_health_reporter_get_req *req,
-							 const char *health_reporter_name)
-{
-	free(req->health_reporter_name);
-	req->_present.health_reporter_name_len = strlen(health_reporter_name);
-	req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1);
-	memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len);
-	req->health_reporter_name[req->_present.health_reporter_name_len] = 0;
-}
-
-struct devlink_health_reporter_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 health_reporter_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *health_reporter_name;
-};
-
-void
-devlink_health_reporter_get_rsp_free(struct devlink_health_reporter_get_rsp *rsp);
-
-/*
- * Get health reporter instances.
- */
-struct devlink_health_reporter_get_rsp *
-devlink_health_reporter_get(struct ynl_sock *ys,
-			    struct devlink_health_reporter_get_req *req);
-
-/* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */
-struct devlink_health_reporter_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-};
-
-static inline struct devlink_health_reporter_get_req_dump *
-devlink_health_reporter_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_health_reporter_get_req_dump));
-}
-void
-devlink_health_reporter_get_req_dump_free(struct devlink_health_reporter_get_req_dump *req);
-
-static inline void
-devlink_health_reporter_get_req_dump_set_bus_name(struct devlink_health_reporter_get_req_dump *req,
-						  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_get_req_dump_set_dev_name(struct devlink_health_reporter_get_req_dump *req,
-						  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_get_req_dump_set_port_index(struct devlink_health_reporter_get_req_dump *req,
-						    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-
-struct devlink_health_reporter_get_list {
-	struct devlink_health_reporter_get_list *next;
-	struct devlink_health_reporter_get_rsp obj __attribute__((aligned(8)));
-};
-
-void
-devlink_health_reporter_get_list_free(struct devlink_health_reporter_get_list *rsp);
-
-struct devlink_health_reporter_get_list *
-devlink_health_reporter_get_dump(struct ynl_sock *ys,
-				 struct devlink_health_reporter_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_SET ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */
-struct devlink_health_reporter_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 health_reporter_name_len;
-		__u32 health_reporter_graceful_period:1;
-		__u32 health_reporter_auto_recover:1;
-		__u32 health_reporter_auto_dump:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *health_reporter_name;
-	__u64 health_reporter_graceful_period;
-	__u8 health_reporter_auto_recover;
-	__u8 health_reporter_auto_dump;
-};
-
-static inline struct devlink_health_reporter_set_req *
-devlink_health_reporter_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_health_reporter_set_req));
-}
-void
-devlink_health_reporter_set_req_free(struct devlink_health_reporter_set_req *req);
-
-static inline void
-devlink_health_reporter_set_req_set_bus_name(struct devlink_health_reporter_set_req *req,
-					     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_set_req_set_dev_name(struct devlink_health_reporter_set_req *req,
-					     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_set_req_set_port_index(struct devlink_health_reporter_set_req *req,
-					       __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_health_reporter_set_req_set_health_reporter_name(struct devlink_health_reporter_set_req *req,
-							 const char *health_reporter_name)
-{
-	free(req->health_reporter_name);
-	req->_present.health_reporter_name_len = strlen(health_reporter_name);
-	req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1);
-	memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len);
-	req->health_reporter_name[req->_present.health_reporter_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_set_req_set_health_reporter_graceful_period(struct devlink_health_reporter_set_req *req,
-								    __u64 health_reporter_graceful_period)
-{
-	req->_present.health_reporter_graceful_period = 1;
-	req->health_reporter_graceful_period = health_reporter_graceful_period;
-}
-static inline void
-devlink_health_reporter_set_req_set_health_reporter_auto_recover(struct devlink_health_reporter_set_req *req,
-								 __u8 health_reporter_auto_recover)
-{
-	req->_present.health_reporter_auto_recover = 1;
-	req->health_reporter_auto_recover = health_reporter_auto_recover;
-}
-static inline void
-devlink_health_reporter_set_req_set_health_reporter_auto_dump(struct devlink_health_reporter_set_req *req,
-							      __u8 health_reporter_auto_dump)
-{
-	req->_present.health_reporter_auto_dump = 1;
-	req->health_reporter_auto_dump = health_reporter_auto_dump;
-}
-
-/*
- * Set health reporter instances.
- */
-int devlink_health_reporter_set(struct ynl_sock *ys,
-				struct devlink_health_reporter_set_req *req);
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_RECOVER ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */
-struct devlink_health_reporter_recover_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 health_reporter_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *health_reporter_name;
-};
-
-static inline struct devlink_health_reporter_recover_req *
-devlink_health_reporter_recover_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_health_reporter_recover_req));
-}
-void
-devlink_health_reporter_recover_req_free(struct devlink_health_reporter_recover_req *req);
-
-static inline void
-devlink_health_reporter_recover_req_set_bus_name(struct devlink_health_reporter_recover_req *req,
-						 const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_recover_req_set_dev_name(struct devlink_health_reporter_recover_req *req,
-						 const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_recover_req_set_port_index(struct devlink_health_reporter_recover_req *req,
-						   __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_health_reporter_recover_req_set_health_reporter_name(struct devlink_health_reporter_recover_req *req,
-							     const char *health_reporter_name)
-{
-	free(req->health_reporter_name);
-	req->_present.health_reporter_name_len = strlen(health_reporter_name);
-	req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1);
-	memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len);
-	req->health_reporter_name[req->_present.health_reporter_name_len] = 0;
-}
-
-/*
- * Recover health reporter instances.
- */
-int devlink_health_reporter_recover(struct ynl_sock *ys,
-				    struct devlink_health_reporter_recover_req *req);
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */
-struct devlink_health_reporter_diagnose_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 health_reporter_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *health_reporter_name;
-};
-
-static inline struct devlink_health_reporter_diagnose_req *
-devlink_health_reporter_diagnose_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_health_reporter_diagnose_req));
-}
-void
-devlink_health_reporter_diagnose_req_free(struct devlink_health_reporter_diagnose_req *req);
-
-static inline void
-devlink_health_reporter_diagnose_req_set_bus_name(struct devlink_health_reporter_diagnose_req *req,
-						  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_diagnose_req_set_dev_name(struct devlink_health_reporter_diagnose_req *req,
-						  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_diagnose_req_set_port_index(struct devlink_health_reporter_diagnose_req *req,
-						    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_health_reporter_diagnose_req_set_health_reporter_name(struct devlink_health_reporter_diagnose_req *req,
-							      const char *health_reporter_name)
-{
-	free(req->health_reporter_name);
-	req->_present.health_reporter_name_len = strlen(health_reporter_name);
-	req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1);
-	memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len);
-	req->health_reporter_name[req->_present.health_reporter_name_len] = 0;
-}
-
-/*
- * Diagnose health reporter instances.
- */
-int devlink_health_reporter_diagnose(struct ynl_sock *ys,
-				     struct devlink_health_reporter_diagnose_req *req);
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */
-struct devlink_health_reporter_dump_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 health_reporter_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *health_reporter_name;
-};
-
-static inline struct devlink_health_reporter_dump_get_req_dump *
-devlink_health_reporter_dump_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_health_reporter_dump_get_req_dump));
-}
-void
-devlink_health_reporter_dump_get_req_dump_free(struct devlink_health_reporter_dump_get_req_dump *req);
-
-static inline void
-devlink_health_reporter_dump_get_req_dump_set_bus_name(struct devlink_health_reporter_dump_get_req_dump *req,
-						       const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_dump_get_req_dump_set_dev_name(struct devlink_health_reporter_dump_get_req_dump *req,
-						       const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_dump_get_req_dump_set_port_index(struct devlink_health_reporter_dump_get_req_dump *req,
-							 __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_health_reporter_dump_get_req_dump_set_health_reporter_name(struct devlink_health_reporter_dump_get_req_dump *req,
-								   const char *health_reporter_name)
-{
-	free(req->health_reporter_name);
-	req->_present.health_reporter_name_len = strlen(health_reporter_name);
-	req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1);
-	memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len);
-	req->health_reporter_name[req->_present.health_reporter_name_len] = 0;
-}
-
-struct devlink_health_reporter_dump_get_rsp_dump {
-	struct {
-		__u32 fmsg:1;
-	} _present;
-
-	struct devlink_dl_fmsg fmsg;
-};
-
-struct devlink_health_reporter_dump_get_rsp_list {
-	struct devlink_health_reporter_dump_get_rsp_list *next;
-	struct devlink_health_reporter_dump_get_rsp_dump obj __attribute__((aligned(8)));
-};
-
-void
-devlink_health_reporter_dump_get_rsp_list_free(struct devlink_health_reporter_dump_get_rsp_list *rsp);
-
-struct devlink_health_reporter_dump_get_rsp_list *
-devlink_health_reporter_dump_get_dump(struct ynl_sock *ys,
-				      struct devlink_health_reporter_dump_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */
-struct devlink_health_reporter_dump_clear_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 health_reporter_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *health_reporter_name;
-};
-
-static inline struct devlink_health_reporter_dump_clear_req *
-devlink_health_reporter_dump_clear_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_health_reporter_dump_clear_req));
-}
-void
-devlink_health_reporter_dump_clear_req_free(struct devlink_health_reporter_dump_clear_req *req);
-
-static inline void
-devlink_health_reporter_dump_clear_req_set_bus_name(struct devlink_health_reporter_dump_clear_req *req,
-						    const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_dump_clear_req_set_dev_name(struct devlink_health_reporter_dump_clear_req *req,
-						    const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_dump_clear_req_set_port_index(struct devlink_health_reporter_dump_clear_req *req,
-						      __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_health_reporter_dump_clear_req_set_health_reporter_name(struct devlink_health_reporter_dump_clear_req *req,
-								const char *health_reporter_name)
-{
-	free(req->health_reporter_name);
-	req->_present.health_reporter_name_len = strlen(health_reporter_name);
-	req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1);
-	memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len);
-	req->health_reporter_name[req->_present.health_reporter_name_len] = 0;
-}
-
-/*
- * Clear dump of health reporter instances.
- */
-int devlink_health_reporter_dump_clear(struct ynl_sock *ys,
-				       struct devlink_health_reporter_dump_clear_req *req);
-
-/* ============== DEVLINK_CMD_FLASH_UPDATE ============== */
-/* DEVLINK_CMD_FLASH_UPDATE - do */
-struct devlink_flash_update_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 flash_update_file_name_len;
-		__u32 flash_update_component_len;
-		__u32 flash_update_overwrite_mask:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *flash_update_file_name;
-	char *flash_update_component;
-	struct nla_bitfield32 flash_update_overwrite_mask;
-};
-
-static inline struct devlink_flash_update_req *
-devlink_flash_update_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_flash_update_req));
-}
-void devlink_flash_update_req_free(struct devlink_flash_update_req *req);
-
-static inline void
-devlink_flash_update_req_set_bus_name(struct devlink_flash_update_req *req,
-				      const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_flash_update_req_set_dev_name(struct devlink_flash_update_req *req,
-				      const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_flash_update_req_set_flash_update_file_name(struct devlink_flash_update_req *req,
-						    const char *flash_update_file_name)
-{
-	free(req->flash_update_file_name);
-	req->_present.flash_update_file_name_len = strlen(flash_update_file_name);
-	req->flash_update_file_name = malloc(req->_present.flash_update_file_name_len + 1);
-	memcpy(req->flash_update_file_name, flash_update_file_name, req->_present.flash_update_file_name_len);
-	req->flash_update_file_name[req->_present.flash_update_file_name_len] = 0;
-}
-static inline void
-devlink_flash_update_req_set_flash_update_component(struct devlink_flash_update_req *req,
-						    const char *flash_update_component)
-{
-	free(req->flash_update_component);
-	req->_present.flash_update_component_len = strlen(flash_update_component);
-	req->flash_update_component = malloc(req->_present.flash_update_component_len + 1);
-	memcpy(req->flash_update_component, flash_update_component, req->_present.flash_update_component_len);
-	req->flash_update_component[req->_present.flash_update_component_len] = 0;
-}
-static inline void
-devlink_flash_update_req_set_flash_update_overwrite_mask(struct devlink_flash_update_req *req,
-							 struct nla_bitfield32 *flash_update_overwrite_mask)
-{
-	req->_present.flash_update_overwrite_mask = 1;
-	memcpy(&req->flash_update_overwrite_mask, flash_update_overwrite_mask, sizeof(struct nla_bitfield32));
-}
-
-/*
- * Flash update devlink instances.
- */
-int devlink_flash_update(struct ynl_sock *ys,
-			 struct devlink_flash_update_req *req);
-
-/* ============== DEVLINK_CMD_TRAP_GET ============== */
-/* DEVLINK_CMD_TRAP_GET - do */
-struct devlink_trap_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 trap_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *trap_name;
-};
-
-static inline struct devlink_trap_get_req *devlink_trap_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_trap_get_req));
-}
-void devlink_trap_get_req_free(struct devlink_trap_get_req *req);
-
-static inline void
-devlink_trap_get_req_set_bus_name(struct devlink_trap_get_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_trap_get_req_set_dev_name(struct devlink_trap_get_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_trap_get_req_set_trap_name(struct devlink_trap_get_req *req,
-				   const char *trap_name)
-{
-	free(req->trap_name);
-	req->_present.trap_name_len = strlen(trap_name);
-	req->trap_name = malloc(req->_present.trap_name_len + 1);
-	memcpy(req->trap_name, trap_name, req->_present.trap_name_len);
-	req->trap_name[req->_present.trap_name_len] = 0;
-}
-
-struct devlink_trap_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 trap_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *trap_name;
-};
-
-void devlink_trap_get_rsp_free(struct devlink_trap_get_rsp *rsp);
-
-/*
- * Get trap instances.
- */
-struct devlink_trap_get_rsp *
-devlink_trap_get(struct ynl_sock *ys, struct devlink_trap_get_req *req);
-
-/* DEVLINK_CMD_TRAP_GET - dump */
-struct devlink_trap_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_trap_get_req_dump *
-devlink_trap_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_trap_get_req_dump));
-}
-void devlink_trap_get_req_dump_free(struct devlink_trap_get_req_dump *req);
-
-static inline void
-devlink_trap_get_req_dump_set_bus_name(struct devlink_trap_get_req_dump *req,
-				       const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_trap_get_req_dump_set_dev_name(struct devlink_trap_get_req_dump *req,
-				       const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_trap_get_list {
-	struct devlink_trap_get_list *next;
-	struct devlink_trap_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_trap_get_list_free(struct devlink_trap_get_list *rsp);
-
-struct devlink_trap_get_list *
-devlink_trap_get_dump(struct ynl_sock *ys,
-		      struct devlink_trap_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_TRAP_SET ============== */
-/* DEVLINK_CMD_TRAP_SET - do */
-struct devlink_trap_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 trap_name_len;
-		__u32 trap_action:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *trap_name;
-	enum devlink_trap_action trap_action;
-};
-
-static inline struct devlink_trap_set_req *devlink_trap_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_trap_set_req));
-}
-void devlink_trap_set_req_free(struct devlink_trap_set_req *req);
-
-static inline void
-devlink_trap_set_req_set_bus_name(struct devlink_trap_set_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_trap_set_req_set_dev_name(struct devlink_trap_set_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_trap_set_req_set_trap_name(struct devlink_trap_set_req *req,
-				   const char *trap_name)
-{
-	free(req->trap_name);
-	req->_present.trap_name_len = strlen(trap_name);
-	req->trap_name = malloc(req->_present.trap_name_len + 1);
-	memcpy(req->trap_name, trap_name, req->_present.trap_name_len);
-	req->trap_name[req->_present.trap_name_len] = 0;
-}
-static inline void
-devlink_trap_set_req_set_trap_action(struct devlink_trap_set_req *req,
-				     enum devlink_trap_action trap_action)
-{
-	req->_present.trap_action = 1;
-	req->trap_action = trap_action;
-}
-
-/*
- * Set trap instances.
- */
-int devlink_trap_set(struct ynl_sock *ys, struct devlink_trap_set_req *req);
-
-/* ============== DEVLINK_CMD_TRAP_GROUP_GET ============== */
-/* DEVLINK_CMD_TRAP_GROUP_GET - do */
-struct devlink_trap_group_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 trap_group_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *trap_group_name;
-};
-
-static inline struct devlink_trap_group_get_req *
-devlink_trap_group_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_trap_group_get_req));
-}
-void devlink_trap_group_get_req_free(struct devlink_trap_group_get_req *req);
-
-static inline void
-devlink_trap_group_get_req_set_bus_name(struct devlink_trap_group_get_req *req,
-					const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_trap_group_get_req_set_dev_name(struct devlink_trap_group_get_req *req,
-					const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_trap_group_get_req_set_trap_group_name(struct devlink_trap_group_get_req *req,
-					       const char *trap_group_name)
-{
-	free(req->trap_group_name);
-	req->_present.trap_group_name_len = strlen(trap_group_name);
-	req->trap_group_name = malloc(req->_present.trap_group_name_len + 1);
-	memcpy(req->trap_group_name, trap_group_name, req->_present.trap_group_name_len);
-	req->trap_group_name[req->_present.trap_group_name_len] = 0;
-}
-
-struct devlink_trap_group_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 trap_group_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *trap_group_name;
-};
-
-void devlink_trap_group_get_rsp_free(struct devlink_trap_group_get_rsp *rsp);
-
-/*
- * Get trap group instances.
- */
-struct devlink_trap_group_get_rsp *
-devlink_trap_group_get(struct ynl_sock *ys,
-		       struct devlink_trap_group_get_req *req);
-
-/* DEVLINK_CMD_TRAP_GROUP_GET - dump */
-struct devlink_trap_group_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_trap_group_get_req_dump *
-devlink_trap_group_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_trap_group_get_req_dump));
-}
-void
-devlink_trap_group_get_req_dump_free(struct devlink_trap_group_get_req_dump *req);
-
-static inline void
-devlink_trap_group_get_req_dump_set_bus_name(struct devlink_trap_group_get_req_dump *req,
-					     const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_trap_group_get_req_dump_set_dev_name(struct devlink_trap_group_get_req_dump *req,
-					     const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_trap_group_get_list {
-	struct devlink_trap_group_get_list *next;
-	struct devlink_trap_group_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_trap_group_get_list_free(struct devlink_trap_group_get_list *rsp);
-
-struct devlink_trap_group_get_list *
-devlink_trap_group_get_dump(struct ynl_sock *ys,
-			    struct devlink_trap_group_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_TRAP_GROUP_SET ============== */
-/* DEVLINK_CMD_TRAP_GROUP_SET - do */
-struct devlink_trap_group_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 trap_group_name_len;
-		__u32 trap_action:1;
-		__u32 trap_policer_id:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *trap_group_name;
-	enum devlink_trap_action trap_action;
-	__u32 trap_policer_id;
-};
-
-static inline struct devlink_trap_group_set_req *
-devlink_trap_group_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_trap_group_set_req));
-}
-void devlink_trap_group_set_req_free(struct devlink_trap_group_set_req *req);
-
-static inline void
-devlink_trap_group_set_req_set_bus_name(struct devlink_trap_group_set_req *req,
-					const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_trap_group_set_req_set_dev_name(struct devlink_trap_group_set_req *req,
-					const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_trap_group_set_req_set_trap_group_name(struct devlink_trap_group_set_req *req,
-					       const char *trap_group_name)
-{
-	free(req->trap_group_name);
-	req->_present.trap_group_name_len = strlen(trap_group_name);
-	req->trap_group_name = malloc(req->_present.trap_group_name_len + 1);
-	memcpy(req->trap_group_name, trap_group_name, req->_present.trap_group_name_len);
-	req->trap_group_name[req->_present.trap_group_name_len] = 0;
-}
-static inline void
-devlink_trap_group_set_req_set_trap_action(struct devlink_trap_group_set_req *req,
-					   enum devlink_trap_action trap_action)
-{
-	req->_present.trap_action = 1;
-	req->trap_action = trap_action;
-}
-static inline void
-devlink_trap_group_set_req_set_trap_policer_id(struct devlink_trap_group_set_req *req,
-					       __u32 trap_policer_id)
-{
-	req->_present.trap_policer_id = 1;
-	req->trap_policer_id = trap_policer_id;
-}
-
-/*
- * Set trap group instances.
- */
-int devlink_trap_group_set(struct ynl_sock *ys,
-			   struct devlink_trap_group_set_req *req);
-
-/* ============== DEVLINK_CMD_TRAP_POLICER_GET ============== */
-/* DEVLINK_CMD_TRAP_POLICER_GET - do */
-struct devlink_trap_policer_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 trap_policer_id:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 trap_policer_id;
-};
-
-static inline struct devlink_trap_policer_get_req *
-devlink_trap_policer_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_trap_policer_get_req));
-}
-void
-devlink_trap_policer_get_req_free(struct devlink_trap_policer_get_req *req);
-
-static inline void
-devlink_trap_policer_get_req_set_bus_name(struct devlink_trap_policer_get_req *req,
-					  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_trap_policer_get_req_set_dev_name(struct devlink_trap_policer_get_req *req,
-					  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_trap_policer_get_req_set_trap_policer_id(struct devlink_trap_policer_get_req *req,
-						 __u32 trap_policer_id)
-{
-	req->_present.trap_policer_id = 1;
-	req->trap_policer_id = trap_policer_id;
-}
-
-struct devlink_trap_policer_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 trap_policer_id:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 trap_policer_id;
-};
-
-void
-devlink_trap_policer_get_rsp_free(struct devlink_trap_policer_get_rsp *rsp);
-
-/*
- * Get trap policer instances.
- */
-struct devlink_trap_policer_get_rsp *
-devlink_trap_policer_get(struct ynl_sock *ys,
-			 struct devlink_trap_policer_get_req *req);
-
-/* DEVLINK_CMD_TRAP_POLICER_GET - dump */
-struct devlink_trap_policer_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_trap_policer_get_req_dump *
-devlink_trap_policer_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_trap_policer_get_req_dump));
-}
-void
-devlink_trap_policer_get_req_dump_free(struct devlink_trap_policer_get_req_dump *req);
-
-static inline void
-devlink_trap_policer_get_req_dump_set_bus_name(struct devlink_trap_policer_get_req_dump *req,
-					       const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_trap_policer_get_req_dump_set_dev_name(struct devlink_trap_policer_get_req_dump *req,
-					       const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_trap_policer_get_list {
-	struct devlink_trap_policer_get_list *next;
-	struct devlink_trap_policer_get_rsp obj __attribute__((aligned(8)));
-};
-
-void
-devlink_trap_policer_get_list_free(struct devlink_trap_policer_get_list *rsp);
-
-struct devlink_trap_policer_get_list *
-devlink_trap_policer_get_dump(struct ynl_sock *ys,
-			      struct devlink_trap_policer_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_TRAP_POLICER_SET ============== */
-/* DEVLINK_CMD_TRAP_POLICER_SET - do */
-struct devlink_trap_policer_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 trap_policer_id:1;
-		__u32 trap_policer_rate:1;
-		__u32 trap_policer_burst:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 trap_policer_id;
-	__u64 trap_policer_rate;
-	__u64 trap_policer_burst;
-};
-
-static inline struct devlink_trap_policer_set_req *
-devlink_trap_policer_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_trap_policer_set_req));
-}
-void
-devlink_trap_policer_set_req_free(struct devlink_trap_policer_set_req *req);
-
-static inline void
-devlink_trap_policer_set_req_set_bus_name(struct devlink_trap_policer_set_req *req,
-					  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_trap_policer_set_req_set_dev_name(struct devlink_trap_policer_set_req *req,
-					  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_trap_policer_set_req_set_trap_policer_id(struct devlink_trap_policer_set_req *req,
-						 __u32 trap_policer_id)
-{
-	req->_present.trap_policer_id = 1;
-	req->trap_policer_id = trap_policer_id;
-}
-static inline void
-devlink_trap_policer_set_req_set_trap_policer_rate(struct devlink_trap_policer_set_req *req,
-						   __u64 trap_policer_rate)
-{
-	req->_present.trap_policer_rate = 1;
-	req->trap_policer_rate = trap_policer_rate;
-}
-static inline void
-devlink_trap_policer_set_req_set_trap_policer_burst(struct devlink_trap_policer_set_req *req,
-						    __u64 trap_policer_burst)
-{
-	req->_present.trap_policer_burst = 1;
-	req->trap_policer_burst = trap_policer_burst;
-}
-
-/*
- * Get trap policer instances.
- */
-int devlink_trap_policer_set(struct ynl_sock *ys,
-			     struct devlink_trap_policer_set_req *req);
-
-/* ============== DEVLINK_CMD_HEALTH_REPORTER_TEST ============== */
-/* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */
-struct devlink_health_reporter_test_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 health_reporter_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *health_reporter_name;
-};
-
-static inline struct devlink_health_reporter_test_req *
-devlink_health_reporter_test_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_health_reporter_test_req));
-}
-void
-devlink_health_reporter_test_req_free(struct devlink_health_reporter_test_req *req);
-
-static inline void
-devlink_health_reporter_test_req_set_bus_name(struct devlink_health_reporter_test_req *req,
-					      const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_test_req_set_dev_name(struct devlink_health_reporter_test_req *req,
-					      const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_health_reporter_test_req_set_port_index(struct devlink_health_reporter_test_req *req,
-						__u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_health_reporter_test_req_set_health_reporter_name(struct devlink_health_reporter_test_req *req,
-							  const char *health_reporter_name)
-{
-	free(req->health_reporter_name);
-	req->_present.health_reporter_name_len = strlen(health_reporter_name);
-	req->health_reporter_name = malloc(req->_present.health_reporter_name_len + 1);
-	memcpy(req->health_reporter_name, health_reporter_name, req->_present.health_reporter_name_len);
-	req->health_reporter_name[req->_present.health_reporter_name_len] = 0;
-}
-
-/*
- * Test health reporter instances.
- */
-int devlink_health_reporter_test(struct ynl_sock *ys,
-				 struct devlink_health_reporter_test_req *req);
-
-/* ============== DEVLINK_CMD_RATE_GET ============== */
-/* DEVLINK_CMD_RATE_GET - do */
-struct devlink_rate_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 rate_node_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *rate_node_name;
-};
-
-static inline struct devlink_rate_get_req *devlink_rate_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_rate_get_req));
-}
-void devlink_rate_get_req_free(struct devlink_rate_get_req *req);
-
-static inline void
-devlink_rate_get_req_set_bus_name(struct devlink_rate_get_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_rate_get_req_set_dev_name(struct devlink_rate_get_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_rate_get_req_set_port_index(struct devlink_rate_get_req *req,
-				    __u32 port_index)
-{
-	req->_present.port_index = 1;
-	req->port_index = port_index;
-}
-static inline void
-devlink_rate_get_req_set_rate_node_name(struct devlink_rate_get_req *req,
-					const char *rate_node_name)
-{
-	free(req->rate_node_name);
-	req->_present.rate_node_name_len = strlen(rate_node_name);
-	req->rate_node_name = malloc(req->_present.rate_node_name_len + 1);
-	memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len);
-	req->rate_node_name[req->_present.rate_node_name_len] = 0;
-}
-
-struct devlink_rate_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 port_index:1;
-		__u32 rate_node_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 port_index;
-	char *rate_node_name;
-};
-
-void devlink_rate_get_rsp_free(struct devlink_rate_get_rsp *rsp);
-
-/*
- * Get rate instances.
- */
-struct devlink_rate_get_rsp *
-devlink_rate_get(struct ynl_sock *ys, struct devlink_rate_get_req *req);
-
-/* DEVLINK_CMD_RATE_GET - dump */
-struct devlink_rate_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_rate_get_req_dump *
-devlink_rate_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_rate_get_req_dump));
-}
-void devlink_rate_get_req_dump_free(struct devlink_rate_get_req_dump *req);
-
-static inline void
-devlink_rate_get_req_dump_set_bus_name(struct devlink_rate_get_req_dump *req,
-				       const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_rate_get_req_dump_set_dev_name(struct devlink_rate_get_req_dump *req,
-				       const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_rate_get_list {
-	struct devlink_rate_get_list *next;
-	struct devlink_rate_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_rate_get_list_free(struct devlink_rate_get_list *rsp);
-
-struct devlink_rate_get_list *
-devlink_rate_get_dump(struct ynl_sock *ys,
-		      struct devlink_rate_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_RATE_SET ============== */
-/* DEVLINK_CMD_RATE_SET - do */
-struct devlink_rate_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 rate_node_name_len;
-		__u32 rate_tx_share:1;
-		__u32 rate_tx_max:1;
-		__u32 rate_tx_priority:1;
-		__u32 rate_tx_weight:1;
-		__u32 rate_parent_node_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *rate_node_name;
-	__u64 rate_tx_share;
-	__u64 rate_tx_max;
-	__u32 rate_tx_priority;
-	__u32 rate_tx_weight;
-	char *rate_parent_node_name;
-};
-
-static inline struct devlink_rate_set_req *devlink_rate_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_rate_set_req));
-}
-void devlink_rate_set_req_free(struct devlink_rate_set_req *req);
-
-static inline void
-devlink_rate_set_req_set_bus_name(struct devlink_rate_set_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_rate_set_req_set_dev_name(struct devlink_rate_set_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_rate_set_req_set_rate_node_name(struct devlink_rate_set_req *req,
-					const char *rate_node_name)
-{
-	free(req->rate_node_name);
-	req->_present.rate_node_name_len = strlen(rate_node_name);
-	req->rate_node_name = malloc(req->_present.rate_node_name_len + 1);
-	memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len);
-	req->rate_node_name[req->_present.rate_node_name_len] = 0;
-}
-static inline void
-devlink_rate_set_req_set_rate_tx_share(struct devlink_rate_set_req *req,
-				       __u64 rate_tx_share)
-{
-	req->_present.rate_tx_share = 1;
-	req->rate_tx_share = rate_tx_share;
-}
-static inline void
-devlink_rate_set_req_set_rate_tx_max(struct devlink_rate_set_req *req,
-				     __u64 rate_tx_max)
-{
-	req->_present.rate_tx_max = 1;
-	req->rate_tx_max = rate_tx_max;
-}
-static inline void
-devlink_rate_set_req_set_rate_tx_priority(struct devlink_rate_set_req *req,
-					  __u32 rate_tx_priority)
-{
-	req->_present.rate_tx_priority = 1;
-	req->rate_tx_priority = rate_tx_priority;
-}
-static inline void
-devlink_rate_set_req_set_rate_tx_weight(struct devlink_rate_set_req *req,
-					__u32 rate_tx_weight)
-{
-	req->_present.rate_tx_weight = 1;
-	req->rate_tx_weight = rate_tx_weight;
-}
-static inline void
-devlink_rate_set_req_set_rate_parent_node_name(struct devlink_rate_set_req *req,
-					       const char *rate_parent_node_name)
-{
-	free(req->rate_parent_node_name);
-	req->_present.rate_parent_node_name_len = strlen(rate_parent_node_name);
-	req->rate_parent_node_name = malloc(req->_present.rate_parent_node_name_len + 1);
-	memcpy(req->rate_parent_node_name, rate_parent_node_name, req->_present.rate_parent_node_name_len);
-	req->rate_parent_node_name[req->_present.rate_parent_node_name_len] = 0;
-}
-
-/*
- * Set rate instances.
- */
-int devlink_rate_set(struct ynl_sock *ys, struct devlink_rate_set_req *req);
-
-/* ============== DEVLINK_CMD_RATE_NEW ============== */
-/* DEVLINK_CMD_RATE_NEW - do */
-struct devlink_rate_new_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 rate_node_name_len;
-		__u32 rate_tx_share:1;
-		__u32 rate_tx_max:1;
-		__u32 rate_tx_priority:1;
-		__u32 rate_tx_weight:1;
-		__u32 rate_parent_node_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *rate_node_name;
-	__u64 rate_tx_share;
-	__u64 rate_tx_max;
-	__u32 rate_tx_priority;
-	__u32 rate_tx_weight;
-	char *rate_parent_node_name;
-};
-
-static inline struct devlink_rate_new_req *devlink_rate_new_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_rate_new_req));
-}
-void devlink_rate_new_req_free(struct devlink_rate_new_req *req);
-
-static inline void
-devlink_rate_new_req_set_bus_name(struct devlink_rate_new_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_rate_new_req_set_dev_name(struct devlink_rate_new_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_rate_new_req_set_rate_node_name(struct devlink_rate_new_req *req,
-					const char *rate_node_name)
-{
-	free(req->rate_node_name);
-	req->_present.rate_node_name_len = strlen(rate_node_name);
-	req->rate_node_name = malloc(req->_present.rate_node_name_len + 1);
-	memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len);
-	req->rate_node_name[req->_present.rate_node_name_len] = 0;
-}
-static inline void
-devlink_rate_new_req_set_rate_tx_share(struct devlink_rate_new_req *req,
-				       __u64 rate_tx_share)
-{
-	req->_present.rate_tx_share = 1;
-	req->rate_tx_share = rate_tx_share;
-}
-static inline void
-devlink_rate_new_req_set_rate_tx_max(struct devlink_rate_new_req *req,
-				     __u64 rate_tx_max)
-{
-	req->_present.rate_tx_max = 1;
-	req->rate_tx_max = rate_tx_max;
-}
-static inline void
-devlink_rate_new_req_set_rate_tx_priority(struct devlink_rate_new_req *req,
-					  __u32 rate_tx_priority)
-{
-	req->_present.rate_tx_priority = 1;
-	req->rate_tx_priority = rate_tx_priority;
-}
-static inline void
-devlink_rate_new_req_set_rate_tx_weight(struct devlink_rate_new_req *req,
-					__u32 rate_tx_weight)
-{
-	req->_present.rate_tx_weight = 1;
-	req->rate_tx_weight = rate_tx_weight;
-}
-static inline void
-devlink_rate_new_req_set_rate_parent_node_name(struct devlink_rate_new_req *req,
-					       const char *rate_parent_node_name)
-{
-	free(req->rate_parent_node_name);
-	req->_present.rate_parent_node_name_len = strlen(rate_parent_node_name);
-	req->rate_parent_node_name = malloc(req->_present.rate_parent_node_name_len + 1);
-	memcpy(req->rate_parent_node_name, rate_parent_node_name, req->_present.rate_parent_node_name_len);
-	req->rate_parent_node_name[req->_present.rate_parent_node_name_len] = 0;
-}
-
-/*
- * Create rate instances.
- */
-int devlink_rate_new(struct ynl_sock *ys, struct devlink_rate_new_req *req);
-
-/* ============== DEVLINK_CMD_RATE_DEL ============== */
-/* DEVLINK_CMD_RATE_DEL - do */
-struct devlink_rate_del_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 rate_node_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	char *rate_node_name;
-};
-
-static inline struct devlink_rate_del_req *devlink_rate_del_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_rate_del_req));
-}
-void devlink_rate_del_req_free(struct devlink_rate_del_req *req);
-
-static inline void
-devlink_rate_del_req_set_bus_name(struct devlink_rate_del_req *req,
-				  const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_rate_del_req_set_dev_name(struct devlink_rate_del_req *req,
-				  const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_rate_del_req_set_rate_node_name(struct devlink_rate_del_req *req,
-					const char *rate_node_name)
-{
-	free(req->rate_node_name);
-	req->_present.rate_node_name_len = strlen(rate_node_name);
-	req->rate_node_name = malloc(req->_present.rate_node_name_len + 1);
-	memcpy(req->rate_node_name, rate_node_name, req->_present.rate_node_name_len);
-	req->rate_node_name[req->_present.rate_node_name_len] = 0;
-}
-
-/*
- * Delete rate instances.
- */
-int devlink_rate_del(struct ynl_sock *ys, struct devlink_rate_del_req *req);
-
-/* ============== DEVLINK_CMD_LINECARD_GET ============== */
-/* DEVLINK_CMD_LINECARD_GET - do */
-struct devlink_linecard_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 linecard_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 linecard_index;
-};
-
-static inline struct devlink_linecard_get_req *
-devlink_linecard_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_linecard_get_req));
-}
-void devlink_linecard_get_req_free(struct devlink_linecard_get_req *req);
-
-static inline void
-devlink_linecard_get_req_set_bus_name(struct devlink_linecard_get_req *req,
-				      const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_linecard_get_req_set_dev_name(struct devlink_linecard_get_req *req,
-				      const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_linecard_get_req_set_linecard_index(struct devlink_linecard_get_req *req,
-					    __u32 linecard_index)
-{
-	req->_present.linecard_index = 1;
-	req->linecard_index = linecard_index;
-}
-
-struct devlink_linecard_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 linecard_index:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 linecard_index;
-};
-
-void devlink_linecard_get_rsp_free(struct devlink_linecard_get_rsp *rsp);
-
-/*
- * Get line card instances.
- */
-struct devlink_linecard_get_rsp *
-devlink_linecard_get(struct ynl_sock *ys, struct devlink_linecard_get_req *req);
-
-/* DEVLINK_CMD_LINECARD_GET - dump */
-struct devlink_linecard_get_req_dump {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_linecard_get_req_dump *
-devlink_linecard_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_linecard_get_req_dump));
-}
-void
-devlink_linecard_get_req_dump_free(struct devlink_linecard_get_req_dump *req);
-
-static inline void
-devlink_linecard_get_req_dump_set_bus_name(struct devlink_linecard_get_req_dump *req,
-					   const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_linecard_get_req_dump_set_dev_name(struct devlink_linecard_get_req_dump *req,
-					   const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_linecard_get_list {
-	struct devlink_linecard_get_list *next;
-	struct devlink_linecard_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_linecard_get_list_free(struct devlink_linecard_get_list *rsp);
-
-struct devlink_linecard_get_list *
-devlink_linecard_get_dump(struct ynl_sock *ys,
-			  struct devlink_linecard_get_req_dump *req);
-
-/* ============== DEVLINK_CMD_LINECARD_SET ============== */
-/* DEVLINK_CMD_LINECARD_SET - do */
-struct devlink_linecard_set_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 linecard_index:1;
-		__u32 linecard_type_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	__u32 linecard_index;
-	char *linecard_type;
-};
-
-static inline struct devlink_linecard_set_req *
-devlink_linecard_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_linecard_set_req));
-}
-void devlink_linecard_set_req_free(struct devlink_linecard_set_req *req);
-
-static inline void
-devlink_linecard_set_req_set_bus_name(struct devlink_linecard_set_req *req,
-				      const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_linecard_set_req_set_dev_name(struct devlink_linecard_set_req *req,
-				      const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_linecard_set_req_set_linecard_index(struct devlink_linecard_set_req *req,
-					    __u32 linecard_index)
-{
-	req->_present.linecard_index = 1;
-	req->linecard_index = linecard_index;
-}
-static inline void
-devlink_linecard_set_req_set_linecard_type(struct devlink_linecard_set_req *req,
-					   const char *linecard_type)
-{
-	free(req->linecard_type);
-	req->_present.linecard_type_len = strlen(linecard_type);
-	req->linecard_type = malloc(req->_present.linecard_type_len + 1);
-	memcpy(req->linecard_type, linecard_type, req->_present.linecard_type_len);
-	req->linecard_type[req->_present.linecard_type_len] = 0;
-}
-
-/*
- * Set line card instances.
- */
-int devlink_linecard_set(struct ynl_sock *ys,
-			 struct devlink_linecard_set_req *req);
-
-/* ============== DEVLINK_CMD_SELFTESTS_GET ============== */
-/* DEVLINK_CMD_SELFTESTS_GET - do */
-struct devlink_selftests_get_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-static inline struct devlink_selftests_get_req *
-devlink_selftests_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_selftests_get_req));
-}
-void devlink_selftests_get_req_free(struct devlink_selftests_get_req *req);
-
-static inline void
-devlink_selftests_get_req_set_bus_name(struct devlink_selftests_get_req *req,
-				       const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_selftests_get_req_set_dev_name(struct devlink_selftests_get_req *req,
-				       const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-
-struct devlink_selftests_get_rsp {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-};
-
-void devlink_selftests_get_rsp_free(struct devlink_selftests_get_rsp *rsp);
-
-/*
- * Get device selftest instances.
- */
-struct devlink_selftests_get_rsp *
-devlink_selftests_get(struct ynl_sock *ys,
-		      struct devlink_selftests_get_req *req);
-
-/* DEVLINK_CMD_SELFTESTS_GET - dump */
-struct devlink_selftests_get_list {
-	struct devlink_selftests_get_list *next;
-	struct devlink_selftests_get_rsp obj __attribute__((aligned(8)));
-};
-
-void devlink_selftests_get_list_free(struct devlink_selftests_get_list *rsp);
-
-struct devlink_selftests_get_list *
-devlink_selftests_get_dump(struct ynl_sock *ys);
-
-/* ============== DEVLINK_CMD_SELFTESTS_RUN ============== */
-/* DEVLINK_CMD_SELFTESTS_RUN - do */
-struct devlink_selftests_run_req {
-	struct {
-		__u32 bus_name_len;
-		__u32 dev_name_len;
-		__u32 selftests:1;
-	} _present;
-
-	char *bus_name;
-	char *dev_name;
-	struct devlink_dl_selftest_id selftests;
-};
-
-static inline struct devlink_selftests_run_req *
-devlink_selftests_run_req_alloc(void)
-{
-	return calloc(1, sizeof(struct devlink_selftests_run_req));
-}
-void devlink_selftests_run_req_free(struct devlink_selftests_run_req *req);
-
-static inline void
-devlink_selftests_run_req_set_bus_name(struct devlink_selftests_run_req *req,
-				       const char *bus_name)
-{
-	free(req->bus_name);
-	req->_present.bus_name_len = strlen(bus_name);
-	req->bus_name = malloc(req->_present.bus_name_len + 1);
-	memcpy(req->bus_name, bus_name, req->_present.bus_name_len);
-	req->bus_name[req->_present.bus_name_len] = 0;
-}
-static inline void
-devlink_selftests_run_req_set_dev_name(struct devlink_selftests_run_req *req,
-				       const char *dev_name)
-{
-	free(req->dev_name);
-	req->_present.dev_name_len = strlen(dev_name);
-	req->dev_name = malloc(req->_present.dev_name_len + 1);
-	memcpy(req->dev_name, dev_name, req->_present.dev_name_len);
-	req->dev_name[req->_present.dev_name_len] = 0;
-}
-static inline void
-devlink_selftests_run_req_set_selftests_flash(struct devlink_selftests_run_req *req)
-{
-	req->_present.selftests = 1;
-	req->selftests._present.flash = 1;
-}
-
-/*
- * Run device selftest instances.
- */
-int devlink_selftests_run(struct ynl_sock *ys,
-			  struct devlink_selftests_run_req *req);
-
-#endif /* _LINUX_DEVLINK_GEN_H */
diff --git a/tools/net/ynl/generated/ethtool-user.c b/tools/net/ynl/generated/ethtool-user.c
deleted file mode 100644
index 660435639e2b..000000000000
--- a/tools/net/ynl/generated/ethtool-user.c
+++ /dev/null
@@ -1,6370 +0,0 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/ethtool.yaml */
-/* YNL-GEN user source */
-/* YNL-ARG --user-header linux/ethtool_netlink.h --exclude-op stats-get */
-
-#include <stdlib.h>
-#include <string.h>
-#include "ethtool-user.h"
-#include "ynl.h"
-#include <linux/ethtool.h>
-
-#include <libmnl/libmnl.h>
-#include <linux/genetlink.h>
-
-#include "linux/ethtool_netlink.h"
-
-/* Enums */
-static const char * const ethtool_op_strmap[] = {
-	[ETHTOOL_MSG_STRSET_GET] = "strset-get",
-	[ETHTOOL_MSG_LINKINFO_GET] = "linkinfo-get",
-	[3] = "linkinfo-ntf",
-	[ETHTOOL_MSG_LINKMODES_GET] = "linkmodes-get",
-	[5] = "linkmodes-ntf",
-	[ETHTOOL_MSG_LINKSTATE_GET] = "linkstate-get",
-	[ETHTOOL_MSG_DEBUG_GET] = "debug-get",
-	[8] = "debug-ntf",
-	[ETHTOOL_MSG_WOL_GET] = "wol-get",
-	[10] = "wol-ntf",
-	[ETHTOOL_MSG_FEATURES_GET] = "features-get",
-	[ETHTOOL_MSG_FEATURES_SET] = "features-set",
-	[13] = "features-ntf",
-	[14] = "privflags-get",
-	[15] = "privflags-ntf",
-	[16] = "rings-get",
-	[17] = "rings-ntf",
-	[18] = "channels-get",
-	[19] = "channels-ntf",
-	[20] = "coalesce-get",
-	[21] = "coalesce-ntf",
-	[22] = "pause-get",
-	[23] = "pause-ntf",
-	[24] = "eee-get",
-	[25] = "eee-ntf",
-	[26] = "tsinfo-get",
-	[27] = "cable-test-ntf",
-	[28] = "cable-test-tdr-ntf",
-	[29] = "tunnel-info-get",
-	[30] = "fec-get",
-	[31] = "fec-ntf",
-	[32] = "module-eeprom-get",
-	[34] = "phc-vclocks-get",
-	[35] = "module-get",
-	[36] = "module-ntf",
-	[37] = "pse-get",
-	[ETHTOOL_MSG_RSS_GET] = "rss-get",
-	[ETHTOOL_MSG_PLCA_GET_CFG] = "plca-get-cfg",
-	[40] = "plca-get-status",
-	[41] = "plca-ntf",
-	[ETHTOOL_MSG_MM_GET] = "mm-get",
-	[43] = "mm-ntf",
-};
-
-const char *ethtool_op_str(int op)
-{
-	if (op < 0 || op >= (int)MNL_ARRAY_SIZE(ethtool_op_strmap))
-		return NULL;
-	return ethtool_op_strmap[op];
-}
-
-static const char * const ethtool_udp_tunnel_type_strmap[] = {
-	[0] = "vxlan",
-	[1] = "geneve",
-	[2] = "vxlan-gpe",
-};
-
-const char *ethtool_udp_tunnel_type_str(int value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(ethtool_udp_tunnel_type_strmap))
-		return NULL;
-	return ethtool_udp_tunnel_type_strmap[value];
-}
-
-static const char * const ethtool_stringset_strmap[] = {
-};
-
-const char *ethtool_stringset_str(enum ethtool_stringset value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(ethtool_stringset_strmap))
-		return NULL;
-	return ethtool_stringset_strmap[value];
-}
-
-/* Policies */
-struct ynl_policy_attr ethtool_header_policy[ETHTOOL_A_HEADER_MAX + 1] = {
-	[ETHTOOL_A_HEADER_DEV_INDEX] = { .name = "dev-index", .type = YNL_PT_U32, },
-	[ETHTOOL_A_HEADER_DEV_NAME] = { .name = "dev-name", .type = YNL_PT_NUL_STR, },
-	[ETHTOOL_A_HEADER_FLAGS] = { .name = "flags", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_header_nest = {
-	.max_attr = ETHTOOL_A_HEADER_MAX,
-	.table = ethtool_header_policy,
-};
-
-struct ynl_policy_attr ethtool_pause_stat_policy[ETHTOOL_A_PAUSE_STAT_MAX + 1] = {
-	[ETHTOOL_A_PAUSE_STAT_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
-	[ETHTOOL_A_PAUSE_STAT_TX_FRAMES] = { .name = "tx-frames", .type = YNL_PT_U64, },
-	[ETHTOOL_A_PAUSE_STAT_RX_FRAMES] = { .name = "rx-frames", .type = YNL_PT_U64, },
-};
-
-struct ynl_policy_nest ethtool_pause_stat_nest = {
-	.max_attr = ETHTOOL_A_PAUSE_STAT_MAX,
-	.table = ethtool_pause_stat_policy,
-};
-
-struct ynl_policy_attr ethtool_cable_test_tdr_cfg_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1] = {
-	[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .name = "first", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST] = { .name = "last", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP] = { .name = "step", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR] = { .name = "pair", .type = YNL_PT_U8, },
-};
-
-struct ynl_policy_nest ethtool_cable_test_tdr_cfg_nest = {
-	.max_attr = ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX,
-	.table = ethtool_cable_test_tdr_cfg_policy,
-};
-
-struct ynl_policy_attr ethtool_fec_stat_policy[ETHTOOL_A_FEC_STAT_MAX + 1] = {
-	[ETHTOOL_A_FEC_STAT_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
-	[ETHTOOL_A_FEC_STAT_CORRECTED] = { .name = "corrected", .type = YNL_PT_BINARY,},
-	[ETHTOOL_A_FEC_STAT_UNCORR] = { .name = "uncorr", .type = YNL_PT_BINARY,},
-	[ETHTOOL_A_FEC_STAT_CORR_BITS] = { .name = "corr-bits", .type = YNL_PT_BINARY,},
-};
-
-struct ynl_policy_nest ethtool_fec_stat_nest = {
-	.max_attr = ETHTOOL_A_FEC_STAT_MAX,
-	.table = ethtool_fec_stat_policy,
-};
-
-struct ynl_policy_attr ethtool_mm_stat_policy[ETHTOOL_A_MM_STAT_MAX + 1] = {
-	[ETHTOOL_A_MM_STAT_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
-	[ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS] = { .name = "reassembly-errors", .type = YNL_PT_U64, },
-	[ETHTOOL_A_MM_STAT_SMD_ERRORS] = { .name = "smd-errors", .type = YNL_PT_U64, },
-	[ETHTOOL_A_MM_STAT_REASSEMBLY_OK] = { .name = "reassembly-ok", .type = YNL_PT_U64, },
-	[ETHTOOL_A_MM_STAT_RX_FRAG_COUNT] = { .name = "rx-frag-count", .type = YNL_PT_U64, },
-	[ETHTOOL_A_MM_STAT_TX_FRAG_COUNT] = { .name = "tx-frag-count", .type = YNL_PT_U64, },
-	[ETHTOOL_A_MM_STAT_HOLD_COUNT] = { .name = "hold-count", .type = YNL_PT_U64, },
-};
-
-struct ynl_policy_nest ethtool_mm_stat_nest = {
-	.max_attr = ETHTOOL_A_MM_STAT_MAX,
-	.table = ethtool_mm_stat_policy,
-};
-
-struct ynl_policy_attr ethtool_cable_result_policy[ETHTOOL_A_CABLE_RESULT_MAX + 1] = {
-	[ETHTOOL_A_CABLE_RESULT_PAIR] = { .name = "pair", .type = YNL_PT_U8, },
-	[ETHTOOL_A_CABLE_RESULT_CODE] = { .name = "code", .type = YNL_PT_U8, },
-};
-
-struct ynl_policy_nest ethtool_cable_result_nest = {
-	.max_attr = ETHTOOL_A_CABLE_RESULT_MAX,
-	.table = ethtool_cable_result_policy,
-};
-
-struct ynl_policy_attr ethtool_cable_fault_length_policy[ETHTOOL_A_CABLE_FAULT_LENGTH_MAX + 1] = {
-	[ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR] = { .name = "pair", .type = YNL_PT_U8, },
-	[ETHTOOL_A_CABLE_FAULT_LENGTH_CM] = { .name = "cm", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_cable_fault_length_nest = {
-	.max_attr = ETHTOOL_A_CABLE_FAULT_LENGTH_MAX,
-	.table = ethtool_cable_fault_length_policy,
-};
-
-struct ynl_policy_attr ethtool_bitset_bit_policy[ETHTOOL_A_BITSET_BIT_MAX + 1] = {
-	[ETHTOOL_A_BITSET_BIT_INDEX] = { .name = "index", .type = YNL_PT_U32, },
-	[ETHTOOL_A_BITSET_BIT_NAME] = { .name = "name", .type = YNL_PT_NUL_STR, },
-	[ETHTOOL_A_BITSET_BIT_VALUE] = { .name = "value", .type = YNL_PT_FLAG, },
-};
-
-struct ynl_policy_nest ethtool_bitset_bit_nest = {
-	.max_attr = ETHTOOL_A_BITSET_BIT_MAX,
-	.table = ethtool_bitset_bit_policy,
-};
-
-struct ynl_policy_attr ethtool_tunnel_udp_entry_policy[ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX + 1] = {
-	[ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT] = { .name = "port", .type = YNL_PT_U16, },
-	[ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE] = { .name = "type", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_tunnel_udp_entry_nest = {
-	.max_attr = ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX,
-	.table = ethtool_tunnel_udp_entry_policy,
-};
-
-struct ynl_policy_attr ethtool_string_policy[ETHTOOL_A_STRING_MAX + 1] = {
-	[ETHTOOL_A_STRING_INDEX] = { .name = "index", .type = YNL_PT_U32, },
-	[ETHTOOL_A_STRING_VALUE] = { .name = "value", .type = YNL_PT_NUL_STR, },
-};
-
-struct ynl_policy_nest ethtool_string_nest = {
-	.max_attr = ETHTOOL_A_STRING_MAX,
-	.table = ethtool_string_policy,
-};
-
-struct ynl_policy_attr ethtool_cable_nest_policy[ETHTOOL_A_CABLE_NEST_MAX + 1] = {
-	[ETHTOOL_A_CABLE_NEST_RESULT] = { .name = "result", .type = YNL_PT_NEST, .nest = &ethtool_cable_result_nest, },
-	[ETHTOOL_A_CABLE_NEST_FAULT_LENGTH] = { .name = "fault-length", .type = YNL_PT_NEST, .nest = &ethtool_cable_fault_length_nest, },
-};
-
-struct ynl_policy_nest ethtool_cable_nest_nest = {
-	.max_attr = ETHTOOL_A_CABLE_NEST_MAX,
-	.table = ethtool_cable_nest_policy,
-};
-
-struct ynl_policy_attr ethtool_bitset_bits_policy[ETHTOOL_A_BITSET_BITS_MAX + 1] = {
-	[ETHTOOL_A_BITSET_BITS_BIT] = { .name = "bit", .type = YNL_PT_NEST, .nest = &ethtool_bitset_bit_nest, },
-};
-
-struct ynl_policy_nest ethtool_bitset_bits_nest = {
-	.max_attr = ETHTOOL_A_BITSET_BITS_MAX,
-	.table = ethtool_bitset_bits_policy,
-};
-
-struct ynl_policy_attr ethtool_strings_policy[ETHTOOL_A_STRINGS_MAX + 1] = {
-	[ETHTOOL_A_STRINGS_STRING] = { .name = "string", .type = YNL_PT_NEST, .nest = &ethtool_string_nest, },
-};
-
-struct ynl_policy_nest ethtool_strings_nest = {
-	.max_attr = ETHTOOL_A_STRINGS_MAX,
-	.table = ethtool_strings_policy,
-};
-
-struct ynl_policy_attr ethtool_bitset_policy[ETHTOOL_A_BITSET_MAX + 1] = {
-	[ETHTOOL_A_BITSET_NOMASK] = { .name = "nomask", .type = YNL_PT_FLAG, },
-	[ETHTOOL_A_BITSET_SIZE] = { .name = "size", .type = YNL_PT_U32, },
-	[ETHTOOL_A_BITSET_BITS] = { .name = "bits", .type = YNL_PT_NEST, .nest = &ethtool_bitset_bits_nest, },
-};
-
-struct ynl_policy_nest ethtool_bitset_nest = {
-	.max_attr = ETHTOOL_A_BITSET_MAX,
-	.table = ethtool_bitset_policy,
-};
-
-struct ynl_policy_attr ethtool_stringset_policy[ETHTOOL_A_STRINGSET_MAX + 1] = {
-	[ETHTOOL_A_STRINGSET_ID] = { .name = "id", .type = YNL_PT_U32, },
-	[ETHTOOL_A_STRINGSET_COUNT] = { .name = "count", .type = YNL_PT_U32, },
-	[ETHTOOL_A_STRINGSET_STRINGS] = { .name = "strings", .type = YNL_PT_NEST, .nest = &ethtool_strings_nest, },
-};
-
-struct ynl_policy_nest ethtool_stringset_nest = {
-	.max_attr = ETHTOOL_A_STRINGSET_MAX,
-	.table = ethtool_stringset_policy,
-};
-
-struct ynl_policy_attr ethtool_tunnel_udp_table_policy[ETHTOOL_A_TUNNEL_UDP_TABLE_MAX + 1] = {
-	[ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE] = { .name = "size", .type = YNL_PT_U32, },
-	[ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES] = { .name = "types", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY] = { .name = "entry", .type = YNL_PT_NEST, .nest = &ethtool_tunnel_udp_entry_nest, },
-};
-
-struct ynl_policy_nest ethtool_tunnel_udp_table_nest = {
-	.max_attr = ETHTOOL_A_TUNNEL_UDP_TABLE_MAX,
-	.table = ethtool_tunnel_udp_table_policy,
-};
-
-struct ynl_policy_attr ethtool_stringsets_policy[ETHTOOL_A_STRINGSETS_MAX + 1] = {
-	[ETHTOOL_A_STRINGSETS_STRINGSET] = { .name = "stringset", .type = YNL_PT_NEST, .nest = &ethtool_stringset_nest, },
-};
-
-struct ynl_policy_nest ethtool_stringsets_nest = {
-	.max_attr = ETHTOOL_A_STRINGSETS_MAX,
-	.table = ethtool_stringsets_policy,
-};
-
-struct ynl_policy_attr ethtool_tunnel_udp_policy[ETHTOOL_A_TUNNEL_UDP_MAX + 1] = {
-	[ETHTOOL_A_TUNNEL_UDP_TABLE] = { .name = "table", .type = YNL_PT_NEST, .nest = &ethtool_tunnel_udp_table_nest, },
-};
-
-struct ynl_policy_nest ethtool_tunnel_udp_nest = {
-	.max_attr = ETHTOOL_A_TUNNEL_UDP_MAX,
-	.table = ethtool_tunnel_udp_policy,
-};
-
-struct ynl_policy_attr ethtool_strset_policy[ETHTOOL_A_STRSET_MAX + 1] = {
-	[ETHTOOL_A_STRSET_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_STRSET_STRINGSETS] = { .name = "stringsets", .type = YNL_PT_NEST, .nest = &ethtool_stringsets_nest, },
-	[ETHTOOL_A_STRSET_COUNTS_ONLY] = { .name = "counts-only", .type = YNL_PT_FLAG, },
-};
-
-struct ynl_policy_nest ethtool_strset_nest = {
-	.max_attr = ETHTOOL_A_STRSET_MAX,
-	.table = ethtool_strset_policy,
-};
-
-struct ynl_policy_attr ethtool_linkinfo_policy[ETHTOOL_A_LINKINFO_MAX + 1] = {
-	[ETHTOOL_A_LINKINFO_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_LINKINFO_PORT] = { .name = "port", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKINFO_PHYADDR] = { .name = "phyaddr", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKINFO_TP_MDIX] = { .name = "tp-mdix", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .name = "tp-mdix-ctrl", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKINFO_TRANSCEIVER] = { .name = "transceiver", .type = YNL_PT_U8, },
-};
-
-struct ynl_policy_nest ethtool_linkinfo_nest = {
-	.max_attr = ETHTOOL_A_LINKINFO_MAX,
-	.table = ethtool_linkinfo_policy,
-};
-
-struct ynl_policy_attr ethtool_linkmodes_policy[ETHTOOL_A_LINKMODES_MAX + 1] = {
-	[ETHTOOL_A_LINKMODES_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_LINKMODES_AUTONEG] = { .name = "autoneg", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKMODES_OURS] = { .name = "ours", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_LINKMODES_PEER] = { .name = "peer", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_LINKMODES_SPEED] = { .name = "speed", .type = YNL_PT_U32, },
-	[ETHTOOL_A_LINKMODES_DUPLEX] = { .name = "duplex", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG] = { .name = "master-slave-cfg", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE] = { .name = "master-slave-state", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKMODES_LANES] = { .name = "lanes", .type = YNL_PT_U32, },
-	[ETHTOOL_A_LINKMODES_RATE_MATCHING] = { .name = "rate-matching", .type = YNL_PT_U8, },
-};
-
-struct ynl_policy_nest ethtool_linkmodes_nest = {
-	.max_attr = ETHTOOL_A_LINKMODES_MAX,
-	.table = ethtool_linkmodes_policy,
-};
-
-struct ynl_policy_attr ethtool_linkstate_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = {
-	[ETHTOOL_A_LINKSTATE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_LINKSTATE_LINK] = { .name = "link", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKSTATE_SQI] = { .name = "sqi", .type = YNL_PT_U32, },
-	[ETHTOOL_A_LINKSTATE_SQI_MAX] = { .name = "sqi-max", .type = YNL_PT_U32, },
-	[ETHTOOL_A_LINKSTATE_EXT_STATE] = { .name = "ext-state", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKSTATE_EXT_SUBSTATE] = { .name = "ext-substate", .type = YNL_PT_U8, },
-	[ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT] = { .name = "ext-down-cnt", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_linkstate_nest = {
-	.max_attr = ETHTOOL_A_LINKSTATE_MAX,
-	.table = ethtool_linkstate_policy,
-};
-
-struct ynl_policy_attr ethtool_debug_policy[ETHTOOL_A_DEBUG_MAX + 1] = {
-	[ETHTOOL_A_DEBUG_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_DEBUG_MSGMASK] = { .name = "msgmask", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-};
-
-struct ynl_policy_nest ethtool_debug_nest = {
-	.max_attr = ETHTOOL_A_DEBUG_MAX,
-	.table = ethtool_debug_policy,
-};
-
-struct ynl_policy_attr ethtool_wol_policy[ETHTOOL_A_WOL_MAX + 1] = {
-	[ETHTOOL_A_WOL_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_WOL_MODES] = { .name = "modes", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_WOL_SOPASS] = { .name = "sopass", .type = YNL_PT_BINARY,},
-};
-
-struct ynl_policy_nest ethtool_wol_nest = {
-	.max_attr = ETHTOOL_A_WOL_MAX,
-	.table = ethtool_wol_policy,
-};
-
-struct ynl_policy_attr ethtool_features_policy[ETHTOOL_A_FEATURES_MAX + 1] = {
-	[ETHTOOL_A_FEATURES_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_FEATURES_HW] = { .name = "hw", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_FEATURES_WANTED] = { .name = "wanted", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_FEATURES_ACTIVE] = { .name = "active", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_FEATURES_NOCHANGE] = { .name = "nochange", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-};
-
-struct ynl_policy_nest ethtool_features_nest = {
-	.max_attr = ETHTOOL_A_FEATURES_MAX,
-	.table = ethtool_features_policy,
-};
-
-struct ynl_policy_attr ethtool_privflags_policy[ETHTOOL_A_PRIVFLAGS_MAX + 1] = {
-	[ETHTOOL_A_PRIVFLAGS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_PRIVFLAGS_FLAGS] = { .name = "flags", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-};
-
-struct ynl_policy_nest ethtool_privflags_nest = {
-	.max_attr = ETHTOOL_A_PRIVFLAGS_MAX,
-	.table = ethtool_privflags_policy,
-};
-
-struct ynl_policy_attr ethtool_rings_policy[ETHTOOL_A_RINGS_MAX + 1] = {
-	[ETHTOOL_A_RINGS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_RINGS_RX_MAX] = { .name = "rx-max", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_RX_MINI_MAX] = { .name = "rx-mini-max", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_RX_JUMBO_MAX] = { .name = "rx-jumbo-max", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_TX_MAX] = { .name = "tx-max", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_RX] = { .name = "rx", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_RX_MINI] = { .name = "rx-mini", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_RX_JUMBO] = { .name = "rx-jumbo", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_TX] = { .name = "tx", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_RX_BUF_LEN] = { .name = "rx-buf-len", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_TCP_DATA_SPLIT] = { .name = "tcp-data-split", .type = YNL_PT_U8, },
-	[ETHTOOL_A_RINGS_CQE_SIZE] = { .name = "cqe-size", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_TX_PUSH] = { .name = "tx-push", .type = YNL_PT_U8, },
-	[ETHTOOL_A_RINGS_RX_PUSH] = { .name = "rx-push", .type = YNL_PT_U8, },
-	[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .name = "tx-push-buf-len", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX] = { .name = "tx-push-buf-len-max", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_rings_nest = {
-	.max_attr = ETHTOOL_A_RINGS_MAX,
-	.table = ethtool_rings_policy,
-};
-
-struct ynl_policy_attr ethtool_channels_policy[ETHTOOL_A_CHANNELS_MAX + 1] = {
-	[ETHTOOL_A_CHANNELS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_CHANNELS_RX_MAX] = { .name = "rx-max", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CHANNELS_TX_MAX] = { .name = "tx-max", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CHANNELS_OTHER_MAX] = { .name = "other-max", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CHANNELS_COMBINED_MAX] = { .name = "combined-max", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CHANNELS_RX_COUNT] = { .name = "rx-count", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CHANNELS_TX_COUNT] = { .name = "tx-count", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CHANNELS_OTHER_COUNT] = { .name = "other-count", .type = YNL_PT_U32, },
-	[ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .name = "combined-count", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_channels_nest = {
-	.max_attr = ETHTOOL_A_CHANNELS_MAX,
-	.table = ethtool_channels_policy,
-};
-
-struct ynl_policy_attr ethtool_coalesce_policy[ETHTOOL_A_COALESCE_MAX + 1] = {
-	[ETHTOOL_A_COALESCE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_COALESCE_RX_USECS] = { .name = "rx-usecs", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_RX_MAX_FRAMES] = { .name = "rx-max-frames", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_RX_USECS_IRQ] = { .name = "rx-usecs-irq", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ] = { .name = "rx-max-frames-irq", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_USECS] = { .name = "tx-usecs", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_MAX_FRAMES] = { .name = "tx-max-frames", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_USECS_IRQ] = { .name = "tx-usecs-irq", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ] = { .name = "tx-max-frames-irq", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_STATS_BLOCK_USECS] = { .name = "stats-block-usecs", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX] = { .name = "use-adaptive-rx", .type = YNL_PT_U8, },
-	[ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX] = { .name = "use-adaptive-tx", .type = YNL_PT_U8, },
-	[ETHTOOL_A_COALESCE_PKT_RATE_LOW] = { .name = "pkt-rate-low", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_RX_USECS_LOW] = { .name = "rx-usecs-low", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW] = { .name = "rx-max-frames-low", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_USECS_LOW] = { .name = "tx-usecs-low", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW] = { .name = "tx-max-frames-low", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_PKT_RATE_HIGH] = { .name = "pkt-rate-high", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_RX_USECS_HIGH] = { .name = "rx-usecs-high", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH] = { .name = "rx-max-frames-high", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_USECS_HIGH] = { .name = "tx-usecs-high", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH] = { .name = "tx-max-frames-high", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .name = "rate-sample-interval", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX] = { .name = "use-cqe-mode-tx", .type = YNL_PT_U8, },
-	[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX] = { .name = "use-cqe-mode-rx", .type = YNL_PT_U8, },
-	[ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .name = "tx-aggr-max-bytes", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .name = "tx-aggr-max-frames", .type = YNL_PT_U32, },
-	[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .name = "tx-aggr-time-usecs", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_coalesce_nest = {
-	.max_attr = ETHTOOL_A_COALESCE_MAX,
-	.table = ethtool_coalesce_policy,
-};
-
-struct ynl_policy_attr ethtool_pause_policy[ETHTOOL_A_PAUSE_MAX + 1] = {
-	[ETHTOOL_A_PAUSE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_PAUSE_AUTONEG] = { .name = "autoneg", .type = YNL_PT_U8, },
-	[ETHTOOL_A_PAUSE_RX] = { .name = "rx", .type = YNL_PT_U8, },
-	[ETHTOOL_A_PAUSE_TX] = { .name = "tx", .type = YNL_PT_U8, },
-	[ETHTOOL_A_PAUSE_STATS] = { .name = "stats", .type = YNL_PT_NEST, .nest = &ethtool_pause_stat_nest, },
-	[ETHTOOL_A_PAUSE_STATS_SRC] = { .name = "stats-src", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_pause_nest = {
-	.max_attr = ETHTOOL_A_PAUSE_MAX,
-	.table = ethtool_pause_policy,
-};
-
-struct ynl_policy_attr ethtool_eee_policy[ETHTOOL_A_EEE_MAX + 1] = {
-	[ETHTOOL_A_EEE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_EEE_MODES_OURS] = { .name = "modes-ours", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_EEE_MODES_PEER] = { .name = "modes-peer", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_EEE_ACTIVE] = { .name = "active", .type = YNL_PT_U8, },
-	[ETHTOOL_A_EEE_ENABLED] = { .name = "enabled", .type = YNL_PT_U8, },
-	[ETHTOOL_A_EEE_TX_LPI_ENABLED] = { .name = "tx-lpi-enabled", .type = YNL_PT_U8, },
-	[ETHTOOL_A_EEE_TX_LPI_TIMER] = { .name = "tx-lpi-timer", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_eee_nest = {
-	.max_attr = ETHTOOL_A_EEE_MAX,
-	.table = ethtool_eee_policy,
-};
-
-struct ynl_policy_attr ethtool_tsinfo_policy[ETHTOOL_A_TSINFO_MAX + 1] = {
-	[ETHTOOL_A_TSINFO_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_TSINFO_TIMESTAMPING] = { .name = "timestamping", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_TSINFO_TX_TYPES] = { .name = "tx-types", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_TSINFO_RX_FILTERS] = { .name = "rx-filters", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_TSINFO_PHC_INDEX] = { .name = "phc-index", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_tsinfo_nest = {
-	.max_attr = ETHTOOL_A_TSINFO_MAX,
-	.table = ethtool_tsinfo_policy,
-};
-
-struct ynl_policy_attr ethtool_cable_test_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = {
-	[ETHTOOL_A_CABLE_TEST_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-};
-
-struct ynl_policy_nest ethtool_cable_test_nest = {
-	.max_attr = ETHTOOL_A_CABLE_TEST_MAX,
-	.table = ethtool_cable_test_policy,
-};
-
-struct ynl_policy_attr ethtool_cable_test_ntf_policy[ETHTOOL_A_CABLE_TEST_NTF_MAX + 1] = {
-	[ETHTOOL_A_CABLE_TEST_NTF_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_CABLE_TEST_NTF_STATUS] = { .name = "status", .type = YNL_PT_U8, },
-	[ETHTOOL_A_CABLE_TEST_NTF_NEST] = { .name = "nest", .type = YNL_PT_NEST, .nest = &ethtool_cable_nest_nest, },
-};
-
-struct ynl_policy_nest ethtool_cable_test_ntf_nest = {
-	.max_attr = ETHTOOL_A_CABLE_TEST_NTF_MAX,
-	.table = ethtool_cable_test_ntf_policy,
-};
-
-struct ynl_policy_attr ethtool_cable_test_tdr_policy[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1] = {
-	[ETHTOOL_A_CABLE_TEST_TDR_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_CABLE_TEST_TDR_CFG] = { .name = "cfg", .type = YNL_PT_NEST, .nest = &ethtool_cable_test_tdr_cfg_nest, },
-};
-
-struct ynl_policy_nest ethtool_cable_test_tdr_nest = {
-	.max_attr = ETHTOOL_A_CABLE_TEST_TDR_MAX,
-	.table = ethtool_cable_test_tdr_policy,
-};
-
-struct ynl_policy_attr ethtool_cable_test_tdr_ntf_policy[ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX + 1] = {
-	[ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS] = { .name = "status", .type = YNL_PT_U8, },
-	[ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST] = { .name = "nest", .type = YNL_PT_NEST, .nest = &ethtool_cable_nest_nest, },
-};
-
-struct ynl_policy_nest ethtool_cable_test_tdr_ntf_nest = {
-	.max_attr = ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX,
-	.table = ethtool_cable_test_tdr_ntf_policy,
-};
-
-struct ynl_policy_attr ethtool_tunnel_info_policy[ETHTOOL_A_TUNNEL_INFO_MAX + 1] = {
-	[ETHTOOL_A_TUNNEL_INFO_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_TUNNEL_INFO_UDP_PORTS] = { .name = "udp-ports", .type = YNL_PT_NEST, .nest = &ethtool_tunnel_udp_nest, },
-};
-
-struct ynl_policy_nest ethtool_tunnel_info_nest = {
-	.max_attr = ETHTOOL_A_TUNNEL_INFO_MAX,
-	.table = ethtool_tunnel_info_policy,
-};
-
-struct ynl_policy_attr ethtool_fec_policy[ETHTOOL_A_FEC_MAX + 1] = {
-	[ETHTOOL_A_FEC_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_FEC_MODES] = { .name = "modes", .type = YNL_PT_NEST, .nest = &ethtool_bitset_nest, },
-	[ETHTOOL_A_FEC_AUTO] = { .name = "auto", .type = YNL_PT_U8, },
-	[ETHTOOL_A_FEC_ACTIVE] = { .name = "active", .type = YNL_PT_U32, },
-	[ETHTOOL_A_FEC_STATS] = { .name = "stats", .type = YNL_PT_NEST, .nest = &ethtool_fec_stat_nest, },
-};
-
-struct ynl_policy_nest ethtool_fec_nest = {
-	.max_attr = ETHTOOL_A_FEC_MAX,
-	.table = ethtool_fec_policy,
-};
-
-struct ynl_policy_attr ethtool_module_eeprom_policy[ETHTOOL_A_MODULE_EEPROM_MAX + 1] = {
-	[ETHTOOL_A_MODULE_EEPROM_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_MODULE_EEPROM_OFFSET] = { .name = "offset", .type = YNL_PT_U32, },
-	[ETHTOOL_A_MODULE_EEPROM_LENGTH] = { .name = "length", .type = YNL_PT_U32, },
-	[ETHTOOL_A_MODULE_EEPROM_PAGE] = { .name = "page", .type = YNL_PT_U8, },
-	[ETHTOOL_A_MODULE_EEPROM_BANK] = { .name = "bank", .type = YNL_PT_U8, },
-	[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS] = { .name = "i2c-address", .type = YNL_PT_U8, },
-	[ETHTOOL_A_MODULE_EEPROM_DATA] = { .name = "data", .type = YNL_PT_BINARY,},
-};
-
-struct ynl_policy_nest ethtool_module_eeprom_nest = {
-	.max_attr = ETHTOOL_A_MODULE_EEPROM_MAX,
-	.table = ethtool_module_eeprom_policy,
-};
-
-struct ynl_policy_attr ethtool_phc_vclocks_policy[ETHTOOL_A_PHC_VCLOCKS_MAX + 1] = {
-	[ETHTOOL_A_PHC_VCLOCKS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_PHC_VCLOCKS_NUM] = { .name = "num", .type = YNL_PT_U32, },
-	[ETHTOOL_A_PHC_VCLOCKS_INDEX] = { .name = "index", .type = YNL_PT_BINARY,},
-};
-
-struct ynl_policy_nest ethtool_phc_vclocks_nest = {
-	.max_attr = ETHTOOL_A_PHC_VCLOCKS_MAX,
-	.table = ethtool_phc_vclocks_policy,
-};
-
-struct ynl_policy_attr ethtool_module_policy[ETHTOOL_A_MODULE_MAX + 1] = {
-	[ETHTOOL_A_MODULE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_MODULE_POWER_MODE_POLICY] = { .name = "power-mode-policy", .type = YNL_PT_U8, },
-	[ETHTOOL_A_MODULE_POWER_MODE] = { .name = "power-mode", .type = YNL_PT_U8, },
-};
-
-struct ynl_policy_nest ethtool_module_nest = {
-	.max_attr = ETHTOOL_A_MODULE_MAX,
-	.table = ethtool_module_policy,
-};
-
-struct ynl_policy_attr ethtool_pse_policy[ETHTOOL_A_PSE_MAX + 1] = {
-	[ETHTOOL_A_PSE_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_PODL_PSE_ADMIN_STATE] = { .name = "admin-state", .type = YNL_PT_U32, },
-	[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] = { .name = "admin-control", .type = YNL_PT_U32, },
-	[ETHTOOL_A_PODL_PSE_PW_D_STATUS] = { .name = "pw-d-status", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_pse_nest = {
-	.max_attr = ETHTOOL_A_PSE_MAX,
-	.table = ethtool_pse_policy,
-};
-
-struct ynl_policy_attr ethtool_rss_policy[ETHTOOL_A_RSS_MAX + 1] = {
-	[ETHTOOL_A_RSS_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_RSS_CONTEXT] = { .name = "context", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RSS_HFUNC] = { .name = "hfunc", .type = YNL_PT_U32, },
-	[ETHTOOL_A_RSS_INDIR] = { .name = "indir", .type = YNL_PT_BINARY,},
-	[ETHTOOL_A_RSS_HKEY] = { .name = "hkey", .type = YNL_PT_BINARY,},
-};
-
-struct ynl_policy_nest ethtool_rss_nest = {
-	.max_attr = ETHTOOL_A_RSS_MAX,
-	.table = ethtool_rss_policy,
-};
-
-struct ynl_policy_attr ethtool_plca_policy[ETHTOOL_A_PLCA_MAX + 1] = {
-	[ETHTOOL_A_PLCA_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_PLCA_VERSION] = { .name = "version", .type = YNL_PT_U16, },
-	[ETHTOOL_A_PLCA_ENABLED] = { .name = "enabled", .type = YNL_PT_U8, },
-	[ETHTOOL_A_PLCA_STATUS] = { .name = "status", .type = YNL_PT_U8, },
-	[ETHTOOL_A_PLCA_NODE_CNT] = { .name = "node-cnt", .type = YNL_PT_U32, },
-	[ETHTOOL_A_PLCA_NODE_ID] = { .name = "node-id", .type = YNL_PT_U32, },
-	[ETHTOOL_A_PLCA_TO_TMR] = { .name = "to-tmr", .type = YNL_PT_U32, },
-	[ETHTOOL_A_PLCA_BURST_CNT] = { .name = "burst-cnt", .type = YNL_PT_U32, },
-	[ETHTOOL_A_PLCA_BURST_TMR] = { .name = "burst-tmr", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest ethtool_plca_nest = {
-	.max_attr = ETHTOOL_A_PLCA_MAX,
-	.table = ethtool_plca_policy,
-};
-
-struct ynl_policy_attr ethtool_mm_policy[ETHTOOL_A_MM_MAX + 1] = {
-	[ETHTOOL_A_MM_HEADER] = { .name = "header", .type = YNL_PT_NEST, .nest = &ethtool_header_nest, },
-	[ETHTOOL_A_MM_PMAC_ENABLED] = { .name = "pmac-enabled", .type = YNL_PT_U8, },
-	[ETHTOOL_A_MM_TX_ENABLED] = { .name = "tx-enabled", .type = YNL_PT_U8, },
-	[ETHTOOL_A_MM_TX_ACTIVE] = { .name = "tx-active", .type = YNL_PT_U8, },
-	[ETHTOOL_A_MM_TX_MIN_FRAG_SIZE] = { .name = "tx-min-frag-size", .type = YNL_PT_U32, },
-	[ETHTOOL_A_MM_RX_MIN_FRAG_SIZE] = { .name = "rx-min-frag-size", .type = YNL_PT_U32, },
-	[ETHTOOL_A_MM_VERIFY_ENABLED] = { .name = "verify-enabled", .type = YNL_PT_U8, },
-	[ETHTOOL_A_MM_VERIFY_STATUS] = { .name = "verify-status", .type = YNL_PT_U8, },
-	[ETHTOOL_A_MM_VERIFY_TIME] = { .name = "verify-time", .type = YNL_PT_U32, },
-	[ETHTOOL_A_MM_MAX_VERIFY_TIME] = { .name = "max-verify-time", .type = YNL_PT_U32, },
-	[ETHTOOL_A_MM_STATS] = { .name = "stats", .type = YNL_PT_NEST, .nest = &ethtool_mm_stat_nest, },
-};
-
-struct ynl_policy_nest ethtool_mm_nest = {
-	.max_attr = ETHTOOL_A_MM_MAX,
-	.table = ethtool_mm_policy,
-};
-
-/* Common nested types */
-void ethtool_header_free(struct ethtool_header *obj)
-{
-	free(obj->dev_name);
-}
-
-int ethtool_header_put(struct nlmsghdr *nlh, unsigned int attr_type,
-		       struct ethtool_header *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.dev_index)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_HEADER_DEV_INDEX, obj->dev_index);
-	if (obj->_present.dev_name_len)
-		mnl_attr_put_strz(nlh, ETHTOOL_A_HEADER_DEV_NAME, obj->dev_name);
-	if (obj->_present.flags)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_HEADER_FLAGS, obj->flags);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_header_parse(struct ynl_parse_arg *yarg,
-			 const struct nlattr *nested)
-{
-	struct ethtool_header *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_HEADER_DEV_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dev_index = 1;
-			dst->dev_index = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_HEADER_DEV_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.dev_name_len = len;
-			dst->dev_name = malloc(len + 1);
-			memcpy(dst->dev_name, mnl_attr_get_str(attr), len);
-			dst->dev_name[len] = 0;
-		} else if (type == ETHTOOL_A_HEADER_FLAGS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.flags = 1;
-			dst->flags = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_pause_stat_free(struct ethtool_pause_stat *obj)
-{
-}
-
-int ethtool_pause_stat_put(struct nlmsghdr *nlh, unsigned int attr_type,
-			   struct ethtool_pause_stat *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.tx_frames)
-		mnl_attr_put_u64(nlh, ETHTOOL_A_PAUSE_STAT_TX_FRAMES, obj->tx_frames);
-	if (obj->_present.rx_frames)
-		mnl_attr_put_u64(nlh, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, obj->rx_frames);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_pause_stat_parse(struct ynl_parse_arg *yarg,
-			     const struct nlattr *nested)
-{
-	struct ethtool_pause_stat *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_PAUSE_STAT_TX_FRAMES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_frames = 1;
-			dst->tx_frames = mnl_attr_get_u64(attr);
-		} else if (type == ETHTOOL_A_PAUSE_STAT_RX_FRAMES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_frames = 1;
-			dst->rx_frames = mnl_attr_get_u64(attr);
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_cable_test_tdr_cfg_free(struct ethtool_cable_test_tdr_cfg *obj)
-{
-}
-
-void ethtool_fec_stat_free(struct ethtool_fec_stat *obj)
-{
-	free(obj->corrected);
-	free(obj->uncorr);
-	free(obj->corr_bits);
-}
-
-int ethtool_fec_stat_put(struct nlmsghdr *nlh, unsigned int attr_type,
-			 struct ethtool_fec_stat *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.corrected_len)
-		mnl_attr_put(nlh, ETHTOOL_A_FEC_STAT_CORRECTED, obj->_present.corrected_len, obj->corrected);
-	if (obj->_present.uncorr_len)
-		mnl_attr_put(nlh, ETHTOOL_A_FEC_STAT_UNCORR, obj->_present.uncorr_len, obj->uncorr);
-	if (obj->_present.corr_bits_len)
-		mnl_attr_put(nlh, ETHTOOL_A_FEC_STAT_CORR_BITS, obj->_present.corr_bits_len, obj->corr_bits);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_fec_stat_parse(struct ynl_parse_arg *yarg,
-			   const struct nlattr *nested)
-{
-	struct ethtool_fec_stat *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_FEC_STAT_CORRECTED) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.corrected_len = len;
-			dst->corrected = malloc(len);
-			memcpy(dst->corrected, mnl_attr_get_payload(attr), len);
-		} else if (type == ETHTOOL_A_FEC_STAT_UNCORR) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.uncorr_len = len;
-			dst->uncorr = malloc(len);
-			memcpy(dst->uncorr, mnl_attr_get_payload(attr), len);
-		} else if (type == ETHTOOL_A_FEC_STAT_CORR_BITS) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.corr_bits_len = len;
-			dst->corr_bits = malloc(len);
-			memcpy(dst->corr_bits, mnl_attr_get_payload(attr), len);
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_mm_stat_free(struct ethtool_mm_stat *obj)
-{
-}
-
-int ethtool_mm_stat_parse(struct ynl_parse_arg *yarg,
-			  const struct nlattr *nested)
-{
-	struct ethtool_mm_stat *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.reassembly_errors = 1;
-			dst->reassembly_errors = mnl_attr_get_u64(attr);
-		} else if (type == ETHTOOL_A_MM_STAT_SMD_ERRORS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.smd_errors = 1;
-			dst->smd_errors = mnl_attr_get_u64(attr);
-		} else if (type == ETHTOOL_A_MM_STAT_REASSEMBLY_OK) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.reassembly_ok = 1;
-			dst->reassembly_ok = mnl_attr_get_u64(attr);
-		} else if (type == ETHTOOL_A_MM_STAT_RX_FRAG_COUNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_frag_count = 1;
-			dst->rx_frag_count = mnl_attr_get_u64(attr);
-		} else if (type == ETHTOOL_A_MM_STAT_TX_FRAG_COUNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_frag_count = 1;
-			dst->tx_frag_count = mnl_attr_get_u64(attr);
-		} else if (type == ETHTOOL_A_MM_STAT_HOLD_COUNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.hold_count = 1;
-			dst->hold_count = mnl_attr_get_u64(attr);
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_cable_result_free(struct ethtool_cable_result *obj)
-{
-}
-
-int ethtool_cable_result_parse(struct ynl_parse_arg *yarg,
-			       const struct nlattr *nested)
-{
-	struct ethtool_cable_result *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_CABLE_RESULT_PAIR) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.pair = 1;
-			dst->pair = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_CABLE_RESULT_CODE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.code = 1;
-			dst->code = mnl_attr_get_u8(attr);
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_cable_fault_length_free(struct ethtool_cable_fault_length *obj)
-{
-}
-
-int ethtool_cable_fault_length_parse(struct ynl_parse_arg *yarg,
-				     const struct nlattr *nested)
-{
-	struct ethtool_cable_fault_length *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.pair = 1;
-			dst->pair = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_CABLE_FAULT_LENGTH_CM) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.cm = 1;
-			dst->cm = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_bitset_bit_free(struct ethtool_bitset_bit *obj)
-{
-	free(obj->name);
-}
-
-int ethtool_bitset_bit_put(struct nlmsghdr *nlh, unsigned int attr_type,
-			   struct ethtool_bitset_bit *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.index)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_BITSET_BIT_INDEX, obj->index);
-	if (obj->_present.name_len)
-		mnl_attr_put_strz(nlh, ETHTOOL_A_BITSET_BIT_NAME, obj->name);
-	if (obj->_present.value)
-		mnl_attr_put(nlh, ETHTOOL_A_BITSET_BIT_VALUE, 0, NULL);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_bitset_bit_parse(struct ynl_parse_arg *yarg,
-			     const struct nlattr *nested)
-{
-	struct ethtool_bitset_bit *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_BITSET_BIT_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.index = 1;
-			dst->index = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_BITSET_BIT_NAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.name_len = len;
-			dst->name = malloc(len + 1);
-			memcpy(dst->name, mnl_attr_get_str(attr), len);
-			dst->name[len] = 0;
-		} else if (type == ETHTOOL_A_BITSET_BIT_VALUE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.value = 1;
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_tunnel_udp_entry_free(struct ethtool_tunnel_udp_entry *obj)
-{
-}
-
-int ethtool_tunnel_udp_entry_parse(struct ynl_parse_arg *yarg,
-				   const struct nlattr *nested)
-{
-	struct ethtool_tunnel_udp_entry *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port = 1;
-			dst->port = mnl_attr_get_u16(attr);
-		} else if (type == ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.type = 1;
-			dst->type = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_string_free(struct ethtool_string *obj)
-{
-	free(obj->value);
-}
-
-int ethtool_string_put(struct nlmsghdr *nlh, unsigned int attr_type,
-		       struct ethtool_string *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.index)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_STRING_INDEX, obj->index);
-	if (obj->_present.value_len)
-		mnl_attr_put_strz(nlh, ETHTOOL_A_STRING_VALUE, obj->value);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_string_parse(struct ynl_parse_arg *yarg,
-			 const struct nlattr *nested)
-{
-	struct ethtool_string *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_STRING_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.index = 1;
-			dst->index = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_STRING_VALUE) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.value_len = len;
-			dst->value = malloc(len + 1);
-			memcpy(dst->value, mnl_attr_get_str(attr), len);
-			dst->value[len] = 0;
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_cable_nest_free(struct ethtool_cable_nest *obj)
-{
-	ethtool_cable_result_free(&obj->result);
-	ethtool_cable_fault_length_free(&obj->fault_length);
-}
-
-int ethtool_cable_nest_parse(struct ynl_parse_arg *yarg,
-			     const struct nlattr *nested)
-{
-	struct ethtool_cable_nest *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_CABLE_NEST_RESULT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.result = 1;
-
-			parg.rsp_policy = &ethtool_cable_result_nest;
-			parg.data = &dst->result;
-			if (ethtool_cable_result_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_CABLE_NEST_FAULT_LENGTH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.fault_length = 1;
-
-			parg.rsp_policy = &ethtool_cable_fault_length_nest;
-			parg.data = &dst->fault_length;
-			if (ethtool_cable_fault_length_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_bitset_bits_free(struct ethtool_bitset_bits *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_bit; i++)
-		ethtool_bitset_bit_free(&obj->bit[i]);
-	free(obj->bit);
-}
-
-int ethtool_bitset_bits_put(struct nlmsghdr *nlh, unsigned int attr_type,
-			    struct ethtool_bitset_bits *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	for (unsigned int i = 0; i < obj->n_bit; i++)
-		ethtool_bitset_bit_put(nlh, ETHTOOL_A_BITSET_BITS_BIT, &obj->bit[i]);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_bitset_bits_parse(struct ynl_parse_arg *yarg,
-			      const struct nlattr *nested)
-{
-	struct ethtool_bitset_bits *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	unsigned int n_bit = 0;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->bit)
-		return ynl_error_parse(yarg, "attribute already present (bitset-bits.bit)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_BITSET_BITS_BIT) {
-			n_bit++;
-		}
-	}
-
-	if (n_bit) {
-		dst->bit = calloc(n_bit, sizeof(*dst->bit));
-		dst->n_bit = n_bit;
-		i = 0;
-		parg.rsp_policy = &ethtool_bitset_bit_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == ETHTOOL_A_BITSET_BITS_BIT) {
-				parg.data = &dst->bit[i];
-				if (ethtool_bitset_bit_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_strings_free(struct ethtool_strings *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_string; i++)
-		ethtool_string_free(&obj->string[i]);
-	free(obj->string);
-}
-
-int ethtool_strings_put(struct nlmsghdr *nlh, unsigned int attr_type,
-			struct ethtool_strings *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	for (unsigned int i = 0; i < obj->n_string; i++)
-		ethtool_string_put(nlh, ETHTOOL_A_STRINGS_STRING, &obj->string[i]);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_strings_parse(struct ynl_parse_arg *yarg,
-			  const struct nlattr *nested)
-{
-	struct ethtool_strings *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	unsigned int n_string = 0;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->string)
-		return ynl_error_parse(yarg, "attribute already present (strings.string)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_STRINGS_STRING) {
-			n_string++;
-		}
-	}
-
-	if (n_string) {
-		dst->string = calloc(n_string, sizeof(*dst->string));
-		dst->n_string = n_string;
-		i = 0;
-		parg.rsp_policy = &ethtool_string_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == ETHTOOL_A_STRINGS_STRING) {
-				parg.data = &dst->string[i];
-				if (ethtool_string_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_bitset_free(struct ethtool_bitset *obj)
-{
-	ethtool_bitset_bits_free(&obj->bits);
-}
-
-int ethtool_bitset_put(struct nlmsghdr *nlh, unsigned int attr_type,
-		       struct ethtool_bitset *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.nomask)
-		mnl_attr_put(nlh, ETHTOOL_A_BITSET_NOMASK, 0, NULL);
-	if (obj->_present.size)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_BITSET_SIZE, obj->size);
-	if (obj->_present.bits)
-		ethtool_bitset_bits_put(nlh, ETHTOOL_A_BITSET_BITS, &obj->bits);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_bitset_parse(struct ynl_parse_arg *yarg,
-			 const struct nlattr *nested)
-{
-	struct ethtool_bitset *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_BITSET_NOMASK) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.nomask = 1;
-		} else if (type == ETHTOOL_A_BITSET_SIZE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.size = 1;
-			dst->size = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_BITSET_BITS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.bits = 1;
-
-			parg.rsp_policy = &ethtool_bitset_bits_nest;
-			parg.data = &dst->bits;
-			if (ethtool_bitset_bits_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_stringset_free(struct ethtool_stringset_ *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_strings; i++)
-		ethtool_strings_free(&obj->strings[i]);
-	free(obj->strings);
-}
-
-int ethtool_stringset_put(struct nlmsghdr *nlh, unsigned int attr_type,
-			  struct ethtool_stringset_ *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.id)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_STRINGSET_ID, obj->id);
-	if (obj->_present.count)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_STRINGSET_COUNT, obj->count);
-	for (unsigned int i = 0; i < obj->n_strings; i++)
-		ethtool_strings_put(nlh, ETHTOOL_A_STRINGSET_STRINGS, &obj->strings[i]);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_stringset_parse(struct ynl_parse_arg *yarg,
-			    const struct nlattr *nested)
-{
-	struct ethtool_stringset_ *dst = yarg->data;
-	unsigned int n_strings = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->strings)
-		return ynl_error_parse(yarg, "attribute already present (stringset.strings)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_STRINGSET_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.id = 1;
-			dst->id = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_STRINGSET_COUNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.count = 1;
-			dst->count = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_STRINGSET_STRINGS) {
-			n_strings++;
-		}
-	}
-
-	if (n_strings) {
-		dst->strings = calloc(n_strings, sizeof(*dst->strings));
-		dst->n_strings = n_strings;
-		i = 0;
-		parg.rsp_policy = &ethtool_strings_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == ETHTOOL_A_STRINGSET_STRINGS) {
-				parg.data = &dst->strings[i];
-				if (ethtool_strings_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_tunnel_udp_table_free(struct ethtool_tunnel_udp_table *obj)
-{
-	unsigned int i;
-
-	ethtool_bitset_free(&obj->types);
-	for (i = 0; i < obj->n_entry; i++)
-		ethtool_tunnel_udp_entry_free(&obj->entry[i]);
-	free(obj->entry);
-}
-
-int ethtool_tunnel_udp_table_parse(struct ynl_parse_arg *yarg,
-				   const struct nlattr *nested)
-{
-	struct ethtool_tunnel_udp_table *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	unsigned int n_entry = 0;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->entry)
-		return ynl_error_parse(yarg, "attribute already present (tunnel-udp-table.entry)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.size = 1;
-			dst->size = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.types = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->types;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY) {
-			n_entry++;
-		}
-	}
-
-	if (n_entry) {
-		dst->entry = calloc(n_entry, sizeof(*dst->entry));
-		dst->n_entry = n_entry;
-		i = 0;
-		parg.rsp_policy = &ethtool_tunnel_udp_entry_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY) {
-				parg.data = &dst->entry[i];
-				if (ethtool_tunnel_udp_entry_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_stringsets_free(struct ethtool_stringsets *obj)
-{
-	unsigned int i;
-
-	for (i = 0; i < obj->n_stringset; i++)
-		ethtool_stringset_free(&obj->stringset[i]);
-	free(obj->stringset);
-}
-
-int ethtool_stringsets_put(struct nlmsghdr *nlh, unsigned int attr_type,
-			   struct ethtool_stringsets *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	for (unsigned int i = 0; i < obj->n_stringset; i++)
-		ethtool_stringset_put(nlh, ETHTOOL_A_STRINGSETS_STRINGSET, &obj->stringset[i]);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int ethtool_stringsets_parse(struct ynl_parse_arg *yarg,
-			     const struct nlattr *nested)
-{
-	struct ethtool_stringsets *dst = yarg->data;
-	unsigned int n_stringset = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	parg.ys = yarg->ys;
-
-	if (dst->stringset)
-		return ynl_error_parse(yarg, "attribute already present (stringsets.stringset)");
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_STRINGSETS_STRINGSET) {
-			n_stringset++;
-		}
-	}
-
-	if (n_stringset) {
-		dst->stringset = calloc(n_stringset, sizeof(*dst->stringset));
-		dst->n_stringset = n_stringset;
-		i = 0;
-		parg.rsp_policy = &ethtool_stringset_nest;
-		mnl_attr_for_each_nested(attr, nested) {
-			if (mnl_attr_get_type(attr) == ETHTOOL_A_STRINGSETS_STRINGSET) {
-				parg.data = &dst->stringset[i];
-				if (ethtool_stringset_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-
-	return 0;
-}
-
-void ethtool_tunnel_udp_free(struct ethtool_tunnel_udp *obj)
-{
-	ethtool_tunnel_udp_table_free(&obj->table);
-}
-
-int ethtool_tunnel_udp_parse(struct ynl_parse_arg *yarg,
-			     const struct nlattr *nested)
-{
-	struct ethtool_tunnel_udp *dst = yarg->data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_TUNNEL_UDP_TABLE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.table = 1;
-
-			parg.rsp_policy = &ethtool_tunnel_udp_table_nest;
-			parg.data = &dst->table;
-			if (ethtool_tunnel_udp_table_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_STRSET_GET ============== */
-/* ETHTOOL_MSG_STRSET_GET - do */
-void ethtool_strset_get_req_free(struct ethtool_strset_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	ethtool_stringsets_free(&req->stringsets);
-	free(req);
-}
-
-void ethtool_strset_get_rsp_free(struct ethtool_strset_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_stringsets_free(&rsp->stringsets);
-	free(rsp);
-}
-
-int ethtool_strset_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_strset_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_STRSET_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_STRSET_STRINGSETS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.stringsets = 1;
-
-			parg.rsp_policy = &ethtool_stringsets_nest;
-			parg.data = &dst->stringsets;
-			if (ethtool_stringsets_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_strset_get_rsp *
-ethtool_strset_get(struct ynl_sock *ys, struct ethtool_strset_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_strset_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_STRSET_GET, 1);
-	ys->req_policy = &ethtool_strset_nest;
-	yrs.yarg.rsp_policy = &ethtool_strset_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_STRSET_HEADER, &req->header);
-	if (req->_present.stringsets)
-		ethtool_stringsets_put(nlh, ETHTOOL_A_STRSET_STRINGSETS, &req->stringsets);
-	if (req->_present.counts_only)
-		mnl_attr_put(nlh, ETHTOOL_A_STRSET_COUNTS_ONLY, 0, NULL);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_strset_get_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_STRSET_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_strset_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_STRSET_GET - dump */
-void ethtool_strset_get_list_free(struct ethtool_strset_get_list *rsp)
-{
-	struct ethtool_strset_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_stringsets_free(&rsp->obj.stringsets);
-		free(rsp);
-	}
-}
-
-struct ethtool_strset_get_list *
-ethtool_strset_get_dump(struct ynl_sock *ys,
-			struct ethtool_strset_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_strset_get_list);
-	yds.cb = ethtool_strset_get_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_STRSET_GET;
-	yds.rsp_policy = &ethtool_strset_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_STRSET_GET, 1);
-	ys->req_policy = &ethtool_strset_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_STRSET_HEADER, &req->header);
-	if (req->_present.stringsets)
-		ethtool_stringsets_put(nlh, ETHTOOL_A_STRSET_STRINGSETS, &req->stringsets);
-	if (req->_present.counts_only)
-		mnl_attr_put(nlh, ETHTOOL_A_STRSET_COUNTS_ONLY, 0, NULL);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_strset_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_LINKINFO_GET ============== */
-/* ETHTOOL_MSG_LINKINFO_GET - do */
-void ethtool_linkinfo_get_req_free(struct ethtool_linkinfo_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_linkinfo_get_rsp_free(struct ethtool_linkinfo_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_linkinfo_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_linkinfo_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_LINKINFO_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_LINKINFO_PORT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port = 1;
-			dst->port = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKINFO_PHYADDR) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.phyaddr = 1;
-			dst->phyaddr = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKINFO_TP_MDIX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tp_mdix = 1;
-			dst->tp_mdix = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKINFO_TP_MDIX_CTRL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tp_mdix_ctrl = 1;
-			dst->tp_mdix_ctrl = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKINFO_TRANSCEIVER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.transceiver = 1;
-			dst->transceiver = mnl_attr_get_u8(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_linkinfo_get_rsp *
-ethtool_linkinfo_get(struct ynl_sock *ys, struct ethtool_linkinfo_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_linkinfo_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKINFO_GET, 1);
-	ys->req_policy = &ethtool_linkinfo_nest;
-	yrs.yarg.rsp_policy = &ethtool_linkinfo_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_LINKINFO_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_linkinfo_get_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_LINKINFO_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_linkinfo_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_LINKINFO_GET - dump */
-void ethtool_linkinfo_get_list_free(struct ethtool_linkinfo_get_list *rsp)
-{
-	struct ethtool_linkinfo_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_linkinfo_get_list *
-ethtool_linkinfo_get_dump(struct ynl_sock *ys,
-			  struct ethtool_linkinfo_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_linkinfo_get_list);
-	yds.cb = ethtool_linkinfo_get_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_LINKINFO_GET;
-	yds.rsp_policy = &ethtool_linkinfo_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_LINKINFO_GET, 1);
-	ys->req_policy = &ethtool_linkinfo_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_LINKINFO_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_linkinfo_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_LINKINFO_GET - notify */
-void ethtool_linkinfo_get_ntf_free(struct ethtool_linkinfo_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_LINKINFO_SET ============== */
-/* ETHTOOL_MSG_LINKINFO_SET - do */
-void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_linkinfo_set(struct ynl_sock *ys,
-			 struct ethtool_linkinfo_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKINFO_SET, 1);
-	ys->req_policy = &ethtool_linkinfo_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_LINKINFO_HEADER, &req->header);
-	if (req->_present.port)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_PORT, req->port);
-	if (req->_present.phyaddr)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_PHYADDR, req->phyaddr);
-	if (req->_present.tp_mdix)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TP_MDIX, req->tp_mdix);
-	if (req->_present.tp_mdix_ctrl)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, req->tp_mdix_ctrl);
-	if (req->_present.transceiver)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKINFO_TRANSCEIVER, req->transceiver);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_LINKMODES_GET ============== */
-/* ETHTOOL_MSG_LINKMODES_GET - do */
-void ethtool_linkmodes_get_req_free(struct ethtool_linkmodes_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_linkmodes_get_rsp_free(struct ethtool_linkmodes_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_bitset_free(&rsp->ours);
-	ethtool_bitset_free(&rsp->peer);
-	free(rsp);
-}
-
-int ethtool_linkmodes_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_linkmodes_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_LINKMODES_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_LINKMODES_AUTONEG) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.autoneg = 1;
-			dst->autoneg = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKMODES_OURS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ours = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->ours;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_LINKMODES_PEER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.peer = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->peer;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_LINKMODES_SPEED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.speed = 1;
-			dst->speed = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_LINKMODES_DUPLEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.duplex = 1;
-			dst->duplex = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.master_slave_cfg = 1;
-			dst->master_slave_cfg = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.master_slave_state = 1;
-			dst->master_slave_state = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKMODES_LANES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.lanes = 1;
-			dst->lanes = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_LINKMODES_RATE_MATCHING) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rate_matching = 1;
-			dst->rate_matching = mnl_attr_get_u8(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_linkmodes_get_rsp *
-ethtool_linkmodes_get(struct ynl_sock *ys,
-		      struct ethtool_linkmodes_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_linkmodes_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKMODES_GET, 1);
-	ys->req_policy = &ethtool_linkmodes_nest;
-	yrs.yarg.rsp_policy = &ethtool_linkmodes_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_LINKMODES_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_linkmodes_get_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_LINKMODES_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_linkmodes_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_LINKMODES_GET - dump */
-void ethtool_linkmodes_get_list_free(struct ethtool_linkmodes_get_list *rsp)
-{
-	struct ethtool_linkmodes_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_bitset_free(&rsp->obj.ours);
-		ethtool_bitset_free(&rsp->obj.peer);
-		free(rsp);
-	}
-}
-
-struct ethtool_linkmodes_get_list *
-ethtool_linkmodes_get_dump(struct ynl_sock *ys,
-			   struct ethtool_linkmodes_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_linkmodes_get_list);
-	yds.cb = ethtool_linkmodes_get_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_LINKMODES_GET;
-	yds.rsp_policy = &ethtool_linkmodes_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_LINKMODES_GET, 1);
-	ys->req_policy = &ethtool_linkmodes_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_LINKMODES_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_linkmodes_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_LINKMODES_GET - notify */
-void ethtool_linkmodes_get_ntf_free(struct ethtool_linkmodes_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_bitset_free(&rsp->obj.ours);
-	ethtool_bitset_free(&rsp->obj.peer);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_LINKMODES_SET ============== */
-/* ETHTOOL_MSG_LINKMODES_SET - do */
-void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	ethtool_bitset_free(&req->ours);
-	ethtool_bitset_free(&req->peer);
-	free(req);
-}
-
-int ethtool_linkmodes_set(struct ynl_sock *ys,
-			  struct ethtool_linkmodes_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKMODES_SET, 1);
-	ys->req_policy = &ethtool_linkmodes_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_LINKMODES_HEADER, &req->header);
-	if (req->_present.autoneg)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_AUTONEG, req->autoneg);
-	if (req->_present.ours)
-		ethtool_bitset_put(nlh, ETHTOOL_A_LINKMODES_OURS, &req->ours);
-	if (req->_present.peer)
-		ethtool_bitset_put(nlh, ETHTOOL_A_LINKMODES_PEER, &req->peer);
-	if (req->_present.speed)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_LINKMODES_SPEED, req->speed);
-	if (req->_present.duplex)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_DUPLEX, req->duplex);
-	if (req->_present.master_slave_cfg)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, req->master_slave_cfg);
-	if (req->_present.master_slave_state)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, req->master_slave_state);
-	if (req->_present.lanes)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_LINKMODES_LANES, req->lanes);
-	if (req->_present.rate_matching)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_LINKMODES_RATE_MATCHING, req->rate_matching);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_LINKSTATE_GET ============== */
-/* ETHTOOL_MSG_LINKSTATE_GET - do */
-void ethtool_linkstate_get_req_free(struct ethtool_linkstate_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_linkstate_get_rsp_free(struct ethtool_linkstate_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_linkstate_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_linkstate_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_LINKSTATE_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_LINKSTATE_LINK) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.link = 1;
-			dst->link = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKSTATE_SQI) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sqi = 1;
-			dst->sqi = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_LINKSTATE_SQI_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sqi_max = 1;
-			dst->sqi_max = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_LINKSTATE_EXT_STATE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ext_state = 1;
-			dst->ext_state = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKSTATE_EXT_SUBSTATE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ext_substate = 1;
-			dst->ext_substate = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ext_down_cnt = 1;
-			dst->ext_down_cnt = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_linkstate_get_rsp *
-ethtool_linkstate_get(struct ynl_sock *ys,
-		      struct ethtool_linkstate_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_linkstate_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_LINKSTATE_GET, 1);
-	ys->req_policy = &ethtool_linkstate_nest;
-	yrs.yarg.rsp_policy = &ethtool_linkstate_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_LINKSTATE_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_linkstate_get_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_LINKSTATE_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_linkstate_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_LINKSTATE_GET - dump */
-void ethtool_linkstate_get_list_free(struct ethtool_linkstate_get_list *rsp)
-{
-	struct ethtool_linkstate_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_linkstate_get_list *
-ethtool_linkstate_get_dump(struct ynl_sock *ys,
-			   struct ethtool_linkstate_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_linkstate_get_list);
-	yds.cb = ethtool_linkstate_get_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_LINKSTATE_GET;
-	yds.rsp_policy = &ethtool_linkstate_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_LINKSTATE_GET, 1);
-	ys->req_policy = &ethtool_linkstate_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_LINKSTATE_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_linkstate_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_DEBUG_GET ============== */
-/* ETHTOOL_MSG_DEBUG_GET - do */
-void ethtool_debug_get_req_free(struct ethtool_debug_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_debug_get_rsp_free(struct ethtool_debug_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_bitset_free(&rsp->msgmask);
-	free(rsp);
-}
-
-int ethtool_debug_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_debug_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_DEBUG_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_DEBUG_MSGMASK) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.msgmask = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->msgmask;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_debug_get_rsp *
-ethtool_debug_get(struct ynl_sock *ys, struct ethtool_debug_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_debug_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_DEBUG_GET, 1);
-	ys->req_policy = &ethtool_debug_nest;
-	yrs.yarg.rsp_policy = &ethtool_debug_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_DEBUG_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_debug_get_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_DEBUG_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_debug_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_DEBUG_GET - dump */
-void ethtool_debug_get_list_free(struct ethtool_debug_get_list *rsp)
-{
-	struct ethtool_debug_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_bitset_free(&rsp->obj.msgmask);
-		free(rsp);
-	}
-}
-
-struct ethtool_debug_get_list *
-ethtool_debug_get_dump(struct ynl_sock *ys,
-		       struct ethtool_debug_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_debug_get_list);
-	yds.cb = ethtool_debug_get_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_DEBUG_GET;
-	yds.rsp_policy = &ethtool_debug_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_DEBUG_GET, 1);
-	ys->req_policy = &ethtool_debug_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_DEBUG_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_debug_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_DEBUG_GET - notify */
-void ethtool_debug_get_ntf_free(struct ethtool_debug_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_bitset_free(&rsp->obj.msgmask);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_DEBUG_SET ============== */
-/* ETHTOOL_MSG_DEBUG_SET - do */
-void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	ethtool_bitset_free(&req->msgmask);
-	free(req);
-}
-
-int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_DEBUG_SET, 1);
-	ys->req_policy = &ethtool_debug_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_DEBUG_HEADER, &req->header);
-	if (req->_present.msgmask)
-		ethtool_bitset_put(nlh, ETHTOOL_A_DEBUG_MSGMASK, &req->msgmask);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_WOL_GET ============== */
-/* ETHTOOL_MSG_WOL_GET - do */
-void ethtool_wol_get_req_free(struct ethtool_wol_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_wol_get_rsp_free(struct ethtool_wol_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_bitset_free(&rsp->modes);
-	free(rsp->sopass);
-	free(rsp);
-}
-
-int ethtool_wol_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct ethtool_wol_get_rsp *dst;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_WOL_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_WOL_MODES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.modes = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->modes;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_WOL_SOPASS) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.sopass_len = len;
-			dst->sopass = malloc(len);
-			memcpy(dst->sopass, mnl_attr_get_payload(attr), len);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_wol_get_rsp *
-ethtool_wol_get(struct ynl_sock *ys, struct ethtool_wol_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_wol_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_WOL_GET, 1);
-	ys->req_policy = &ethtool_wol_nest;
-	yrs.yarg.rsp_policy = &ethtool_wol_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_WOL_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_wol_get_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_WOL_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_wol_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_WOL_GET - dump */
-void ethtool_wol_get_list_free(struct ethtool_wol_get_list *rsp)
-{
-	struct ethtool_wol_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_bitset_free(&rsp->obj.modes);
-		free(rsp->obj.sopass);
-		free(rsp);
-	}
-}
-
-struct ethtool_wol_get_list *
-ethtool_wol_get_dump(struct ynl_sock *ys, struct ethtool_wol_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_wol_get_list);
-	yds.cb = ethtool_wol_get_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_WOL_GET;
-	yds.rsp_policy = &ethtool_wol_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_WOL_GET, 1);
-	ys->req_policy = &ethtool_wol_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_WOL_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_wol_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_WOL_GET - notify */
-void ethtool_wol_get_ntf_free(struct ethtool_wol_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_bitset_free(&rsp->obj.modes);
-	free(rsp->obj.sopass);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_WOL_SET ============== */
-/* ETHTOOL_MSG_WOL_SET - do */
-void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	ethtool_bitset_free(&req->modes);
-	free(req->sopass);
-	free(req);
-}
-
-int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_WOL_SET, 1);
-	ys->req_policy = &ethtool_wol_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_WOL_HEADER, &req->header);
-	if (req->_present.modes)
-		ethtool_bitset_put(nlh, ETHTOOL_A_WOL_MODES, &req->modes);
-	if (req->_present.sopass_len)
-		mnl_attr_put(nlh, ETHTOOL_A_WOL_SOPASS, req->_present.sopass_len, req->sopass);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_FEATURES_GET ============== */
-/* ETHTOOL_MSG_FEATURES_GET - do */
-void ethtool_features_get_req_free(struct ethtool_features_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_features_get_rsp_free(struct ethtool_features_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_bitset_free(&rsp->hw);
-	ethtool_bitset_free(&rsp->wanted);
-	ethtool_bitset_free(&rsp->active);
-	ethtool_bitset_free(&rsp->nochange);
-	free(rsp);
-}
-
-int ethtool_features_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_features_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_FEATURES_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEATURES_HW) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.hw = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->hw;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEATURES_WANTED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.wanted = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->wanted;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEATURES_ACTIVE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.active = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->active;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEATURES_NOCHANGE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.nochange = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->nochange;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_features_get_rsp *
-ethtool_features_get(struct ynl_sock *ys, struct ethtool_features_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_features_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEATURES_GET, 1);
-	ys->req_policy = &ethtool_features_nest;
-	yrs.yarg.rsp_policy = &ethtool_features_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_FEATURES_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_features_get_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_FEATURES_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_features_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_FEATURES_GET - dump */
-void ethtool_features_get_list_free(struct ethtool_features_get_list *rsp)
-{
-	struct ethtool_features_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_bitset_free(&rsp->obj.hw);
-		ethtool_bitset_free(&rsp->obj.wanted);
-		ethtool_bitset_free(&rsp->obj.active);
-		ethtool_bitset_free(&rsp->obj.nochange);
-		free(rsp);
-	}
-}
-
-struct ethtool_features_get_list *
-ethtool_features_get_dump(struct ynl_sock *ys,
-			  struct ethtool_features_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_features_get_list);
-	yds.cb = ethtool_features_get_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_FEATURES_GET;
-	yds.rsp_policy = &ethtool_features_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_FEATURES_GET, 1);
-	ys->req_policy = &ethtool_features_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_FEATURES_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_features_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_FEATURES_GET - notify */
-void ethtool_features_get_ntf_free(struct ethtool_features_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_bitset_free(&rsp->obj.hw);
-	ethtool_bitset_free(&rsp->obj.wanted);
-	ethtool_bitset_free(&rsp->obj.active);
-	ethtool_bitset_free(&rsp->obj.nochange);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_FEATURES_SET ============== */
-/* ETHTOOL_MSG_FEATURES_SET - do */
-void ethtool_features_set_req_free(struct ethtool_features_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	ethtool_bitset_free(&req->hw);
-	ethtool_bitset_free(&req->wanted);
-	ethtool_bitset_free(&req->active);
-	ethtool_bitset_free(&req->nochange);
-	free(req);
-}
-
-void ethtool_features_set_rsp_free(struct ethtool_features_set_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_bitset_free(&rsp->hw);
-	ethtool_bitset_free(&rsp->wanted);
-	ethtool_bitset_free(&rsp->active);
-	ethtool_bitset_free(&rsp->nochange);
-	free(rsp);
-}
-
-int ethtool_features_set_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_features_set_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_FEATURES_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEATURES_HW) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.hw = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->hw;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEATURES_WANTED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.wanted = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->wanted;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEATURES_ACTIVE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.active = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->active;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEATURES_NOCHANGE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.nochange = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->nochange;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_features_set_rsp *
-ethtool_features_set(struct ynl_sock *ys, struct ethtool_features_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_features_set_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEATURES_SET, 1);
-	ys->req_policy = &ethtool_features_nest;
-	yrs.yarg.rsp_policy = &ethtool_features_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_FEATURES_HEADER, &req->header);
-	if (req->_present.hw)
-		ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_HW, &req->hw);
-	if (req->_present.wanted)
-		ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_WANTED, &req->wanted);
-	if (req->_present.active)
-		ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_ACTIVE, &req->active);
-	if (req->_present.nochange)
-		ethtool_bitset_put(nlh, ETHTOOL_A_FEATURES_NOCHANGE, &req->nochange);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_features_set_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_FEATURES_SET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_features_set_rsp_free(rsp);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_PRIVFLAGS_GET ============== */
-/* ETHTOOL_MSG_PRIVFLAGS_GET - do */
-void ethtool_privflags_get_req_free(struct ethtool_privflags_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_privflags_get_rsp_free(struct ethtool_privflags_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_bitset_free(&rsp->flags);
-	free(rsp);
-}
-
-int ethtool_privflags_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_privflags_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_PRIVFLAGS_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_PRIVFLAGS_FLAGS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.flags = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->flags;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_privflags_get_rsp *
-ethtool_privflags_get(struct ynl_sock *ys,
-		      struct ethtool_privflags_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_privflags_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PRIVFLAGS_GET, 1);
-	ys->req_policy = &ethtool_privflags_nest;
-	yrs.yarg.rsp_policy = &ethtool_privflags_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PRIVFLAGS_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_privflags_get_rsp_parse;
-	yrs.rsp_cmd = 14;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_privflags_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_PRIVFLAGS_GET - dump */
-void ethtool_privflags_get_list_free(struct ethtool_privflags_get_list *rsp)
-{
-	struct ethtool_privflags_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_bitset_free(&rsp->obj.flags);
-		free(rsp);
-	}
-}
-
-struct ethtool_privflags_get_list *
-ethtool_privflags_get_dump(struct ynl_sock *ys,
-			   struct ethtool_privflags_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_privflags_get_list);
-	yds.cb = ethtool_privflags_get_rsp_parse;
-	yds.rsp_cmd = 14;
-	yds.rsp_policy = &ethtool_privflags_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PRIVFLAGS_GET, 1);
-	ys->req_policy = &ethtool_privflags_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PRIVFLAGS_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_privflags_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_PRIVFLAGS_GET - notify */
-void ethtool_privflags_get_ntf_free(struct ethtool_privflags_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_bitset_free(&rsp->obj.flags);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_PRIVFLAGS_SET ============== */
-/* ETHTOOL_MSG_PRIVFLAGS_SET - do */
-void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	ethtool_bitset_free(&req->flags);
-	free(req);
-}
-
-int ethtool_privflags_set(struct ynl_sock *ys,
-			  struct ethtool_privflags_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PRIVFLAGS_SET, 1);
-	ys->req_policy = &ethtool_privflags_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PRIVFLAGS_HEADER, &req->header);
-	if (req->_present.flags)
-		ethtool_bitset_put(nlh, ETHTOOL_A_PRIVFLAGS_FLAGS, &req->flags);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_RINGS_GET ============== */
-/* ETHTOOL_MSG_RINGS_GET - do */
-void ethtool_rings_get_req_free(struct ethtool_rings_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_rings_get_rsp_free(struct ethtool_rings_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_rings_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_rings_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_RINGS_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_RINGS_RX_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_max = 1;
-			dst->rx_max = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_RX_MINI_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_mini_max = 1;
-			dst->rx_mini_max = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_RX_JUMBO_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_jumbo_max = 1;
-			dst->rx_jumbo_max = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_TX_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_max = 1;
-			dst->tx_max = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_RX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx = 1;
-			dst->rx = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_RX_MINI) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_mini = 1;
-			dst->rx_mini = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_RX_JUMBO) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_jumbo = 1;
-			dst->rx_jumbo = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_TX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx = 1;
-			dst->tx = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_RX_BUF_LEN) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_buf_len = 1;
-			dst->rx_buf_len = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_TCP_DATA_SPLIT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tcp_data_split = 1;
-			dst->tcp_data_split = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_RINGS_CQE_SIZE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.cqe_size = 1;
-			dst->cqe_size = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_TX_PUSH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_push = 1;
-			dst->tx_push = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_RINGS_RX_PUSH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_push = 1;
-			dst->rx_push = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_push_buf_len = 1;
-			dst->tx_push_buf_len = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_push_buf_len_max = 1;
-			dst->tx_push_buf_len_max = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_rings_get_rsp *
-ethtool_rings_get(struct ynl_sock *ys, struct ethtool_rings_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_rings_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_RINGS_GET, 1);
-	ys->req_policy = &ethtool_rings_nest;
-	yrs.yarg.rsp_policy = &ethtool_rings_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_RINGS_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_rings_get_rsp_parse;
-	yrs.rsp_cmd = 16;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_rings_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_RINGS_GET - dump */
-void ethtool_rings_get_list_free(struct ethtool_rings_get_list *rsp)
-{
-	struct ethtool_rings_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_rings_get_list *
-ethtool_rings_get_dump(struct ynl_sock *ys,
-		       struct ethtool_rings_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_rings_get_list);
-	yds.cb = ethtool_rings_get_rsp_parse;
-	yds.rsp_cmd = 16;
-	yds.rsp_policy = &ethtool_rings_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_RINGS_GET, 1);
-	ys->req_policy = &ethtool_rings_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_RINGS_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_rings_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_RINGS_GET - notify */
-void ethtool_rings_get_ntf_free(struct ethtool_rings_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_RINGS_SET ============== */
-/* ETHTOOL_MSG_RINGS_SET - do */
-void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_RINGS_SET, 1);
-	ys->req_policy = &ethtool_rings_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_RINGS_HEADER, &req->header);
-	if (req->_present.rx_max)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_MAX, req->rx_max);
-	if (req->_present.rx_mini_max)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_MINI_MAX, req->rx_mini_max);
-	if (req->_present.rx_jumbo_max)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_JUMBO_MAX, req->rx_jumbo_max);
-	if (req->_present.tx_max)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_MAX, req->tx_max);
-	if (req->_present.rx)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX, req->rx);
-	if (req->_present.rx_mini)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_MINI, req->rx_mini);
-	if (req->_present.rx_jumbo)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_JUMBO, req->rx_jumbo);
-	if (req->_present.tx)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX, req->tx);
-	if (req->_present.rx_buf_len)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_RX_BUF_LEN, req->rx_buf_len);
-	if (req->_present.tcp_data_split)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_RINGS_TCP_DATA_SPLIT, req->tcp_data_split);
-	if (req->_present.cqe_size)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_CQE_SIZE, req->cqe_size);
-	if (req->_present.tx_push)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_RINGS_TX_PUSH, req->tx_push);
-	if (req->_present.rx_push)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_RINGS_RX_PUSH, req->rx_push);
-	if (req->_present.tx_push_buf_len)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, req->tx_push_buf_len);
-	if (req->_present.tx_push_buf_len_max)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, req->tx_push_buf_len_max);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_CHANNELS_GET ============== */
-/* ETHTOOL_MSG_CHANNELS_GET - do */
-void ethtool_channels_get_req_free(struct ethtool_channels_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_channels_get_rsp_free(struct ethtool_channels_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_channels_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_channels_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_CHANNELS_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_CHANNELS_RX_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_max = 1;
-			dst->rx_max = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_CHANNELS_TX_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_max = 1;
-			dst->tx_max = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_CHANNELS_OTHER_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.other_max = 1;
-			dst->other_max = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_CHANNELS_COMBINED_MAX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.combined_max = 1;
-			dst->combined_max = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_CHANNELS_RX_COUNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_count = 1;
-			dst->rx_count = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_CHANNELS_TX_COUNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_count = 1;
-			dst->tx_count = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_CHANNELS_OTHER_COUNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.other_count = 1;
-			dst->other_count = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_CHANNELS_COMBINED_COUNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.combined_count = 1;
-			dst->combined_count = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_channels_get_rsp *
-ethtool_channels_get(struct ynl_sock *ys, struct ethtool_channels_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_channels_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CHANNELS_GET, 1);
-	ys->req_policy = &ethtool_channels_nest;
-	yrs.yarg.rsp_policy = &ethtool_channels_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_CHANNELS_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_channels_get_rsp_parse;
-	yrs.rsp_cmd = 18;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_channels_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_CHANNELS_GET - dump */
-void ethtool_channels_get_list_free(struct ethtool_channels_get_list *rsp)
-{
-	struct ethtool_channels_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_channels_get_list *
-ethtool_channels_get_dump(struct ynl_sock *ys,
-			  struct ethtool_channels_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_channels_get_list);
-	yds.cb = ethtool_channels_get_rsp_parse;
-	yds.rsp_cmd = 18;
-	yds.rsp_policy = &ethtool_channels_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_CHANNELS_GET, 1);
-	ys->req_policy = &ethtool_channels_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_CHANNELS_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_channels_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_CHANNELS_GET - notify */
-void ethtool_channels_get_ntf_free(struct ethtool_channels_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_CHANNELS_SET ============== */
-/* ETHTOOL_MSG_CHANNELS_SET - do */
-void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_channels_set(struct ynl_sock *ys,
-			 struct ethtool_channels_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CHANNELS_SET, 1);
-	ys->req_policy = &ethtool_channels_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_CHANNELS_HEADER, &req->header);
-	if (req->_present.rx_max)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_RX_MAX, req->rx_max);
-	if (req->_present.tx_max)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_TX_MAX, req->tx_max);
-	if (req->_present.other_max)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_OTHER_MAX, req->other_max);
-	if (req->_present.combined_max)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_MAX, req->combined_max);
-	if (req->_present.rx_count)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_RX_COUNT, req->rx_count);
-	if (req->_present.tx_count)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_TX_COUNT, req->tx_count);
-	if (req->_present.other_count)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_OTHER_COUNT, req->other_count);
-	if (req->_present.combined_count)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_CHANNELS_COMBINED_COUNT, req->combined_count);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_COALESCE_GET ============== */
-/* ETHTOOL_MSG_COALESCE_GET - do */
-void ethtool_coalesce_get_req_free(struct ethtool_coalesce_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_coalesce_get_rsp_free(struct ethtool_coalesce_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_coalesce_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_coalesce_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_COALESCE_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_COALESCE_RX_USECS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_usecs = 1;
-			dst->rx_usecs = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_max_frames = 1;
-			dst->rx_max_frames = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_RX_USECS_IRQ) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_usecs_irq = 1;
-			dst->rx_usecs_irq = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_max_frames_irq = 1;
-			dst->rx_max_frames_irq = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_USECS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_usecs = 1;
-			dst->tx_usecs = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_max_frames = 1;
-			dst->tx_max_frames = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_USECS_IRQ) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_usecs_irq = 1;
-			dst->tx_usecs_irq = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_max_frames_irq = 1;
-			dst->tx_max_frames_irq = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_STATS_BLOCK_USECS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.stats_block_usecs = 1;
-			dst->stats_block_usecs = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.use_adaptive_rx = 1;
-			dst->use_adaptive_rx = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.use_adaptive_tx = 1;
-			dst->use_adaptive_tx = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_COALESCE_PKT_RATE_LOW) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.pkt_rate_low = 1;
-			dst->pkt_rate_low = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_RX_USECS_LOW) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_usecs_low = 1;
-			dst->rx_usecs_low = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_max_frames_low = 1;
-			dst->rx_max_frames_low = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_USECS_LOW) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_usecs_low = 1;
-			dst->tx_usecs_low = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_max_frames_low = 1;
-			dst->tx_max_frames_low = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_PKT_RATE_HIGH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.pkt_rate_high = 1;
-			dst->pkt_rate_high = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_RX_USECS_HIGH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_usecs_high = 1;
-			dst->rx_usecs_high = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_max_frames_high = 1;
-			dst->rx_max_frames_high = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_USECS_HIGH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_usecs_high = 1;
-			dst->tx_usecs_high = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_max_frames_high = 1;
-			dst->tx_max_frames_high = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rate_sample_interval = 1;
-			dst->rate_sample_interval = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_USE_CQE_MODE_TX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.use_cqe_mode_tx = 1;
-			dst->use_cqe_mode_tx = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_COALESCE_USE_CQE_MODE_RX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.use_cqe_mode_rx = 1;
-			dst->use_cqe_mode_rx = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_aggr_max_bytes = 1;
-			dst->tx_aggr_max_bytes = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_aggr_max_frames = 1;
-			dst->tx_aggr_max_frames = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_aggr_time_usecs = 1;
-			dst->tx_aggr_time_usecs = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_coalesce_get_rsp *
-ethtool_coalesce_get(struct ynl_sock *ys, struct ethtool_coalesce_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_coalesce_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_COALESCE_GET, 1);
-	ys->req_policy = &ethtool_coalesce_nest;
-	yrs.yarg.rsp_policy = &ethtool_coalesce_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_COALESCE_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_coalesce_get_rsp_parse;
-	yrs.rsp_cmd = 20;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_coalesce_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_COALESCE_GET - dump */
-void ethtool_coalesce_get_list_free(struct ethtool_coalesce_get_list *rsp)
-{
-	struct ethtool_coalesce_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_coalesce_get_list *
-ethtool_coalesce_get_dump(struct ynl_sock *ys,
-			  struct ethtool_coalesce_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_coalesce_get_list);
-	yds.cb = ethtool_coalesce_get_rsp_parse;
-	yds.rsp_cmd = 20;
-	yds.rsp_policy = &ethtool_coalesce_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_COALESCE_GET, 1);
-	ys->req_policy = &ethtool_coalesce_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_COALESCE_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_coalesce_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_COALESCE_GET - notify */
-void ethtool_coalesce_get_ntf_free(struct ethtool_coalesce_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_COALESCE_SET ============== */
-/* ETHTOOL_MSG_COALESCE_SET - do */
-void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_coalesce_set(struct ynl_sock *ys,
-			 struct ethtool_coalesce_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_COALESCE_SET, 1);
-	ys->req_policy = &ethtool_coalesce_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_COALESCE_HEADER, &req->header);
-	if (req->_present.rx_usecs)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS, req->rx_usecs);
-	if (req->_present.rx_max_frames)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES, req->rx_max_frames);
-	if (req->_present.rx_usecs_irq)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS_IRQ, req->rx_usecs_irq);
-	if (req->_present.rx_max_frames_irq)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, req->rx_max_frames_irq);
-	if (req->_present.tx_usecs)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS, req->tx_usecs);
-	if (req->_present.tx_max_frames)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES, req->tx_max_frames);
-	if (req->_present.tx_usecs_irq)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS_IRQ, req->tx_usecs_irq);
-	if (req->_present.tx_max_frames_irq)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, req->tx_max_frames_irq);
-	if (req->_present.stats_block_usecs)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, req->stats_block_usecs);
-	if (req->_present.use_adaptive_rx)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, req->use_adaptive_rx);
-	if (req->_present.use_adaptive_tx)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, req->use_adaptive_tx);
-	if (req->_present.pkt_rate_low)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_PKT_RATE_LOW, req->pkt_rate_low);
-	if (req->_present.rx_usecs_low)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS_LOW, req->rx_usecs_low);
-	if (req->_present.rx_max_frames_low)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, req->rx_max_frames_low);
-	if (req->_present.tx_usecs_low)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS_LOW, req->tx_usecs_low);
-	if (req->_present.tx_max_frames_low)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, req->tx_max_frames_low);
-	if (req->_present.pkt_rate_high)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_PKT_RATE_HIGH, req->pkt_rate_high);
-	if (req->_present.rx_usecs_high)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_USECS_HIGH, req->rx_usecs_high);
-	if (req->_present.rx_max_frames_high)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, req->rx_max_frames_high);
-	if (req->_present.tx_usecs_high)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_USECS_HIGH, req->tx_usecs_high);
-	if (req->_present.tx_max_frames_high)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, req->tx_max_frames_high);
-	if (req->_present.rate_sample_interval)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, req->rate_sample_interval);
-	if (req->_present.use_cqe_mode_tx)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, req->use_cqe_mode_tx);
-	if (req->_present.use_cqe_mode_rx)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, req->use_cqe_mode_rx);
-	if (req->_present.tx_aggr_max_bytes)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, req->tx_aggr_max_bytes);
-	if (req->_present.tx_aggr_max_frames)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, req->tx_aggr_max_frames);
-	if (req->_present.tx_aggr_time_usecs)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, req->tx_aggr_time_usecs);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_PAUSE_GET ============== */
-/* ETHTOOL_MSG_PAUSE_GET - do */
-void ethtool_pause_get_req_free(struct ethtool_pause_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_pause_get_rsp_free(struct ethtool_pause_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_pause_stat_free(&rsp->stats);
-	free(rsp);
-}
-
-int ethtool_pause_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_pause_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_PAUSE_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_PAUSE_AUTONEG) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.autoneg = 1;
-			dst->autoneg = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_PAUSE_RX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx = 1;
-			dst->rx = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_PAUSE_TX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx = 1;
-			dst->tx = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_PAUSE_STATS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.stats = 1;
-
-			parg.rsp_policy = &ethtool_pause_stat_nest;
-			parg.data = &dst->stats;
-			if (ethtool_pause_stat_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_PAUSE_STATS_SRC) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.stats_src = 1;
-			dst->stats_src = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_pause_get_rsp *
-ethtool_pause_get(struct ynl_sock *ys, struct ethtool_pause_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_pause_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PAUSE_GET, 1);
-	ys->req_policy = &ethtool_pause_nest;
-	yrs.yarg.rsp_policy = &ethtool_pause_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PAUSE_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_pause_get_rsp_parse;
-	yrs.rsp_cmd = 22;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_pause_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_PAUSE_GET - dump */
-void ethtool_pause_get_list_free(struct ethtool_pause_get_list *rsp)
-{
-	struct ethtool_pause_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_pause_stat_free(&rsp->obj.stats);
-		free(rsp);
-	}
-}
-
-struct ethtool_pause_get_list *
-ethtool_pause_get_dump(struct ynl_sock *ys,
-		       struct ethtool_pause_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_pause_get_list);
-	yds.cb = ethtool_pause_get_rsp_parse;
-	yds.rsp_cmd = 22;
-	yds.rsp_policy = &ethtool_pause_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PAUSE_GET, 1);
-	ys->req_policy = &ethtool_pause_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PAUSE_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_pause_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_PAUSE_GET - notify */
-void ethtool_pause_get_ntf_free(struct ethtool_pause_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_pause_stat_free(&rsp->obj.stats);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_PAUSE_SET ============== */
-/* ETHTOOL_MSG_PAUSE_SET - do */
-void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	ethtool_pause_stat_free(&req->stats);
-	free(req);
-}
-
-int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PAUSE_SET, 1);
-	ys->req_policy = &ethtool_pause_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PAUSE_HEADER, &req->header);
-	if (req->_present.autoneg)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_PAUSE_AUTONEG, req->autoneg);
-	if (req->_present.rx)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_PAUSE_RX, req->rx);
-	if (req->_present.tx)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_PAUSE_TX, req->tx);
-	if (req->_present.stats)
-		ethtool_pause_stat_put(nlh, ETHTOOL_A_PAUSE_STATS, &req->stats);
-	if (req->_present.stats_src)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_PAUSE_STATS_SRC, req->stats_src);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_EEE_GET ============== */
-/* ETHTOOL_MSG_EEE_GET - do */
-void ethtool_eee_get_req_free(struct ethtool_eee_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_eee_get_rsp_free(struct ethtool_eee_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_bitset_free(&rsp->modes_ours);
-	ethtool_bitset_free(&rsp->modes_peer);
-	free(rsp);
-}
-
-int ethtool_eee_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct ethtool_eee_get_rsp *dst;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_EEE_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_EEE_MODES_OURS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.modes_ours = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->modes_ours;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_EEE_MODES_PEER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.modes_peer = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->modes_peer;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_EEE_ACTIVE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.active = 1;
-			dst->active = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_EEE_ENABLED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.enabled = 1;
-			dst->enabled = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_EEE_TX_LPI_ENABLED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_lpi_enabled = 1;
-			dst->tx_lpi_enabled = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_EEE_TX_LPI_TIMER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_lpi_timer = 1;
-			dst->tx_lpi_timer = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_eee_get_rsp *
-ethtool_eee_get(struct ynl_sock *ys, struct ethtool_eee_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_eee_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_EEE_GET, 1);
-	ys->req_policy = &ethtool_eee_nest;
-	yrs.yarg.rsp_policy = &ethtool_eee_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_EEE_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_eee_get_rsp_parse;
-	yrs.rsp_cmd = 24;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_eee_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_EEE_GET - dump */
-void ethtool_eee_get_list_free(struct ethtool_eee_get_list *rsp)
-{
-	struct ethtool_eee_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_bitset_free(&rsp->obj.modes_ours);
-		ethtool_bitset_free(&rsp->obj.modes_peer);
-		free(rsp);
-	}
-}
-
-struct ethtool_eee_get_list *
-ethtool_eee_get_dump(struct ynl_sock *ys, struct ethtool_eee_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_eee_get_list);
-	yds.cb = ethtool_eee_get_rsp_parse;
-	yds.rsp_cmd = 24;
-	yds.rsp_policy = &ethtool_eee_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_EEE_GET, 1);
-	ys->req_policy = &ethtool_eee_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_EEE_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_eee_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_EEE_GET - notify */
-void ethtool_eee_get_ntf_free(struct ethtool_eee_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_bitset_free(&rsp->obj.modes_ours);
-	ethtool_bitset_free(&rsp->obj.modes_peer);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_EEE_SET ============== */
-/* ETHTOOL_MSG_EEE_SET - do */
-void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	ethtool_bitset_free(&req->modes_ours);
-	ethtool_bitset_free(&req->modes_peer);
-	free(req);
-}
-
-int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_EEE_SET, 1);
-	ys->req_policy = &ethtool_eee_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_EEE_HEADER, &req->header);
-	if (req->_present.modes_ours)
-		ethtool_bitset_put(nlh, ETHTOOL_A_EEE_MODES_OURS, &req->modes_ours);
-	if (req->_present.modes_peer)
-		ethtool_bitset_put(nlh, ETHTOOL_A_EEE_MODES_PEER, &req->modes_peer);
-	if (req->_present.active)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_EEE_ACTIVE, req->active);
-	if (req->_present.enabled)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_EEE_ENABLED, req->enabled);
-	if (req->_present.tx_lpi_enabled)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_EEE_TX_LPI_ENABLED, req->tx_lpi_enabled);
-	if (req->_present.tx_lpi_timer)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_EEE_TX_LPI_TIMER, req->tx_lpi_timer);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_TSINFO_GET ============== */
-/* ETHTOOL_MSG_TSINFO_GET - do */
-void ethtool_tsinfo_get_req_free(struct ethtool_tsinfo_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_tsinfo_get_rsp_free(struct ethtool_tsinfo_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_bitset_free(&rsp->timestamping);
-	ethtool_bitset_free(&rsp->tx_types);
-	ethtool_bitset_free(&rsp->rx_filters);
-	free(rsp);
-}
-
-int ethtool_tsinfo_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_tsinfo_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_TSINFO_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_TSINFO_TIMESTAMPING) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.timestamping = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->timestamping;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_TSINFO_TX_TYPES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_types = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->tx_types;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_TSINFO_RX_FILTERS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_filters = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->rx_filters;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_TSINFO_PHC_INDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.phc_index = 1;
-			dst->phc_index = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_tsinfo_get_rsp *
-ethtool_tsinfo_get(struct ynl_sock *ys, struct ethtool_tsinfo_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_tsinfo_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_TSINFO_GET, 1);
-	ys->req_policy = &ethtool_tsinfo_nest;
-	yrs.yarg.rsp_policy = &ethtool_tsinfo_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_TSINFO_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_tsinfo_get_rsp_parse;
-	yrs.rsp_cmd = 26;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_tsinfo_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_TSINFO_GET - dump */
-void ethtool_tsinfo_get_list_free(struct ethtool_tsinfo_get_list *rsp)
-{
-	struct ethtool_tsinfo_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_bitset_free(&rsp->obj.timestamping);
-		ethtool_bitset_free(&rsp->obj.tx_types);
-		ethtool_bitset_free(&rsp->obj.rx_filters);
-		free(rsp);
-	}
-}
-
-struct ethtool_tsinfo_get_list *
-ethtool_tsinfo_get_dump(struct ynl_sock *ys,
-			struct ethtool_tsinfo_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_tsinfo_get_list);
-	yds.cb = ethtool_tsinfo_get_rsp_parse;
-	yds.rsp_cmd = 26;
-	yds.rsp_policy = &ethtool_tsinfo_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_TSINFO_GET, 1);
-	ys->req_policy = &ethtool_tsinfo_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_TSINFO_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_tsinfo_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_CABLE_TEST_ACT ============== */
-/* ETHTOOL_MSG_CABLE_TEST_ACT - do */
-void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_cable_test_act(struct ynl_sock *ys,
-			   struct ethtool_cable_test_act_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CABLE_TEST_ACT, 1);
-	ys->req_policy = &ethtool_cable_test_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_HEADER, &req->header);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_CABLE_TEST_TDR_ACT ============== */
-/* ETHTOOL_MSG_CABLE_TEST_TDR_ACT - do */
-void
-ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_cable_test_tdr_act(struct ynl_sock *ys,
-			       struct ethtool_cable_test_tdr_act_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_CABLE_TEST_TDR_ACT, 1);
-	ys->req_policy = &ethtool_cable_test_tdr_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_CABLE_TEST_TDR_HEADER, &req->header);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_TUNNEL_INFO_GET ============== */
-/* ETHTOOL_MSG_TUNNEL_INFO_GET - do */
-void ethtool_tunnel_info_get_req_free(struct ethtool_tunnel_info_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_tunnel_info_get_rsp_free(struct ethtool_tunnel_info_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_tunnel_udp_free(&rsp->udp_ports);
-	free(rsp);
-}
-
-int ethtool_tunnel_info_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_tunnel_info_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_TUNNEL_INFO_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_TUNNEL_INFO_UDP_PORTS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.udp_ports = 1;
-
-			parg.rsp_policy = &ethtool_tunnel_udp_nest;
-			parg.data = &dst->udp_ports;
-			if (ethtool_tunnel_udp_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_tunnel_info_get_rsp *
-ethtool_tunnel_info_get(struct ynl_sock *ys,
-			struct ethtool_tunnel_info_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_tunnel_info_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_TUNNEL_INFO_GET, 1);
-	ys->req_policy = &ethtool_tunnel_info_nest;
-	yrs.yarg.rsp_policy = &ethtool_tunnel_info_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_TUNNEL_INFO_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_tunnel_info_get_rsp_parse;
-	yrs.rsp_cmd = 29;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_tunnel_info_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_TUNNEL_INFO_GET - dump */
-void
-ethtool_tunnel_info_get_list_free(struct ethtool_tunnel_info_get_list *rsp)
-{
-	struct ethtool_tunnel_info_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_tunnel_udp_free(&rsp->obj.udp_ports);
-		free(rsp);
-	}
-}
-
-struct ethtool_tunnel_info_get_list *
-ethtool_tunnel_info_get_dump(struct ynl_sock *ys,
-			     struct ethtool_tunnel_info_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_tunnel_info_get_list);
-	yds.cb = ethtool_tunnel_info_get_rsp_parse;
-	yds.rsp_cmd = 29;
-	yds.rsp_policy = &ethtool_tunnel_info_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_TUNNEL_INFO_GET, 1);
-	ys->req_policy = &ethtool_tunnel_info_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_TUNNEL_INFO_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_tunnel_info_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_FEC_GET ============== */
-/* ETHTOOL_MSG_FEC_GET - do */
-void ethtool_fec_get_req_free(struct ethtool_fec_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_fec_get_rsp_free(struct ethtool_fec_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_bitset_free(&rsp->modes);
-	ethtool_fec_stat_free(&rsp->stats);
-	free(rsp);
-}
-
-int ethtool_fec_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct ethtool_fec_get_rsp *dst;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_FEC_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEC_MODES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.modes = 1;
-
-			parg.rsp_policy = &ethtool_bitset_nest;
-			parg.data = &dst->modes;
-			if (ethtool_bitset_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_FEC_AUTO) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.auto_ = 1;
-			dst->auto_ = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_FEC_ACTIVE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.active = 1;
-			dst->active = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_FEC_STATS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.stats = 1;
-
-			parg.rsp_policy = &ethtool_fec_stat_nest;
-			parg.data = &dst->stats;
-			if (ethtool_fec_stat_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_fec_get_rsp *
-ethtool_fec_get(struct ynl_sock *ys, struct ethtool_fec_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_fec_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEC_GET, 1);
-	ys->req_policy = &ethtool_fec_nest;
-	yrs.yarg.rsp_policy = &ethtool_fec_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_FEC_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_fec_get_rsp_parse;
-	yrs.rsp_cmd = 30;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_fec_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_FEC_GET - dump */
-void ethtool_fec_get_list_free(struct ethtool_fec_get_list *rsp)
-{
-	struct ethtool_fec_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_bitset_free(&rsp->obj.modes);
-		ethtool_fec_stat_free(&rsp->obj.stats);
-		free(rsp);
-	}
-}
-
-struct ethtool_fec_get_list *
-ethtool_fec_get_dump(struct ynl_sock *ys, struct ethtool_fec_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_fec_get_list);
-	yds.cb = ethtool_fec_get_rsp_parse;
-	yds.rsp_cmd = 30;
-	yds.rsp_policy = &ethtool_fec_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_FEC_GET, 1);
-	ys->req_policy = &ethtool_fec_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_FEC_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_fec_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_FEC_GET - notify */
-void ethtool_fec_get_ntf_free(struct ethtool_fec_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_bitset_free(&rsp->obj.modes);
-	ethtool_fec_stat_free(&rsp->obj.stats);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_FEC_SET ============== */
-/* ETHTOOL_MSG_FEC_SET - do */
-void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	ethtool_bitset_free(&req->modes);
-	ethtool_fec_stat_free(&req->stats);
-	free(req);
-}
-
-int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_FEC_SET, 1);
-	ys->req_policy = &ethtool_fec_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_FEC_HEADER, &req->header);
-	if (req->_present.modes)
-		ethtool_bitset_put(nlh, ETHTOOL_A_FEC_MODES, &req->modes);
-	if (req->_present.auto_)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_FEC_AUTO, req->auto_);
-	if (req->_present.active)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_FEC_ACTIVE, req->active);
-	if (req->_present.stats)
-		ethtool_fec_stat_put(nlh, ETHTOOL_A_FEC_STATS, &req->stats);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_MODULE_EEPROM_GET ============== */
-/* ETHTOOL_MSG_MODULE_EEPROM_GET - do */
-void
-ethtool_module_eeprom_get_req_free(struct ethtool_module_eeprom_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void
-ethtool_module_eeprom_get_rsp_free(struct ethtool_module_eeprom_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp->data);
-	free(rsp);
-}
-
-int ethtool_module_eeprom_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_module_eeprom_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_MODULE_EEPROM_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_MODULE_EEPROM_OFFSET) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.offset = 1;
-			dst->offset = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_MODULE_EEPROM_LENGTH) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.length = 1;
-			dst->length = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_MODULE_EEPROM_PAGE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.page = 1;
-			dst->page = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_MODULE_EEPROM_BANK) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.bank = 1;
-			dst->bank = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.i2c_address = 1;
-			dst->i2c_address = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_MODULE_EEPROM_DATA) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.data_len = len;
-			dst->data = malloc(len);
-			memcpy(dst->data, mnl_attr_get_payload(attr), len);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_module_eeprom_get_rsp *
-ethtool_module_eeprom_get(struct ynl_sock *ys,
-			  struct ethtool_module_eeprom_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_module_eeprom_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MODULE_EEPROM_GET, 1);
-	ys->req_policy = &ethtool_module_eeprom_nest;
-	yrs.yarg.rsp_policy = &ethtool_module_eeprom_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_MODULE_EEPROM_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_module_eeprom_get_rsp_parse;
-	yrs.rsp_cmd = 32;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_module_eeprom_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_MODULE_EEPROM_GET - dump */
-void
-ethtool_module_eeprom_get_list_free(struct ethtool_module_eeprom_get_list *rsp)
-{
-	struct ethtool_module_eeprom_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp->obj.data);
-		free(rsp);
-	}
-}
-
-struct ethtool_module_eeprom_get_list *
-ethtool_module_eeprom_get_dump(struct ynl_sock *ys,
-			       struct ethtool_module_eeprom_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_module_eeprom_get_list);
-	yds.cb = ethtool_module_eeprom_get_rsp_parse;
-	yds.rsp_cmd = 32;
-	yds.rsp_policy = &ethtool_module_eeprom_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_MODULE_EEPROM_GET, 1);
-	ys->req_policy = &ethtool_module_eeprom_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_MODULE_EEPROM_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_module_eeprom_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_PHC_VCLOCKS_GET ============== */
-/* ETHTOOL_MSG_PHC_VCLOCKS_GET - do */
-void ethtool_phc_vclocks_get_req_free(struct ethtool_phc_vclocks_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_phc_vclocks_get_rsp_free(struct ethtool_phc_vclocks_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_phc_vclocks_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_phc_vclocks_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_PHC_VCLOCKS_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_PHC_VCLOCKS_NUM) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.num = 1;
-			dst->num = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_phc_vclocks_get_rsp *
-ethtool_phc_vclocks_get(struct ynl_sock *ys,
-			struct ethtool_phc_vclocks_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_phc_vclocks_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PHC_VCLOCKS_GET, 1);
-	ys->req_policy = &ethtool_phc_vclocks_nest;
-	yrs.yarg.rsp_policy = &ethtool_phc_vclocks_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PHC_VCLOCKS_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_phc_vclocks_get_rsp_parse;
-	yrs.rsp_cmd = 34;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_phc_vclocks_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_PHC_VCLOCKS_GET - dump */
-void
-ethtool_phc_vclocks_get_list_free(struct ethtool_phc_vclocks_get_list *rsp)
-{
-	struct ethtool_phc_vclocks_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_phc_vclocks_get_list *
-ethtool_phc_vclocks_get_dump(struct ynl_sock *ys,
-			     struct ethtool_phc_vclocks_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_phc_vclocks_get_list);
-	yds.cb = ethtool_phc_vclocks_get_rsp_parse;
-	yds.rsp_cmd = 34;
-	yds.rsp_policy = &ethtool_phc_vclocks_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PHC_VCLOCKS_GET, 1);
-	ys->req_policy = &ethtool_phc_vclocks_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PHC_VCLOCKS_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_phc_vclocks_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_MODULE_GET ============== */
-/* ETHTOOL_MSG_MODULE_GET - do */
-void ethtool_module_get_req_free(struct ethtool_module_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_module_get_rsp_free(struct ethtool_module_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_module_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_module_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_MODULE_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_MODULE_POWER_MODE_POLICY) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.power_mode_policy = 1;
-			dst->power_mode_policy = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_MODULE_POWER_MODE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.power_mode = 1;
-			dst->power_mode = mnl_attr_get_u8(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_module_get_rsp *
-ethtool_module_get(struct ynl_sock *ys, struct ethtool_module_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_module_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MODULE_GET, 1);
-	ys->req_policy = &ethtool_module_nest;
-	yrs.yarg.rsp_policy = &ethtool_module_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_MODULE_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_module_get_rsp_parse;
-	yrs.rsp_cmd = 35;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_module_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_MODULE_GET - dump */
-void ethtool_module_get_list_free(struct ethtool_module_get_list *rsp)
-{
-	struct ethtool_module_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_module_get_list *
-ethtool_module_get_dump(struct ynl_sock *ys,
-			struct ethtool_module_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_module_get_list);
-	yds.cb = ethtool_module_get_rsp_parse;
-	yds.rsp_cmd = 35;
-	yds.rsp_policy = &ethtool_module_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_MODULE_GET, 1);
-	ys->req_policy = &ethtool_module_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_MODULE_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_module_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_MODULE_GET - notify */
-void ethtool_module_get_ntf_free(struct ethtool_module_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_MODULE_SET ============== */
-/* ETHTOOL_MSG_MODULE_SET - do */
-void ethtool_module_set_req_free(struct ethtool_module_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MODULE_SET, 1);
-	ys->req_policy = &ethtool_module_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_MODULE_HEADER, &req->header);
-	if (req->_present.power_mode_policy)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE_POLICY, req->power_mode_policy);
-	if (req->_present.power_mode)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_MODULE_POWER_MODE, req->power_mode);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_PSE_GET ============== */
-/* ETHTOOL_MSG_PSE_GET - do */
-void ethtool_pse_get_req_free(struct ethtool_pse_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_pse_get_rsp_free(struct ethtool_pse_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_pse_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct ethtool_pse_get_rsp *dst;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_PSE_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_PODL_PSE_ADMIN_STATE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.admin_state = 1;
-			dst->admin_state = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PODL_PSE_ADMIN_CONTROL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.admin_control = 1;
-			dst->admin_control = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PODL_PSE_PW_D_STATUS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.pw_d_status = 1;
-			dst->pw_d_status = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_pse_get_rsp *
-ethtool_pse_get(struct ynl_sock *ys, struct ethtool_pse_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_pse_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PSE_GET, 1);
-	ys->req_policy = &ethtool_pse_nest;
-	yrs.yarg.rsp_policy = &ethtool_pse_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PSE_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_pse_get_rsp_parse;
-	yrs.rsp_cmd = 37;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_pse_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_PSE_GET - dump */
-void ethtool_pse_get_list_free(struct ethtool_pse_get_list *rsp)
-{
-	struct ethtool_pse_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_pse_get_list *
-ethtool_pse_get_dump(struct ynl_sock *ys, struct ethtool_pse_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_pse_get_list);
-	yds.cb = ethtool_pse_get_rsp_parse;
-	yds.rsp_cmd = 37;
-	yds.rsp_policy = &ethtool_pse_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PSE_GET, 1);
-	ys->req_policy = &ethtool_pse_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PSE_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_pse_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_PSE_SET ============== */
-/* ETHTOOL_MSG_PSE_SET - do */
-void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PSE_SET, 1);
-	ys->req_policy = &ethtool_pse_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PSE_HEADER, &req->header);
-	if (req->_present.admin_state)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_ADMIN_STATE, req->admin_state);
-	if (req->_present.admin_control)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, req->admin_control);
-	if (req->_present.pw_d_status)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_PODL_PSE_PW_D_STATUS, req->pw_d_status);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_RSS_GET ============== */
-/* ETHTOOL_MSG_RSS_GET - do */
-void ethtool_rss_get_req_free(struct ethtool_rss_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_rss_get_rsp_free(struct ethtool_rss_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp->indir);
-	free(rsp->hkey);
-	free(rsp);
-}
-
-int ethtool_rss_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct ethtool_rss_get_rsp *dst;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_RSS_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_RSS_CONTEXT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.context = 1;
-			dst->context = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RSS_HFUNC) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.hfunc = 1;
-			dst->hfunc = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_RSS_INDIR) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.indir_len = len;
-			dst->indir = malloc(len);
-			memcpy(dst->indir, mnl_attr_get_payload(attr), len);
-		} else if (type == ETHTOOL_A_RSS_HKEY) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.hkey_len = len;
-			dst->hkey = malloc(len);
-			memcpy(dst->hkey, mnl_attr_get_payload(attr), len);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_rss_get_rsp *
-ethtool_rss_get(struct ynl_sock *ys, struct ethtool_rss_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_rss_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_RSS_GET, 1);
-	ys->req_policy = &ethtool_rss_nest;
-	yrs.yarg.rsp_policy = &ethtool_rss_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_RSS_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_rss_get_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_RSS_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_rss_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_RSS_GET - dump */
-void ethtool_rss_get_list_free(struct ethtool_rss_get_list *rsp)
-{
-	struct ethtool_rss_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp->obj.indir);
-		free(rsp->obj.hkey);
-		free(rsp);
-	}
-}
-
-struct ethtool_rss_get_list *
-ethtool_rss_get_dump(struct ynl_sock *ys, struct ethtool_rss_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_rss_get_list);
-	yds.cb = ethtool_rss_get_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_RSS_GET;
-	yds.rsp_policy = &ethtool_rss_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_RSS_GET, 1);
-	ys->req_policy = &ethtool_rss_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_RSS_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_rss_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_PLCA_GET_CFG ============== */
-/* ETHTOOL_MSG_PLCA_GET_CFG - do */
-void ethtool_plca_get_cfg_req_free(struct ethtool_plca_get_cfg_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_plca_get_cfg_rsp_free(struct ethtool_plca_get_cfg_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_plca_get_cfg_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_plca_get_cfg_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_PLCA_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_PLCA_VERSION) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.version = 1;
-			dst->version = mnl_attr_get_u16(attr);
-		} else if (type == ETHTOOL_A_PLCA_ENABLED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.enabled = 1;
-			dst->enabled = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_PLCA_STATUS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.status = 1;
-			dst->status = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_PLCA_NODE_CNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.node_cnt = 1;
-			dst->node_cnt = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PLCA_NODE_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.node_id = 1;
-			dst->node_id = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PLCA_TO_TMR) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.to_tmr = 1;
-			dst->to_tmr = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PLCA_BURST_CNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.burst_cnt = 1;
-			dst->burst_cnt = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PLCA_BURST_TMR) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.burst_tmr = 1;
-			dst->burst_tmr = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_plca_get_cfg_rsp *
-ethtool_plca_get_cfg(struct ynl_sock *ys, struct ethtool_plca_get_cfg_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_plca_get_cfg_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_CFG, 1);
-	ys->req_policy = &ethtool_plca_nest;
-	yrs.yarg.rsp_policy = &ethtool_plca_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_plca_get_cfg_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_PLCA_GET_CFG;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_plca_get_cfg_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_PLCA_GET_CFG - dump */
-void ethtool_plca_get_cfg_list_free(struct ethtool_plca_get_cfg_list *rsp)
-{
-	struct ethtool_plca_get_cfg_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_plca_get_cfg_list *
-ethtool_plca_get_cfg_dump(struct ynl_sock *ys,
-			  struct ethtool_plca_get_cfg_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_plca_get_cfg_list);
-	yds.cb = ethtool_plca_get_cfg_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_PLCA_GET_CFG;
-	yds.rsp_policy = &ethtool_plca_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_CFG, 1);
-	ys->req_policy = &ethtool_plca_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_plca_get_cfg_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_PLCA_GET_CFG - notify */
-void ethtool_plca_get_cfg_ntf_free(struct ethtool_plca_get_cfg_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_PLCA_SET_CFG ============== */
-/* ETHTOOL_MSG_PLCA_SET_CFG - do */
-void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_plca_set_cfg(struct ynl_sock *ys,
-			 struct ethtool_plca_set_cfg_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PLCA_SET_CFG, 1);
-	ys->req_policy = &ethtool_plca_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
-	if (req->_present.version)
-		mnl_attr_put_u16(nlh, ETHTOOL_A_PLCA_VERSION, req->version);
-	if (req->_present.enabled)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_PLCA_ENABLED, req->enabled);
-	if (req->_present.status)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_PLCA_STATUS, req->status);
-	if (req->_present.node_cnt)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_NODE_CNT, req->node_cnt);
-	if (req->_present.node_id)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_NODE_ID, req->node_id);
-	if (req->_present.to_tmr)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_TO_TMR, req->to_tmr);
-	if (req->_present.burst_cnt)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_CNT, req->burst_cnt);
-	if (req->_present.burst_tmr)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_PLCA_BURST_TMR, req->burst_tmr);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== ETHTOOL_MSG_PLCA_GET_STATUS ============== */
-/* ETHTOOL_MSG_PLCA_GET_STATUS - do */
-void ethtool_plca_get_status_req_free(struct ethtool_plca_get_status_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_plca_get_status_rsp_free(struct ethtool_plca_get_status_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	free(rsp);
-}
-
-int ethtool_plca_get_status_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_plca_get_status_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_PLCA_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_PLCA_VERSION) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.version = 1;
-			dst->version = mnl_attr_get_u16(attr);
-		} else if (type == ETHTOOL_A_PLCA_ENABLED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.enabled = 1;
-			dst->enabled = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_PLCA_STATUS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.status = 1;
-			dst->status = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_PLCA_NODE_CNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.node_cnt = 1;
-			dst->node_cnt = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PLCA_NODE_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.node_id = 1;
-			dst->node_id = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PLCA_TO_TMR) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.to_tmr = 1;
-			dst->to_tmr = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PLCA_BURST_CNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.burst_cnt = 1;
-			dst->burst_cnt = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_PLCA_BURST_TMR) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.burst_tmr = 1;
-			dst->burst_tmr = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_plca_get_status_rsp *
-ethtool_plca_get_status(struct ynl_sock *ys,
-			struct ethtool_plca_get_status_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_plca_get_status_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_STATUS, 1);
-	ys->req_policy = &ethtool_plca_nest;
-	yrs.yarg.rsp_policy = &ethtool_plca_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_plca_get_status_rsp_parse;
-	yrs.rsp_cmd = 40;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_plca_get_status_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_PLCA_GET_STATUS - dump */
-void
-ethtool_plca_get_status_list_free(struct ethtool_plca_get_status_list *rsp)
-{
-	struct ethtool_plca_get_status_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		free(rsp);
-	}
-}
-
-struct ethtool_plca_get_status_list *
-ethtool_plca_get_status_dump(struct ynl_sock *ys,
-			     struct ethtool_plca_get_status_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_plca_get_status_list);
-	yds.cb = ethtool_plca_get_status_rsp_parse;
-	yds.rsp_cmd = 40;
-	yds.rsp_policy = &ethtool_plca_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_PLCA_GET_STATUS, 1);
-	ys->req_policy = &ethtool_plca_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_PLCA_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_plca_get_status_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== ETHTOOL_MSG_MM_GET ============== */
-/* ETHTOOL_MSG_MM_GET - do */
-void ethtool_mm_get_req_free(struct ethtool_mm_get_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-void ethtool_mm_get_rsp_free(struct ethtool_mm_get_rsp *rsp)
-{
-	ethtool_header_free(&rsp->header);
-	ethtool_mm_stat_free(&rsp->stats);
-	free(rsp);
-}
-
-int ethtool_mm_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct ethtool_mm_get_rsp *dst;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_MM_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_MM_PMAC_ENABLED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.pmac_enabled = 1;
-			dst->pmac_enabled = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_MM_TX_ENABLED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_enabled = 1;
-			dst->tx_enabled = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_MM_TX_ACTIVE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_active = 1;
-			dst->tx_active = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_MM_TX_MIN_FRAG_SIZE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.tx_min_frag_size = 1;
-			dst->tx_min_frag_size = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_MM_RX_MIN_FRAG_SIZE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.rx_min_frag_size = 1;
-			dst->rx_min_frag_size = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_MM_VERIFY_ENABLED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.verify_enabled = 1;
-			dst->verify_enabled = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_MM_VERIFY_TIME) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.verify_time = 1;
-			dst->verify_time = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_MM_MAX_VERIFY_TIME) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.max_verify_time = 1;
-			dst->max_verify_time = mnl_attr_get_u32(attr);
-		} else if (type == ETHTOOL_A_MM_STATS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.stats = 1;
-
-			parg.rsp_policy = &ethtool_mm_stat_nest;
-			parg.data = &dst->stats;
-			if (ethtool_mm_stat_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct ethtool_mm_get_rsp *
-ethtool_mm_get(struct ynl_sock *ys, struct ethtool_mm_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct ethtool_mm_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MM_GET, 1);
-	ys->req_policy = &ethtool_mm_nest;
-	yrs.yarg.rsp_policy = &ethtool_mm_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_MM_HEADER, &req->header);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = ethtool_mm_get_rsp_parse;
-	yrs.rsp_cmd = ETHTOOL_MSG_MM_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	ethtool_mm_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_MM_GET - dump */
-void ethtool_mm_get_list_free(struct ethtool_mm_get_list *rsp)
-{
-	struct ethtool_mm_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		ethtool_header_free(&rsp->obj.header);
-		ethtool_mm_stat_free(&rsp->obj.stats);
-		free(rsp);
-	}
-}
-
-struct ethtool_mm_get_list *
-ethtool_mm_get_dump(struct ynl_sock *ys, struct ethtool_mm_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct ethtool_mm_get_list);
-	yds.cb = ethtool_mm_get_rsp_parse;
-	yds.rsp_cmd = ETHTOOL_MSG_MM_GET;
-	yds.rsp_policy = &ethtool_mm_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, ETHTOOL_MSG_MM_GET, 1);
-	ys->req_policy = &ethtool_mm_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_MM_HEADER, &req->header);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	ethtool_mm_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ETHTOOL_MSG_MM_GET - notify */
-void ethtool_mm_get_ntf_free(struct ethtool_mm_get_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_mm_stat_free(&rsp->obj.stats);
-	free(rsp);
-}
-
-/* ============== ETHTOOL_MSG_MM_SET ============== */
-/* ETHTOOL_MSG_MM_SET - do */
-void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req)
-{
-	ethtool_header_free(&req->header);
-	free(req);
-}
-
-int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, ETHTOOL_MSG_MM_SET, 1);
-	ys->req_policy = &ethtool_mm_nest;
-
-	if (req->_present.header)
-		ethtool_header_put(nlh, ETHTOOL_A_MM_HEADER, &req->header);
-	if (req->_present.verify_enabled)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_MM_VERIFY_ENABLED, req->verify_enabled);
-	if (req->_present.verify_time)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_MM_VERIFY_TIME, req->verify_time);
-	if (req->_present.tx_enabled)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_MM_TX_ENABLED, req->tx_enabled);
-	if (req->_present.pmac_enabled)
-		mnl_attr_put_u8(nlh, ETHTOOL_A_MM_PMAC_ENABLED, req->pmac_enabled);
-	if (req->_present.tx_min_frag_size)
-		mnl_attr_put_u32(nlh, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, req->tx_min_frag_size);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ETHTOOL_MSG_CABLE_TEST_NTF - event */
-int ethtool_cable_test_ntf_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ethtool_cable_test_ntf_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_CABLE_TEST_NTF_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_CABLE_TEST_NTF_STATUS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.status = 1;
-			dst->status = mnl_attr_get_u8(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-void ethtool_cable_test_ntf_free(struct ethtool_cable_test_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	free(rsp);
-}
-
-/* ETHTOOL_MSG_CABLE_TEST_TDR_NTF - event */
-int ethtool_cable_test_tdr_ntf_rsp_parse(const struct nlmsghdr *nlh,
-					 void *data)
-{
-	struct ethtool_cable_test_tdr_ntf_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.header = 1;
-
-			parg.rsp_policy = &ethtool_header_nest;
-			parg.data = &dst->header;
-			if (ethtool_header_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.status = 1;
-			dst->status = mnl_attr_get_u8(attr);
-		} else if (type == ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.nest = 1;
-
-			parg.rsp_policy = &ethtool_cable_nest_nest;
-			parg.data = &dst->nest;
-			if (ethtool_cable_nest_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-void ethtool_cable_test_tdr_ntf_free(struct ethtool_cable_test_tdr_ntf *rsp)
-{
-	ethtool_header_free(&rsp->obj.header);
-	ethtool_cable_nest_free(&rsp->obj.nest);
-	free(rsp);
-}
-
-static const struct ynl_ntf_info ethtool_ntf_info[] =  {
-	[ETHTOOL_MSG_LINKINFO_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_linkinfo_get_ntf),
-		.cb		= ethtool_linkinfo_get_rsp_parse,
-		.policy		= &ethtool_linkinfo_nest,
-		.free		= (void *)ethtool_linkinfo_get_ntf_free,
-	},
-	[ETHTOOL_MSG_LINKMODES_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_linkmodes_get_ntf),
-		.cb		= ethtool_linkmodes_get_rsp_parse,
-		.policy		= &ethtool_linkmodes_nest,
-		.free		= (void *)ethtool_linkmodes_get_ntf_free,
-	},
-	[ETHTOOL_MSG_DEBUG_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_debug_get_ntf),
-		.cb		= ethtool_debug_get_rsp_parse,
-		.policy		= &ethtool_debug_nest,
-		.free		= (void *)ethtool_debug_get_ntf_free,
-	},
-	[ETHTOOL_MSG_WOL_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_wol_get_ntf),
-		.cb		= ethtool_wol_get_rsp_parse,
-		.policy		= &ethtool_wol_nest,
-		.free		= (void *)ethtool_wol_get_ntf_free,
-	},
-	[ETHTOOL_MSG_FEATURES_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_features_get_ntf),
-		.cb		= ethtool_features_get_rsp_parse,
-		.policy		= &ethtool_features_nest,
-		.free		= (void *)ethtool_features_get_ntf_free,
-	},
-	[ETHTOOL_MSG_PRIVFLAGS_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_privflags_get_ntf),
-		.cb		= ethtool_privflags_get_rsp_parse,
-		.policy		= &ethtool_privflags_nest,
-		.free		= (void *)ethtool_privflags_get_ntf_free,
-	},
-	[ETHTOOL_MSG_RINGS_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_rings_get_ntf),
-		.cb		= ethtool_rings_get_rsp_parse,
-		.policy		= &ethtool_rings_nest,
-		.free		= (void *)ethtool_rings_get_ntf_free,
-	},
-	[ETHTOOL_MSG_CHANNELS_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_channels_get_ntf),
-		.cb		= ethtool_channels_get_rsp_parse,
-		.policy		= &ethtool_channels_nest,
-		.free		= (void *)ethtool_channels_get_ntf_free,
-	},
-	[ETHTOOL_MSG_COALESCE_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_coalesce_get_ntf),
-		.cb		= ethtool_coalesce_get_rsp_parse,
-		.policy		= &ethtool_coalesce_nest,
-		.free		= (void *)ethtool_coalesce_get_ntf_free,
-	},
-	[ETHTOOL_MSG_PAUSE_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_pause_get_ntf),
-		.cb		= ethtool_pause_get_rsp_parse,
-		.policy		= &ethtool_pause_nest,
-		.free		= (void *)ethtool_pause_get_ntf_free,
-	},
-	[ETHTOOL_MSG_EEE_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_eee_get_ntf),
-		.cb		= ethtool_eee_get_rsp_parse,
-		.policy		= &ethtool_eee_nest,
-		.free		= (void *)ethtool_eee_get_ntf_free,
-	},
-	[ETHTOOL_MSG_CABLE_TEST_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_cable_test_ntf),
-		.cb		= ethtool_cable_test_ntf_rsp_parse,
-		.policy		= &ethtool_cable_test_ntf_nest,
-		.free		= (void *)ethtool_cable_test_ntf_free,
-	},
-	[ETHTOOL_MSG_CABLE_TEST_TDR_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_cable_test_tdr_ntf),
-		.cb		= ethtool_cable_test_tdr_ntf_rsp_parse,
-		.policy		= &ethtool_cable_test_tdr_ntf_nest,
-		.free		= (void *)ethtool_cable_test_tdr_ntf_free,
-	},
-	[ETHTOOL_MSG_FEC_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_fec_get_ntf),
-		.cb		= ethtool_fec_get_rsp_parse,
-		.policy		= &ethtool_fec_nest,
-		.free		= (void *)ethtool_fec_get_ntf_free,
-	},
-	[ETHTOOL_MSG_MODULE_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_module_get_ntf),
-		.cb		= ethtool_module_get_rsp_parse,
-		.policy		= &ethtool_module_nest,
-		.free		= (void *)ethtool_module_get_ntf_free,
-	},
-	[ETHTOOL_MSG_PLCA_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_plca_get_cfg_ntf),
-		.cb		= ethtool_plca_get_cfg_rsp_parse,
-		.policy		= &ethtool_plca_nest,
-		.free		= (void *)ethtool_plca_get_cfg_ntf_free,
-	},
-	[ETHTOOL_MSG_MM_NTF] =  {
-		.alloc_sz	= sizeof(struct ethtool_mm_get_ntf),
-		.cb		= ethtool_mm_get_rsp_parse,
-		.policy		= &ethtool_mm_nest,
-		.free		= (void *)ethtool_mm_get_ntf_free,
-	},
-};
-
-const struct ynl_family ynl_ethtool_family =  {
-	.name		= "ethtool",
-	.ntf_info	= ethtool_ntf_info,
-	.ntf_info_size	= MNL_ARRAY_SIZE(ethtool_ntf_info),
-};
diff --git a/tools/net/ynl/generated/ethtool-user.h b/tools/net/ynl/generated/ethtool-user.h
deleted file mode 100644
index ca0ec5fd7798..000000000000
--- a/tools/net/ynl/generated/ethtool-user.h
+++ /dev/null
@@ -1,5535 +0,0 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/ethtool.yaml */
-/* YNL-GEN user header */
-/* YNL-ARG --user-header linux/ethtool_netlink.h --exclude-op stats-get */
-
-#ifndef _LINUX_ETHTOOL_GEN_H
-#define _LINUX_ETHTOOL_GEN_H
-
-#include <stdlib.h>
-#include <string.h>
-#include <linux/types.h>
-#include <linux/ethtool.h>
-
-struct ynl_sock;
-
-extern const struct ynl_family ynl_ethtool_family;
-
-/* Enums */
-const char *ethtool_op_str(int op);
-const char *ethtool_udp_tunnel_type_str(int value);
-const char *ethtool_stringset_str(enum ethtool_stringset value);
-
-/* Common nested types */
-struct ethtool_header {
-	struct {
-		__u32 dev_index:1;
-		__u32 dev_name_len;
-		__u32 flags:1;
-	} _present;
-
-	__u32 dev_index;
-	char *dev_name;
-	__u32 flags;
-};
-
-struct ethtool_pause_stat {
-	struct {
-		__u32 tx_frames:1;
-		__u32 rx_frames:1;
-	} _present;
-
-	__u64 tx_frames;
-	__u64 rx_frames;
-};
-
-struct ethtool_cable_test_tdr_cfg {
-	struct {
-		__u32 first:1;
-		__u32 last:1;
-		__u32 step:1;
-		__u32 pair:1;
-	} _present;
-
-	__u32 first;
-	__u32 last;
-	__u32 step;
-	__u8 pair;
-};
-
-struct ethtool_fec_stat {
-	struct {
-		__u32 corrected_len;
-		__u32 uncorr_len;
-		__u32 corr_bits_len;
-	} _present;
-
-	void *corrected;
-	void *uncorr;
-	void *corr_bits;
-};
-
-struct ethtool_mm_stat {
-	struct {
-		__u32 reassembly_errors:1;
-		__u32 smd_errors:1;
-		__u32 reassembly_ok:1;
-		__u32 rx_frag_count:1;
-		__u32 tx_frag_count:1;
-		__u32 hold_count:1;
-	} _present;
-
-	__u64 reassembly_errors;
-	__u64 smd_errors;
-	__u64 reassembly_ok;
-	__u64 rx_frag_count;
-	__u64 tx_frag_count;
-	__u64 hold_count;
-};
-
-struct ethtool_cable_result {
-	struct {
-		__u32 pair:1;
-		__u32 code:1;
-	} _present;
-
-	__u8 pair;
-	__u8 code;
-};
-
-struct ethtool_cable_fault_length {
-	struct {
-		__u32 pair:1;
-		__u32 cm:1;
-	} _present;
-
-	__u8 pair;
-	__u32 cm;
-};
-
-struct ethtool_bitset_bit {
-	struct {
-		__u32 index:1;
-		__u32 name_len;
-		__u32 value:1;
-	} _present;
-
-	__u32 index;
-	char *name;
-};
-
-struct ethtool_tunnel_udp_entry {
-	struct {
-		__u32 port:1;
-		__u32 type:1;
-	} _present;
-
-	__u16 port /* big-endian */;
-	__u32 type;
-};
-
-struct ethtool_string {
-	struct {
-		__u32 index:1;
-		__u32 value_len;
-	} _present;
-
-	__u32 index;
-	char *value;
-};
-
-struct ethtool_cable_nest {
-	struct {
-		__u32 result:1;
-		__u32 fault_length:1;
-	} _present;
-
-	struct ethtool_cable_result result;
-	struct ethtool_cable_fault_length fault_length;
-};
-
-struct ethtool_bitset_bits {
-	unsigned int n_bit;
-	struct ethtool_bitset_bit *bit;
-};
-
-struct ethtool_strings {
-	unsigned int n_string;
-	struct ethtool_string *string;
-};
-
-struct ethtool_bitset {
-	struct {
-		__u32 nomask:1;
-		__u32 size:1;
-		__u32 bits:1;
-	} _present;
-
-	__u32 size;
-	struct ethtool_bitset_bits bits;
-};
-
-struct ethtool_stringset_ {
-	struct {
-		__u32 id:1;
-		__u32 count:1;
-	} _present;
-
-	__u32 id;
-	__u32 count;
-	unsigned int n_strings;
-	struct ethtool_strings *strings;
-};
-
-struct ethtool_tunnel_udp_table {
-	struct {
-		__u32 size:1;
-		__u32 types:1;
-	} _present;
-
-	__u32 size;
-	struct ethtool_bitset types;
-	unsigned int n_entry;
-	struct ethtool_tunnel_udp_entry *entry;
-};
-
-struct ethtool_stringsets {
-	unsigned int n_stringset;
-	struct ethtool_stringset_ *stringset;
-};
-
-struct ethtool_tunnel_udp {
-	struct {
-		__u32 table:1;
-	} _present;
-
-	struct ethtool_tunnel_udp_table table;
-};
-
-/* ============== ETHTOOL_MSG_STRSET_GET ============== */
-/* ETHTOOL_MSG_STRSET_GET - do */
-struct ethtool_strset_get_req {
-	struct {
-		__u32 header:1;
-		__u32 stringsets:1;
-		__u32 counts_only:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_stringsets stringsets;
-};
-
-static inline struct ethtool_strset_get_req *ethtool_strset_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_strset_get_req));
-}
-void ethtool_strset_get_req_free(struct ethtool_strset_get_req *req);
-
-static inline void
-ethtool_strset_get_req_set_header_dev_index(struct ethtool_strset_get_req *req,
-					    __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_strset_get_req_set_header_dev_name(struct ethtool_strset_get_req *req,
-					   const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_strset_get_req_set_header_flags(struct ethtool_strset_get_req *req,
-					__u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-__ethtool_strset_get_req_set_stringsets_stringset(struct ethtool_strset_get_req *req,
-						  struct ethtool_stringset_ *stringset,
-						  unsigned int n_stringset)
-{
-	free(req->stringsets.stringset);
-	req->stringsets.stringset = stringset;
-	req->stringsets.n_stringset = n_stringset;
-}
-static inline void
-ethtool_strset_get_req_set_counts_only(struct ethtool_strset_get_req *req)
-{
-	req->_present.counts_only = 1;
-}
-
-struct ethtool_strset_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 stringsets:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_stringsets stringsets;
-};
-
-void ethtool_strset_get_rsp_free(struct ethtool_strset_get_rsp *rsp);
-
-/*
- * Get string set from the kernel.
- */
-struct ethtool_strset_get_rsp *
-ethtool_strset_get(struct ynl_sock *ys, struct ethtool_strset_get_req *req);
-
-/* ETHTOOL_MSG_STRSET_GET - dump */
-struct ethtool_strset_get_req_dump {
-	struct {
-		__u32 header:1;
-		__u32 stringsets:1;
-		__u32 counts_only:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_stringsets stringsets;
-};
-
-static inline struct ethtool_strset_get_req_dump *
-ethtool_strset_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_strset_get_req_dump));
-}
-void ethtool_strset_get_req_dump_free(struct ethtool_strset_get_req_dump *req);
-
-static inline void
-ethtool_strset_get_req_dump_set_header_dev_index(struct ethtool_strset_get_req_dump *req,
-						 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_strset_get_req_dump_set_header_dev_name(struct ethtool_strset_get_req_dump *req,
-						const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_strset_get_req_dump_set_header_flags(struct ethtool_strset_get_req_dump *req,
-					     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-__ethtool_strset_get_req_dump_set_stringsets_stringset(struct ethtool_strset_get_req_dump *req,
-						       struct ethtool_stringset_ *stringset,
-						       unsigned int n_stringset)
-{
-	free(req->stringsets.stringset);
-	req->stringsets.stringset = stringset;
-	req->stringsets.n_stringset = n_stringset;
-}
-static inline void
-ethtool_strset_get_req_dump_set_counts_only(struct ethtool_strset_get_req_dump *req)
-{
-	req->_present.counts_only = 1;
-}
-
-struct ethtool_strset_get_list {
-	struct ethtool_strset_get_list *next;
-	struct ethtool_strset_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_strset_get_list_free(struct ethtool_strset_get_list *rsp);
-
-struct ethtool_strset_get_list *
-ethtool_strset_get_dump(struct ynl_sock *ys,
-			struct ethtool_strset_get_req_dump *req);
-
-/* ============== ETHTOOL_MSG_LINKINFO_GET ============== */
-/* ETHTOOL_MSG_LINKINFO_GET - do */
-struct ethtool_linkinfo_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_linkinfo_get_req *
-ethtool_linkinfo_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_linkinfo_get_req));
-}
-void ethtool_linkinfo_get_req_free(struct ethtool_linkinfo_get_req *req);
-
-static inline void
-ethtool_linkinfo_get_req_set_header_dev_index(struct ethtool_linkinfo_get_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_linkinfo_get_req_set_header_dev_name(struct ethtool_linkinfo_get_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_linkinfo_get_req_set_header_flags(struct ethtool_linkinfo_get_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_linkinfo_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 port:1;
-		__u32 phyaddr:1;
-		__u32 tp_mdix:1;
-		__u32 tp_mdix_ctrl:1;
-		__u32 transceiver:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 port;
-	__u8 phyaddr;
-	__u8 tp_mdix;
-	__u8 tp_mdix_ctrl;
-	__u8 transceiver;
-};
-
-void ethtool_linkinfo_get_rsp_free(struct ethtool_linkinfo_get_rsp *rsp);
-
-/*
- * Get link info.
- */
-struct ethtool_linkinfo_get_rsp *
-ethtool_linkinfo_get(struct ynl_sock *ys, struct ethtool_linkinfo_get_req *req);
-
-/* ETHTOOL_MSG_LINKINFO_GET - dump */
-struct ethtool_linkinfo_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_linkinfo_get_req_dump *
-ethtool_linkinfo_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_linkinfo_get_req_dump));
-}
-void
-ethtool_linkinfo_get_req_dump_free(struct ethtool_linkinfo_get_req_dump *req);
-
-static inline void
-ethtool_linkinfo_get_req_dump_set_header_dev_index(struct ethtool_linkinfo_get_req_dump *req,
-						   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_linkinfo_get_req_dump_set_header_dev_name(struct ethtool_linkinfo_get_req_dump *req,
-						  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_linkinfo_get_req_dump_set_header_flags(struct ethtool_linkinfo_get_req_dump *req,
-					       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_linkinfo_get_list {
-	struct ethtool_linkinfo_get_list *next;
-	struct ethtool_linkinfo_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_linkinfo_get_list_free(struct ethtool_linkinfo_get_list *rsp);
-
-struct ethtool_linkinfo_get_list *
-ethtool_linkinfo_get_dump(struct ynl_sock *ys,
-			  struct ethtool_linkinfo_get_req_dump *req);
-
-/* ETHTOOL_MSG_LINKINFO_GET - notify */
-struct ethtool_linkinfo_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_linkinfo_get_ntf *ntf);
-	struct ethtool_linkinfo_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_linkinfo_get_ntf_free(struct ethtool_linkinfo_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_LINKINFO_SET ============== */
-/* ETHTOOL_MSG_LINKINFO_SET - do */
-struct ethtool_linkinfo_set_req {
-	struct {
-		__u32 header:1;
-		__u32 port:1;
-		__u32 phyaddr:1;
-		__u32 tp_mdix:1;
-		__u32 tp_mdix_ctrl:1;
-		__u32 transceiver:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 port;
-	__u8 phyaddr;
-	__u8 tp_mdix;
-	__u8 tp_mdix_ctrl;
-	__u8 transceiver;
-};
-
-static inline struct ethtool_linkinfo_set_req *
-ethtool_linkinfo_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_linkinfo_set_req));
-}
-void ethtool_linkinfo_set_req_free(struct ethtool_linkinfo_set_req *req);
-
-static inline void
-ethtool_linkinfo_set_req_set_header_dev_index(struct ethtool_linkinfo_set_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_linkinfo_set_req_set_header_dev_name(struct ethtool_linkinfo_set_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_linkinfo_set_req_set_header_flags(struct ethtool_linkinfo_set_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_linkinfo_set_req_set_port(struct ethtool_linkinfo_set_req *req,
-				  __u8 port)
-{
-	req->_present.port = 1;
-	req->port = port;
-}
-static inline void
-ethtool_linkinfo_set_req_set_phyaddr(struct ethtool_linkinfo_set_req *req,
-				     __u8 phyaddr)
-{
-	req->_present.phyaddr = 1;
-	req->phyaddr = phyaddr;
-}
-static inline void
-ethtool_linkinfo_set_req_set_tp_mdix(struct ethtool_linkinfo_set_req *req,
-				     __u8 tp_mdix)
-{
-	req->_present.tp_mdix = 1;
-	req->tp_mdix = tp_mdix;
-}
-static inline void
-ethtool_linkinfo_set_req_set_tp_mdix_ctrl(struct ethtool_linkinfo_set_req *req,
-					  __u8 tp_mdix_ctrl)
-{
-	req->_present.tp_mdix_ctrl = 1;
-	req->tp_mdix_ctrl = tp_mdix_ctrl;
-}
-static inline void
-ethtool_linkinfo_set_req_set_transceiver(struct ethtool_linkinfo_set_req *req,
-					 __u8 transceiver)
-{
-	req->_present.transceiver = 1;
-	req->transceiver = transceiver;
-}
-
-/*
- * Set link info.
- */
-int ethtool_linkinfo_set(struct ynl_sock *ys,
-			 struct ethtool_linkinfo_set_req *req);
-
-/* ============== ETHTOOL_MSG_LINKMODES_GET ============== */
-/* ETHTOOL_MSG_LINKMODES_GET - do */
-struct ethtool_linkmodes_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_linkmodes_get_req *
-ethtool_linkmodes_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_linkmodes_get_req));
-}
-void ethtool_linkmodes_get_req_free(struct ethtool_linkmodes_get_req *req);
-
-static inline void
-ethtool_linkmodes_get_req_set_header_dev_index(struct ethtool_linkmodes_get_req *req,
-					       __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_linkmodes_get_req_set_header_dev_name(struct ethtool_linkmodes_get_req *req,
-					      const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_linkmodes_get_req_set_header_flags(struct ethtool_linkmodes_get_req *req,
-					   __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_linkmodes_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 autoneg:1;
-		__u32 ours:1;
-		__u32 peer:1;
-		__u32 speed:1;
-		__u32 duplex:1;
-		__u32 master_slave_cfg:1;
-		__u32 master_slave_state:1;
-		__u32 lanes:1;
-		__u32 rate_matching:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 autoneg;
-	struct ethtool_bitset ours;
-	struct ethtool_bitset peer;
-	__u32 speed;
-	__u8 duplex;
-	__u8 master_slave_cfg;
-	__u8 master_slave_state;
-	__u32 lanes;
-	__u8 rate_matching;
-};
-
-void ethtool_linkmodes_get_rsp_free(struct ethtool_linkmodes_get_rsp *rsp);
-
-/*
- * Get link modes.
- */
-struct ethtool_linkmodes_get_rsp *
-ethtool_linkmodes_get(struct ynl_sock *ys,
-		      struct ethtool_linkmodes_get_req *req);
-
-/* ETHTOOL_MSG_LINKMODES_GET - dump */
-struct ethtool_linkmodes_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_linkmodes_get_req_dump *
-ethtool_linkmodes_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_linkmodes_get_req_dump));
-}
-void
-ethtool_linkmodes_get_req_dump_free(struct ethtool_linkmodes_get_req_dump *req);
-
-static inline void
-ethtool_linkmodes_get_req_dump_set_header_dev_index(struct ethtool_linkmodes_get_req_dump *req,
-						    __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_linkmodes_get_req_dump_set_header_dev_name(struct ethtool_linkmodes_get_req_dump *req,
-						   const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_linkmodes_get_req_dump_set_header_flags(struct ethtool_linkmodes_get_req_dump *req,
-						__u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_linkmodes_get_list {
-	struct ethtool_linkmodes_get_list *next;
-	struct ethtool_linkmodes_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_linkmodes_get_list_free(struct ethtool_linkmodes_get_list *rsp);
-
-struct ethtool_linkmodes_get_list *
-ethtool_linkmodes_get_dump(struct ynl_sock *ys,
-			   struct ethtool_linkmodes_get_req_dump *req);
-
-/* ETHTOOL_MSG_LINKMODES_GET - notify */
-struct ethtool_linkmodes_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_linkmodes_get_ntf *ntf);
-	struct ethtool_linkmodes_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_linkmodes_get_ntf_free(struct ethtool_linkmodes_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_LINKMODES_SET ============== */
-/* ETHTOOL_MSG_LINKMODES_SET - do */
-struct ethtool_linkmodes_set_req {
-	struct {
-		__u32 header:1;
-		__u32 autoneg:1;
-		__u32 ours:1;
-		__u32 peer:1;
-		__u32 speed:1;
-		__u32 duplex:1;
-		__u32 master_slave_cfg:1;
-		__u32 master_slave_state:1;
-		__u32 lanes:1;
-		__u32 rate_matching:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 autoneg;
-	struct ethtool_bitset ours;
-	struct ethtool_bitset peer;
-	__u32 speed;
-	__u8 duplex;
-	__u8 master_slave_cfg;
-	__u8 master_slave_state;
-	__u32 lanes;
-	__u8 rate_matching;
-};
-
-static inline struct ethtool_linkmodes_set_req *
-ethtool_linkmodes_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_linkmodes_set_req));
-}
-void ethtool_linkmodes_set_req_free(struct ethtool_linkmodes_set_req *req);
-
-static inline void
-ethtool_linkmodes_set_req_set_header_dev_index(struct ethtool_linkmodes_set_req *req,
-					       __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_linkmodes_set_req_set_header_dev_name(struct ethtool_linkmodes_set_req *req,
-					      const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_linkmodes_set_req_set_header_flags(struct ethtool_linkmodes_set_req *req,
-					   __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_linkmodes_set_req_set_autoneg(struct ethtool_linkmodes_set_req *req,
-				      __u8 autoneg)
-{
-	req->_present.autoneg = 1;
-	req->autoneg = autoneg;
-}
-static inline void
-ethtool_linkmodes_set_req_set_ours_nomask(struct ethtool_linkmodes_set_req *req)
-{
-	req->_present.ours = 1;
-	req->ours._present.nomask = 1;
-}
-static inline void
-ethtool_linkmodes_set_req_set_ours_size(struct ethtool_linkmodes_set_req *req,
-					__u32 size)
-{
-	req->_present.ours = 1;
-	req->ours._present.size = 1;
-	req->ours.size = size;
-}
-static inline void
-__ethtool_linkmodes_set_req_set_ours_bits_bit(struct ethtool_linkmodes_set_req *req,
-					      struct ethtool_bitset_bit *bit,
-					      unsigned int n_bit)
-{
-	free(req->ours.bits.bit);
-	req->ours.bits.bit = bit;
-	req->ours.bits.n_bit = n_bit;
-}
-static inline void
-ethtool_linkmodes_set_req_set_peer_nomask(struct ethtool_linkmodes_set_req *req)
-{
-	req->_present.peer = 1;
-	req->peer._present.nomask = 1;
-}
-static inline void
-ethtool_linkmodes_set_req_set_peer_size(struct ethtool_linkmodes_set_req *req,
-					__u32 size)
-{
-	req->_present.peer = 1;
-	req->peer._present.size = 1;
-	req->peer.size = size;
-}
-static inline void
-__ethtool_linkmodes_set_req_set_peer_bits_bit(struct ethtool_linkmodes_set_req *req,
-					      struct ethtool_bitset_bit *bit,
-					      unsigned int n_bit)
-{
-	free(req->peer.bits.bit);
-	req->peer.bits.bit = bit;
-	req->peer.bits.n_bit = n_bit;
-}
-static inline void
-ethtool_linkmodes_set_req_set_speed(struct ethtool_linkmodes_set_req *req,
-				    __u32 speed)
-{
-	req->_present.speed = 1;
-	req->speed = speed;
-}
-static inline void
-ethtool_linkmodes_set_req_set_duplex(struct ethtool_linkmodes_set_req *req,
-				     __u8 duplex)
-{
-	req->_present.duplex = 1;
-	req->duplex = duplex;
-}
-static inline void
-ethtool_linkmodes_set_req_set_master_slave_cfg(struct ethtool_linkmodes_set_req *req,
-					       __u8 master_slave_cfg)
-{
-	req->_present.master_slave_cfg = 1;
-	req->master_slave_cfg = master_slave_cfg;
-}
-static inline void
-ethtool_linkmodes_set_req_set_master_slave_state(struct ethtool_linkmodes_set_req *req,
-						 __u8 master_slave_state)
-{
-	req->_present.master_slave_state = 1;
-	req->master_slave_state = master_slave_state;
-}
-static inline void
-ethtool_linkmodes_set_req_set_lanes(struct ethtool_linkmodes_set_req *req,
-				    __u32 lanes)
-{
-	req->_present.lanes = 1;
-	req->lanes = lanes;
-}
-static inline void
-ethtool_linkmodes_set_req_set_rate_matching(struct ethtool_linkmodes_set_req *req,
-					    __u8 rate_matching)
-{
-	req->_present.rate_matching = 1;
-	req->rate_matching = rate_matching;
-}
-
-/*
- * Set link modes.
- */
-int ethtool_linkmodes_set(struct ynl_sock *ys,
-			  struct ethtool_linkmodes_set_req *req);
-
-/* ============== ETHTOOL_MSG_LINKSTATE_GET ============== */
-/* ETHTOOL_MSG_LINKSTATE_GET - do */
-struct ethtool_linkstate_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_linkstate_get_req *
-ethtool_linkstate_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_linkstate_get_req));
-}
-void ethtool_linkstate_get_req_free(struct ethtool_linkstate_get_req *req);
-
-static inline void
-ethtool_linkstate_get_req_set_header_dev_index(struct ethtool_linkstate_get_req *req,
-					       __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_linkstate_get_req_set_header_dev_name(struct ethtool_linkstate_get_req *req,
-					      const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_linkstate_get_req_set_header_flags(struct ethtool_linkstate_get_req *req,
-					   __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_linkstate_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 link:1;
-		__u32 sqi:1;
-		__u32 sqi_max:1;
-		__u32 ext_state:1;
-		__u32 ext_substate:1;
-		__u32 ext_down_cnt:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 link;
-	__u32 sqi;
-	__u32 sqi_max;
-	__u8 ext_state;
-	__u8 ext_substate;
-	__u32 ext_down_cnt;
-};
-
-void ethtool_linkstate_get_rsp_free(struct ethtool_linkstate_get_rsp *rsp);
-
-/*
- * Get link state.
- */
-struct ethtool_linkstate_get_rsp *
-ethtool_linkstate_get(struct ynl_sock *ys,
-		      struct ethtool_linkstate_get_req *req);
-
-/* ETHTOOL_MSG_LINKSTATE_GET - dump */
-struct ethtool_linkstate_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_linkstate_get_req_dump *
-ethtool_linkstate_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_linkstate_get_req_dump));
-}
-void
-ethtool_linkstate_get_req_dump_free(struct ethtool_linkstate_get_req_dump *req);
-
-static inline void
-ethtool_linkstate_get_req_dump_set_header_dev_index(struct ethtool_linkstate_get_req_dump *req,
-						    __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_linkstate_get_req_dump_set_header_dev_name(struct ethtool_linkstate_get_req_dump *req,
-						   const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_linkstate_get_req_dump_set_header_flags(struct ethtool_linkstate_get_req_dump *req,
-						__u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_linkstate_get_list {
-	struct ethtool_linkstate_get_list *next;
-	struct ethtool_linkstate_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_linkstate_get_list_free(struct ethtool_linkstate_get_list *rsp);
-
-struct ethtool_linkstate_get_list *
-ethtool_linkstate_get_dump(struct ynl_sock *ys,
-			   struct ethtool_linkstate_get_req_dump *req);
-
-/* ============== ETHTOOL_MSG_DEBUG_GET ============== */
-/* ETHTOOL_MSG_DEBUG_GET - do */
-struct ethtool_debug_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_debug_get_req *ethtool_debug_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_debug_get_req));
-}
-void ethtool_debug_get_req_free(struct ethtool_debug_get_req *req);
-
-static inline void
-ethtool_debug_get_req_set_header_dev_index(struct ethtool_debug_get_req *req,
-					   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_debug_get_req_set_header_dev_name(struct ethtool_debug_get_req *req,
-					  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_debug_get_req_set_header_flags(struct ethtool_debug_get_req *req,
-				       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_debug_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 msgmask:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset msgmask;
-};
-
-void ethtool_debug_get_rsp_free(struct ethtool_debug_get_rsp *rsp);
-
-/*
- * Get debug message mask.
- */
-struct ethtool_debug_get_rsp *
-ethtool_debug_get(struct ynl_sock *ys, struct ethtool_debug_get_req *req);
-
-/* ETHTOOL_MSG_DEBUG_GET - dump */
-struct ethtool_debug_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_debug_get_req_dump *
-ethtool_debug_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_debug_get_req_dump));
-}
-void ethtool_debug_get_req_dump_free(struct ethtool_debug_get_req_dump *req);
-
-static inline void
-ethtool_debug_get_req_dump_set_header_dev_index(struct ethtool_debug_get_req_dump *req,
-						__u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_debug_get_req_dump_set_header_dev_name(struct ethtool_debug_get_req_dump *req,
-					       const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_debug_get_req_dump_set_header_flags(struct ethtool_debug_get_req_dump *req,
-					    __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_debug_get_list {
-	struct ethtool_debug_get_list *next;
-	struct ethtool_debug_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_debug_get_list_free(struct ethtool_debug_get_list *rsp);
-
-struct ethtool_debug_get_list *
-ethtool_debug_get_dump(struct ynl_sock *ys,
-		       struct ethtool_debug_get_req_dump *req);
-
-/* ETHTOOL_MSG_DEBUG_GET - notify */
-struct ethtool_debug_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_debug_get_ntf *ntf);
-	struct ethtool_debug_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_debug_get_ntf_free(struct ethtool_debug_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_DEBUG_SET ============== */
-/* ETHTOOL_MSG_DEBUG_SET - do */
-struct ethtool_debug_set_req {
-	struct {
-		__u32 header:1;
-		__u32 msgmask:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset msgmask;
-};
-
-static inline struct ethtool_debug_set_req *ethtool_debug_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_debug_set_req));
-}
-void ethtool_debug_set_req_free(struct ethtool_debug_set_req *req);
-
-static inline void
-ethtool_debug_set_req_set_header_dev_index(struct ethtool_debug_set_req *req,
-					   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_debug_set_req_set_header_dev_name(struct ethtool_debug_set_req *req,
-					  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_debug_set_req_set_header_flags(struct ethtool_debug_set_req *req,
-				       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_debug_set_req_set_msgmask_nomask(struct ethtool_debug_set_req *req)
-{
-	req->_present.msgmask = 1;
-	req->msgmask._present.nomask = 1;
-}
-static inline void
-ethtool_debug_set_req_set_msgmask_size(struct ethtool_debug_set_req *req,
-				       __u32 size)
-{
-	req->_present.msgmask = 1;
-	req->msgmask._present.size = 1;
-	req->msgmask.size = size;
-}
-static inline void
-__ethtool_debug_set_req_set_msgmask_bits_bit(struct ethtool_debug_set_req *req,
-					     struct ethtool_bitset_bit *bit,
-					     unsigned int n_bit)
-{
-	free(req->msgmask.bits.bit);
-	req->msgmask.bits.bit = bit;
-	req->msgmask.bits.n_bit = n_bit;
-}
-
-/*
- * Set debug message mask.
- */
-int ethtool_debug_set(struct ynl_sock *ys, struct ethtool_debug_set_req *req);
-
-/* ============== ETHTOOL_MSG_WOL_GET ============== */
-/* ETHTOOL_MSG_WOL_GET - do */
-struct ethtool_wol_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_wol_get_req *ethtool_wol_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_wol_get_req));
-}
-void ethtool_wol_get_req_free(struct ethtool_wol_get_req *req);
-
-static inline void
-ethtool_wol_get_req_set_header_dev_index(struct ethtool_wol_get_req *req,
-					 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_wol_get_req_set_header_dev_name(struct ethtool_wol_get_req *req,
-					const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_wol_get_req_set_header_flags(struct ethtool_wol_get_req *req,
-				     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_wol_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 modes:1;
-		__u32 sopass_len;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset modes;
-	void *sopass;
-};
-
-void ethtool_wol_get_rsp_free(struct ethtool_wol_get_rsp *rsp);
-
-/*
- * Get WOL params.
- */
-struct ethtool_wol_get_rsp *
-ethtool_wol_get(struct ynl_sock *ys, struct ethtool_wol_get_req *req);
-
-/* ETHTOOL_MSG_WOL_GET - dump */
-struct ethtool_wol_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_wol_get_req_dump *
-ethtool_wol_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_wol_get_req_dump));
-}
-void ethtool_wol_get_req_dump_free(struct ethtool_wol_get_req_dump *req);
-
-static inline void
-ethtool_wol_get_req_dump_set_header_dev_index(struct ethtool_wol_get_req_dump *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_wol_get_req_dump_set_header_dev_name(struct ethtool_wol_get_req_dump *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_wol_get_req_dump_set_header_flags(struct ethtool_wol_get_req_dump *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_wol_get_list {
-	struct ethtool_wol_get_list *next;
-	struct ethtool_wol_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_wol_get_list_free(struct ethtool_wol_get_list *rsp);
-
-struct ethtool_wol_get_list *
-ethtool_wol_get_dump(struct ynl_sock *ys, struct ethtool_wol_get_req_dump *req);
-
-/* ETHTOOL_MSG_WOL_GET - notify */
-struct ethtool_wol_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_wol_get_ntf *ntf);
-	struct ethtool_wol_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_wol_get_ntf_free(struct ethtool_wol_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_WOL_SET ============== */
-/* ETHTOOL_MSG_WOL_SET - do */
-struct ethtool_wol_set_req {
-	struct {
-		__u32 header:1;
-		__u32 modes:1;
-		__u32 sopass_len;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset modes;
-	void *sopass;
-};
-
-static inline struct ethtool_wol_set_req *ethtool_wol_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_wol_set_req));
-}
-void ethtool_wol_set_req_free(struct ethtool_wol_set_req *req);
-
-static inline void
-ethtool_wol_set_req_set_header_dev_index(struct ethtool_wol_set_req *req,
-					 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_wol_set_req_set_header_dev_name(struct ethtool_wol_set_req *req,
-					const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_wol_set_req_set_header_flags(struct ethtool_wol_set_req *req,
-				     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_wol_set_req_set_modes_nomask(struct ethtool_wol_set_req *req)
-{
-	req->_present.modes = 1;
-	req->modes._present.nomask = 1;
-}
-static inline void
-ethtool_wol_set_req_set_modes_size(struct ethtool_wol_set_req *req, __u32 size)
-{
-	req->_present.modes = 1;
-	req->modes._present.size = 1;
-	req->modes.size = size;
-}
-static inline void
-__ethtool_wol_set_req_set_modes_bits_bit(struct ethtool_wol_set_req *req,
-					 struct ethtool_bitset_bit *bit,
-					 unsigned int n_bit)
-{
-	free(req->modes.bits.bit);
-	req->modes.bits.bit = bit;
-	req->modes.bits.n_bit = n_bit;
-}
-static inline void
-ethtool_wol_set_req_set_sopass(struct ethtool_wol_set_req *req,
-			       const void *sopass, size_t len)
-{
-	free(req->sopass);
-	req->_present.sopass_len = len;
-	req->sopass = malloc(req->_present.sopass_len);
-	memcpy(req->sopass, sopass, req->_present.sopass_len);
-}
-
-/*
- * Set WOL params.
- */
-int ethtool_wol_set(struct ynl_sock *ys, struct ethtool_wol_set_req *req);
-
-/* ============== ETHTOOL_MSG_FEATURES_GET ============== */
-/* ETHTOOL_MSG_FEATURES_GET - do */
-struct ethtool_features_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_features_get_req *
-ethtool_features_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_features_get_req));
-}
-void ethtool_features_get_req_free(struct ethtool_features_get_req *req);
-
-static inline void
-ethtool_features_get_req_set_header_dev_index(struct ethtool_features_get_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_features_get_req_set_header_dev_name(struct ethtool_features_get_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_features_get_req_set_header_flags(struct ethtool_features_get_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_features_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 hw:1;
-		__u32 wanted:1;
-		__u32 active:1;
-		__u32 nochange:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset hw;
-	struct ethtool_bitset wanted;
-	struct ethtool_bitset active;
-	struct ethtool_bitset nochange;
-};
-
-void ethtool_features_get_rsp_free(struct ethtool_features_get_rsp *rsp);
-
-/*
- * Get features.
- */
-struct ethtool_features_get_rsp *
-ethtool_features_get(struct ynl_sock *ys, struct ethtool_features_get_req *req);
-
-/* ETHTOOL_MSG_FEATURES_GET - dump */
-struct ethtool_features_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_features_get_req_dump *
-ethtool_features_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_features_get_req_dump));
-}
-void
-ethtool_features_get_req_dump_free(struct ethtool_features_get_req_dump *req);
-
-static inline void
-ethtool_features_get_req_dump_set_header_dev_index(struct ethtool_features_get_req_dump *req,
-						   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_features_get_req_dump_set_header_dev_name(struct ethtool_features_get_req_dump *req,
-						  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_features_get_req_dump_set_header_flags(struct ethtool_features_get_req_dump *req,
-					       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_features_get_list {
-	struct ethtool_features_get_list *next;
-	struct ethtool_features_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_features_get_list_free(struct ethtool_features_get_list *rsp);
-
-struct ethtool_features_get_list *
-ethtool_features_get_dump(struct ynl_sock *ys,
-			  struct ethtool_features_get_req_dump *req);
-
-/* ETHTOOL_MSG_FEATURES_GET - notify */
-struct ethtool_features_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_features_get_ntf *ntf);
-	struct ethtool_features_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_features_get_ntf_free(struct ethtool_features_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_FEATURES_SET ============== */
-/* ETHTOOL_MSG_FEATURES_SET - do */
-struct ethtool_features_set_req {
-	struct {
-		__u32 header:1;
-		__u32 hw:1;
-		__u32 wanted:1;
-		__u32 active:1;
-		__u32 nochange:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset hw;
-	struct ethtool_bitset wanted;
-	struct ethtool_bitset active;
-	struct ethtool_bitset nochange;
-};
-
-static inline struct ethtool_features_set_req *
-ethtool_features_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_features_set_req));
-}
-void ethtool_features_set_req_free(struct ethtool_features_set_req *req);
-
-static inline void
-ethtool_features_set_req_set_header_dev_index(struct ethtool_features_set_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_features_set_req_set_header_dev_name(struct ethtool_features_set_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_features_set_req_set_header_flags(struct ethtool_features_set_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_features_set_req_set_hw_nomask(struct ethtool_features_set_req *req)
-{
-	req->_present.hw = 1;
-	req->hw._present.nomask = 1;
-}
-static inline void
-ethtool_features_set_req_set_hw_size(struct ethtool_features_set_req *req,
-				     __u32 size)
-{
-	req->_present.hw = 1;
-	req->hw._present.size = 1;
-	req->hw.size = size;
-}
-static inline void
-__ethtool_features_set_req_set_hw_bits_bit(struct ethtool_features_set_req *req,
-					   struct ethtool_bitset_bit *bit,
-					   unsigned int n_bit)
-{
-	free(req->hw.bits.bit);
-	req->hw.bits.bit = bit;
-	req->hw.bits.n_bit = n_bit;
-}
-static inline void
-ethtool_features_set_req_set_wanted_nomask(struct ethtool_features_set_req *req)
-{
-	req->_present.wanted = 1;
-	req->wanted._present.nomask = 1;
-}
-static inline void
-ethtool_features_set_req_set_wanted_size(struct ethtool_features_set_req *req,
-					 __u32 size)
-{
-	req->_present.wanted = 1;
-	req->wanted._present.size = 1;
-	req->wanted.size = size;
-}
-static inline void
-__ethtool_features_set_req_set_wanted_bits_bit(struct ethtool_features_set_req *req,
-					       struct ethtool_bitset_bit *bit,
-					       unsigned int n_bit)
-{
-	free(req->wanted.bits.bit);
-	req->wanted.bits.bit = bit;
-	req->wanted.bits.n_bit = n_bit;
-}
-static inline void
-ethtool_features_set_req_set_active_nomask(struct ethtool_features_set_req *req)
-{
-	req->_present.active = 1;
-	req->active._present.nomask = 1;
-}
-static inline void
-ethtool_features_set_req_set_active_size(struct ethtool_features_set_req *req,
-					 __u32 size)
-{
-	req->_present.active = 1;
-	req->active._present.size = 1;
-	req->active.size = size;
-}
-static inline void
-__ethtool_features_set_req_set_active_bits_bit(struct ethtool_features_set_req *req,
-					       struct ethtool_bitset_bit *bit,
-					       unsigned int n_bit)
-{
-	free(req->active.bits.bit);
-	req->active.bits.bit = bit;
-	req->active.bits.n_bit = n_bit;
-}
-static inline void
-ethtool_features_set_req_set_nochange_nomask(struct ethtool_features_set_req *req)
-{
-	req->_present.nochange = 1;
-	req->nochange._present.nomask = 1;
-}
-static inline void
-ethtool_features_set_req_set_nochange_size(struct ethtool_features_set_req *req,
-					   __u32 size)
-{
-	req->_present.nochange = 1;
-	req->nochange._present.size = 1;
-	req->nochange.size = size;
-}
-static inline void
-__ethtool_features_set_req_set_nochange_bits_bit(struct ethtool_features_set_req *req,
-						 struct ethtool_bitset_bit *bit,
-						 unsigned int n_bit)
-{
-	free(req->nochange.bits.bit);
-	req->nochange.bits.bit = bit;
-	req->nochange.bits.n_bit = n_bit;
-}
-
-struct ethtool_features_set_rsp {
-	struct {
-		__u32 header:1;
-		__u32 hw:1;
-		__u32 wanted:1;
-		__u32 active:1;
-		__u32 nochange:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset hw;
-	struct ethtool_bitset wanted;
-	struct ethtool_bitset active;
-	struct ethtool_bitset nochange;
-};
-
-void ethtool_features_set_rsp_free(struct ethtool_features_set_rsp *rsp);
-
-/*
- * Set features.
- */
-struct ethtool_features_set_rsp *
-ethtool_features_set(struct ynl_sock *ys, struct ethtool_features_set_req *req);
-
-/* ============== ETHTOOL_MSG_PRIVFLAGS_GET ============== */
-/* ETHTOOL_MSG_PRIVFLAGS_GET - do */
-struct ethtool_privflags_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_privflags_get_req *
-ethtool_privflags_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_privflags_get_req));
-}
-void ethtool_privflags_get_req_free(struct ethtool_privflags_get_req *req);
-
-static inline void
-ethtool_privflags_get_req_set_header_dev_index(struct ethtool_privflags_get_req *req,
-					       __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_privflags_get_req_set_header_dev_name(struct ethtool_privflags_get_req *req,
-					      const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_privflags_get_req_set_header_flags(struct ethtool_privflags_get_req *req,
-					   __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_privflags_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 flags:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset flags;
-};
-
-void ethtool_privflags_get_rsp_free(struct ethtool_privflags_get_rsp *rsp);
-
-/*
- * Get device private flags.
- */
-struct ethtool_privflags_get_rsp *
-ethtool_privflags_get(struct ynl_sock *ys,
-		      struct ethtool_privflags_get_req *req);
-
-/* ETHTOOL_MSG_PRIVFLAGS_GET - dump */
-struct ethtool_privflags_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_privflags_get_req_dump *
-ethtool_privflags_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_privflags_get_req_dump));
-}
-void
-ethtool_privflags_get_req_dump_free(struct ethtool_privflags_get_req_dump *req);
-
-static inline void
-ethtool_privflags_get_req_dump_set_header_dev_index(struct ethtool_privflags_get_req_dump *req,
-						    __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_privflags_get_req_dump_set_header_dev_name(struct ethtool_privflags_get_req_dump *req,
-						   const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_privflags_get_req_dump_set_header_flags(struct ethtool_privflags_get_req_dump *req,
-						__u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_privflags_get_list {
-	struct ethtool_privflags_get_list *next;
-	struct ethtool_privflags_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_privflags_get_list_free(struct ethtool_privflags_get_list *rsp);
-
-struct ethtool_privflags_get_list *
-ethtool_privflags_get_dump(struct ynl_sock *ys,
-			   struct ethtool_privflags_get_req_dump *req);
-
-/* ETHTOOL_MSG_PRIVFLAGS_GET - notify */
-struct ethtool_privflags_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_privflags_get_ntf *ntf);
-	struct ethtool_privflags_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_privflags_get_ntf_free(struct ethtool_privflags_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_PRIVFLAGS_SET ============== */
-/* ETHTOOL_MSG_PRIVFLAGS_SET - do */
-struct ethtool_privflags_set_req {
-	struct {
-		__u32 header:1;
-		__u32 flags:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset flags;
-};
-
-static inline struct ethtool_privflags_set_req *
-ethtool_privflags_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_privflags_set_req));
-}
-void ethtool_privflags_set_req_free(struct ethtool_privflags_set_req *req);
-
-static inline void
-ethtool_privflags_set_req_set_header_dev_index(struct ethtool_privflags_set_req *req,
-					       __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_privflags_set_req_set_header_dev_name(struct ethtool_privflags_set_req *req,
-					      const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_privflags_set_req_set_header_flags(struct ethtool_privflags_set_req *req,
-					   __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_privflags_set_req_set_flags_nomask(struct ethtool_privflags_set_req *req)
-{
-	req->_present.flags = 1;
-	req->flags._present.nomask = 1;
-}
-static inline void
-ethtool_privflags_set_req_set_flags_size(struct ethtool_privflags_set_req *req,
-					 __u32 size)
-{
-	req->_present.flags = 1;
-	req->flags._present.size = 1;
-	req->flags.size = size;
-}
-static inline void
-__ethtool_privflags_set_req_set_flags_bits_bit(struct ethtool_privflags_set_req *req,
-					       struct ethtool_bitset_bit *bit,
-					       unsigned int n_bit)
-{
-	free(req->flags.bits.bit);
-	req->flags.bits.bit = bit;
-	req->flags.bits.n_bit = n_bit;
-}
-
-/*
- * Set device private flags.
- */
-int ethtool_privflags_set(struct ynl_sock *ys,
-			  struct ethtool_privflags_set_req *req);
-
-/* ============== ETHTOOL_MSG_RINGS_GET ============== */
-/* ETHTOOL_MSG_RINGS_GET - do */
-struct ethtool_rings_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_rings_get_req *ethtool_rings_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_rings_get_req));
-}
-void ethtool_rings_get_req_free(struct ethtool_rings_get_req *req);
-
-static inline void
-ethtool_rings_get_req_set_header_dev_index(struct ethtool_rings_get_req *req,
-					   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_rings_get_req_set_header_dev_name(struct ethtool_rings_get_req *req,
-					  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_rings_get_req_set_header_flags(struct ethtool_rings_get_req *req,
-				       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_rings_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 rx_max:1;
-		__u32 rx_mini_max:1;
-		__u32 rx_jumbo_max:1;
-		__u32 tx_max:1;
-		__u32 rx:1;
-		__u32 rx_mini:1;
-		__u32 rx_jumbo:1;
-		__u32 tx:1;
-		__u32 rx_buf_len:1;
-		__u32 tcp_data_split:1;
-		__u32 cqe_size:1;
-		__u32 tx_push:1;
-		__u32 rx_push:1;
-		__u32 tx_push_buf_len:1;
-		__u32 tx_push_buf_len_max:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 rx_max;
-	__u32 rx_mini_max;
-	__u32 rx_jumbo_max;
-	__u32 tx_max;
-	__u32 rx;
-	__u32 rx_mini;
-	__u32 rx_jumbo;
-	__u32 tx;
-	__u32 rx_buf_len;
-	__u8 tcp_data_split;
-	__u32 cqe_size;
-	__u8 tx_push;
-	__u8 rx_push;
-	__u32 tx_push_buf_len;
-	__u32 tx_push_buf_len_max;
-};
-
-void ethtool_rings_get_rsp_free(struct ethtool_rings_get_rsp *rsp);
-
-/*
- * Get ring params.
- */
-struct ethtool_rings_get_rsp *
-ethtool_rings_get(struct ynl_sock *ys, struct ethtool_rings_get_req *req);
-
-/* ETHTOOL_MSG_RINGS_GET - dump */
-struct ethtool_rings_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_rings_get_req_dump *
-ethtool_rings_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_rings_get_req_dump));
-}
-void ethtool_rings_get_req_dump_free(struct ethtool_rings_get_req_dump *req);
-
-static inline void
-ethtool_rings_get_req_dump_set_header_dev_index(struct ethtool_rings_get_req_dump *req,
-						__u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_rings_get_req_dump_set_header_dev_name(struct ethtool_rings_get_req_dump *req,
-					       const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_rings_get_req_dump_set_header_flags(struct ethtool_rings_get_req_dump *req,
-					    __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_rings_get_list {
-	struct ethtool_rings_get_list *next;
-	struct ethtool_rings_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_rings_get_list_free(struct ethtool_rings_get_list *rsp);
-
-struct ethtool_rings_get_list *
-ethtool_rings_get_dump(struct ynl_sock *ys,
-		       struct ethtool_rings_get_req_dump *req);
-
-/* ETHTOOL_MSG_RINGS_GET - notify */
-struct ethtool_rings_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_rings_get_ntf *ntf);
-	struct ethtool_rings_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_rings_get_ntf_free(struct ethtool_rings_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_RINGS_SET ============== */
-/* ETHTOOL_MSG_RINGS_SET - do */
-struct ethtool_rings_set_req {
-	struct {
-		__u32 header:1;
-		__u32 rx_max:1;
-		__u32 rx_mini_max:1;
-		__u32 rx_jumbo_max:1;
-		__u32 tx_max:1;
-		__u32 rx:1;
-		__u32 rx_mini:1;
-		__u32 rx_jumbo:1;
-		__u32 tx:1;
-		__u32 rx_buf_len:1;
-		__u32 tcp_data_split:1;
-		__u32 cqe_size:1;
-		__u32 tx_push:1;
-		__u32 rx_push:1;
-		__u32 tx_push_buf_len:1;
-		__u32 tx_push_buf_len_max:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 rx_max;
-	__u32 rx_mini_max;
-	__u32 rx_jumbo_max;
-	__u32 tx_max;
-	__u32 rx;
-	__u32 rx_mini;
-	__u32 rx_jumbo;
-	__u32 tx;
-	__u32 rx_buf_len;
-	__u8 tcp_data_split;
-	__u32 cqe_size;
-	__u8 tx_push;
-	__u8 rx_push;
-	__u32 tx_push_buf_len;
-	__u32 tx_push_buf_len_max;
-};
-
-static inline struct ethtool_rings_set_req *ethtool_rings_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_rings_set_req));
-}
-void ethtool_rings_set_req_free(struct ethtool_rings_set_req *req);
-
-static inline void
-ethtool_rings_set_req_set_header_dev_index(struct ethtool_rings_set_req *req,
-					   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_rings_set_req_set_header_dev_name(struct ethtool_rings_set_req *req,
-					  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_rings_set_req_set_header_flags(struct ethtool_rings_set_req *req,
-				       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_rings_set_req_set_rx_max(struct ethtool_rings_set_req *req,
-				 __u32 rx_max)
-{
-	req->_present.rx_max = 1;
-	req->rx_max = rx_max;
-}
-static inline void
-ethtool_rings_set_req_set_rx_mini_max(struct ethtool_rings_set_req *req,
-				      __u32 rx_mini_max)
-{
-	req->_present.rx_mini_max = 1;
-	req->rx_mini_max = rx_mini_max;
-}
-static inline void
-ethtool_rings_set_req_set_rx_jumbo_max(struct ethtool_rings_set_req *req,
-				       __u32 rx_jumbo_max)
-{
-	req->_present.rx_jumbo_max = 1;
-	req->rx_jumbo_max = rx_jumbo_max;
-}
-static inline void
-ethtool_rings_set_req_set_tx_max(struct ethtool_rings_set_req *req,
-				 __u32 tx_max)
-{
-	req->_present.tx_max = 1;
-	req->tx_max = tx_max;
-}
-static inline void
-ethtool_rings_set_req_set_rx(struct ethtool_rings_set_req *req, __u32 rx)
-{
-	req->_present.rx = 1;
-	req->rx = rx;
-}
-static inline void
-ethtool_rings_set_req_set_rx_mini(struct ethtool_rings_set_req *req,
-				  __u32 rx_mini)
-{
-	req->_present.rx_mini = 1;
-	req->rx_mini = rx_mini;
-}
-static inline void
-ethtool_rings_set_req_set_rx_jumbo(struct ethtool_rings_set_req *req,
-				   __u32 rx_jumbo)
-{
-	req->_present.rx_jumbo = 1;
-	req->rx_jumbo = rx_jumbo;
-}
-static inline void
-ethtool_rings_set_req_set_tx(struct ethtool_rings_set_req *req, __u32 tx)
-{
-	req->_present.tx = 1;
-	req->tx = tx;
-}
-static inline void
-ethtool_rings_set_req_set_rx_buf_len(struct ethtool_rings_set_req *req,
-				     __u32 rx_buf_len)
-{
-	req->_present.rx_buf_len = 1;
-	req->rx_buf_len = rx_buf_len;
-}
-static inline void
-ethtool_rings_set_req_set_tcp_data_split(struct ethtool_rings_set_req *req,
-					 __u8 tcp_data_split)
-{
-	req->_present.tcp_data_split = 1;
-	req->tcp_data_split = tcp_data_split;
-}
-static inline void
-ethtool_rings_set_req_set_cqe_size(struct ethtool_rings_set_req *req,
-				   __u32 cqe_size)
-{
-	req->_present.cqe_size = 1;
-	req->cqe_size = cqe_size;
-}
-static inline void
-ethtool_rings_set_req_set_tx_push(struct ethtool_rings_set_req *req,
-				  __u8 tx_push)
-{
-	req->_present.tx_push = 1;
-	req->tx_push = tx_push;
-}
-static inline void
-ethtool_rings_set_req_set_rx_push(struct ethtool_rings_set_req *req,
-				  __u8 rx_push)
-{
-	req->_present.rx_push = 1;
-	req->rx_push = rx_push;
-}
-static inline void
-ethtool_rings_set_req_set_tx_push_buf_len(struct ethtool_rings_set_req *req,
-					  __u32 tx_push_buf_len)
-{
-	req->_present.tx_push_buf_len = 1;
-	req->tx_push_buf_len = tx_push_buf_len;
-}
-static inline void
-ethtool_rings_set_req_set_tx_push_buf_len_max(struct ethtool_rings_set_req *req,
-					      __u32 tx_push_buf_len_max)
-{
-	req->_present.tx_push_buf_len_max = 1;
-	req->tx_push_buf_len_max = tx_push_buf_len_max;
-}
-
-/*
- * Set ring params.
- */
-int ethtool_rings_set(struct ynl_sock *ys, struct ethtool_rings_set_req *req);
-
-/* ============== ETHTOOL_MSG_CHANNELS_GET ============== */
-/* ETHTOOL_MSG_CHANNELS_GET - do */
-struct ethtool_channels_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_channels_get_req *
-ethtool_channels_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_channels_get_req));
-}
-void ethtool_channels_get_req_free(struct ethtool_channels_get_req *req);
-
-static inline void
-ethtool_channels_get_req_set_header_dev_index(struct ethtool_channels_get_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_channels_get_req_set_header_dev_name(struct ethtool_channels_get_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_channels_get_req_set_header_flags(struct ethtool_channels_get_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_channels_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 rx_max:1;
-		__u32 tx_max:1;
-		__u32 other_max:1;
-		__u32 combined_max:1;
-		__u32 rx_count:1;
-		__u32 tx_count:1;
-		__u32 other_count:1;
-		__u32 combined_count:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 rx_max;
-	__u32 tx_max;
-	__u32 other_max;
-	__u32 combined_max;
-	__u32 rx_count;
-	__u32 tx_count;
-	__u32 other_count;
-	__u32 combined_count;
-};
-
-void ethtool_channels_get_rsp_free(struct ethtool_channels_get_rsp *rsp);
-
-/*
- * Get channel params.
- */
-struct ethtool_channels_get_rsp *
-ethtool_channels_get(struct ynl_sock *ys, struct ethtool_channels_get_req *req);
-
-/* ETHTOOL_MSG_CHANNELS_GET - dump */
-struct ethtool_channels_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_channels_get_req_dump *
-ethtool_channels_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_channels_get_req_dump));
-}
-void
-ethtool_channels_get_req_dump_free(struct ethtool_channels_get_req_dump *req);
-
-static inline void
-ethtool_channels_get_req_dump_set_header_dev_index(struct ethtool_channels_get_req_dump *req,
-						   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_channels_get_req_dump_set_header_dev_name(struct ethtool_channels_get_req_dump *req,
-						  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_channels_get_req_dump_set_header_flags(struct ethtool_channels_get_req_dump *req,
-					       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_channels_get_list {
-	struct ethtool_channels_get_list *next;
-	struct ethtool_channels_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_channels_get_list_free(struct ethtool_channels_get_list *rsp);
-
-struct ethtool_channels_get_list *
-ethtool_channels_get_dump(struct ynl_sock *ys,
-			  struct ethtool_channels_get_req_dump *req);
-
-/* ETHTOOL_MSG_CHANNELS_GET - notify */
-struct ethtool_channels_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_channels_get_ntf *ntf);
-	struct ethtool_channels_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_channels_get_ntf_free(struct ethtool_channels_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_CHANNELS_SET ============== */
-/* ETHTOOL_MSG_CHANNELS_SET - do */
-struct ethtool_channels_set_req {
-	struct {
-		__u32 header:1;
-		__u32 rx_max:1;
-		__u32 tx_max:1;
-		__u32 other_max:1;
-		__u32 combined_max:1;
-		__u32 rx_count:1;
-		__u32 tx_count:1;
-		__u32 other_count:1;
-		__u32 combined_count:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 rx_max;
-	__u32 tx_max;
-	__u32 other_max;
-	__u32 combined_max;
-	__u32 rx_count;
-	__u32 tx_count;
-	__u32 other_count;
-	__u32 combined_count;
-};
-
-static inline struct ethtool_channels_set_req *
-ethtool_channels_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_channels_set_req));
-}
-void ethtool_channels_set_req_free(struct ethtool_channels_set_req *req);
-
-static inline void
-ethtool_channels_set_req_set_header_dev_index(struct ethtool_channels_set_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_channels_set_req_set_header_dev_name(struct ethtool_channels_set_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_channels_set_req_set_header_flags(struct ethtool_channels_set_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_channels_set_req_set_rx_max(struct ethtool_channels_set_req *req,
-				    __u32 rx_max)
-{
-	req->_present.rx_max = 1;
-	req->rx_max = rx_max;
-}
-static inline void
-ethtool_channels_set_req_set_tx_max(struct ethtool_channels_set_req *req,
-				    __u32 tx_max)
-{
-	req->_present.tx_max = 1;
-	req->tx_max = tx_max;
-}
-static inline void
-ethtool_channels_set_req_set_other_max(struct ethtool_channels_set_req *req,
-				       __u32 other_max)
-{
-	req->_present.other_max = 1;
-	req->other_max = other_max;
-}
-static inline void
-ethtool_channels_set_req_set_combined_max(struct ethtool_channels_set_req *req,
-					  __u32 combined_max)
-{
-	req->_present.combined_max = 1;
-	req->combined_max = combined_max;
-}
-static inline void
-ethtool_channels_set_req_set_rx_count(struct ethtool_channels_set_req *req,
-				      __u32 rx_count)
-{
-	req->_present.rx_count = 1;
-	req->rx_count = rx_count;
-}
-static inline void
-ethtool_channels_set_req_set_tx_count(struct ethtool_channels_set_req *req,
-				      __u32 tx_count)
-{
-	req->_present.tx_count = 1;
-	req->tx_count = tx_count;
-}
-static inline void
-ethtool_channels_set_req_set_other_count(struct ethtool_channels_set_req *req,
-					 __u32 other_count)
-{
-	req->_present.other_count = 1;
-	req->other_count = other_count;
-}
-static inline void
-ethtool_channels_set_req_set_combined_count(struct ethtool_channels_set_req *req,
-					    __u32 combined_count)
-{
-	req->_present.combined_count = 1;
-	req->combined_count = combined_count;
-}
-
-/*
- * Set channel params.
- */
-int ethtool_channels_set(struct ynl_sock *ys,
-			 struct ethtool_channels_set_req *req);
-
-/* ============== ETHTOOL_MSG_COALESCE_GET ============== */
-/* ETHTOOL_MSG_COALESCE_GET - do */
-struct ethtool_coalesce_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_coalesce_get_req *
-ethtool_coalesce_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_coalesce_get_req));
-}
-void ethtool_coalesce_get_req_free(struct ethtool_coalesce_get_req *req);
-
-static inline void
-ethtool_coalesce_get_req_set_header_dev_index(struct ethtool_coalesce_get_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_coalesce_get_req_set_header_dev_name(struct ethtool_coalesce_get_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_coalesce_get_req_set_header_flags(struct ethtool_coalesce_get_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_coalesce_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 rx_usecs:1;
-		__u32 rx_max_frames:1;
-		__u32 rx_usecs_irq:1;
-		__u32 rx_max_frames_irq:1;
-		__u32 tx_usecs:1;
-		__u32 tx_max_frames:1;
-		__u32 tx_usecs_irq:1;
-		__u32 tx_max_frames_irq:1;
-		__u32 stats_block_usecs:1;
-		__u32 use_adaptive_rx:1;
-		__u32 use_adaptive_tx:1;
-		__u32 pkt_rate_low:1;
-		__u32 rx_usecs_low:1;
-		__u32 rx_max_frames_low:1;
-		__u32 tx_usecs_low:1;
-		__u32 tx_max_frames_low:1;
-		__u32 pkt_rate_high:1;
-		__u32 rx_usecs_high:1;
-		__u32 rx_max_frames_high:1;
-		__u32 tx_usecs_high:1;
-		__u32 tx_max_frames_high:1;
-		__u32 rate_sample_interval:1;
-		__u32 use_cqe_mode_tx:1;
-		__u32 use_cqe_mode_rx:1;
-		__u32 tx_aggr_max_bytes:1;
-		__u32 tx_aggr_max_frames:1;
-		__u32 tx_aggr_time_usecs:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 rx_usecs;
-	__u32 rx_max_frames;
-	__u32 rx_usecs_irq;
-	__u32 rx_max_frames_irq;
-	__u32 tx_usecs;
-	__u32 tx_max_frames;
-	__u32 tx_usecs_irq;
-	__u32 tx_max_frames_irq;
-	__u32 stats_block_usecs;
-	__u8 use_adaptive_rx;
-	__u8 use_adaptive_tx;
-	__u32 pkt_rate_low;
-	__u32 rx_usecs_low;
-	__u32 rx_max_frames_low;
-	__u32 tx_usecs_low;
-	__u32 tx_max_frames_low;
-	__u32 pkt_rate_high;
-	__u32 rx_usecs_high;
-	__u32 rx_max_frames_high;
-	__u32 tx_usecs_high;
-	__u32 tx_max_frames_high;
-	__u32 rate_sample_interval;
-	__u8 use_cqe_mode_tx;
-	__u8 use_cqe_mode_rx;
-	__u32 tx_aggr_max_bytes;
-	__u32 tx_aggr_max_frames;
-	__u32 tx_aggr_time_usecs;
-};
-
-void ethtool_coalesce_get_rsp_free(struct ethtool_coalesce_get_rsp *rsp);
-
-/*
- * Get coalesce params.
- */
-struct ethtool_coalesce_get_rsp *
-ethtool_coalesce_get(struct ynl_sock *ys, struct ethtool_coalesce_get_req *req);
-
-/* ETHTOOL_MSG_COALESCE_GET - dump */
-struct ethtool_coalesce_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_coalesce_get_req_dump *
-ethtool_coalesce_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_coalesce_get_req_dump));
-}
-void
-ethtool_coalesce_get_req_dump_free(struct ethtool_coalesce_get_req_dump *req);
-
-static inline void
-ethtool_coalesce_get_req_dump_set_header_dev_index(struct ethtool_coalesce_get_req_dump *req,
-						   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_coalesce_get_req_dump_set_header_dev_name(struct ethtool_coalesce_get_req_dump *req,
-						  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_coalesce_get_req_dump_set_header_flags(struct ethtool_coalesce_get_req_dump *req,
-					       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_coalesce_get_list {
-	struct ethtool_coalesce_get_list *next;
-	struct ethtool_coalesce_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_coalesce_get_list_free(struct ethtool_coalesce_get_list *rsp);
-
-struct ethtool_coalesce_get_list *
-ethtool_coalesce_get_dump(struct ynl_sock *ys,
-			  struct ethtool_coalesce_get_req_dump *req);
-
-/* ETHTOOL_MSG_COALESCE_GET - notify */
-struct ethtool_coalesce_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_coalesce_get_ntf *ntf);
-	struct ethtool_coalesce_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_coalesce_get_ntf_free(struct ethtool_coalesce_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_COALESCE_SET ============== */
-/* ETHTOOL_MSG_COALESCE_SET - do */
-struct ethtool_coalesce_set_req {
-	struct {
-		__u32 header:1;
-		__u32 rx_usecs:1;
-		__u32 rx_max_frames:1;
-		__u32 rx_usecs_irq:1;
-		__u32 rx_max_frames_irq:1;
-		__u32 tx_usecs:1;
-		__u32 tx_max_frames:1;
-		__u32 tx_usecs_irq:1;
-		__u32 tx_max_frames_irq:1;
-		__u32 stats_block_usecs:1;
-		__u32 use_adaptive_rx:1;
-		__u32 use_adaptive_tx:1;
-		__u32 pkt_rate_low:1;
-		__u32 rx_usecs_low:1;
-		__u32 rx_max_frames_low:1;
-		__u32 tx_usecs_low:1;
-		__u32 tx_max_frames_low:1;
-		__u32 pkt_rate_high:1;
-		__u32 rx_usecs_high:1;
-		__u32 rx_max_frames_high:1;
-		__u32 tx_usecs_high:1;
-		__u32 tx_max_frames_high:1;
-		__u32 rate_sample_interval:1;
-		__u32 use_cqe_mode_tx:1;
-		__u32 use_cqe_mode_rx:1;
-		__u32 tx_aggr_max_bytes:1;
-		__u32 tx_aggr_max_frames:1;
-		__u32 tx_aggr_time_usecs:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 rx_usecs;
-	__u32 rx_max_frames;
-	__u32 rx_usecs_irq;
-	__u32 rx_max_frames_irq;
-	__u32 tx_usecs;
-	__u32 tx_max_frames;
-	__u32 tx_usecs_irq;
-	__u32 tx_max_frames_irq;
-	__u32 stats_block_usecs;
-	__u8 use_adaptive_rx;
-	__u8 use_adaptive_tx;
-	__u32 pkt_rate_low;
-	__u32 rx_usecs_low;
-	__u32 rx_max_frames_low;
-	__u32 tx_usecs_low;
-	__u32 tx_max_frames_low;
-	__u32 pkt_rate_high;
-	__u32 rx_usecs_high;
-	__u32 rx_max_frames_high;
-	__u32 tx_usecs_high;
-	__u32 tx_max_frames_high;
-	__u32 rate_sample_interval;
-	__u8 use_cqe_mode_tx;
-	__u8 use_cqe_mode_rx;
-	__u32 tx_aggr_max_bytes;
-	__u32 tx_aggr_max_frames;
-	__u32 tx_aggr_time_usecs;
-};
-
-static inline struct ethtool_coalesce_set_req *
-ethtool_coalesce_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_coalesce_set_req));
-}
-void ethtool_coalesce_set_req_free(struct ethtool_coalesce_set_req *req);
-
-static inline void
-ethtool_coalesce_set_req_set_header_dev_index(struct ethtool_coalesce_set_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_coalesce_set_req_set_header_dev_name(struct ethtool_coalesce_set_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_coalesce_set_req_set_header_flags(struct ethtool_coalesce_set_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_coalesce_set_req_set_rx_usecs(struct ethtool_coalesce_set_req *req,
-				      __u32 rx_usecs)
-{
-	req->_present.rx_usecs = 1;
-	req->rx_usecs = rx_usecs;
-}
-static inline void
-ethtool_coalesce_set_req_set_rx_max_frames(struct ethtool_coalesce_set_req *req,
-					   __u32 rx_max_frames)
-{
-	req->_present.rx_max_frames = 1;
-	req->rx_max_frames = rx_max_frames;
-}
-static inline void
-ethtool_coalesce_set_req_set_rx_usecs_irq(struct ethtool_coalesce_set_req *req,
-					  __u32 rx_usecs_irq)
-{
-	req->_present.rx_usecs_irq = 1;
-	req->rx_usecs_irq = rx_usecs_irq;
-}
-static inline void
-ethtool_coalesce_set_req_set_rx_max_frames_irq(struct ethtool_coalesce_set_req *req,
-					       __u32 rx_max_frames_irq)
-{
-	req->_present.rx_max_frames_irq = 1;
-	req->rx_max_frames_irq = rx_max_frames_irq;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_usecs(struct ethtool_coalesce_set_req *req,
-				      __u32 tx_usecs)
-{
-	req->_present.tx_usecs = 1;
-	req->tx_usecs = tx_usecs;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_max_frames(struct ethtool_coalesce_set_req *req,
-					   __u32 tx_max_frames)
-{
-	req->_present.tx_max_frames = 1;
-	req->tx_max_frames = tx_max_frames;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_usecs_irq(struct ethtool_coalesce_set_req *req,
-					  __u32 tx_usecs_irq)
-{
-	req->_present.tx_usecs_irq = 1;
-	req->tx_usecs_irq = tx_usecs_irq;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_max_frames_irq(struct ethtool_coalesce_set_req *req,
-					       __u32 tx_max_frames_irq)
-{
-	req->_present.tx_max_frames_irq = 1;
-	req->tx_max_frames_irq = tx_max_frames_irq;
-}
-static inline void
-ethtool_coalesce_set_req_set_stats_block_usecs(struct ethtool_coalesce_set_req *req,
-					       __u32 stats_block_usecs)
-{
-	req->_present.stats_block_usecs = 1;
-	req->stats_block_usecs = stats_block_usecs;
-}
-static inline void
-ethtool_coalesce_set_req_set_use_adaptive_rx(struct ethtool_coalesce_set_req *req,
-					     __u8 use_adaptive_rx)
-{
-	req->_present.use_adaptive_rx = 1;
-	req->use_adaptive_rx = use_adaptive_rx;
-}
-static inline void
-ethtool_coalesce_set_req_set_use_adaptive_tx(struct ethtool_coalesce_set_req *req,
-					     __u8 use_adaptive_tx)
-{
-	req->_present.use_adaptive_tx = 1;
-	req->use_adaptive_tx = use_adaptive_tx;
-}
-static inline void
-ethtool_coalesce_set_req_set_pkt_rate_low(struct ethtool_coalesce_set_req *req,
-					  __u32 pkt_rate_low)
-{
-	req->_present.pkt_rate_low = 1;
-	req->pkt_rate_low = pkt_rate_low;
-}
-static inline void
-ethtool_coalesce_set_req_set_rx_usecs_low(struct ethtool_coalesce_set_req *req,
-					  __u32 rx_usecs_low)
-{
-	req->_present.rx_usecs_low = 1;
-	req->rx_usecs_low = rx_usecs_low;
-}
-static inline void
-ethtool_coalesce_set_req_set_rx_max_frames_low(struct ethtool_coalesce_set_req *req,
-					       __u32 rx_max_frames_low)
-{
-	req->_present.rx_max_frames_low = 1;
-	req->rx_max_frames_low = rx_max_frames_low;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_usecs_low(struct ethtool_coalesce_set_req *req,
-					  __u32 tx_usecs_low)
-{
-	req->_present.tx_usecs_low = 1;
-	req->tx_usecs_low = tx_usecs_low;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_max_frames_low(struct ethtool_coalesce_set_req *req,
-					       __u32 tx_max_frames_low)
-{
-	req->_present.tx_max_frames_low = 1;
-	req->tx_max_frames_low = tx_max_frames_low;
-}
-static inline void
-ethtool_coalesce_set_req_set_pkt_rate_high(struct ethtool_coalesce_set_req *req,
-					   __u32 pkt_rate_high)
-{
-	req->_present.pkt_rate_high = 1;
-	req->pkt_rate_high = pkt_rate_high;
-}
-static inline void
-ethtool_coalesce_set_req_set_rx_usecs_high(struct ethtool_coalesce_set_req *req,
-					   __u32 rx_usecs_high)
-{
-	req->_present.rx_usecs_high = 1;
-	req->rx_usecs_high = rx_usecs_high;
-}
-static inline void
-ethtool_coalesce_set_req_set_rx_max_frames_high(struct ethtool_coalesce_set_req *req,
-						__u32 rx_max_frames_high)
-{
-	req->_present.rx_max_frames_high = 1;
-	req->rx_max_frames_high = rx_max_frames_high;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_usecs_high(struct ethtool_coalesce_set_req *req,
-					   __u32 tx_usecs_high)
-{
-	req->_present.tx_usecs_high = 1;
-	req->tx_usecs_high = tx_usecs_high;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_max_frames_high(struct ethtool_coalesce_set_req *req,
-						__u32 tx_max_frames_high)
-{
-	req->_present.tx_max_frames_high = 1;
-	req->tx_max_frames_high = tx_max_frames_high;
-}
-static inline void
-ethtool_coalesce_set_req_set_rate_sample_interval(struct ethtool_coalesce_set_req *req,
-						  __u32 rate_sample_interval)
-{
-	req->_present.rate_sample_interval = 1;
-	req->rate_sample_interval = rate_sample_interval;
-}
-static inline void
-ethtool_coalesce_set_req_set_use_cqe_mode_tx(struct ethtool_coalesce_set_req *req,
-					     __u8 use_cqe_mode_tx)
-{
-	req->_present.use_cqe_mode_tx = 1;
-	req->use_cqe_mode_tx = use_cqe_mode_tx;
-}
-static inline void
-ethtool_coalesce_set_req_set_use_cqe_mode_rx(struct ethtool_coalesce_set_req *req,
-					     __u8 use_cqe_mode_rx)
-{
-	req->_present.use_cqe_mode_rx = 1;
-	req->use_cqe_mode_rx = use_cqe_mode_rx;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_aggr_max_bytes(struct ethtool_coalesce_set_req *req,
-					       __u32 tx_aggr_max_bytes)
-{
-	req->_present.tx_aggr_max_bytes = 1;
-	req->tx_aggr_max_bytes = tx_aggr_max_bytes;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_aggr_max_frames(struct ethtool_coalesce_set_req *req,
-						__u32 tx_aggr_max_frames)
-{
-	req->_present.tx_aggr_max_frames = 1;
-	req->tx_aggr_max_frames = tx_aggr_max_frames;
-}
-static inline void
-ethtool_coalesce_set_req_set_tx_aggr_time_usecs(struct ethtool_coalesce_set_req *req,
-						__u32 tx_aggr_time_usecs)
-{
-	req->_present.tx_aggr_time_usecs = 1;
-	req->tx_aggr_time_usecs = tx_aggr_time_usecs;
-}
-
-/*
- * Set coalesce params.
- */
-int ethtool_coalesce_set(struct ynl_sock *ys,
-			 struct ethtool_coalesce_set_req *req);
-
-/* ============== ETHTOOL_MSG_PAUSE_GET ============== */
-/* ETHTOOL_MSG_PAUSE_GET - do */
-struct ethtool_pause_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_pause_get_req *ethtool_pause_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_pause_get_req));
-}
-void ethtool_pause_get_req_free(struct ethtool_pause_get_req *req);
-
-static inline void
-ethtool_pause_get_req_set_header_dev_index(struct ethtool_pause_get_req *req,
-					   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_pause_get_req_set_header_dev_name(struct ethtool_pause_get_req *req,
-					  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_pause_get_req_set_header_flags(struct ethtool_pause_get_req *req,
-				       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_pause_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 autoneg:1;
-		__u32 rx:1;
-		__u32 tx:1;
-		__u32 stats:1;
-		__u32 stats_src:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 autoneg;
-	__u8 rx;
-	__u8 tx;
-	struct ethtool_pause_stat stats;
-	__u32 stats_src;
-};
-
-void ethtool_pause_get_rsp_free(struct ethtool_pause_get_rsp *rsp);
-
-/*
- * Get pause params.
- */
-struct ethtool_pause_get_rsp *
-ethtool_pause_get(struct ynl_sock *ys, struct ethtool_pause_get_req *req);
-
-/* ETHTOOL_MSG_PAUSE_GET - dump */
-struct ethtool_pause_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_pause_get_req_dump *
-ethtool_pause_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_pause_get_req_dump));
-}
-void ethtool_pause_get_req_dump_free(struct ethtool_pause_get_req_dump *req);
-
-static inline void
-ethtool_pause_get_req_dump_set_header_dev_index(struct ethtool_pause_get_req_dump *req,
-						__u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_pause_get_req_dump_set_header_dev_name(struct ethtool_pause_get_req_dump *req,
-					       const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_pause_get_req_dump_set_header_flags(struct ethtool_pause_get_req_dump *req,
-					    __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_pause_get_list {
-	struct ethtool_pause_get_list *next;
-	struct ethtool_pause_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_pause_get_list_free(struct ethtool_pause_get_list *rsp);
-
-struct ethtool_pause_get_list *
-ethtool_pause_get_dump(struct ynl_sock *ys,
-		       struct ethtool_pause_get_req_dump *req);
-
-/* ETHTOOL_MSG_PAUSE_GET - notify */
-struct ethtool_pause_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_pause_get_ntf *ntf);
-	struct ethtool_pause_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_pause_get_ntf_free(struct ethtool_pause_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_PAUSE_SET ============== */
-/* ETHTOOL_MSG_PAUSE_SET - do */
-struct ethtool_pause_set_req {
-	struct {
-		__u32 header:1;
-		__u32 autoneg:1;
-		__u32 rx:1;
-		__u32 tx:1;
-		__u32 stats:1;
-		__u32 stats_src:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 autoneg;
-	__u8 rx;
-	__u8 tx;
-	struct ethtool_pause_stat stats;
-	__u32 stats_src;
-};
-
-static inline struct ethtool_pause_set_req *ethtool_pause_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_pause_set_req));
-}
-void ethtool_pause_set_req_free(struct ethtool_pause_set_req *req);
-
-static inline void
-ethtool_pause_set_req_set_header_dev_index(struct ethtool_pause_set_req *req,
-					   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_pause_set_req_set_header_dev_name(struct ethtool_pause_set_req *req,
-					  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_pause_set_req_set_header_flags(struct ethtool_pause_set_req *req,
-				       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_pause_set_req_set_autoneg(struct ethtool_pause_set_req *req,
-				  __u8 autoneg)
-{
-	req->_present.autoneg = 1;
-	req->autoneg = autoneg;
-}
-static inline void
-ethtool_pause_set_req_set_rx(struct ethtool_pause_set_req *req, __u8 rx)
-{
-	req->_present.rx = 1;
-	req->rx = rx;
-}
-static inline void
-ethtool_pause_set_req_set_tx(struct ethtool_pause_set_req *req, __u8 tx)
-{
-	req->_present.tx = 1;
-	req->tx = tx;
-}
-static inline void
-ethtool_pause_set_req_set_stats_tx_frames(struct ethtool_pause_set_req *req,
-					  __u64 tx_frames)
-{
-	req->_present.stats = 1;
-	req->stats._present.tx_frames = 1;
-	req->stats.tx_frames = tx_frames;
-}
-static inline void
-ethtool_pause_set_req_set_stats_rx_frames(struct ethtool_pause_set_req *req,
-					  __u64 rx_frames)
-{
-	req->_present.stats = 1;
-	req->stats._present.rx_frames = 1;
-	req->stats.rx_frames = rx_frames;
-}
-static inline void
-ethtool_pause_set_req_set_stats_src(struct ethtool_pause_set_req *req,
-				    __u32 stats_src)
-{
-	req->_present.stats_src = 1;
-	req->stats_src = stats_src;
-}
-
-/*
- * Set pause params.
- */
-int ethtool_pause_set(struct ynl_sock *ys, struct ethtool_pause_set_req *req);
-
-/* ============== ETHTOOL_MSG_EEE_GET ============== */
-/* ETHTOOL_MSG_EEE_GET - do */
-struct ethtool_eee_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_eee_get_req *ethtool_eee_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_eee_get_req));
-}
-void ethtool_eee_get_req_free(struct ethtool_eee_get_req *req);
-
-static inline void
-ethtool_eee_get_req_set_header_dev_index(struct ethtool_eee_get_req *req,
-					 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_eee_get_req_set_header_dev_name(struct ethtool_eee_get_req *req,
-					const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_eee_get_req_set_header_flags(struct ethtool_eee_get_req *req,
-				     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_eee_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 modes_ours:1;
-		__u32 modes_peer:1;
-		__u32 active:1;
-		__u32 enabled:1;
-		__u32 tx_lpi_enabled:1;
-		__u32 tx_lpi_timer:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset modes_ours;
-	struct ethtool_bitset modes_peer;
-	__u8 active;
-	__u8 enabled;
-	__u8 tx_lpi_enabled;
-	__u32 tx_lpi_timer;
-};
-
-void ethtool_eee_get_rsp_free(struct ethtool_eee_get_rsp *rsp);
-
-/*
- * Get eee params.
- */
-struct ethtool_eee_get_rsp *
-ethtool_eee_get(struct ynl_sock *ys, struct ethtool_eee_get_req *req);
-
-/* ETHTOOL_MSG_EEE_GET - dump */
-struct ethtool_eee_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_eee_get_req_dump *
-ethtool_eee_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_eee_get_req_dump));
-}
-void ethtool_eee_get_req_dump_free(struct ethtool_eee_get_req_dump *req);
-
-static inline void
-ethtool_eee_get_req_dump_set_header_dev_index(struct ethtool_eee_get_req_dump *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_eee_get_req_dump_set_header_dev_name(struct ethtool_eee_get_req_dump *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_eee_get_req_dump_set_header_flags(struct ethtool_eee_get_req_dump *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_eee_get_list {
-	struct ethtool_eee_get_list *next;
-	struct ethtool_eee_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_eee_get_list_free(struct ethtool_eee_get_list *rsp);
-
-struct ethtool_eee_get_list *
-ethtool_eee_get_dump(struct ynl_sock *ys, struct ethtool_eee_get_req_dump *req);
-
-/* ETHTOOL_MSG_EEE_GET - notify */
-struct ethtool_eee_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_eee_get_ntf *ntf);
-	struct ethtool_eee_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_eee_get_ntf_free(struct ethtool_eee_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_EEE_SET ============== */
-/* ETHTOOL_MSG_EEE_SET - do */
-struct ethtool_eee_set_req {
-	struct {
-		__u32 header:1;
-		__u32 modes_ours:1;
-		__u32 modes_peer:1;
-		__u32 active:1;
-		__u32 enabled:1;
-		__u32 tx_lpi_enabled:1;
-		__u32 tx_lpi_timer:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset modes_ours;
-	struct ethtool_bitset modes_peer;
-	__u8 active;
-	__u8 enabled;
-	__u8 tx_lpi_enabled;
-	__u32 tx_lpi_timer;
-};
-
-static inline struct ethtool_eee_set_req *ethtool_eee_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_eee_set_req));
-}
-void ethtool_eee_set_req_free(struct ethtool_eee_set_req *req);
-
-static inline void
-ethtool_eee_set_req_set_header_dev_index(struct ethtool_eee_set_req *req,
-					 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_eee_set_req_set_header_dev_name(struct ethtool_eee_set_req *req,
-					const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_eee_set_req_set_header_flags(struct ethtool_eee_set_req *req,
-				     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_eee_set_req_set_modes_ours_nomask(struct ethtool_eee_set_req *req)
-{
-	req->_present.modes_ours = 1;
-	req->modes_ours._present.nomask = 1;
-}
-static inline void
-ethtool_eee_set_req_set_modes_ours_size(struct ethtool_eee_set_req *req,
-					__u32 size)
-{
-	req->_present.modes_ours = 1;
-	req->modes_ours._present.size = 1;
-	req->modes_ours.size = size;
-}
-static inline void
-__ethtool_eee_set_req_set_modes_ours_bits_bit(struct ethtool_eee_set_req *req,
-					      struct ethtool_bitset_bit *bit,
-					      unsigned int n_bit)
-{
-	free(req->modes_ours.bits.bit);
-	req->modes_ours.bits.bit = bit;
-	req->modes_ours.bits.n_bit = n_bit;
-}
-static inline void
-ethtool_eee_set_req_set_modes_peer_nomask(struct ethtool_eee_set_req *req)
-{
-	req->_present.modes_peer = 1;
-	req->modes_peer._present.nomask = 1;
-}
-static inline void
-ethtool_eee_set_req_set_modes_peer_size(struct ethtool_eee_set_req *req,
-					__u32 size)
-{
-	req->_present.modes_peer = 1;
-	req->modes_peer._present.size = 1;
-	req->modes_peer.size = size;
-}
-static inline void
-__ethtool_eee_set_req_set_modes_peer_bits_bit(struct ethtool_eee_set_req *req,
-					      struct ethtool_bitset_bit *bit,
-					      unsigned int n_bit)
-{
-	free(req->modes_peer.bits.bit);
-	req->modes_peer.bits.bit = bit;
-	req->modes_peer.bits.n_bit = n_bit;
-}
-static inline void
-ethtool_eee_set_req_set_active(struct ethtool_eee_set_req *req, __u8 active)
-{
-	req->_present.active = 1;
-	req->active = active;
-}
-static inline void
-ethtool_eee_set_req_set_enabled(struct ethtool_eee_set_req *req, __u8 enabled)
-{
-	req->_present.enabled = 1;
-	req->enabled = enabled;
-}
-static inline void
-ethtool_eee_set_req_set_tx_lpi_enabled(struct ethtool_eee_set_req *req,
-				       __u8 tx_lpi_enabled)
-{
-	req->_present.tx_lpi_enabled = 1;
-	req->tx_lpi_enabled = tx_lpi_enabled;
-}
-static inline void
-ethtool_eee_set_req_set_tx_lpi_timer(struct ethtool_eee_set_req *req,
-				     __u32 tx_lpi_timer)
-{
-	req->_present.tx_lpi_timer = 1;
-	req->tx_lpi_timer = tx_lpi_timer;
-}
-
-/*
- * Set eee params.
- */
-int ethtool_eee_set(struct ynl_sock *ys, struct ethtool_eee_set_req *req);
-
-/* ============== ETHTOOL_MSG_TSINFO_GET ============== */
-/* ETHTOOL_MSG_TSINFO_GET - do */
-struct ethtool_tsinfo_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_tsinfo_get_req *ethtool_tsinfo_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_tsinfo_get_req));
-}
-void ethtool_tsinfo_get_req_free(struct ethtool_tsinfo_get_req *req);
-
-static inline void
-ethtool_tsinfo_get_req_set_header_dev_index(struct ethtool_tsinfo_get_req *req,
-					    __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_tsinfo_get_req_set_header_dev_name(struct ethtool_tsinfo_get_req *req,
-					   const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_tsinfo_get_req_set_header_flags(struct ethtool_tsinfo_get_req *req,
-					__u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_tsinfo_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 timestamping:1;
-		__u32 tx_types:1;
-		__u32 rx_filters:1;
-		__u32 phc_index:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset timestamping;
-	struct ethtool_bitset tx_types;
-	struct ethtool_bitset rx_filters;
-	__u32 phc_index;
-};
-
-void ethtool_tsinfo_get_rsp_free(struct ethtool_tsinfo_get_rsp *rsp);
-
-/*
- * Get tsinfo params.
- */
-struct ethtool_tsinfo_get_rsp *
-ethtool_tsinfo_get(struct ynl_sock *ys, struct ethtool_tsinfo_get_req *req);
-
-/* ETHTOOL_MSG_TSINFO_GET - dump */
-struct ethtool_tsinfo_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_tsinfo_get_req_dump *
-ethtool_tsinfo_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_tsinfo_get_req_dump));
-}
-void ethtool_tsinfo_get_req_dump_free(struct ethtool_tsinfo_get_req_dump *req);
-
-static inline void
-ethtool_tsinfo_get_req_dump_set_header_dev_index(struct ethtool_tsinfo_get_req_dump *req,
-						 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_tsinfo_get_req_dump_set_header_dev_name(struct ethtool_tsinfo_get_req_dump *req,
-						const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_tsinfo_get_req_dump_set_header_flags(struct ethtool_tsinfo_get_req_dump *req,
-					     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_tsinfo_get_list {
-	struct ethtool_tsinfo_get_list *next;
-	struct ethtool_tsinfo_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_tsinfo_get_list_free(struct ethtool_tsinfo_get_list *rsp);
-
-struct ethtool_tsinfo_get_list *
-ethtool_tsinfo_get_dump(struct ynl_sock *ys,
-			struct ethtool_tsinfo_get_req_dump *req);
-
-/* ============== ETHTOOL_MSG_CABLE_TEST_ACT ============== */
-/* ETHTOOL_MSG_CABLE_TEST_ACT - do */
-struct ethtool_cable_test_act_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_cable_test_act_req *
-ethtool_cable_test_act_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_cable_test_act_req));
-}
-void ethtool_cable_test_act_req_free(struct ethtool_cable_test_act_req *req);
-
-static inline void
-ethtool_cable_test_act_req_set_header_dev_index(struct ethtool_cable_test_act_req *req,
-						__u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_cable_test_act_req_set_header_dev_name(struct ethtool_cable_test_act_req *req,
-					       const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_cable_test_act_req_set_header_flags(struct ethtool_cable_test_act_req *req,
-					    __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-/*
- * Cable test.
- */
-int ethtool_cable_test_act(struct ynl_sock *ys,
-			   struct ethtool_cable_test_act_req *req);
-
-/* ============== ETHTOOL_MSG_CABLE_TEST_TDR_ACT ============== */
-/* ETHTOOL_MSG_CABLE_TEST_TDR_ACT - do */
-struct ethtool_cable_test_tdr_act_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_cable_test_tdr_act_req *
-ethtool_cable_test_tdr_act_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_cable_test_tdr_act_req));
-}
-void
-ethtool_cable_test_tdr_act_req_free(struct ethtool_cable_test_tdr_act_req *req);
-
-static inline void
-ethtool_cable_test_tdr_act_req_set_header_dev_index(struct ethtool_cable_test_tdr_act_req *req,
-						    __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_cable_test_tdr_act_req_set_header_dev_name(struct ethtool_cable_test_tdr_act_req *req,
-						   const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_cable_test_tdr_act_req_set_header_flags(struct ethtool_cable_test_tdr_act_req *req,
-						__u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-/*
- * Cable test TDR.
- */
-int ethtool_cable_test_tdr_act(struct ynl_sock *ys,
-			       struct ethtool_cable_test_tdr_act_req *req);
-
-/* ============== ETHTOOL_MSG_TUNNEL_INFO_GET ============== */
-/* ETHTOOL_MSG_TUNNEL_INFO_GET - do */
-struct ethtool_tunnel_info_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_tunnel_info_get_req *
-ethtool_tunnel_info_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_tunnel_info_get_req));
-}
-void ethtool_tunnel_info_get_req_free(struct ethtool_tunnel_info_get_req *req);
-
-static inline void
-ethtool_tunnel_info_get_req_set_header_dev_index(struct ethtool_tunnel_info_get_req *req,
-						 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_tunnel_info_get_req_set_header_dev_name(struct ethtool_tunnel_info_get_req *req,
-						const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_tunnel_info_get_req_set_header_flags(struct ethtool_tunnel_info_get_req *req,
-					     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_tunnel_info_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 udp_ports:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_tunnel_udp udp_ports;
-};
-
-void ethtool_tunnel_info_get_rsp_free(struct ethtool_tunnel_info_get_rsp *rsp);
-
-/*
- * Get tsinfo params.
- */
-struct ethtool_tunnel_info_get_rsp *
-ethtool_tunnel_info_get(struct ynl_sock *ys,
-			struct ethtool_tunnel_info_get_req *req);
-
-/* ETHTOOL_MSG_TUNNEL_INFO_GET - dump */
-struct ethtool_tunnel_info_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_tunnel_info_get_req_dump *
-ethtool_tunnel_info_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_tunnel_info_get_req_dump));
-}
-void
-ethtool_tunnel_info_get_req_dump_free(struct ethtool_tunnel_info_get_req_dump *req);
-
-static inline void
-ethtool_tunnel_info_get_req_dump_set_header_dev_index(struct ethtool_tunnel_info_get_req_dump *req,
-						      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_tunnel_info_get_req_dump_set_header_dev_name(struct ethtool_tunnel_info_get_req_dump *req,
-						     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_tunnel_info_get_req_dump_set_header_flags(struct ethtool_tunnel_info_get_req_dump *req,
-						  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_tunnel_info_get_list {
-	struct ethtool_tunnel_info_get_list *next;
-	struct ethtool_tunnel_info_get_rsp obj __attribute__((aligned(8)));
-};
-
-void
-ethtool_tunnel_info_get_list_free(struct ethtool_tunnel_info_get_list *rsp);
-
-struct ethtool_tunnel_info_get_list *
-ethtool_tunnel_info_get_dump(struct ynl_sock *ys,
-			     struct ethtool_tunnel_info_get_req_dump *req);
-
-/* ============== ETHTOOL_MSG_FEC_GET ============== */
-/* ETHTOOL_MSG_FEC_GET - do */
-struct ethtool_fec_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_fec_get_req *ethtool_fec_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_fec_get_req));
-}
-void ethtool_fec_get_req_free(struct ethtool_fec_get_req *req);
-
-static inline void
-ethtool_fec_get_req_set_header_dev_index(struct ethtool_fec_get_req *req,
-					 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_fec_get_req_set_header_dev_name(struct ethtool_fec_get_req *req,
-					const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_fec_get_req_set_header_flags(struct ethtool_fec_get_req *req,
-				     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_fec_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 modes:1;
-		__u32 auto_:1;
-		__u32 active:1;
-		__u32 stats:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset modes;
-	__u8 auto_;
-	__u32 active;
-	struct ethtool_fec_stat stats;
-};
-
-void ethtool_fec_get_rsp_free(struct ethtool_fec_get_rsp *rsp);
-
-/*
- * Get FEC params.
- */
-struct ethtool_fec_get_rsp *
-ethtool_fec_get(struct ynl_sock *ys, struct ethtool_fec_get_req *req);
-
-/* ETHTOOL_MSG_FEC_GET - dump */
-struct ethtool_fec_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_fec_get_req_dump *
-ethtool_fec_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_fec_get_req_dump));
-}
-void ethtool_fec_get_req_dump_free(struct ethtool_fec_get_req_dump *req);
-
-static inline void
-ethtool_fec_get_req_dump_set_header_dev_index(struct ethtool_fec_get_req_dump *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_fec_get_req_dump_set_header_dev_name(struct ethtool_fec_get_req_dump *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_fec_get_req_dump_set_header_flags(struct ethtool_fec_get_req_dump *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_fec_get_list {
-	struct ethtool_fec_get_list *next;
-	struct ethtool_fec_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_fec_get_list_free(struct ethtool_fec_get_list *rsp);
-
-struct ethtool_fec_get_list *
-ethtool_fec_get_dump(struct ynl_sock *ys, struct ethtool_fec_get_req_dump *req);
-
-/* ETHTOOL_MSG_FEC_GET - notify */
-struct ethtool_fec_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_fec_get_ntf *ntf);
-	struct ethtool_fec_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_fec_get_ntf_free(struct ethtool_fec_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_FEC_SET ============== */
-/* ETHTOOL_MSG_FEC_SET - do */
-struct ethtool_fec_set_req {
-	struct {
-		__u32 header:1;
-		__u32 modes:1;
-		__u32 auto_:1;
-		__u32 active:1;
-		__u32 stats:1;
-	} _present;
-
-	struct ethtool_header header;
-	struct ethtool_bitset modes;
-	__u8 auto_;
-	__u32 active;
-	struct ethtool_fec_stat stats;
-};
-
-static inline struct ethtool_fec_set_req *ethtool_fec_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_fec_set_req));
-}
-void ethtool_fec_set_req_free(struct ethtool_fec_set_req *req);
-
-static inline void
-ethtool_fec_set_req_set_header_dev_index(struct ethtool_fec_set_req *req,
-					 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_fec_set_req_set_header_dev_name(struct ethtool_fec_set_req *req,
-					const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_fec_set_req_set_header_flags(struct ethtool_fec_set_req *req,
-				     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_fec_set_req_set_modes_nomask(struct ethtool_fec_set_req *req)
-{
-	req->_present.modes = 1;
-	req->modes._present.nomask = 1;
-}
-static inline void
-ethtool_fec_set_req_set_modes_size(struct ethtool_fec_set_req *req, __u32 size)
-{
-	req->_present.modes = 1;
-	req->modes._present.size = 1;
-	req->modes.size = size;
-}
-static inline void
-__ethtool_fec_set_req_set_modes_bits_bit(struct ethtool_fec_set_req *req,
-					 struct ethtool_bitset_bit *bit,
-					 unsigned int n_bit)
-{
-	free(req->modes.bits.bit);
-	req->modes.bits.bit = bit;
-	req->modes.bits.n_bit = n_bit;
-}
-static inline void
-ethtool_fec_set_req_set_auto_(struct ethtool_fec_set_req *req, __u8 auto_)
-{
-	req->_present.auto_ = 1;
-	req->auto_ = auto_;
-}
-static inline void
-ethtool_fec_set_req_set_active(struct ethtool_fec_set_req *req, __u32 active)
-{
-	req->_present.active = 1;
-	req->active = active;
-}
-static inline void
-ethtool_fec_set_req_set_stats_corrected(struct ethtool_fec_set_req *req,
-					const void *corrected, size_t len)
-{
-	free(req->stats.corrected);
-	req->stats._present.corrected_len = len;
-	req->stats.corrected = malloc(req->stats._present.corrected_len);
-	memcpy(req->stats.corrected, corrected, req->stats._present.corrected_len);
-}
-static inline void
-ethtool_fec_set_req_set_stats_uncorr(struct ethtool_fec_set_req *req,
-				     const void *uncorr, size_t len)
-{
-	free(req->stats.uncorr);
-	req->stats._present.uncorr_len = len;
-	req->stats.uncorr = malloc(req->stats._present.uncorr_len);
-	memcpy(req->stats.uncorr, uncorr, req->stats._present.uncorr_len);
-}
-static inline void
-ethtool_fec_set_req_set_stats_corr_bits(struct ethtool_fec_set_req *req,
-					const void *corr_bits, size_t len)
-{
-	free(req->stats.corr_bits);
-	req->stats._present.corr_bits_len = len;
-	req->stats.corr_bits = malloc(req->stats._present.corr_bits_len);
-	memcpy(req->stats.corr_bits, corr_bits, req->stats._present.corr_bits_len);
-}
-
-/*
- * Set FEC params.
- */
-int ethtool_fec_set(struct ynl_sock *ys, struct ethtool_fec_set_req *req);
-
-/* ============== ETHTOOL_MSG_MODULE_EEPROM_GET ============== */
-/* ETHTOOL_MSG_MODULE_EEPROM_GET - do */
-struct ethtool_module_eeprom_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_module_eeprom_get_req *
-ethtool_module_eeprom_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_module_eeprom_get_req));
-}
-void
-ethtool_module_eeprom_get_req_free(struct ethtool_module_eeprom_get_req *req);
-
-static inline void
-ethtool_module_eeprom_get_req_set_header_dev_index(struct ethtool_module_eeprom_get_req *req,
-						   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_module_eeprom_get_req_set_header_dev_name(struct ethtool_module_eeprom_get_req *req,
-						  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_module_eeprom_get_req_set_header_flags(struct ethtool_module_eeprom_get_req *req,
-					       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_module_eeprom_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 offset:1;
-		__u32 length:1;
-		__u32 page:1;
-		__u32 bank:1;
-		__u32 i2c_address:1;
-		__u32 data_len;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 offset;
-	__u32 length;
-	__u8 page;
-	__u8 bank;
-	__u8 i2c_address;
-	void *data;
-};
-
-void
-ethtool_module_eeprom_get_rsp_free(struct ethtool_module_eeprom_get_rsp *rsp);
-
-/*
- * Get module EEPROM params.
- */
-struct ethtool_module_eeprom_get_rsp *
-ethtool_module_eeprom_get(struct ynl_sock *ys,
-			  struct ethtool_module_eeprom_get_req *req);
-
-/* ETHTOOL_MSG_MODULE_EEPROM_GET - dump */
-struct ethtool_module_eeprom_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_module_eeprom_get_req_dump *
-ethtool_module_eeprom_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_module_eeprom_get_req_dump));
-}
-void
-ethtool_module_eeprom_get_req_dump_free(struct ethtool_module_eeprom_get_req_dump *req);
-
-static inline void
-ethtool_module_eeprom_get_req_dump_set_header_dev_index(struct ethtool_module_eeprom_get_req_dump *req,
-							__u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_module_eeprom_get_req_dump_set_header_dev_name(struct ethtool_module_eeprom_get_req_dump *req,
-						       const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_module_eeprom_get_req_dump_set_header_flags(struct ethtool_module_eeprom_get_req_dump *req,
-						    __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_module_eeprom_get_list {
-	struct ethtool_module_eeprom_get_list *next;
-	struct ethtool_module_eeprom_get_rsp obj __attribute__((aligned(8)));
-};
-
-void
-ethtool_module_eeprom_get_list_free(struct ethtool_module_eeprom_get_list *rsp);
-
-struct ethtool_module_eeprom_get_list *
-ethtool_module_eeprom_get_dump(struct ynl_sock *ys,
-			       struct ethtool_module_eeprom_get_req_dump *req);
-
-/* ============== ETHTOOL_MSG_PHC_VCLOCKS_GET ============== */
-/* ETHTOOL_MSG_PHC_VCLOCKS_GET - do */
-struct ethtool_phc_vclocks_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_phc_vclocks_get_req *
-ethtool_phc_vclocks_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_phc_vclocks_get_req));
-}
-void ethtool_phc_vclocks_get_req_free(struct ethtool_phc_vclocks_get_req *req);
-
-static inline void
-ethtool_phc_vclocks_get_req_set_header_dev_index(struct ethtool_phc_vclocks_get_req *req,
-						 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_phc_vclocks_get_req_set_header_dev_name(struct ethtool_phc_vclocks_get_req *req,
-						const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_phc_vclocks_get_req_set_header_flags(struct ethtool_phc_vclocks_get_req *req,
-					     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_phc_vclocks_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 num:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 num;
-};
-
-void ethtool_phc_vclocks_get_rsp_free(struct ethtool_phc_vclocks_get_rsp *rsp);
-
-/*
- * Get PHC VCLOCKs.
- */
-struct ethtool_phc_vclocks_get_rsp *
-ethtool_phc_vclocks_get(struct ynl_sock *ys,
-			struct ethtool_phc_vclocks_get_req *req);
-
-/* ETHTOOL_MSG_PHC_VCLOCKS_GET - dump */
-struct ethtool_phc_vclocks_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_phc_vclocks_get_req_dump *
-ethtool_phc_vclocks_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_phc_vclocks_get_req_dump));
-}
-void
-ethtool_phc_vclocks_get_req_dump_free(struct ethtool_phc_vclocks_get_req_dump *req);
-
-static inline void
-ethtool_phc_vclocks_get_req_dump_set_header_dev_index(struct ethtool_phc_vclocks_get_req_dump *req,
-						      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_phc_vclocks_get_req_dump_set_header_dev_name(struct ethtool_phc_vclocks_get_req_dump *req,
-						     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_phc_vclocks_get_req_dump_set_header_flags(struct ethtool_phc_vclocks_get_req_dump *req,
-						  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_phc_vclocks_get_list {
-	struct ethtool_phc_vclocks_get_list *next;
-	struct ethtool_phc_vclocks_get_rsp obj __attribute__((aligned(8)));
-};
-
-void
-ethtool_phc_vclocks_get_list_free(struct ethtool_phc_vclocks_get_list *rsp);
-
-struct ethtool_phc_vclocks_get_list *
-ethtool_phc_vclocks_get_dump(struct ynl_sock *ys,
-			     struct ethtool_phc_vclocks_get_req_dump *req);
-
-/* ============== ETHTOOL_MSG_MODULE_GET ============== */
-/* ETHTOOL_MSG_MODULE_GET - do */
-struct ethtool_module_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_module_get_req *ethtool_module_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_module_get_req));
-}
-void ethtool_module_get_req_free(struct ethtool_module_get_req *req);
-
-static inline void
-ethtool_module_get_req_set_header_dev_index(struct ethtool_module_get_req *req,
-					    __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_module_get_req_set_header_dev_name(struct ethtool_module_get_req *req,
-					   const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_module_get_req_set_header_flags(struct ethtool_module_get_req *req,
-					__u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_module_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 power_mode_policy:1;
-		__u32 power_mode:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 power_mode_policy;
-	__u8 power_mode;
-};
-
-void ethtool_module_get_rsp_free(struct ethtool_module_get_rsp *rsp);
-
-/*
- * Get module params.
- */
-struct ethtool_module_get_rsp *
-ethtool_module_get(struct ynl_sock *ys, struct ethtool_module_get_req *req);
-
-/* ETHTOOL_MSG_MODULE_GET - dump */
-struct ethtool_module_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_module_get_req_dump *
-ethtool_module_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_module_get_req_dump));
-}
-void ethtool_module_get_req_dump_free(struct ethtool_module_get_req_dump *req);
-
-static inline void
-ethtool_module_get_req_dump_set_header_dev_index(struct ethtool_module_get_req_dump *req,
-						 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_module_get_req_dump_set_header_dev_name(struct ethtool_module_get_req_dump *req,
-						const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_module_get_req_dump_set_header_flags(struct ethtool_module_get_req_dump *req,
-					     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_module_get_list {
-	struct ethtool_module_get_list *next;
-	struct ethtool_module_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_module_get_list_free(struct ethtool_module_get_list *rsp);
-
-struct ethtool_module_get_list *
-ethtool_module_get_dump(struct ynl_sock *ys,
-			struct ethtool_module_get_req_dump *req);
-
-/* ETHTOOL_MSG_MODULE_GET - notify */
-struct ethtool_module_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_module_get_ntf *ntf);
-	struct ethtool_module_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_module_get_ntf_free(struct ethtool_module_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_MODULE_SET ============== */
-/* ETHTOOL_MSG_MODULE_SET - do */
-struct ethtool_module_set_req {
-	struct {
-		__u32 header:1;
-		__u32 power_mode_policy:1;
-		__u32 power_mode:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 power_mode_policy;
-	__u8 power_mode;
-};
-
-static inline struct ethtool_module_set_req *ethtool_module_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_module_set_req));
-}
-void ethtool_module_set_req_free(struct ethtool_module_set_req *req);
-
-static inline void
-ethtool_module_set_req_set_header_dev_index(struct ethtool_module_set_req *req,
-					    __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_module_set_req_set_header_dev_name(struct ethtool_module_set_req *req,
-					   const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_module_set_req_set_header_flags(struct ethtool_module_set_req *req,
-					__u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_module_set_req_set_power_mode_policy(struct ethtool_module_set_req *req,
-					     __u8 power_mode_policy)
-{
-	req->_present.power_mode_policy = 1;
-	req->power_mode_policy = power_mode_policy;
-}
-static inline void
-ethtool_module_set_req_set_power_mode(struct ethtool_module_set_req *req,
-				      __u8 power_mode)
-{
-	req->_present.power_mode = 1;
-	req->power_mode = power_mode;
-}
-
-/*
- * Set module params.
- */
-int ethtool_module_set(struct ynl_sock *ys, struct ethtool_module_set_req *req);
-
-/* ============== ETHTOOL_MSG_PSE_GET ============== */
-/* ETHTOOL_MSG_PSE_GET - do */
-struct ethtool_pse_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_pse_get_req *ethtool_pse_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_pse_get_req));
-}
-void ethtool_pse_get_req_free(struct ethtool_pse_get_req *req);
-
-static inline void
-ethtool_pse_get_req_set_header_dev_index(struct ethtool_pse_get_req *req,
-					 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_pse_get_req_set_header_dev_name(struct ethtool_pse_get_req *req,
-					const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_pse_get_req_set_header_flags(struct ethtool_pse_get_req *req,
-				     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_pse_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 admin_state:1;
-		__u32 admin_control:1;
-		__u32 pw_d_status:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 admin_state;
-	__u32 admin_control;
-	__u32 pw_d_status;
-};
-
-void ethtool_pse_get_rsp_free(struct ethtool_pse_get_rsp *rsp);
-
-/*
- * Get Power Sourcing Equipment params.
- */
-struct ethtool_pse_get_rsp *
-ethtool_pse_get(struct ynl_sock *ys, struct ethtool_pse_get_req *req);
-
-/* ETHTOOL_MSG_PSE_GET - dump */
-struct ethtool_pse_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_pse_get_req_dump *
-ethtool_pse_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_pse_get_req_dump));
-}
-void ethtool_pse_get_req_dump_free(struct ethtool_pse_get_req_dump *req);
-
-static inline void
-ethtool_pse_get_req_dump_set_header_dev_index(struct ethtool_pse_get_req_dump *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_pse_get_req_dump_set_header_dev_name(struct ethtool_pse_get_req_dump *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_pse_get_req_dump_set_header_flags(struct ethtool_pse_get_req_dump *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_pse_get_list {
-	struct ethtool_pse_get_list *next;
-	struct ethtool_pse_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_pse_get_list_free(struct ethtool_pse_get_list *rsp);
-
-struct ethtool_pse_get_list *
-ethtool_pse_get_dump(struct ynl_sock *ys, struct ethtool_pse_get_req_dump *req);
-
-/* ============== ETHTOOL_MSG_PSE_SET ============== */
-/* ETHTOOL_MSG_PSE_SET - do */
-struct ethtool_pse_set_req {
-	struct {
-		__u32 header:1;
-		__u32 admin_state:1;
-		__u32 admin_control:1;
-		__u32 pw_d_status:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 admin_state;
-	__u32 admin_control;
-	__u32 pw_d_status;
-};
-
-static inline struct ethtool_pse_set_req *ethtool_pse_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_pse_set_req));
-}
-void ethtool_pse_set_req_free(struct ethtool_pse_set_req *req);
-
-static inline void
-ethtool_pse_set_req_set_header_dev_index(struct ethtool_pse_set_req *req,
-					 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_pse_set_req_set_header_dev_name(struct ethtool_pse_set_req *req,
-					const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_pse_set_req_set_header_flags(struct ethtool_pse_set_req *req,
-				     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_pse_set_req_set_admin_state(struct ethtool_pse_set_req *req,
-				    __u32 admin_state)
-{
-	req->_present.admin_state = 1;
-	req->admin_state = admin_state;
-}
-static inline void
-ethtool_pse_set_req_set_admin_control(struct ethtool_pse_set_req *req,
-				      __u32 admin_control)
-{
-	req->_present.admin_control = 1;
-	req->admin_control = admin_control;
-}
-static inline void
-ethtool_pse_set_req_set_pw_d_status(struct ethtool_pse_set_req *req,
-				    __u32 pw_d_status)
-{
-	req->_present.pw_d_status = 1;
-	req->pw_d_status = pw_d_status;
-}
-
-/*
- * Set Power Sourcing Equipment params.
- */
-int ethtool_pse_set(struct ynl_sock *ys, struct ethtool_pse_set_req *req);
-
-/* ============== ETHTOOL_MSG_RSS_GET ============== */
-/* ETHTOOL_MSG_RSS_GET - do */
-struct ethtool_rss_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_rss_get_req *ethtool_rss_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_rss_get_req));
-}
-void ethtool_rss_get_req_free(struct ethtool_rss_get_req *req);
-
-static inline void
-ethtool_rss_get_req_set_header_dev_index(struct ethtool_rss_get_req *req,
-					 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_rss_get_req_set_header_dev_name(struct ethtool_rss_get_req *req,
-					const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_rss_get_req_set_header_flags(struct ethtool_rss_get_req *req,
-				     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_rss_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 context:1;
-		__u32 hfunc:1;
-		__u32 indir_len;
-		__u32 hkey_len;
-	} _present;
-
-	struct ethtool_header header;
-	__u32 context;
-	__u32 hfunc;
-	void *indir;
-	void *hkey;
-};
-
-void ethtool_rss_get_rsp_free(struct ethtool_rss_get_rsp *rsp);
-
-/*
- * Get RSS params.
- */
-struct ethtool_rss_get_rsp *
-ethtool_rss_get(struct ynl_sock *ys, struct ethtool_rss_get_req *req);
-
-/* ETHTOOL_MSG_RSS_GET - dump */
-struct ethtool_rss_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_rss_get_req_dump *
-ethtool_rss_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_rss_get_req_dump));
-}
-void ethtool_rss_get_req_dump_free(struct ethtool_rss_get_req_dump *req);
-
-static inline void
-ethtool_rss_get_req_dump_set_header_dev_index(struct ethtool_rss_get_req_dump *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_rss_get_req_dump_set_header_dev_name(struct ethtool_rss_get_req_dump *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_rss_get_req_dump_set_header_flags(struct ethtool_rss_get_req_dump *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_rss_get_list {
-	struct ethtool_rss_get_list *next;
-	struct ethtool_rss_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_rss_get_list_free(struct ethtool_rss_get_list *rsp);
-
-struct ethtool_rss_get_list *
-ethtool_rss_get_dump(struct ynl_sock *ys, struct ethtool_rss_get_req_dump *req);
-
-/* ============== ETHTOOL_MSG_PLCA_GET_CFG ============== */
-/* ETHTOOL_MSG_PLCA_GET_CFG - do */
-struct ethtool_plca_get_cfg_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_plca_get_cfg_req *
-ethtool_plca_get_cfg_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_plca_get_cfg_req));
-}
-void ethtool_plca_get_cfg_req_free(struct ethtool_plca_get_cfg_req *req);
-
-static inline void
-ethtool_plca_get_cfg_req_set_header_dev_index(struct ethtool_plca_get_cfg_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_plca_get_cfg_req_set_header_dev_name(struct ethtool_plca_get_cfg_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_plca_get_cfg_req_set_header_flags(struct ethtool_plca_get_cfg_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_plca_get_cfg_rsp {
-	struct {
-		__u32 header:1;
-		__u32 version:1;
-		__u32 enabled:1;
-		__u32 status:1;
-		__u32 node_cnt:1;
-		__u32 node_id:1;
-		__u32 to_tmr:1;
-		__u32 burst_cnt:1;
-		__u32 burst_tmr:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u16 version;
-	__u8 enabled;
-	__u8 status;
-	__u32 node_cnt;
-	__u32 node_id;
-	__u32 to_tmr;
-	__u32 burst_cnt;
-	__u32 burst_tmr;
-};
-
-void ethtool_plca_get_cfg_rsp_free(struct ethtool_plca_get_cfg_rsp *rsp);
-
-/*
- * Get PLCA params.
- */
-struct ethtool_plca_get_cfg_rsp *
-ethtool_plca_get_cfg(struct ynl_sock *ys, struct ethtool_plca_get_cfg_req *req);
-
-/* ETHTOOL_MSG_PLCA_GET_CFG - dump */
-struct ethtool_plca_get_cfg_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_plca_get_cfg_req_dump *
-ethtool_plca_get_cfg_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_plca_get_cfg_req_dump));
-}
-void
-ethtool_plca_get_cfg_req_dump_free(struct ethtool_plca_get_cfg_req_dump *req);
-
-static inline void
-ethtool_plca_get_cfg_req_dump_set_header_dev_index(struct ethtool_plca_get_cfg_req_dump *req,
-						   __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_plca_get_cfg_req_dump_set_header_dev_name(struct ethtool_plca_get_cfg_req_dump *req,
-						  const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_plca_get_cfg_req_dump_set_header_flags(struct ethtool_plca_get_cfg_req_dump *req,
-					       __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_plca_get_cfg_list {
-	struct ethtool_plca_get_cfg_list *next;
-	struct ethtool_plca_get_cfg_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_plca_get_cfg_list_free(struct ethtool_plca_get_cfg_list *rsp);
-
-struct ethtool_plca_get_cfg_list *
-ethtool_plca_get_cfg_dump(struct ynl_sock *ys,
-			  struct ethtool_plca_get_cfg_req_dump *req);
-
-/* ETHTOOL_MSG_PLCA_GET_CFG - notify */
-struct ethtool_plca_get_cfg_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_plca_get_cfg_ntf *ntf);
-	struct ethtool_plca_get_cfg_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_plca_get_cfg_ntf_free(struct ethtool_plca_get_cfg_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_PLCA_SET_CFG ============== */
-/* ETHTOOL_MSG_PLCA_SET_CFG - do */
-struct ethtool_plca_set_cfg_req {
-	struct {
-		__u32 header:1;
-		__u32 version:1;
-		__u32 enabled:1;
-		__u32 status:1;
-		__u32 node_cnt:1;
-		__u32 node_id:1;
-		__u32 to_tmr:1;
-		__u32 burst_cnt:1;
-		__u32 burst_tmr:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u16 version;
-	__u8 enabled;
-	__u8 status;
-	__u32 node_cnt;
-	__u32 node_id;
-	__u32 to_tmr;
-	__u32 burst_cnt;
-	__u32 burst_tmr;
-};
-
-static inline struct ethtool_plca_set_cfg_req *
-ethtool_plca_set_cfg_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_plca_set_cfg_req));
-}
-void ethtool_plca_set_cfg_req_free(struct ethtool_plca_set_cfg_req *req);
-
-static inline void
-ethtool_plca_set_cfg_req_set_header_dev_index(struct ethtool_plca_set_cfg_req *req,
-					      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_header_dev_name(struct ethtool_plca_set_cfg_req *req,
-					     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_header_flags(struct ethtool_plca_set_cfg_req *req,
-					  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_version(struct ethtool_plca_set_cfg_req *req,
-				     __u16 version)
-{
-	req->_present.version = 1;
-	req->version = version;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_enabled(struct ethtool_plca_set_cfg_req *req,
-				     __u8 enabled)
-{
-	req->_present.enabled = 1;
-	req->enabled = enabled;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_status(struct ethtool_plca_set_cfg_req *req,
-				    __u8 status)
-{
-	req->_present.status = 1;
-	req->status = status;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_node_cnt(struct ethtool_plca_set_cfg_req *req,
-				      __u32 node_cnt)
-{
-	req->_present.node_cnt = 1;
-	req->node_cnt = node_cnt;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_node_id(struct ethtool_plca_set_cfg_req *req,
-				     __u32 node_id)
-{
-	req->_present.node_id = 1;
-	req->node_id = node_id;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_to_tmr(struct ethtool_plca_set_cfg_req *req,
-				    __u32 to_tmr)
-{
-	req->_present.to_tmr = 1;
-	req->to_tmr = to_tmr;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_burst_cnt(struct ethtool_plca_set_cfg_req *req,
-				       __u32 burst_cnt)
-{
-	req->_present.burst_cnt = 1;
-	req->burst_cnt = burst_cnt;
-}
-static inline void
-ethtool_plca_set_cfg_req_set_burst_tmr(struct ethtool_plca_set_cfg_req *req,
-				       __u32 burst_tmr)
-{
-	req->_present.burst_tmr = 1;
-	req->burst_tmr = burst_tmr;
-}
-
-/*
- * Set PLCA params.
- */
-int ethtool_plca_set_cfg(struct ynl_sock *ys,
-			 struct ethtool_plca_set_cfg_req *req);
-
-/* ============== ETHTOOL_MSG_PLCA_GET_STATUS ============== */
-/* ETHTOOL_MSG_PLCA_GET_STATUS - do */
-struct ethtool_plca_get_status_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_plca_get_status_req *
-ethtool_plca_get_status_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_plca_get_status_req));
-}
-void ethtool_plca_get_status_req_free(struct ethtool_plca_get_status_req *req);
-
-static inline void
-ethtool_plca_get_status_req_set_header_dev_index(struct ethtool_plca_get_status_req *req,
-						 __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_plca_get_status_req_set_header_dev_name(struct ethtool_plca_get_status_req *req,
-						const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_plca_get_status_req_set_header_flags(struct ethtool_plca_get_status_req *req,
-					     __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_plca_get_status_rsp {
-	struct {
-		__u32 header:1;
-		__u32 version:1;
-		__u32 enabled:1;
-		__u32 status:1;
-		__u32 node_cnt:1;
-		__u32 node_id:1;
-		__u32 to_tmr:1;
-		__u32 burst_cnt:1;
-		__u32 burst_tmr:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u16 version;
-	__u8 enabled;
-	__u8 status;
-	__u32 node_cnt;
-	__u32 node_id;
-	__u32 to_tmr;
-	__u32 burst_cnt;
-	__u32 burst_tmr;
-};
-
-void ethtool_plca_get_status_rsp_free(struct ethtool_plca_get_status_rsp *rsp);
-
-/*
- * Get PLCA status params.
- */
-struct ethtool_plca_get_status_rsp *
-ethtool_plca_get_status(struct ynl_sock *ys,
-			struct ethtool_plca_get_status_req *req);
-
-/* ETHTOOL_MSG_PLCA_GET_STATUS - dump */
-struct ethtool_plca_get_status_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_plca_get_status_req_dump *
-ethtool_plca_get_status_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_plca_get_status_req_dump));
-}
-void
-ethtool_plca_get_status_req_dump_free(struct ethtool_plca_get_status_req_dump *req);
-
-static inline void
-ethtool_plca_get_status_req_dump_set_header_dev_index(struct ethtool_plca_get_status_req_dump *req,
-						      __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_plca_get_status_req_dump_set_header_dev_name(struct ethtool_plca_get_status_req_dump *req,
-						     const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_plca_get_status_req_dump_set_header_flags(struct ethtool_plca_get_status_req_dump *req,
-						  __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_plca_get_status_list {
-	struct ethtool_plca_get_status_list *next;
-	struct ethtool_plca_get_status_rsp obj __attribute__((aligned(8)));
-};
-
-void
-ethtool_plca_get_status_list_free(struct ethtool_plca_get_status_list *rsp);
-
-struct ethtool_plca_get_status_list *
-ethtool_plca_get_status_dump(struct ynl_sock *ys,
-			     struct ethtool_plca_get_status_req_dump *req);
-
-/* ============== ETHTOOL_MSG_MM_GET ============== */
-/* ETHTOOL_MSG_MM_GET - do */
-struct ethtool_mm_get_req {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_mm_get_req *ethtool_mm_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_mm_get_req));
-}
-void ethtool_mm_get_req_free(struct ethtool_mm_get_req *req);
-
-static inline void
-ethtool_mm_get_req_set_header_dev_index(struct ethtool_mm_get_req *req,
-					__u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_mm_get_req_set_header_dev_name(struct ethtool_mm_get_req *req,
-				       const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_mm_get_req_set_header_flags(struct ethtool_mm_get_req *req,
-				    __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_mm_get_rsp {
-	struct {
-		__u32 header:1;
-		__u32 pmac_enabled:1;
-		__u32 tx_enabled:1;
-		__u32 tx_active:1;
-		__u32 tx_min_frag_size:1;
-		__u32 rx_min_frag_size:1;
-		__u32 verify_enabled:1;
-		__u32 verify_time:1;
-		__u32 max_verify_time:1;
-		__u32 stats:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 pmac_enabled;
-	__u8 tx_enabled;
-	__u8 tx_active;
-	__u32 tx_min_frag_size;
-	__u32 rx_min_frag_size;
-	__u8 verify_enabled;
-	__u32 verify_time;
-	__u32 max_verify_time;
-	struct ethtool_mm_stat stats;
-};
-
-void ethtool_mm_get_rsp_free(struct ethtool_mm_get_rsp *rsp);
-
-/*
- * Get MAC Merge configuration and state
- */
-struct ethtool_mm_get_rsp *
-ethtool_mm_get(struct ynl_sock *ys, struct ethtool_mm_get_req *req);
-
-/* ETHTOOL_MSG_MM_GET - dump */
-struct ethtool_mm_get_req_dump {
-	struct {
-		__u32 header:1;
-	} _present;
-
-	struct ethtool_header header;
-};
-
-static inline struct ethtool_mm_get_req_dump *
-ethtool_mm_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_mm_get_req_dump));
-}
-void ethtool_mm_get_req_dump_free(struct ethtool_mm_get_req_dump *req);
-
-static inline void
-ethtool_mm_get_req_dump_set_header_dev_index(struct ethtool_mm_get_req_dump *req,
-					     __u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_mm_get_req_dump_set_header_dev_name(struct ethtool_mm_get_req_dump *req,
-					    const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_mm_get_req_dump_set_header_flags(struct ethtool_mm_get_req_dump *req,
-					 __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-
-struct ethtool_mm_get_list {
-	struct ethtool_mm_get_list *next;
-	struct ethtool_mm_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_mm_get_list_free(struct ethtool_mm_get_list *rsp);
-
-struct ethtool_mm_get_list *
-ethtool_mm_get_dump(struct ynl_sock *ys, struct ethtool_mm_get_req_dump *req);
-
-/* ETHTOOL_MSG_MM_GET - notify */
-struct ethtool_mm_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_mm_get_ntf *ntf);
-	struct ethtool_mm_get_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_mm_get_ntf_free(struct ethtool_mm_get_ntf *rsp);
-
-/* ============== ETHTOOL_MSG_MM_SET ============== */
-/* ETHTOOL_MSG_MM_SET - do */
-struct ethtool_mm_set_req {
-	struct {
-		__u32 header:1;
-		__u32 verify_enabled:1;
-		__u32 verify_time:1;
-		__u32 tx_enabled:1;
-		__u32 pmac_enabled:1;
-		__u32 tx_min_frag_size:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 verify_enabled;
-	__u32 verify_time;
-	__u8 tx_enabled;
-	__u8 pmac_enabled;
-	__u32 tx_min_frag_size;
-};
-
-static inline struct ethtool_mm_set_req *ethtool_mm_set_req_alloc(void)
-{
-	return calloc(1, sizeof(struct ethtool_mm_set_req));
-}
-void ethtool_mm_set_req_free(struct ethtool_mm_set_req *req);
-
-static inline void
-ethtool_mm_set_req_set_header_dev_index(struct ethtool_mm_set_req *req,
-					__u32 dev_index)
-{
-	req->_present.header = 1;
-	req->header._present.dev_index = 1;
-	req->header.dev_index = dev_index;
-}
-static inline void
-ethtool_mm_set_req_set_header_dev_name(struct ethtool_mm_set_req *req,
-				       const char *dev_name)
-{
-	free(req->header.dev_name);
-	req->header._present.dev_name_len = strlen(dev_name);
-	req->header.dev_name = malloc(req->header._present.dev_name_len + 1);
-	memcpy(req->header.dev_name, dev_name, req->header._present.dev_name_len);
-	req->header.dev_name[req->header._present.dev_name_len] = 0;
-}
-static inline void
-ethtool_mm_set_req_set_header_flags(struct ethtool_mm_set_req *req,
-				    __u32 flags)
-{
-	req->_present.header = 1;
-	req->header._present.flags = 1;
-	req->header.flags = flags;
-}
-static inline void
-ethtool_mm_set_req_set_verify_enabled(struct ethtool_mm_set_req *req,
-				      __u8 verify_enabled)
-{
-	req->_present.verify_enabled = 1;
-	req->verify_enabled = verify_enabled;
-}
-static inline void
-ethtool_mm_set_req_set_verify_time(struct ethtool_mm_set_req *req,
-				   __u32 verify_time)
-{
-	req->_present.verify_time = 1;
-	req->verify_time = verify_time;
-}
-static inline void
-ethtool_mm_set_req_set_tx_enabled(struct ethtool_mm_set_req *req,
-				  __u8 tx_enabled)
-{
-	req->_present.tx_enabled = 1;
-	req->tx_enabled = tx_enabled;
-}
-static inline void
-ethtool_mm_set_req_set_pmac_enabled(struct ethtool_mm_set_req *req,
-				    __u8 pmac_enabled)
-{
-	req->_present.pmac_enabled = 1;
-	req->pmac_enabled = pmac_enabled;
-}
-static inline void
-ethtool_mm_set_req_set_tx_min_frag_size(struct ethtool_mm_set_req *req,
-					__u32 tx_min_frag_size)
-{
-	req->_present.tx_min_frag_size = 1;
-	req->tx_min_frag_size = tx_min_frag_size;
-}
-
-/*
- * Set MAC Merge configuration
- */
-int ethtool_mm_set(struct ynl_sock *ys, struct ethtool_mm_set_req *req);
-
-/* ETHTOOL_MSG_CABLE_TEST_NTF - event */
-struct ethtool_cable_test_ntf_rsp {
-	struct {
-		__u32 header:1;
-		__u32 status:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 status;
-};
-
-struct ethtool_cable_test_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_cable_test_ntf *ntf);
-	struct ethtool_cable_test_ntf_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_cable_test_ntf_free(struct ethtool_cable_test_ntf *rsp);
-
-/* ETHTOOL_MSG_CABLE_TEST_TDR_NTF - event */
-struct ethtool_cable_test_tdr_ntf_rsp {
-	struct {
-		__u32 header:1;
-		__u32 status:1;
-		__u32 nest:1;
-	} _present;
-
-	struct ethtool_header header;
-	__u8 status;
-	struct ethtool_cable_nest nest;
-};
-
-struct ethtool_cable_test_tdr_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ethtool_cable_test_tdr_ntf *ntf);
-	struct ethtool_cable_test_tdr_ntf_rsp obj __attribute__((aligned(8)));
-};
-
-void ethtool_cable_test_tdr_ntf_free(struct ethtool_cable_test_tdr_ntf *rsp);
-
-#endif /* _LINUX_ETHTOOL_GEN_H */
diff --git a/tools/net/ynl/generated/fou-user.c b/tools/net/ynl/generated/fou-user.c
deleted file mode 100644
index f30bef23bc31..000000000000
--- a/tools/net/ynl/generated/fou-user.c
+++ /dev/null
@@ -1,330 +0,0 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/fou.yaml */
-/* YNL-GEN user source */
-
-#include <stdlib.h>
-#include <string.h>
-#include "fou-user.h"
-#include "ynl.h"
-#include <linux/fou.h>
-
-#include <libmnl/libmnl.h>
-#include <linux/genetlink.h>
-
-/* Enums */
-static const char * const fou_op_strmap[] = {
-	[FOU_CMD_ADD] = "add",
-	[FOU_CMD_DEL] = "del",
-	[FOU_CMD_GET] = "get",
-};
-
-const char *fou_op_str(int op)
-{
-	if (op < 0 || op >= (int)MNL_ARRAY_SIZE(fou_op_strmap))
-		return NULL;
-	return fou_op_strmap[op];
-}
-
-static const char * const fou_encap_type_strmap[] = {
-	[0] = "unspec",
-	[1] = "direct",
-	[2] = "gue",
-};
-
-const char *fou_encap_type_str(int value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(fou_encap_type_strmap))
-		return NULL;
-	return fou_encap_type_strmap[value];
-}
-
-/* Policies */
-struct ynl_policy_attr fou_policy[FOU_ATTR_MAX + 1] = {
-	[FOU_ATTR_UNSPEC] = { .name = "unspec", .type = YNL_PT_REJECT, },
-	[FOU_ATTR_PORT] = { .name = "port", .type = YNL_PT_U16, },
-	[FOU_ATTR_AF] = { .name = "af", .type = YNL_PT_U8, },
-	[FOU_ATTR_IPPROTO] = { .name = "ipproto", .type = YNL_PT_U8, },
-	[FOU_ATTR_TYPE] = { .name = "type", .type = YNL_PT_U8, },
-	[FOU_ATTR_REMCSUM_NOPARTIAL] = { .name = "remcsum_nopartial", .type = YNL_PT_FLAG, },
-	[FOU_ATTR_LOCAL_V4] = { .name = "local_v4", .type = YNL_PT_U32, },
-	[FOU_ATTR_LOCAL_V6] = { .name = "local_v6", .type = YNL_PT_BINARY,},
-	[FOU_ATTR_PEER_V4] = { .name = "peer_v4", .type = YNL_PT_U32, },
-	[FOU_ATTR_PEER_V6] = { .name = "peer_v6", .type = YNL_PT_BINARY,},
-	[FOU_ATTR_PEER_PORT] = { .name = "peer_port", .type = YNL_PT_U16, },
-	[FOU_ATTR_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest fou_nest = {
-	.max_attr = FOU_ATTR_MAX,
-	.table = fou_policy,
-};
-
-/* Common nested types */
-/* ============== FOU_CMD_ADD ============== */
-/* FOU_CMD_ADD - do */
-void fou_add_req_free(struct fou_add_req *req)
-{
-	free(req->local_v6);
-	free(req->peer_v6);
-	free(req);
-}
-
-int fou_add(struct ynl_sock *ys, struct fou_add_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_ADD, 1);
-	ys->req_policy = &fou_nest;
-
-	if (req->_present.port)
-		mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port);
-	if (req->_present.ipproto)
-		mnl_attr_put_u8(nlh, FOU_ATTR_IPPROTO, req->ipproto);
-	if (req->_present.type)
-		mnl_attr_put_u8(nlh, FOU_ATTR_TYPE, req->type);
-	if (req->_present.remcsum_nopartial)
-		mnl_attr_put(nlh, FOU_ATTR_REMCSUM_NOPARTIAL, 0, NULL);
-	if (req->_present.local_v4)
-		mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4);
-	if (req->_present.peer_v4)
-		mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4);
-	if (req->_present.local_v6_len)
-		mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6);
-	if (req->_present.peer_v6_len)
-		mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
-	if (req->_present.peer_port)
-		mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port);
-	if (req->_present.ifindex)
-		mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== FOU_CMD_DEL ============== */
-/* FOU_CMD_DEL - do */
-void fou_del_req_free(struct fou_del_req *req)
-{
-	free(req->local_v6);
-	free(req->peer_v6);
-	free(req);
-}
-
-int fou_del(struct ynl_sock *ys, struct fou_del_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_DEL, 1);
-	ys->req_policy = &fou_nest;
-
-	if (req->_present.af)
-		mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af);
-	if (req->_present.ifindex)
-		mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
-	if (req->_present.port)
-		mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port);
-	if (req->_present.peer_port)
-		mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port);
-	if (req->_present.local_v4)
-		mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4);
-	if (req->_present.peer_v4)
-		mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4);
-	if (req->_present.local_v6_len)
-		mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6);
-	if (req->_present.peer_v6_len)
-		mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-/* ============== FOU_CMD_GET ============== */
-/* FOU_CMD_GET - do */
-void fou_get_req_free(struct fou_get_req *req)
-{
-	free(req->local_v6);
-	free(req->peer_v6);
-	free(req);
-}
-
-void fou_get_rsp_free(struct fou_get_rsp *rsp)
-{
-	free(rsp->local_v6);
-	free(rsp->peer_v6);
-	free(rsp);
-}
-
-int fou_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct fou_get_rsp *dst;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == FOU_ATTR_PORT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.port = 1;
-			dst->port = mnl_attr_get_u16(attr);
-		} else if (type == FOU_ATTR_IPPROTO) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ipproto = 1;
-			dst->ipproto = mnl_attr_get_u8(attr);
-		} else if (type == FOU_ATTR_TYPE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.type = 1;
-			dst->type = mnl_attr_get_u8(attr);
-		} else if (type == FOU_ATTR_REMCSUM_NOPARTIAL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.remcsum_nopartial = 1;
-		} else if (type == FOU_ATTR_LOCAL_V4) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.local_v4 = 1;
-			dst->local_v4 = mnl_attr_get_u32(attr);
-		} else if (type == FOU_ATTR_PEER_V4) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.peer_v4 = 1;
-			dst->peer_v4 = mnl_attr_get_u32(attr);
-		} else if (type == FOU_ATTR_LOCAL_V6) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.local_v6_len = len;
-			dst->local_v6 = malloc(len);
-			memcpy(dst->local_v6, mnl_attr_get_payload(attr), len);
-		} else if (type == FOU_ATTR_PEER_V6) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.peer_v6_len = len;
-			dst->peer_v6 = malloc(len);
-			memcpy(dst->peer_v6, mnl_attr_get_payload(attr), len);
-		} else if (type == FOU_ATTR_PEER_PORT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.peer_port = 1;
-			dst->peer_port = mnl_attr_get_u16(attr);
-		} else if (type == FOU_ATTR_IFINDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ifindex = 1;
-			dst->ifindex = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct fou_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_GET, 1);
-	ys->req_policy = &fou_nest;
-	yrs.yarg.rsp_policy = &fou_nest;
-
-	if (req->_present.af)
-		mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af);
-	if (req->_present.ifindex)
-		mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex);
-	if (req->_present.port)
-		mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port);
-	if (req->_present.peer_port)
-		mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port);
-	if (req->_present.local_v4)
-		mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4);
-	if (req->_present.peer_v4)
-		mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4);
-	if (req->_present.local_v6_len)
-		mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6);
-	if (req->_present.peer_v6_len)
-		mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = fou_get_rsp_parse;
-	yrs.rsp_cmd = FOU_CMD_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	fou_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* FOU_CMD_GET - dump */
-void fou_get_list_free(struct fou_get_list *rsp)
-{
-	struct fou_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.local_v6);
-		free(rsp->obj.peer_v6);
-		free(rsp);
-	}
-}
-
-struct fou_get_list *fou_get_dump(struct ynl_sock *ys)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct fou_get_list);
-	yds.cb = fou_get_rsp_parse;
-	yds.rsp_cmd = FOU_CMD_GET;
-	yds.rsp_policy = &fou_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, FOU_CMD_GET, 1);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	fou_get_list_free(yds.first);
-	return NULL;
-}
-
-const struct ynl_family ynl_fou_family =  {
-	.name		= "fou",
-};
diff --git a/tools/net/ynl/generated/fou-user.h b/tools/net/ynl/generated/fou-user.h
deleted file mode 100644
index fd566716ddd6..000000000000
--- a/tools/net/ynl/generated/fou-user.h
+++ /dev/null
@@ -1,343 +0,0 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/fou.yaml */
-/* YNL-GEN user header */
-
-#ifndef _LINUX_FOU_GEN_H
-#define _LINUX_FOU_GEN_H
-
-#include <stdlib.h>
-#include <string.h>
-#include <linux/types.h>
-#include <linux/fou.h>
-
-struct ynl_sock;
-
-extern const struct ynl_family ynl_fou_family;
-
-/* Enums */
-const char *fou_op_str(int op);
-const char *fou_encap_type_str(int value);
-
-/* Common nested types */
-/* ============== FOU_CMD_ADD ============== */
-/* FOU_CMD_ADD - do */
-struct fou_add_req {
-	struct {
-		__u32 port:1;
-		__u32 ipproto:1;
-		__u32 type:1;
-		__u32 remcsum_nopartial:1;
-		__u32 local_v4:1;
-		__u32 peer_v4:1;
-		__u32 local_v6_len;
-		__u32 peer_v6_len;
-		__u32 peer_port:1;
-		__u32 ifindex:1;
-	} _present;
-
-	__u16 port /* big-endian */;
-	__u8 ipproto;
-	__u8 type;
-	__u32 local_v4;
-	__u32 peer_v4;
-	void *local_v6;
-	void *peer_v6;
-	__u16 peer_port /* big-endian */;
-	__s32 ifindex;
-};
-
-static inline struct fou_add_req *fou_add_req_alloc(void)
-{
-	return calloc(1, sizeof(struct fou_add_req));
-}
-void fou_add_req_free(struct fou_add_req *req);
-
-static inline void
-fou_add_req_set_port(struct fou_add_req *req, __u16 port /* big-endian */)
-{
-	req->_present.port = 1;
-	req->port = port;
-}
-static inline void
-fou_add_req_set_ipproto(struct fou_add_req *req, __u8 ipproto)
-{
-	req->_present.ipproto = 1;
-	req->ipproto = ipproto;
-}
-static inline void fou_add_req_set_type(struct fou_add_req *req, __u8 type)
-{
-	req->_present.type = 1;
-	req->type = type;
-}
-static inline void fou_add_req_set_remcsum_nopartial(struct fou_add_req *req)
-{
-	req->_present.remcsum_nopartial = 1;
-}
-static inline void
-fou_add_req_set_local_v4(struct fou_add_req *req, __u32 local_v4)
-{
-	req->_present.local_v4 = 1;
-	req->local_v4 = local_v4;
-}
-static inline void
-fou_add_req_set_peer_v4(struct fou_add_req *req, __u32 peer_v4)
-{
-	req->_present.peer_v4 = 1;
-	req->peer_v4 = peer_v4;
-}
-static inline void
-fou_add_req_set_local_v6(struct fou_add_req *req, const void *local_v6,
-			 size_t len)
-{
-	free(req->local_v6);
-	req->_present.local_v6_len = len;
-	req->local_v6 = malloc(req->_present.local_v6_len);
-	memcpy(req->local_v6, local_v6, req->_present.local_v6_len);
-}
-static inline void
-fou_add_req_set_peer_v6(struct fou_add_req *req, const void *peer_v6,
-			size_t len)
-{
-	free(req->peer_v6);
-	req->_present.peer_v6_len = len;
-	req->peer_v6 = malloc(req->_present.peer_v6_len);
-	memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len);
-}
-static inline void
-fou_add_req_set_peer_port(struct fou_add_req *req,
-			  __u16 peer_port /* big-endian */)
-{
-	req->_present.peer_port = 1;
-	req->peer_port = peer_port;
-}
-static inline void
-fou_add_req_set_ifindex(struct fou_add_req *req, __s32 ifindex)
-{
-	req->_present.ifindex = 1;
-	req->ifindex = ifindex;
-}
-
-/*
- * Add port.
- */
-int fou_add(struct ynl_sock *ys, struct fou_add_req *req);
-
-/* ============== FOU_CMD_DEL ============== */
-/* FOU_CMD_DEL - do */
-struct fou_del_req {
-	struct {
-		__u32 af:1;
-		__u32 ifindex:1;
-		__u32 port:1;
-		__u32 peer_port:1;
-		__u32 local_v4:1;
-		__u32 peer_v4:1;
-		__u32 local_v6_len;
-		__u32 peer_v6_len;
-	} _present;
-
-	__u8 af;
-	__s32 ifindex;
-	__u16 port /* big-endian */;
-	__u16 peer_port /* big-endian */;
-	__u32 local_v4;
-	__u32 peer_v4;
-	void *local_v6;
-	void *peer_v6;
-};
-
-static inline struct fou_del_req *fou_del_req_alloc(void)
-{
-	return calloc(1, sizeof(struct fou_del_req));
-}
-void fou_del_req_free(struct fou_del_req *req);
-
-static inline void fou_del_req_set_af(struct fou_del_req *req, __u8 af)
-{
-	req->_present.af = 1;
-	req->af = af;
-}
-static inline void
-fou_del_req_set_ifindex(struct fou_del_req *req, __s32 ifindex)
-{
-	req->_present.ifindex = 1;
-	req->ifindex = ifindex;
-}
-static inline void
-fou_del_req_set_port(struct fou_del_req *req, __u16 port /* big-endian */)
-{
-	req->_present.port = 1;
-	req->port = port;
-}
-static inline void
-fou_del_req_set_peer_port(struct fou_del_req *req,
-			  __u16 peer_port /* big-endian */)
-{
-	req->_present.peer_port = 1;
-	req->peer_port = peer_port;
-}
-static inline void
-fou_del_req_set_local_v4(struct fou_del_req *req, __u32 local_v4)
-{
-	req->_present.local_v4 = 1;
-	req->local_v4 = local_v4;
-}
-static inline void
-fou_del_req_set_peer_v4(struct fou_del_req *req, __u32 peer_v4)
-{
-	req->_present.peer_v4 = 1;
-	req->peer_v4 = peer_v4;
-}
-static inline void
-fou_del_req_set_local_v6(struct fou_del_req *req, const void *local_v6,
-			 size_t len)
-{
-	free(req->local_v6);
-	req->_present.local_v6_len = len;
-	req->local_v6 = malloc(req->_present.local_v6_len);
-	memcpy(req->local_v6, local_v6, req->_present.local_v6_len);
-}
-static inline void
-fou_del_req_set_peer_v6(struct fou_del_req *req, const void *peer_v6,
-			size_t len)
-{
-	free(req->peer_v6);
-	req->_present.peer_v6_len = len;
-	req->peer_v6 = malloc(req->_present.peer_v6_len);
-	memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len);
-}
-
-/*
- * Delete port.
- */
-int fou_del(struct ynl_sock *ys, struct fou_del_req *req);
-
-/* ============== FOU_CMD_GET ============== */
-/* FOU_CMD_GET - do */
-struct fou_get_req {
-	struct {
-		__u32 af:1;
-		__u32 ifindex:1;
-		__u32 port:1;
-		__u32 peer_port:1;
-		__u32 local_v4:1;
-		__u32 peer_v4:1;
-		__u32 local_v6_len;
-		__u32 peer_v6_len;
-	} _present;
-
-	__u8 af;
-	__s32 ifindex;
-	__u16 port /* big-endian */;
-	__u16 peer_port /* big-endian */;
-	__u32 local_v4;
-	__u32 peer_v4;
-	void *local_v6;
-	void *peer_v6;
-};
-
-static inline struct fou_get_req *fou_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct fou_get_req));
-}
-void fou_get_req_free(struct fou_get_req *req);
-
-static inline void fou_get_req_set_af(struct fou_get_req *req, __u8 af)
-{
-	req->_present.af = 1;
-	req->af = af;
-}
-static inline void
-fou_get_req_set_ifindex(struct fou_get_req *req, __s32 ifindex)
-{
-	req->_present.ifindex = 1;
-	req->ifindex = ifindex;
-}
-static inline void
-fou_get_req_set_port(struct fou_get_req *req, __u16 port /* big-endian */)
-{
-	req->_present.port = 1;
-	req->port = port;
-}
-static inline void
-fou_get_req_set_peer_port(struct fou_get_req *req,
-			  __u16 peer_port /* big-endian */)
-{
-	req->_present.peer_port = 1;
-	req->peer_port = peer_port;
-}
-static inline void
-fou_get_req_set_local_v4(struct fou_get_req *req, __u32 local_v4)
-{
-	req->_present.local_v4 = 1;
-	req->local_v4 = local_v4;
-}
-static inline void
-fou_get_req_set_peer_v4(struct fou_get_req *req, __u32 peer_v4)
-{
-	req->_present.peer_v4 = 1;
-	req->peer_v4 = peer_v4;
-}
-static inline void
-fou_get_req_set_local_v6(struct fou_get_req *req, const void *local_v6,
-			 size_t len)
-{
-	free(req->local_v6);
-	req->_present.local_v6_len = len;
-	req->local_v6 = malloc(req->_present.local_v6_len);
-	memcpy(req->local_v6, local_v6, req->_present.local_v6_len);
-}
-static inline void
-fou_get_req_set_peer_v6(struct fou_get_req *req, const void *peer_v6,
-			size_t len)
-{
-	free(req->peer_v6);
-	req->_present.peer_v6_len = len;
-	req->peer_v6 = malloc(req->_present.peer_v6_len);
-	memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len);
-}
-
-struct fou_get_rsp {
-	struct {
-		__u32 port:1;
-		__u32 ipproto:1;
-		__u32 type:1;
-		__u32 remcsum_nopartial:1;
-		__u32 local_v4:1;
-		__u32 peer_v4:1;
-		__u32 local_v6_len;
-		__u32 peer_v6_len;
-		__u32 peer_port:1;
-		__u32 ifindex:1;
-	} _present;
-
-	__u16 port /* big-endian */;
-	__u8 ipproto;
-	__u8 type;
-	__u32 local_v4;
-	__u32 peer_v4;
-	void *local_v6;
-	void *peer_v6;
-	__u16 peer_port /* big-endian */;
-	__s32 ifindex;
-};
-
-void fou_get_rsp_free(struct fou_get_rsp *rsp);
-
-/*
- * Get tunnel info.
- */
-struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req);
-
-/* FOU_CMD_GET - dump */
-struct fou_get_list {
-	struct fou_get_list *next;
-	struct fou_get_rsp obj __attribute__((aligned(8)));
-};
-
-void fou_get_list_free(struct fou_get_list *rsp);
-
-struct fou_get_list *fou_get_dump(struct ynl_sock *ys);
-
-#endif /* _LINUX_FOU_GEN_H */
diff --git a/tools/net/ynl/generated/handshake-user.c b/tools/net/ynl/generated/handshake-user.c
deleted file mode 100644
index 6901f8462cca..000000000000
--- a/tools/net/ynl/generated/handshake-user.c
+++ /dev/null
@@ -1,332 +0,0 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/handshake.yaml */
-/* YNL-GEN user source */
-
-#include <stdlib.h>
-#include <string.h>
-#include "handshake-user.h"
-#include "ynl.h"
-#include <linux/handshake.h>
-
-#include <libmnl/libmnl.h>
-#include <linux/genetlink.h>
-
-/* Enums */
-static const char * const handshake_op_strmap[] = {
-	[HANDSHAKE_CMD_READY] = "ready",
-	[HANDSHAKE_CMD_ACCEPT] = "accept",
-	[HANDSHAKE_CMD_DONE] = "done",
-};
-
-const char *handshake_op_str(int op)
-{
-	if (op < 0 || op >= (int)MNL_ARRAY_SIZE(handshake_op_strmap))
-		return NULL;
-	return handshake_op_strmap[op];
-}
-
-static const char * const handshake_handler_class_strmap[] = {
-	[0] = "none",
-	[1] = "tlshd",
-	[2] = "max",
-};
-
-const char *handshake_handler_class_str(enum handshake_handler_class value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_handler_class_strmap))
-		return NULL;
-	return handshake_handler_class_strmap[value];
-}
-
-static const char * const handshake_msg_type_strmap[] = {
-	[0] = "unspec",
-	[1] = "clienthello",
-	[2] = "serverhello",
-};
-
-const char *handshake_msg_type_str(enum handshake_msg_type value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_msg_type_strmap))
-		return NULL;
-	return handshake_msg_type_strmap[value];
-}
-
-static const char * const handshake_auth_strmap[] = {
-	[0] = "unspec",
-	[1] = "unauth",
-	[2] = "psk",
-	[3] = "x509",
-};
-
-const char *handshake_auth_str(enum handshake_auth value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_auth_strmap))
-		return NULL;
-	return handshake_auth_strmap[value];
-}
-
-/* Policies */
-struct ynl_policy_attr handshake_x509_policy[HANDSHAKE_A_X509_MAX + 1] = {
-	[HANDSHAKE_A_X509_CERT] = { .name = "cert", .type = YNL_PT_U32, },
-	[HANDSHAKE_A_X509_PRIVKEY] = { .name = "privkey", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest handshake_x509_nest = {
-	.max_attr = HANDSHAKE_A_X509_MAX,
-	.table = handshake_x509_policy,
-};
-
-struct ynl_policy_attr handshake_accept_policy[HANDSHAKE_A_ACCEPT_MAX + 1] = {
-	[HANDSHAKE_A_ACCEPT_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, },
-	[HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = { .name = "handler-class", .type = YNL_PT_U32, },
-	[HANDSHAKE_A_ACCEPT_MESSAGE_TYPE] = { .name = "message-type", .type = YNL_PT_U32, },
-	[HANDSHAKE_A_ACCEPT_TIMEOUT] = { .name = "timeout", .type = YNL_PT_U32, },
-	[HANDSHAKE_A_ACCEPT_AUTH_MODE] = { .name = "auth-mode", .type = YNL_PT_U32, },
-	[HANDSHAKE_A_ACCEPT_PEER_IDENTITY] = { .name = "peer-identity", .type = YNL_PT_U32, },
-	[HANDSHAKE_A_ACCEPT_CERTIFICATE] = { .name = "certificate", .type = YNL_PT_NEST, .nest = &handshake_x509_nest, },
-	[HANDSHAKE_A_ACCEPT_PEERNAME] = { .name = "peername", .type = YNL_PT_NUL_STR, },
-};
-
-struct ynl_policy_nest handshake_accept_nest = {
-	.max_attr = HANDSHAKE_A_ACCEPT_MAX,
-	.table = handshake_accept_policy,
-};
-
-struct ynl_policy_attr handshake_done_policy[HANDSHAKE_A_DONE_MAX + 1] = {
-	[HANDSHAKE_A_DONE_STATUS] = { .name = "status", .type = YNL_PT_U32, },
-	[HANDSHAKE_A_DONE_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, },
-	[HANDSHAKE_A_DONE_REMOTE_AUTH] = { .name = "remote-auth", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest handshake_done_nest = {
-	.max_attr = HANDSHAKE_A_DONE_MAX,
-	.table = handshake_done_policy,
-};
-
-/* Common nested types */
-void handshake_x509_free(struct handshake_x509 *obj)
-{
-}
-
-int handshake_x509_parse(struct ynl_parse_arg *yarg,
-			 const struct nlattr *nested)
-{
-	struct handshake_x509 *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == HANDSHAKE_A_X509_CERT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.cert = 1;
-			dst->cert = mnl_attr_get_u32(attr);
-		} else if (type == HANDSHAKE_A_X509_PRIVKEY) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.privkey = 1;
-			dst->privkey = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return 0;
-}
-
-/* ============== HANDSHAKE_CMD_ACCEPT ============== */
-/* HANDSHAKE_CMD_ACCEPT - do */
-void handshake_accept_req_free(struct handshake_accept_req *req)
-{
-	free(req);
-}
-
-void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp)
-{
-	unsigned int i;
-
-	free(rsp->peer_identity);
-	for (i = 0; i < rsp->n_certificate; i++)
-		handshake_x509_free(&rsp->certificate[i]);
-	free(rsp->certificate);
-	free(rsp->peername);
-	free(rsp);
-}
-
-int handshake_accept_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct handshake_accept_rsp *dst;
-	unsigned int n_peer_identity = 0;
-	unsigned int n_certificate = 0;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-	int i;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	if (dst->certificate)
-		return ynl_error_parse(yarg, "attribute already present (accept.certificate)");
-	if (dst->peer_identity)
-		return ynl_error_parse(yarg, "attribute already present (accept.peer-identity)");
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == HANDSHAKE_A_ACCEPT_SOCKFD) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sockfd = 1;
-			dst->sockfd = mnl_attr_get_u32(attr);
-		} else if (type == HANDSHAKE_A_ACCEPT_MESSAGE_TYPE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.message_type = 1;
-			dst->message_type = mnl_attr_get_u32(attr);
-		} else if (type == HANDSHAKE_A_ACCEPT_TIMEOUT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.timeout = 1;
-			dst->timeout = mnl_attr_get_u32(attr);
-		} else if (type == HANDSHAKE_A_ACCEPT_AUTH_MODE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.auth_mode = 1;
-			dst->auth_mode = mnl_attr_get_u32(attr);
-		} else if (type == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) {
-			n_peer_identity++;
-		} else if (type == HANDSHAKE_A_ACCEPT_CERTIFICATE) {
-			n_certificate++;
-		} else if (type == HANDSHAKE_A_ACCEPT_PEERNAME) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr));
-			dst->_present.peername_len = len;
-			dst->peername = malloc(len + 1);
-			memcpy(dst->peername, mnl_attr_get_str(attr), len);
-			dst->peername[len] = 0;
-		}
-	}
-
-	if (n_certificate) {
-		dst->certificate = calloc(n_certificate, sizeof(*dst->certificate));
-		dst->n_certificate = n_certificate;
-		i = 0;
-		parg.rsp_policy = &handshake_x509_nest;
-		mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-			if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_CERTIFICATE) {
-				parg.data = &dst->certificate[i];
-				if (handshake_x509_parse(&parg, attr))
-					return MNL_CB_ERROR;
-				i++;
-			}
-		}
-	}
-	if (n_peer_identity) {
-		dst->peer_identity = calloc(n_peer_identity, sizeof(*dst->peer_identity));
-		dst->n_peer_identity = n_peer_identity;
-		i = 0;
-		mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-			if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) {
-				dst->peer_identity[i] = mnl_attr_get_u32(attr);
-				i++;
-			}
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct handshake_accept_rsp *
-handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct handshake_accept_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_ACCEPT, 1);
-	ys->req_policy = &handshake_accept_nest;
-	yrs.yarg.rsp_policy = &handshake_accept_nest;
-
-	if (req->_present.handler_class)
-		mnl_attr_put_u32(nlh, HANDSHAKE_A_ACCEPT_HANDLER_CLASS, req->handler_class);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = handshake_accept_rsp_parse;
-	yrs.rsp_cmd = HANDSHAKE_CMD_ACCEPT;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	handshake_accept_rsp_free(rsp);
-	return NULL;
-}
-
-/* HANDSHAKE_CMD_ACCEPT - notify */
-void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp)
-{
-	unsigned int i;
-
-	free(rsp->obj.peer_identity);
-	for (i = 0; i < rsp->obj.n_certificate; i++)
-		handshake_x509_free(&rsp->obj.certificate[i]);
-	free(rsp->obj.certificate);
-	free(rsp->obj.peername);
-	free(rsp);
-}
-
-/* ============== HANDSHAKE_CMD_DONE ============== */
-/* HANDSHAKE_CMD_DONE - do */
-void handshake_done_req_free(struct handshake_done_req *req)
-{
-	free(req->remote_auth);
-	free(req);
-}
-
-int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_DONE, 1);
-	ys->req_policy = &handshake_done_nest;
-
-	if (req->_present.status)
-		mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_STATUS, req->status);
-	if (req->_present.sockfd)
-		mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_SOCKFD, req->sockfd);
-	for (unsigned int i = 0; i < req->n_remote_auth; i++)
-		mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_REMOTE_AUTH, req->remote_auth[i]);
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		return -1;
-
-	return 0;
-}
-
-static const struct ynl_ntf_info handshake_ntf_info[] =  {
-	[HANDSHAKE_CMD_READY] =  {
-		.alloc_sz	= sizeof(struct handshake_accept_ntf),
-		.cb		= handshake_accept_rsp_parse,
-		.policy		= &handshake_accept_nest,
-		.free		= (void *)handshake_accept_ntf_free,
-	},
-};
-
-const struct ynl_family ynl_handshake_family =  {
-	.name		= "handshake",
-	.ntf_info	= handshake_ntf_info,
-	.ntf_info_size	= MNL_ARRAY_SIZE(handshake_ntf_info),
-};
diff --git a/tools/net/ynl/generated/handshake-user.h b/tools/net/ynl/generated/handshake-user.h
deleted file mode 100644
index bce537d8b8cc..000000000000
--- a/tools/net/ynl/generated/handshake-user.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/handshake.yaml */
-/* YNL-GEN user header */
-
-#ifndef _LINUX_HANDSHAKE_GEN_H
-#define _LINUX_HANDSHAKE_GEN_H
-
-#include <stdlib.h>
-#include <string.h>
-#include <linux/types.h>
-#include <linux/handshake.h>
-
-struct ynl_sock;
-
-extern const struct ynl_family ynl_handshake_family;
-
-/* Enums */
-const char *handshake_op_str(int op);
-const char *handshake_handler_class_str(enum handshake_handler_class value);
-const char *handshake_msg_type_str(enum handshake_msg_type value);
-const char *handshake_auth_str(enum handshake_auth value);
-
-/* Common nested types */
-struct handshake_x509 {
-	struct {
-		__u32 cert:1;
-		__u32 privkey:1;
-	} _present;
-
-	__s32 cert;
-	__s32 privkey;
-};
-
-/* ============== HANDSHAKE_CMD_ACCEPT ============== */
-/* HANDSHAKE_CMD_ACCEPT - do */
-struct handshake_accept_req {
-	struct {
-		__u32 handler_class:1;
-	} _present;
-
-	enum handshake_handler_class handler_class;
-};
-
-static inline struct handshake_accept_req *handshake_accept_req_alloc(void)
-{
-	return calloc(1, sizeof(struct handshake_accept_req));
-}
-void handshake_accept_req_free(struct handshake_accept_req *req);
-
-static inline void
-handshake_accept_req_set_handler_class(struct handshake_accept_req *req,
-				       enum handshake_handler_class handler_class)
-{
-	req->_present.handler_class = 1;
-	req->handler_class = handler_class;
-}
-
-struct handshake_accept_rsp {
-	struct {
-		__u32 sockfd:1;
-		__u32 message_type:1;
-		__u32 timeout:1;
-		__u32 auth_mode:1;
-		__u32 peername_len;
-	} _present;
-
-	__s32 sockfd;
-	enum handshake_msg_type message_type;
-	__u32 timeout;
-	enum handshake_auth auth_mode;
-	unsigned int n_peer_identity;
-	__u32 *peer_identity;
-	unsigned int n_certificate;
-	struct handshake_x509 *certificate;
-	char *peername;
-};
-
-void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp);
-
-/*
- * Handler retrieves next queued handshake request
- */
-struct handshake_accept_rsp *
-handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req);
-
-/* HANDSHAKE_CMD_ACCEPT - notify */
-struct handshake_accept_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct handshake_accept_ntf *ntf);
-	struct handshake_accept_rsp obj __attribute__((aligned(8)));
-};
-
-void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp);
-
-/* ============== HANDSHAKE_CMD_DONE ============== */
-/* HANDSHAKE_CMD_DONE - do */
-struct handshake_done_req {
-	struct {
-		__u32 status:1;
-		__u32 sockfd:1;
-	} _present;
-
-	__u32 status;
-	__s32 sockfd;
-	unsigned int n_remote_auth;
-	__u32 *remote_auth;
-};
-
-static inline struct handshake_done_req *handshake_done_req_alloc(void)
-{
-	return calloc(1, sizeof(struct handshake_done_req));
-}
-void handshake_done_req_free(struct handshake_done_req *req);
-
-static inline void
-handshake_done_req_set_status(struct handshake_done_req *req, __u32 status)
-{
-	req->_present.status = 1;
-	req->status = status;
-}
-static inline void
-handshake_done_req_set_sockfd(struct handshake_done_req *req, __s32 sockfd)
-{
-	req->_present.sockfd = 1;
-	req->sockfd = sockfd;
-}
-static inline void
-__handshake_done_req_set_remote_auth(struct handshake_done_req *req,
-				     __u32 *remote_auth,
-				     unsigned int n_remote_auth)
-{
-	free(req->remote_auth);
-	req->remote_auth = remote_auth;
-	req->n_remote_auth = n_remote_auth;
-}
-
-/*
- * Handler reports handshake completion
- */
-int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req);
-
-#endif /* _LINUX_HANDSHAKE_GEN_H */
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
deleted file mode 100644
index ed8bcb855a1d..000000000000
--- a/tools/net/ynl/generated/netdev-user.c
+++ /dev/null
@@ -1,952 +0,0 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/netdev.yaml */
-/* YNL-GEN user source */
-
-#include <stdlib.h>
-#include <string.h>
-#include "netdev-user.h"
-#include "ynl.h"
-#include <linux/netdev.h>
-
-#include <libmnl/libmnl.h>
-#include <linux/genetlink.h>
-
-/* Enums */
-static const char * const netdev_op_strmap[] = {
-	[NETDEV_CMD_DEV_GET] = "dev-get",
-	[NETDEV_CMD_DEV_ADD_NTF] = "dev-add-ntf",
-	[NETDEV_CMD_DEV_DEL_NTF] = "dev-del-ntf",
-	[NETDEV_CMD_DEV_CHANGE_NTF] = "dev-change-ntf",
-	[NETDEV_CMD_PAGE_POOL_GET] = "page-pool-get",
-	[NETDEV_CMD_PAGE_POOL_ADD_NTF] = "page-pool-add-ntf",
-	[NETDEV_CMD_PAGE_POOL_DEL_NTF] = "page-pool-del-ntf",
-	[NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf",
-	[NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get",
-	[NETDEV_CMD_QUEUE_GET] = "queue-get",
-	[NETDEV_CMD_NAPI_GET] = "napi-get",
-};
-
-const char *netdev_op_str(int op)
-{
-	if (op < 0 || op >= (int)MNL_ARRAY_SIZE(netdev_op_strmap))
-		return NULL;
-	return netdev_op_strmap[op];
-}
-
-static const char * const netdev_xdp_act_strmap[] = {
-	[0] = "basic",
-	[1] = "redirect",
-	[2] = "ndo-xmit",
-	[3] = "xsk-zerocopy",
-	[4] = "hw-offload",
-	[5] = "rx-sg",
-	[6] = "ndo-xmit-sg",
-};
-
-const char *netdev_xdp_act_str(enum netdev_xdp_act value)
-{
-	value = ffs(value) - 1;
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_act_strmap))
-		return NULL;
-	return netdev_xdp_act_strmap[value];
-}
-
-static const char * const netdev_xdp_rx_metadata_strmap[] = {
-	[0] = "timestamp",
-	[1] = "hash",
-};
-
-const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value)
-{
-	value = ffs(value) - 1;
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_rx_metadata_strmap))
-		return NULL;
-	return netdev_xdp_rx_metadata_strmap[value];
-}
-
-static const char * const netdev_xsk_flags_strmap[] = {
-	[0] = "tx-timestamp",
-	[1] = "tx-checksum",
-};
-
-const char *netdev_xsk_flags_str(enum netdev_xsk_flags value)
-{
-	value = ffs(value) - 1;
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xsk_flags_strmap))
-		return NULL;
-	return netdev_xsk_flags_strmap[value];
-}
-
-static const char * const netdev_queue_type_strmap[] = {
-	[0] = "rx",
-	[1] = "tx",
-};
-
-const char *netdev_queue_type_str(enum netdev_queue_type value)
-{
-	if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_queue_type_strmap))
-		return NULL;
-	return netdev_queue_type_strmap[value];
-}
-
-/* Policies */
-struct ynl_policy_attr netdev_page_pool_info_policy[NETDEV_A_PAGE_POOL_MAX + 1] = {
-	[NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest netdev_page_pool_info_nest = {
-	.max_attr = NETDEV_A_PAGE_POOL_MAX,
-	.table = netdev_page_pool_info_policy,
-};
-
-struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = {
-	[NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
-	[NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
-	[NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, },
-	[NETDEV_A_DEV_XDP_ZC_MAX_SEGS] = { .name = "xdp-zc-max-segs", .type = YNL_PT_U32, },
-	[NETDEV_A_DEV_XDP_RX_METADATA_FEATURES] = { .name = "xdp-rx-metadata-features", .type = YNL_PT_U64, },
-	[NETDEV_A_DEV_XSK_FEATURES] = { .name = "xsk-features", .type = YNL_PT_U64, },
-};
-
-struct ynl_policy_nest netdev_dev_nest = {
-	.max_attr = NETDEV_A_DEV_MAX,
-	.table = netdev_dev_policy,
-};
-
-struct ynl_policy_attr netdev_page_pool_policy[NETDEV_A_PAGE_POOL_MAX + 1] = {
-	[NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
-	[NETDEV_A_PAGE_POOL_NAPI_ID] = { .name = "napi-id", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_INFLIGHT] = { .name = "inflight", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_INFLIGHT_MEM] = { .name = "inflight-mem", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_DETACH_TIME] = { .name = "detach-time", .type = YNL_PT_UINT, },
-};
-
-struct ynl_policy_nest netdev_page_pool_nest = {
-	.max_attr = NETDEV_A_PAGE_POOL_MAX,
-	.table = netdev_page_pool_policy,
-};
-
-struct ynl_policy_attr netdev_page_pool_stats_policy[NETDEV_A_PAGE_POOL_STATS_MAX + 1] = {
-	[NETDEV_A_PAGE_POOL_STATS_INFO] = { .name = "info", .type = YNL_PT_NEST, .nest = &netdev_page_pool_info_nest, },
-	[NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST] = { .name = "alloc-fast", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW] = { .name = "alloc-slow", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER] = { .name = "alloc-slow-high-order", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY] = { .name = "alloc-empty", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL] = { .name = "alloc-refill", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE] = { .name = "alloc-waive", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED] = { .name = "recycle-cached", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL] = { .name = "recycle-cache-full", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING] = { .name = "recycle-ring", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL] = { .name = "recycle-ring-full", .type = YNL_PT_UINT, },
-	[NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT] = { .name = "recycle-released-refcnt", .type = YNL_PT_UINT, },
-};
-
-struct ynl_policy_nest netdev_page_pool_stats_nest = {
-	.max_attr = NETDEV_A_PAGE_POOL_STATS_MAX,
-	.table = netdev_page_pool_stats_policy,
-};
-
-struct ynl_policy_attr netdev_queue_policy[NETDEV_A_QUEUE_MAX + 1] = {
-	[NETDEV_A_QUEUE_ID] = { .name = "id", .type = YNL_PT_U32, },
-	[NETDEV_A_QUEUE_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
-	[NETDEV_A_QUEUE_TYPE] = { .name = "type", .type = YNL_PT_U32, },
-	[NETDEV_A_QUEUE_NAPI_ID] = { .name = "napi-id", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest netdev_queue_nest = {
-	.max_attr = NETDEV_A_QUEUE_MAX,
-	.table = netdev_queue_policy,
-};
-
-struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = {
-	[NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, },
-	[NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, },
-	[NETDEV_A_NAPI_IRQ] = { .name = "irq", .type = YNL_PT_U32, },
-	[NETDEV_A_NAPI_PID] = { .name = "pid", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest netdev_napi_nest = {
-	.max_attr = NETDEV_A_NAPI_MAX,
-	.table = netdev_napi_policy,
-};
-
-/* Common nested types */
-void netdev_page_pool_info_free(struct netdev_page_pool_info *obj)
-{
-}
-
-int netdev_page_pool_info_put(struct nlmsghdr *nlh, unsigned int attr_type,
-			      struct netdev_page_pool_info *obj)
-{
-	struct nlattr *nest;
-
-	nest = mnl_attr_nest_start(nlh, attr_type);
-	if (obj->_present.id)
-		mnl_attr_put_uint(nlh, NETDEV_A_PAGE_POOL_ID, obj->id);
-	if (obj->_present.ifindex)
-		mnl_attr_put_u32(nlh, NETDEV_A_PAGE_POOL_IFINDEX, obj->ifindex);
-	mnl_attr_nest_end(nlh, nest);
-
-	return 0;
-}
-
-int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg,
-				const struct nlattr *nested)
-{
-	struct netdev_page_pool_info *dst = yarg->data;
-	const struct nlattr *attr;
-
-	mnl_attr_for_each_nested(attr, nested) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == NETDEV_A_PAGE_POOL_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.id = 1;
-			dst->id = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_IFINDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ifindex = 1;
-			dst->ifindex = mnl_attr_get_u32(attr);
-		} else if (type == NETDEV_A_NAPI_IRQ) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.irq = 1;
-			dst->irq = mnl_attr_get_u32(attr);
-		} else if (type == NETDEV_A_NAPI_PID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.pid = 1;
-			dst->pid = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return 0;
-}
-
-/* ============== NETDEV_CMD_DEV_GET ============== */
-/* NETDEV_CMD_DEV_GET - do */
-void netdev_dev_get_req_free(struct netdev_dev_get_req *req)
-{
-	free(req);
-}
-
-void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp)
-{
-	free(rsp);
-}
-
-int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct netdev_dev_get_rsp *dst;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == NETDEV_A_DEV_IFINDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ifindex = 1;
-			dst->ifindex = mnl_attr_get_u32(attr);
-		} else if (type == NETDEV_A_DEV_XDP_FEATURES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.xdp_features = 1;
-			dst->xdp_features = mnl_attr_get_u64(attr);
-		} else if (type == NETDEV_A_DEV_XDP_ZC_MAX_SEGS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.xdp_zc_max_segs = 1;
-			dst->xdp_zc_max_segs = mnl_attr_get_u32(attr);
-		} else if (type == NETDEV_A_DEV_XDP_RX_METADATA_FEATURES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.xdp_rx_metadata_features = 1;
-			dst->xdp_rx_metadata_features = mnl_attr_get_u64(attr);
-		} else if (type == NETDEV_A_DEV_XSK_FEATURES) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.xsk_features = 1;
-			dst->xsk_features = mnl_attr_get_u64(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct netdev_dev_get_rsp *
-netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct netdev_dev_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1);
-	ys->req_policy = &netdev_dev_nest;
-	yrs.yarg.rsp_policy = &netdev_dev_nest;
-
-	if (req->_present.ifindex)
-		mnl_attr_put_u32(nlh, NETDEV_A_DEV_IFINDEX, req->ifindex);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = netdev_dev_get_rsp_parse;
-	yrs.rsp_cmd = NETDEV_CMD_DEV_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	netdev_dev_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* NETDEV_CMD_DEV_GET - dump */
-void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp)
-{
-	struct netdev_dev_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp);
-	}
-}
-
-struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct netdev_dev_get_list);
-	yds.cb = netdev_dev_get_rsp_parse;
-	yds.rsp_cmd = NETDEV_CMD_DEV_GET;
-	yds.rsp_policy = &netdev_dev_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	netdev_dev_get_list_free(yds.first);
-	return NULL;
-}
-
-/* NETDEV_CMD_DEV_GET - notify */
-void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp)
-{
-	free(rsp);
-}
-
-/* ============== NETDEV_CMD_PAGE_POOL_GET ============== */
-/* NETDEV_CMD_PAGE_POOL_GET - do */
-void netdev_page_pool_get_req_free(struct netdev_page_pool_get_req *req)
-{
-	free(req);
-}
-
-void netdev_page_pool_get_rsp_free(struct netdev_page_pool_get_rsp *rsp)
-{
-	free(rsp);
-}
-
-int netdev_page_pool_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct netdev_page_pool_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == NETDEV_A_PAGE_POOL_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.id = 1;
-			dst->id = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_IFINDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ifindex = 1;
-			dst->ifindex = mnl_attr_get_u32(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_NAPI_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.napi_id = 1;
-			dst->napi_id = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_INFLIGHT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.inflight = 1;
-			dst->inflight = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_INFLIGHT_MEM) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.inflight_mem = 1;
-			dst->inflight_mem = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_DETACH_TIME) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.detach_time = 1;
-			dst->detach_time = mnl_attr_get_uint(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct netdev_page_pool_get_rsp *
-netdev_page_pool_get(struct ynl_sock *ys, struct netdev_page_pool_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct netdev_page_pool_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_GET, 1);
-	ys->req_policy = &netdev_page_pool_nest;
-	yrs.yarg.rsp_policy = &netdev_page_pool_nest;
-
-	if (req->_present.id)
-		mnl_attr_put_uint(nlh, NETDEV_A_PAGE_POOL_ID, req->id);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = netdev_page_pool_get_rsp_parse;
-	yrs.rsp_cmd = NETDEV_CMD_PAGE_POOL_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	netdev_page_pool_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* NETDEV_CMD_PAGE_POOL_GET - dump */
-void netdev_page_pool_get_list_free(struct netdev_page_pool_get_list *rsp)
-{
-	struct netdev_page_pool_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp);
-	}
-}
-
-struct netdev_page_pool_get_list *
-netdev_page_pool_get_dump(struct ynl_sock *ys)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct netdev_page_pool_get_list);
-	yds.cb = netdev_page_pool_get_rsp_parse;
-	yds.rsp_cmd = NETDEV_CMD_PAGE_POOL_GET;
-	yds.rsp_policy = &netdev_page_pool_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_GET, 1);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	netdev_page_pool_get_list_free(yds.first);
-	return NULL;
-}
-
-/* NETDEV_CMD_PAGE_POOL_GET - notify */
-void netdev_page_pool_get_ntf_free(struct netdev_page_pool_get_ntf *rsp)
-{
-	free(rsp);
-}
-
-/* ============== NETDEV_CMD_PAGE_POOL_STATS_GET ============== */
-/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */
-void
-netdev_page_pool_stats_get_req_free(struct netdev_page_pool_stats_get_req *req)
-{
-	netdev_page_pool_info_free(&req->info);
-	free(req);
-}
-
-void
-netdev_page_pool_stats_get_rsp_free(struct netdev_page_pool_stats_get_rsp *rsp)
-{
-	netdev_page_pool_info_free(&rsp->info);
-	free(rsp);
-}
-
-int netdev_page_pool_stats_get_rsp_parse(const struct nlmsghdr *nlh,
-					 void *data)
-{
-	struct netdev_page_pool_stats_get_rsp *dst;
-	struct ynl_parse_arg *yarg = data;
-	const struct nlattr *attr;
-	struct ynl_parse_arg parg;
-
-	dst = yarg->data;
-	parg.ys = yarg->ys;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == NETDEV_A_PAGE_POOL_STATS_INFO) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.info = 1;
-
-			parg.rsp_policy = &netdev_page_pool_info_nest;
-			parg.data = &dst->info;
-			if (netdev_page_pool_info_parse(&parg, attr))
-				return MNL_CB_ERROR;
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.alloc_fast = 1;
-			dst->alloc_fast = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.alloc_slow = 1;
-			dst->alloc_slow = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.alloc_slow_high_order = 1;
-			dst->alloc_slow_high_order = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.alloc_empty = 1;
-			dst->alloc_empty = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.alloc_refill = 1;
-			dst->alloc_refill = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.alloc_waive = 1;
-			dst->alloc_waive = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.recycle_cached = 1;
-			dst->recycle_cached = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.recycle_cache_full = 1;
-			dst->recycle_cache_full = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.recycle_ring = 1;
-			dst->recycle_ring = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.recycle_ring_full = 1;
-			dst->recycle_ring_full = mnl_attr_get_uint(attr);
-		} else if (type == NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.recycle_released_refcnt = 1;
-			dst->recycle_released_refcnt = mnl_attr_get_uint(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct netdev_page_pool_stats_get_rsp *
-netdev_page_pool_stats_get(struct ynl_sock *ys,
-			   struct netdev_page_pool_stats_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct netdev_page_pool_stats_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_STATS_GET, 1);
-	ys->req_policy = &netdev_page_pool_stats_nest;
-	yrs.yarg.rsp_policy = &netdev_page_pool_stats_nest;
-
-	if (req->_present.info)
-		netdev_page_pool_info_put(nlh, NETDEV_A_PAGE_POOL_STATS_INFO, &req->info);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = netdev_page_pool_stats_get_rsp_parse;
-	yrs.rsp_cmd = NETDEV_CMD_PAGE_POOL_STATS_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	netdev_page_pool_stats_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* NETDEV_CMD_PAGE_POOL_STATS_GET - dump */
-void
-netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp)
-{
-	struct netdev_page_pool_stats_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		netdev_page_pool_info_free(&rsp->obj.info);
-		free(rsp);
-	}
-}
-
-struct netdev_page_pool_stats_get_list *
-netdev_page_pool_stats_get_dump(struct ynl_sock *ys)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct netdev_page_pool_stats_get_list);
-	yds.cb = netdev_page_pool_stats_get_rsp_parse;
-	yds.rsp_cmd = NETDEV_CMD_PAGE_POOL_STATS_GET;
-	yds.rsp_policy = &netdev_page_pool_stats_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_PAGE_POOL_STATS_GET, 1);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	netdev_page_pool_stats_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== NETDEV_CMD_QUEUE_GET ============== */
-/* NETDEV_CMD_QUEUE_GET - do */
-void netdev_queue_get_req_free(struct netdev_queue_get_req *req)
-{
-	free(req);
-}
-
-void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp)
-{
-	free(rsp);
-}
-
-int netdev_queue_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct netdev_queue_get_rsp *dst;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == NETDEV_A_QUEUE_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.id = 1;
-			dst->id = mnl_attr_get_u32(attr);
-		} else if (type == NETDEV_A_QUEUE_TYPE) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.type = 1;
-			dst->type = mnl_attr_get_u32(attr);
-		} else if (type == NETDEV_A_QUEUE_NAPI_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.napi_id = 1;
-			dst->napi_id = mnl_attr_get_u32(attr);
-		} else if (type == NETDEV_A_QUEUE_IFINDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ifindex = 1;
-			dst->ifindex = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct netdev_queue_get_rsp *
-netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct netdev_queue_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1);
-	ys->req_policy = &netdev_queue_nest;
-	yrs.yarg.rsp_policy = &netdev_queue_nest;
-
-	if (req->_present.ifindex)
-		mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex);
-	if (req->_present.type)
-		mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_TYPE, req->type);
-	if (req->_present.id)
-		mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_ID, req->id);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = netdev_queue_get_rsp_parse;
-	yrs.rsp_cmd = NETDEV_CMD_QUEUE_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	netdev_queue_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* NETDEV_CMD_QUEUE_GET - dump */
-void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp)
-{
-	struct netdev_queue_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp);
-	}
-}
-
-struct netdev_queue_get_list *
-netdev_queue_get_dump(struct ynl_sock *ys,
-		      struct netdev_queue_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct netdev_queue_get_list);
-	yds.cb = netdev_queue_get_rsp_parse;
-	yds.rsp_cmd = NETDEV_CMD_QUEUE_GET;
-	yds.rsp_policy = &netdev_queue_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1);
-	ys->req_policy = &netdev_queue_nest;
-
-	if (req->_present.ifindex)
-		mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	netdev_queue_get_list_free(yds.first);
-	return NULL;
-}
-
-/* ============== NETDEV_CMD_NAPI_GET ============== */
-/* NETDEV_CMD_NAPI_GET - do */
-void netdev_napi_get_req_free(struct netdev_napi_get_req *req)
-{
-	free(req);
-}
-
-void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp)
-{
-	free(rsp);
-}
-
-int netdev_napi_get_rsp_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct ynl_parse_arg *yarg = data;
-	struct netdev_napi_get_rsp *dst;
-	const struct nlattr *attr;
-
-	dst = yarg->data;
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == NETDEV_A_NAPI_ID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.id = 1;
-			dst->id = mnl_attr_get_u32(attr);
-		} else if (type == NETDEV_A_NAPI_IFINDEX) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.ifindex = 1;
-			dst->ifindex = mnl_attr_get_u32(attr);
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-struct netdev_napi_get_rsp *
-netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req)
-{
-	struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };
-	struct netdev_napi_get_rsp *rsp;
-	struct nlmsghdr *nlh;
-	int err;
-
-	nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1);
-	ys->req_policy = &netdev_napi_nest;
-	yrs.yarg.rsp_policy = &netdev_napi_nest;
-
-	if (req->_present.id)
-		mnl_attr_put_u32(nlh, NETDEV_A_NAPI_ID, req->id);
-
-	rsp = calloc(1, sizeof(*rsp));
-	yrs.yarg.data = rsp;
-	yrs.cb = netdev_napi_get_rsp_parse;
-	yrs.rsp_cmd = NETDEV_CMD_NAPI_GET;
-
-	err = ynl_exec(ys, nlh, &yrs);
-	if (err < 0)
-		goto err_free;
-
-	return rsp;
-
-err_free:
-	netdev_napi_get_rsp_free(rsp);
-	return NULL;
-}
-
-/* NETDEV_CMD_NAPI_GET - dump */
-void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp)
-{
-	struct netdev_napi_get_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp);
-	}
-}
-
-struct netdev_napi_get_list *
-netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct netdev_napi_get_list);
-	yds.cb = netdev_napi_get_rsp_parse;
-	yds.rsp_cmd = NETDEV_CMD_NAPI_GET;
-	yds.rsp_policy = &netdev_napi_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1);
-	ys->req_policy = &netdev_napi_nest;
-
-	if (req->_present.ifindex)
-		mnl_attr_put_u32(nlh, NETDEV_A_NAPI_IFINDEX, req->ifindex);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	netdev_napi_get_list_free(yds.first);
-	return NULL;
-}
-
-static const struct ynl_ntf_info netdev_ntf_info[] =  {
-	[NETDEV_CMD_DEV_ADD_NTF] =  {
-		.alloc_sz	= sizeof(struct netdev_dev_get_ntf),
-		.cb		= netdev_dev_get_rsp_parse,
-		.policy		= &netdev_dev_nest,
-		.free		= (void *)netdev_dev_get_ntf_free,
-	},
-	[NETDEV_CMD_DEV_DEL_NTF] =  {
-		.alloc_sz	= sizeof(struct netdev_dev_get_ntf),
-		.cb		= netdev_dev_get_rsp_parse,
-		.policy		= &netdev_dev_nest,
-		.free		= (void *)netdev_dev_get_ntf_free,
-	},
-	[NETDEV_CMD_DEV_CHANGE_NTF] =  {
-		.alloc_sz	= sizeof(struct netdev_dev_get_ntf),
-		.cb		= netdev_dev_get_rsp_parse,
-		.policy		= &netdev_dev_nest,
-		.free		= (void *)netdev_dev_get_ntf_free,
-	},
-	[NETDEV_CMD_PAGE_POOL_ADD_NTF] =  {
-		.alloc_sz	= sizeof(struct netdev_page_pool_get_ntf),
-		.cb		= netdev_page_pool_get_rsp_parse,
-		.policy		= &netdev_page_pool_nest,
-		.free		= (void *)netdev_page_pool_get_ntf_free,
-	},
-	[NETDEV_CMD_PAGE_POOL_DEL_NTF] =  {
-		.alloc_sz	= sizeof(struct netdev_page_pool_get_ntf),
-		.cb		= netdev_page_pool_get_rsp_parse,
-		.policy		= &netdev_page_pool_nest,
-		.free		= (void *)netdev_page_pool_get_ntf_free,
-	},
-	[NETDEV_CMD_PAGE_POOL_CHANGE_NTF] =  {
-		.alloc_sz	= sizeof(struct netdev_page_pool_get_ntf),
-		.cb		= netdev_page_pool_get_rsp_parse,
-		.policy		= &netdev_page_pool_nest,
-		.free		= (void *)netdev_page_pool_get_ntf_free,
-	},
-};
-
-const struct ynl_family ynl_netdev_family =  {
-	.name		= "netdev",
-	.ntf_info	= netdev_ntf_info,
-	.ntf_info_size	= MNL_ARRAY_SIZE(netdev_ntf_info),
-};
diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h
deleted file mode 100644
index 3830cf2ab6b8..000000000000
--- a/tools/net/ynl/generated/netdev-user.h
+++ /dev/null
@@ -1,442 +0,0 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/netdev.yaml */
-/* YNL-GEN user header */
-
-#ifndef _LINUX_NETDEV_GEN_H
-#define _LINUX_NETDEV_GEN_H
-
-#include <stdlib.h>
-#include <string.h>
-#include <linux/types.h>
-#include <linux/netdev.h>
-
-struct ynl_sock;
-
-extern const struct ynl_family ynl_netdev_family;
-
-/* Enums */
-const char *netdev_op_str(int op);
-const char *netdev_xdp_act_str(enum netdev_xdp_act value);
-const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value);
-const char *netdev_xsk_flags_str(enum netdev_xsk_flags value);
-const char *netdev_queue_type_str(enum netdev_queue_type value);
-
-/* Common nested types */
-struct netdev_page_pool_info {
-	struct {
-		__u32 id:1;
-		__u32 ifindex:1;
-	} _present;
-
-	__u64 id;
-	__u32 ifindex;
-};
-
-/* ============== NETDEV_CMD_DEV_GET ============== */
-/* NETDEV_CMD_DEV_GET - do */
-struct netdev_dev_get_req {
-	struct {
-		__u32 ifindex:1;
-	} _present;
-
-	__u32 ifindex;
-};
-
-static inline struct netdev_dev_get_req *netdev_dev_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct netdev_dev_get_req));
-}
-void netdev_dev_get_req_free(struct netdev_dev_get_req *req);
-
-static inline void
-netdev_dev_get_req_set_ifindex(struct netdev_dev_get_req *req, __u32 ifindex)
-{
-	req->_present.ifindex = 1;
-	req->ifindex = ifindex;
-}
-
-struct netdev_dev_get_rsp {
-	struct {
-		__u32 ifindex:1;
-		__u32 xdp_features:1;
-		__u32 xdp_zc_max_segs:1;
-		__u32 xdp_rx_metadata_features:1;
-		__u32 xsk_features:1;
-	} _present;
-
-	__u32 ifindex;
-	__u64 xdp_features;
-	__u32 xdp_zc_max_segs;
-	__u64 xdp_rx_metadata_features;
-	__u64 xsk_features;
-};
-
-void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp);
-
-/*
- * Get / dump information about a netdev.
- */
-struct netdev_dev_get_rsp *
-netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req);
-
-/* NETDEV_CMD_DEV_GET - dump */
-struct netdev_dev_get_list {
-	struct netdev_dev_get_list *next;
-	struct netdev_dev_get_rsp obj __attribute__((aligned(8)));
-};
-
-void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp);
-
-struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys);
-
-/* NETDEV_CMD_DEV_GET - notify */
-struct netdev_dev_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct netdev_dev_get_ntf *ntf);
-	struct netdev_dev_get_rsp obj __attribute__((aligned(8)));
-};
-
-void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp);
-
-/* ============== NETDEV_CMD_PAGE_POOL_GET ============== */
-/* NETDEV_CMD_PAGE_POOL_GET - do */
-struct netdev_page_pool_get_req {
-	struct {
-		__u32 id:1;
-	} _present;
-
-	__u64 id;
-};
-
-static inline struct netdev_page_pool_get_req *
-netdev_page_pool_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct netdev_page_pool_get_req));
-}
-void netdev_page_pool_get_req_free(struct netdev_page_pool_get_req *req);
-
-static inline void
-netdev_page_pool_get_req_set_id(struct netdev_page_pool_get_req *req, __u64 id)
-{
-	req->_present.id = 1;
-	req->id = id;
-}
-
-struct netdev_page_pool_get_rsp {
-	struct {
-		__u32 id:1;
-		__u32 ifindex:1;
-		__u32 napi_id:1;
-		__u32 inflight:1;
-		__u32 inflight_mem:1;
-		__u32 detach_time:1;
-	} _present;
-
-	__u64 id;
-	__u32 ifindex;
-	__u64 napi_id;
-	__u64 inflight;
-	__u64 inflight_mem;
-	__u64 detach_time;
-};
-
-void netdev_page_pool_get_rsp_free(struct netdev_page_pool_get_rsp *rsp);
-
-/*
- * Get / dump information about Page Pools.
-(Only Page Pools associated with a net_device can be listed.)
-
- */
-struct netdev_page_pool_get_rsp *
-netdev_page_pool_get(struct ynl_sock *ys, struct netdev_page_pool_get_req *req);
-
-/* NETDEV_CMD_PAGE_POOL_GET - dump */
-struct netdev_page_pool_get_list {
-	struct netdev_page_pool_get_list *next;
-	struct netdev_page_pool_get_rsp obj __attribute__((aligned(8)));
-};
-
-void netdev_page_pool_get_list_free(struct netdev_page_pool_get_list *rsp);
-
-struct netdev_page_pool_get_list *
-netdev_page_pool_get_dump(struct ynl_sock *ys);
-
-/* NETDEV_CMD_PAGE_POOL_GET - notify */
-struct netdev_page_pool_get_ntf {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct netdev_page_pool_get_ntf *ntf);
-	struct netdev_page_pool_get_rsp obj __attribute__((aligned(8)));
-};
-
-void netdev_page_pool_get_ntf_free(struct netdev_page_pool_get_ntf *rsp);
-
-/* ============== NETDEV_CMD_PAGE_POOL_STATS_GET ============== */
-/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */
-struct netdev_page_pool_stats_get_req {
-	struct {
-		__u32 info:1;
-	} _present;
-
-	struct netdev_page_pool_info info;
-};
-
-static inline struct netdev_page_pool_stats_get_req *
-netdev_page_pool_stats_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct netdev_page_pool_stats_get_req));
-}
-void
-netdev_page_pool_stats_get_req_free(struct netdev_page_pool_stats_get_req *req);
-
-static inline void
-netdev_page_pool_stats_get_req_set_info_id(struct netdev_page_pool_stats_get_req *req,
-					   __u64 id)
-{
-	req->_present.info = 1;
-	req->info._present.id = 1;
-	req->info.id = id;
-}
-static inline void
-netdev_page_pool_stats_get_req_set_info_ifindex(struct netdev_page_pool_stats_get_req *req,
-						__u32 ifindex)
-{
-	req->_present.info = 1;
-	req->info._present.ifindex = 1;
-	req->info.ifindex = ifindex;
-}
-
-struct netdev_page_pool_stats_get_rsp {
-	struct {
-		__u32 info:1;
-		__u32 alloc_fast:1;
-		__u32 alloc_slow:1;
-		__u32 alloc_slow_high_order:1;
-		__u32 alloc_empty:1;
-		__u32 alloc_refill:1;
-		__u32 alloc_waive:1;
-		__u32 recycle_cached:1;
-		__u32 recycle_cache_full:1;
-		__u32 recycle_ring:1;
-		__u32 recycle_ring_full:1;
-		__u32 recycle_released_refcnt:1;
-	} _present;
-
-	struct netdev_page_pool_info info;
-	__u64 alloc_fast;
-	__u64 alloc_slow;
-	__u64 alloc_slow_high_order;
-	__u64 alloc_empty;
-	__u64 alloc_refill;
-	__u64 alloc_waive;
-	__u64 recycle_cached;
-	__u64 recycle_cache_full;
-	__u64 recycle_ring;
-	__u64 recycle_ring_full;
-	__u64 recycle_released_refcnt;
-};
-
-void
-netdev_page_pool_stats_get_rsp_free(struct netdev_page_pool_stats_get_rsp *rsp);
-
-/*
- * Get page pool statistics.
- */
-struct netdev_page_pool_stats_get_rsp *
-netdev_page_pool_stats_get(struct ynl_sock *ys,
-			   struct netdev_page_pool_stats_get_req *req);
-
-/* NETDEV_CMD_PAGE_POOL_STATS_GET - dump */
-struct netdev_page_pool_stats_get_list {
-	struct netdev_page_pool_stats_get_list *next;
-	struct netdev_page_pool_stats_get_rsp obj __attribute__((aligned(8)));
-};
-
-void
-netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp);
-
-struct netdev_page_pool_stats_get_list *
-netdev_page_pool_stats_get_dump(struct ynl_sock *ys);
-
-/* ============== NETDEV_CMD_QUEUE_GET ============== */
-/* NETDEV_CMD_QUEUE_GET - do */
-struct netdev_queue_get_req {
-	struct {
-		__u32 ifindex:1;
-		__u32 type:1;
-		__u32 id:1;
-	} _present;
-
-	__u32 ifindex;
-	enum netdev_queue_type type;
-	__u32 id;
-};
-
-static inline struct netdev_queue_get_req *netdev_queue_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct netdev_queue_get_req));
-}
-void netdev_queue_get_req_free(struct netdev_queue_get_req *req);
-
-static inline void
-netdev_queue_get_req_set_ifindex(struct netdev_queue_get_req *req,
-				 __u32 ifindex)
-{
-	req->_present.ifindex = 1;
-	req->ifindex = ifindex;
-}
-static inline void
-netdev_queue_get_req_set_type(struct netdev_queue_get_req *req,
-			      enum netdev_queue_type type)
-{
-	req->_present.type = 1;
-	req->type = type;
-}
-static inline void
-netdev_queue_get_req_set_id(struct netdev_queue_get_req *req, __u32 id)
-{
-	req->_present.id = 1;
-	req->id = id;
-}
-
-struct netdev_queue_get_rsp {
-	struct {
-		__u32 id:1;
-		__u32 type:1;
-		__u32 napi_id:1;
-		__u32 ifindex:1;
-	} _present;
-
-	__u32 id;
-	enum netdev_queue_type type;
-	__u32 napi_id;
-	__u32 ifindex;
-};
-
-void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp);
-
-/*
- * Get queue information from the kernel. Only configured queues will be reported (as opposed to all available hardware queues).
- */
-struct netdev_queue_get_rsp *
-netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req);
-
-/* NETDEV_CMD_QUEUE_GET - dump */
-struct netdev_queue_get_req_dump {
-	struct {
-		__u32 ifindex:1;
-	} _present;
-
-	__u32 ifindex;
-};
-
-static inline struct netdev_queue_get_req_dump *
-netdev_queue_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct netdev_queue_get_req_dump));
-}
-void netdev_queue_get_req_dump_free(struct netdev_queue_get_req_dump *req);
-
-static inline void
-netdev_queue_get_req_dump_set_ifindex(struct netdev_queue_get_req_dump *req,
-				      __u32 ifindex)
-{
-	req->_present.ifindex = 1;
-	req->ifindex = ifindex;
-}
-
-struct netdev_queue_get_list {
-	struct netdev_queue_get_list *next;
-	struct netdev_queue_get_rsp obj __attribute__((aligned(8)));
-};
-
-void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp);
-
-struct netdev_queue_get_list *
-netdev_queue_get_dump(struct ynl_sock *ys,
-		      struct netdev_queue_get_req_dump *req);
-
-/* ============== NETDEV_CMD_NAPI_GET ============== */
-/* NETDEV_CMD_NAPI_GET - do */
-struct netdev_napi_get_req {
-	struct {
-		__u32 id:1;
-	} _present;
-
-	__u32 id;
-};
-
-static inline struct netdev_napi_get_req *netdev_napi_get_req_alloc(void)
-{
-	return calloc(1, sizeof(struct netdev_napi_get_req));
-}
-void netdev_napi_get_req_free(struct netdev_napi_get_req *req);
-
-static inline void
-netdev_napi_get_req_set_id(struct netdev_napi_get_req *req, __u32 id)
-{
-	req->_present.id = 1;
-	req->id = id;
-}
-
-struct netdev_napi_get_rsp {
-	struct {
-		__u32 id:1;
-		__u32 ifindex:1;
-		__u32 irq:1;
-		__u32 pid:1;
-	} _present;
-
-	__u32 id;
-	__u32 ifindex;
-	__u32 irq;
-	__u32 pid;
-};
-
-void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp);
-
-/*
- * Get information about NAPI instances configured on the system.
- */
-struct netdev_napi_get_rsp *
-netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req);
-
-/* NETDEV_CMD_NAPI_GET - dump */
-struct netdev_napi_get_req_dump {
-	struct {
-		__u32 ifindex:1;
-	} _present;
-
-	__u32 ifindex;
-};
-
-static inline struct netdev_napi_get_req_dump *
-netdev_napi_get_req_dump_alloc(void)
-{
-	return calloc(1, sizeof(struct netdev_napi_get_req_dump));
-}
-void netdev_napi_get_req_dump_free(struct netdev_napi_get_req_dump *req);
-
-static inline void
-netdev_napi_get_req_dump_set_ifindex(struct netdev_napi_get_req_dump *req,
-				     __u32 ifindex)
-{
-	req->_present.ifindex = 1;
-	req->ifindex = ifindex;
-}
-
-struct netdev_napi_get_list {
-	struct netdev_napi_get_list *next;
-	struct netdev_napi_get_rsp obj __attribute__((aligned(8)));
-};
-
-void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp);
-
-struct netdev_napi_get_list *
-netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req);
-
-#endif /* _LINUX_NETDEV_GEN_H */
diff --git a/tools/net/ynl/generated/nfsd-user.c b/tools/net/ynl/generated/nfsd-user.c
deleted file mode 100644
index 360b6448c6e9..000000000000
--- a/tools/net/ynl/generated/nfsd-user.c
+++ /dev/null
@@ -1,203 +0,0 @@
-// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/nfsd.yaml */
-/* YNL-GEN user source */
-
-#include <stdlib.h>
-#include <string.h>
-#include "nfsd-user.h"
-#include "ynl.h"
-#include <linux/nfsd_netlink.h>
-
-#include <libmnl/libmnl.h>
-#include <linux/genetlink.h>
-
-/* Enums */
-static const char * const nfsd_op_strmap[] = {
-	[NFSD_CMD_RPC_STATUS_GET] = "rpc-status-get",
-};
-
-const char *nfsd_op_str(int op)
-{
-	if (op < 0 || op >= (int)MNL_ARRAY_SIZE(nfsd_op_strmap))
-		return NULL;
-	return nfsd_op_strmap[op];
-}
-
-/* Policies */
-struct ynl_policy_attr nfsd_rpc_status_policy[NFSD_A_RPC_STATUS_MAX + 1] = {
-	[NFSD_A_RPC_STATUS_XID] = { .name = "xid", .type = YNL_PT_U32, },
-	[NFSD_A_RPC_STATUS_FLAGS] = { .name = "flags", .type = YNL_PT_U32, },
-	[NFSD_A_RPC_STATUS_PROG] = { .name = "prog", .type = YNL_PT_U32, },
-	[NFSD_A_RPC_STATUS_VERSION] = { .name = "version", .type = YNL_PT_U8, },
-	[NFSD_A_RPC_STATUS_PROC] = { .name = "proc", .type = YNL_PT_U32, },
-	[NFSD_A_RPC_STATUS_SERVICE_TIME] = { .name = "service_time", .type = YNL_PT_U64, },
-	[NFSD_A_RPC_STATUS_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, },
-	[NFSD_A_RPC_STATUS_SADDR4] = { .name = "saddr4", .type = YNL_PT_U32, },
-	[NFSD_A_RPC_STATUS_DADDR4] = { .name = "daddr4", .type = YNL_PT_U32, },
-	[NFSD_A_RPC_STATUS_SADDR6] = { .name = "saddr6", .type = YNL_PT_BINARY,},
-	[NFSD_A_RPC_STATUS_DADDR6] = { .name = "daddr6", .type = YNL_PT_BINARY,},
-	[NFSD_A_RPC_STATUS_SPORT] = { .name = "sport", .type = YNL_PT_U16, },
-	[NFSD_A_RPC_STATUS_DPORT] = { .name = "dport", .type = YNL_PT_U16, },
-	[NFSD_A_RPC_STATUS_COMPOUND_OPS] = { .name = "compound-ops", .type = YNL_PT_U32, },
-};
-
-struct ynl_policy_nest nfsd_rpc_status_nest = {
-	.max_attr = NFSD_A_RPC_STATUS_MAX,
-	.table = nfsd_rpc_status_policy,
-};
-
-/* Common nested types */
-/* ============== NFSD_CMD_RPC_STATUS_GET ============== */
-/* NFSD_CMD_RPC_STATUS_GET - dump */
-int nfsd_rpc_status_get_rsp_dump_parse(const struct nlmsghdr *nlh, void *data)
-{
-	struct nfsd_rpc_status_get_rsp_dump *dst;
-	struct ynl_parse_arg *yarg = data;
-	unsigned int n_compound_ops = 0;
-	const struct nlattr *attr;
-	int i;
-
-	dst = yarg->data;
-
-	if (dst->compound_ops)
-		return ynl_error_parse(yarg, "attribute already present (rpc-status.compound-ops)");
-
-	mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-		unsigned int type = mnl_attr_get_type(attr);
-
-		if (type == NFSD_A_RPC_STATUS_XID) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.xid = 1;
-			dst->xid = mnl_attr_get_u32(attr);
-		} else if (type == NFSD_A_RPC_STATUS_FLAGS) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.flags = 1;
-			dst->flags = mnl_attr_get_u32(attr);
-		} else if (type == NFSD_A_RPC_STATUS_PROG) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.prog = 1;
-			dst->prog = mnl_attr_get_u32(attr);
-		} else if (type == NFSD_A_RPC_STATUS_VERSION) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.version = 1;
-			dst->version = mnl_attr_get_u8(attr);
-		} else if (type == NFSD_A_RPC_STATUS_PROC) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.proc = 1;
-			dst->proc = mnl_attr_get_u32(attr);
-		} else if (type == NFSD_A_RPC_STATUS_SERVICE_TIME) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.service_time = 1;
-			dst->service_time = mnl_attr_get_u64(attr);
-		} else if (type == NFSD_A_RPC_STATUS_SADDR4) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.saddr4 = 1;
-			dst->saddr4 = mnl_attr_get_u32(attr);
-		} else if (type == NFSD_A_RPC_STATUS_DADDR4) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.daddr4 = 1;
-			dst->daddr4 = mnl_attr_get_u32(attr);
-		} else if (type == NFSD_A_RPC_STATUS_SADDR6) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.saddr6_len = len;
-			dst->saddr6 = malloc(len);
-			memcpy(dst->saddr6, mnl_attr_get_payload(attr), len);
-		} else if (type == NFSD_A_RPC_STATUS_DADDR6) {
-			unsigned int len;
-
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-
-			len = mnl_attr_get_payload_len(attr);
-			dst->_present.daddr6_len = len;
-			dst->daddr6 = malloc(len);
-			memcpy(dst->daddr6, mnl_attr_get_payload(attr), len);
-		} else if (type == NFSD_A_RPC_STATUS_SPORT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.sport = 1;
-			dst->sport = mnl_attr_get_u16(attr);
-		} else if (type == NFSD_A_RPC_STATUS_DPORT) {
-			if (ynl_attr_validate(yarg, attr))
-				return MNL_CB_ERROR;
-			dst->_present.dport = 1;
-			dst->dport = mnl_attr_get_u16(attr);
-		} else if (type == NFSD_A_RPC_STATUS_COMPOUND_OPS) {
-			n_compound_ops++;
-		}
-	}
-
-	if (n_compound_ops) {
-		dst->compound_ops = calloc(n_compound_ops, sizeof(*dst->compound_ops));
-		dst->n_compound_ops = n_compound_ops;
-		i = 0;
-		mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) {
-			if (mnl_attr_get_type(attr) == NFSD_A_RPC_STATUS_COMPOUND_OPS) {
-				dst->compound_ops[i] = mnl_attr_get_u32(attr);
-				i++;
-			}
-		}
-	}
-
-	return MNL_CB_OK;
-}
-
-void
-nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp)
-{
-	struct nfsd_rpc_status_get_rsp_list *next = rsp;
-
-	while ((void *)next != YNL_LIST_END) {
-		rsp = next;
-		next = rsp->next;
-
-		free(rsp->obj.saddr6);
-		free(rsp->obj.daddr6);
-		free(rsp->obj.compound_ops);
-		free(rsp);
-	}
-}
-
-struct nfsd_rpc_status_get_rsp_list *
-nfsd_rpc_status_get_dump(struct ynl_sock *ys)
-{
-	struct ynl_dump_state yds = {};
-	struct nlmsghdr *nlh;
-	int err;
-
-	yds.ys = ys;
-	yds.alloc_sz = sizeof(struct nfsd_rpc_status_get_rsp_list);
-	yds.cb = nfsd_rpc_status_get_rsp_dump_parse;
-	yds.rsp_cmd = NFSD_CMD_RPC_STATUS_GET;
-	yds.rsp_policy = &nfsd_rpc_status_nest;
-
-	nlh = ynl_gemsg_start_dump(ys, ys->family_id, NFSD_CMD_RPC_STATUS_GET, 1);
-
-	err = ynl_exec_dump(ys, nlh, &yds);
-	if (err < 0)
-		goto free_list;
-
-	return yds.first;
-
-free_list:
-	nfsd_rpc_status_get_rsp_list_free(yds.first);
-	return NULL;
-}
-
-const struct ynl_family ynl_nfsd_family =  {
-	.name		= "nfsd",
-};
diff --git a/tools/net/ynl/generated/nfsd-user.h b/tools/net/ynl/generated/nfsd-user.h
deleted file mode 100644
index 989c6e209ced..000000000000
--- a/tools/net/ynl/generated/nfsd-user.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
-/* Do not edit directly, auto-generated from: */
-/*	Documentation/netlink/specs/nfsd.yaml */
-/* YNL-GEN user header */
-
-#ifndef _LINUX_NFSD_GEN_H
-#define _LINUX_NFSD_GEN_H
-
-#include <stdlib.h>
-#include <string.h>
-#include <linux/types.h>
-#include <linux/nfsd_netlink.h>
-
-struct ynl_sock;
-
-extern const struct ynl_family ynl_nfsd_family;
-
-/* Enums */
-const char *nfsd_op_str(int op);
-
-/* Common nested types */
-/* ============== NFSD_CMD_RPC_STATUS_GET ============== */
-/* NFSD_CMD_RPC_STATUS_GET - dump */
-struct nfsd_rpc_status_get_rsp_dump {
-	struct {
-		__u32 xid:1;
-		__u32 flags:1;
-		__u32 prog:1;
-		__u32 version:1;
-		__u32 proc:1;
-		__u32 service_time:1;
-		__u32 saddr4:1;
-		__u32 daddr4:1;
-		__u32 saddr6_len;
-		__u32 daddr6_len;
-		__u32 sport:1;
-		__u32 dport:1;
-	} _present;
-
-	__u32 xid /* big-endian */;
-	__u32 flags;
-	__u32 prog;
-	__u8 version;
-	__u32 proc;
-	__s64 service_time;
-	__u32 saddr4 /* big-endian */;
-	__u32 daddr4 /* big-endian */;
-	void *saddr6;
-	void *daddr6;
-	__u16 sport /* big-endian */;
-	__u16 dport /* big-endian */;
-	unsigned int n_compound_ops;
-	__u32 *compound_ops;
-};
-
-struct nfsd_rpc_status_get_rsp_list {
-	struct nfsd_rpc_status_get_rsp_list *next;
-	struct nfsd_rpc_status_get_rsp_dump obj __attribute__((aligned(8)));
-};
-
-void
-nfsd_rpc_status_get_rsp_list_free(struct nfsd_rpc_status_get_rsp_list *rsp);
-
-struct nfsd_rpc_status_get_rsp_list *
-nfsd_rpc_status_get_dump(struct ynl_sock *ys);
-
-#endif /* _LINUX_NFSD_GEN_H */
-- 
cgit v1.2.3-70-g09d2


From 41f6f64e6999a837048b1bd13a2f8742964eca6b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 5 Dec 2023 10:42:39 -0800
Subject: bpf: support non-r10 register spill/fill to/from stack in precision
 tracking

Use instruction (jump) history to record instructions that performed
register spill/fill to/from stack, regardless if this was done through
read-only r10 register, or any other register after copying r10 into it
*and* potentially adjusting offset.

To make this work reliably, we push extra per-instruction flags into
instruction history, encoding stack slot index (spi) and stack frame
number in extra 10 bit flags we take away from prev_idx in instruction
history. We don't touch idx field for maximum performance, as it's
checked most frequently during backtracking.

This change removes basically the last remaining practical limitation of
precision backtracking logic in BPF verifier. It fixes known
deficiencies, but also opens up new opportunities to reduce number of
verified states, explored in the subsequent patches.

There are only three differences in selftests' BPF object files
according to veristat, all in the positive direction (less states).

File                                    Program        Insns (A)  Insns (B)  Insns  (DIFF)  States (A)  States (B)  States (DIFF)
--------------------------------------  -------------  ---------  ---------  -------------  ----------  ----------  -------------
test_cls_redirect_dynptr.bpf.linked3.o  cls_redirect        2987       2864  -123 (-4.12%)         240         231    -9 (-3.75%)
xdp_synproxy_kern.bpf.linked3.o         syncookie_tc       82848      82661  -187 (-0.23%)        5107        5073   -34 (-0.67%)
xdp_synproxy_kern.bpf.linked3.o         syncookie_xdp      85116      84964  -152 (-0.18%)        5162        5130   -32 (-0.62%)

Note, I avoided renaming jmp_history to more generic insn_hist to
minimize number of lines changed and potential merge conflicts between
bpf and bpf-next trees.

Notice also cur_hist_entry pointer reset to NULL at the beginning of
instruction verification loop. This pointer avoids the problem of
relying on last jump history entry's insn_idx to determine whether we
already have entry for current instruction or not. It can happen that we
added jump history entry because current instruction is_jmp_point(), but
also we need to add instruction flags for stack access. In this case, we
don't want to entries, so we need to reuse last added entry, if it is
present.

Relying on insn_idx comparison has the same ambiguity problem as the one
that was fixed recently in [0], so we avoid that.

  [0] https://patchwork.kernel.org/project/netdevbpf/patch/20231110002638.4168352-3-andrii@kernel.org/

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Reported-by: Tao Lyu <tao.lyu@epfl.ch>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231205184248.1502704-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h                       |  31 +++-
 kernel/bpf/verifier.c                              | 175 ++++++++++++---------
 .../bpf/progs/verifier_subprog_precision.c         |  23 ++-
 tools/testing/selftests/bpf/verifier/precise.c     |  38 +++--
 4 files changed, 169 insertions(+), 98 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3378cc753061..bada59812e00 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -325,12 +325,34 @@ struct bpf_func_state {
 	int allocated_stack;
 };
 
-struct bpf_idx_pair {
-	u32 prev_idx;
+#define MAX_CALL_FRAMES 8
+
+/* instruction history flags, used in bpf_jmp_history_entry.flags field */
+enum {
+	/* instruction references stack slot through PTR_TO_STACK register;
+	 * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8)
+	 * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512,
+	 * 8 bytes per slot, so slot index (spi) is [0, 63])
+	 */
+	INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */
+
+	INSN_F_SPI_MASK = 0x3f, /* 6 bits */
+	INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */
+
+	INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */
+};
+
+static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
+static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
+
+struct bpf_jmp_history_entry {
 	u32 idx;
+	/* insn idx can't be bigger than 1 million */
+	u32 prev_idx : 22;
+	/* special flags, e.g., whether insn is doing register stack spill/load */
+	u32 flags : 10;
 };
 
-#define MAX_CALL_FRAMES 8
 /* Maximum number of register states that can exist at once */
 #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
 struct bpf_verifier_state {
@@ -413,7 +435,7 @@ struct bpf_verifier_state {
 	 * For most states jmp_history_cnt is [0-3].
 	 * For loops can go up to ~40.
 	 */
-	struct bpf_idx_pair *jmp_history;
+	struct bpf_jmp_history_entry *jmp_history;
 	u32 jmp_history_cnt;
 	u32 dfs_depth;
 	u32 callback_unroll_depth;
@@ -656,6 +678,7 @@ struct bpf_verifier_env {
 		int cur_stack;
 	} cfg;
 	struct backtrack_state bt;
+	struct bpf_jmp_history_entry *cur_hist_ent;
 	u32 pass_cnt; /* number of times do_check() was called */
 	u32 subprog_cnt;
 	/* number of instructions analyzed by the verifier */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1ed39665f802..9bc16dc66465 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1355,8 +1355,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 	int i, err;
 
 	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
-					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
-					    GFP_USER);
+					  src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
+					  GFP_USER);
 	if (!dst_state->jmp_history)
 		return -ENOMEM;
 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
@@ -3221,6 +3221,21 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 	return __check_reg_arg(env, state->regs, regno, t);
 }
 
+static int insn_stack_access_flags(int frameno, int spi)
+{
+	return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
+}
+
+static int insn_stack_access_spi(int insn_flags)
+{
+	return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
+}
+
+static int insn_stack_access_frameno(int insn_flags)
+{
+	return insn_flags & INSN_F_FRAMENO_MASK;
+}
+
 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
 {
 	env->insn_aux_data[idx].jmp_point = true;
@@ -3232,28 +3247,51 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
 }
 
 /* for any branch, call, exit record the history of jmps in the given state */
-static int push_jmp_history(struct bpf_verifier_env *env,
-			    struct bpf_verifier_state *cur)
+static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
+			    int insn_flags)
 {
 	u32 cnt = cur->jmp_history_cnt;
-	struct bpf_idx_pair *p;
+	struct bpf_jmp_history_entry *p;
 	size_t alloc_size;
 
-	if (!is_jmp_point(env, env->insn_idx))
+	/* combine instruction flags if we already recorded this instruction */
+	if (env->cur_hist_ent) {
+		/* atomic instructions push insn_flags twice, for READ and
+		 * WRITE sides, but they should agree on stack slot
+		 */
+		WARN_ONCE((env->cur_hist_ent->flags & insn_flags) &&
+			  (env->cur_hist_ent->flags & insn_flags) != insn_flags,
+			  "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
+			  env->insn_idx, env->cur_hist_ent->flags, insn_flags);
+		env->cur_hist_ent->flags |= insn_flags;
 		return 0;
+	}
 
 	cnt++;
 	alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
 	p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
 	if (!p)
 		return -ENOMEM;
-	p[cnt - 1].idx = env->insn_idx;
-	p[cnt - 1].prev_idx = env->prev_insn_idx;
 	cur->jmp_history = p;
+
+	p = &cur->jmp_history[cnt - 1];
+	p->idx = env->insn_idx;
+	p->prev_idx = env->prev_insn_idx;
+	p->flags = insn_flags;
 	cur->jmp_history_cnt = cnt;
+	env->cur_hist_ent = p;
+
 	return 0;
 }
 
+static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
+						        u32 hist_end, int insn_idx)
+{
+	if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
+		return &st->jmp_history[hist_end - 1];
+	return NULL;
+}
+
 /* Backtrack one insn at a time. If idx is not at the top of recorded
  * history then previous instruction came from straight line execution.
  * Return -ENOENT if we exhausted all instructions within given state.
@@ -3415,9 +3453,14 @@ static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
 	return bt->reg_masks[bt->frame] & (1 << reg);
 }
 
+static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+	return bt->stack_masks[frame] & (1ull << slot);
+}
+
 static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot)
 {
-	return bt->stack_masks[bt->frame] & (1ull << slot);
+	return bt_is_frame_slot_set(bt, bt->frame, slot);
 }
 
 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
@@ -3471,7 +3514,7 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
  *   - *was* processed previously during backtracking.
  */
 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
-			  struct backtrack_state *bt)
+			  struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
 {
 	const struct bpf_insn_cbs cbs = {
 		.cb_call	= disasm_kfunc_name,
@@ -3484,7 +3527,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 	u8 mode = BPF_MODE(insn->code);
 	u32 dreg = insn->dst_reg;
 	u32 sreg = insn->src_reg;
-	u32 spi, i;
+	u32 spi, i, fr;
 
 	if (insn->code == 0)
 		return 0;
@@ -3545,20 +3588,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 		 * by 'precise' mark in corresponding register of this state.
 		 * No further tracking necessary.
 		 */
-		if (insn->src_reg != BPF_REG_FP)
+		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
 			return 0;
-
 		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
 		 * that [fp - off] slot contains scalar that needs to be
 		 * tracked with precision
 		 */
-		spi = (-insn->off - 1) / BPF_REG_SIZE;
-		if (spi >= 64) {
-			verbose(env, "BUG spi %d\n", spi);
-			WARN_ONCE(1, "verifier backtracking bug");
-			return -EFAULT;
-		}
-		bt_set_slot(bt, spi);
+		spi = insn_stack_access_spi(hist->flags);
+		fr = insn_stack_access_frameno(hist->flags);
+		bt_set_frame_slot(bt, fr, spi);
 	} else if (class == BPF_STX || class == BPF_ST) {
 		if (bt_is_reg_set(bt, dreg))
 			/* stx & st shouldn't be using _scalar_ dst_reg
@@ -3567,17 +3605,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 			 */
 			return -ENOTSUPP;
 		/* scalars can only be spilled into stack */
-		if (insn->dst_reg != BPF_REG_FP)
+		if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
 			return 0;
-		spi = (-insn->off - 1) / BPF_REG_SIZE;
-		if (spi >= 64) {
-			verbose(env, "BUG spi %d\n", spi);
-			WARN_ONCE(1, "verifier backtracking bug");
-			return -EFAULT;
-		}
-		if (!bt_is_slot_set(bt, spi))
+		spi = insn_stack_access_spi(hist->flags);
+		fr = insn_stack_access_frameno(hist->flags);
+		if (!bt_is_frame_slot_set(bt, fr, spi))
 			return 0;
-		bt_clear_slot(bt, spi);
+		bt_clear_frame_slot(bt, fr, spi);
 		if (class == BPF_STX)
 			bt_set_reg(bt, sreg);
 	} else if (class == BPF_JMP || class == BPF_JMP32) {
@@ -3621,10 +3655,14 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 					WARN_ONCE(1, "verifier backtracking bug");
 					return -EFAULT;
 				}
-				/* we don't track register spills perfectly,
-				 * so fallback to force-precise instead of failing */
-				if (bt_stack_mask(bt) != 0)
-					return -ENOTSUPP;
+				/* we are now tracking register spills correctly,
+				 * so any instance of leftover slots is a bug
+				 */
+				if (bt_stack_mask(bt) != 0) {
+					verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
+					WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)");
+					return -EFAULT;
+				}
 				/* propagate r1-r5 to the caller */
 				for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
 					if (bt_is_reg_set(bt, i)) {
@@ -3649,8 +3687,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
 				WARN_ONCE(1, "verifier backtracking bug");
 				return -EFAULT;
 			}
-			if (bt_stack_mask(bt) != 0)
-				return -ENOTSUPP;
+			if (bt_stack_mask(bt) != 0) {
+				verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
+				WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)");
+				return -EFAULT;
+			}
 			/* clear r1-r5 in callback subprog's mask */
 			for (i = BPF_REG_1; i <= BPF_REG_5; i++)
 				bt_clear_reg(bt, i);
@@ -4087,6 +4128,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
 	for (;;) {
 		DECLARE_BITMAP(mask, 64);
 		u32 history = st->jmp_history_cnt;
+		struct bpf_jmp_history_entry *hist;
 
 		if (env->log.level & BPF_LOG_LEVEL2) {
 			verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
@@ -4150,7 +4192,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
 				err = 0;
 				skip_first = false;
 			} else {
-				err = backtrack_insn(env, i, subseq_idx, bt);
+				hist = get_jmp_hist_entry(st, history, i);
+				err = backtrack_insn(env, i, subseq_idx, hist, bt);
 			}
 			if (err == -ENOTSUPP) {
 				mark_all_scalars_precise(env, env->cur_state);
@@ -4203,22 +4246,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
 			bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
 			for_each_set_bit(i, mask, 64) {
 				if (i >= func->allocated_stack / BPF_REG_SIZE) {
-					/* the sequence of instructions:
-					 * 2: (bf) r3 = r10
-					 * 3: (7b) *(u64 *)(r3 -8) = r0
-					 * 4: (79) r4 = *(u64 *)(r10 -8)
-					 * doesn't contain jmps. It's backtracked
-					 * as a single block.
-					 * During backtracking insn 3 is not recognized as
-					 * stack access, so at the end of backtracking
-					 * stack slot fp-8 is still marked in stack_mask.
-					 * However the parent state may not have accessed
-					 * fp-8 and it's "unallocated" stack space.
-					 * In such case fallback to conservative.
-					 */
-					mark_all_scalars_precise(env, env->cur_state);
-					bt_reset(bt);
-					return 0;
+					verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n",
+						i, func->allocated_stack / BPF_REG_SIZE);
+					WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)");
+					return -EFAULT;
 				}
 
 				if (!is_spilled_scalar_reg(&func->stack[i])) {
@@ -4391,7 +4422,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
 	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
 	struct bpf_reg_state *reg = NULL;
-	u32 dst_reg = insn->dst_reg;
+	int insn_flags = insn_stack_access_flags(state->frameno, spi);
 
 	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
 	if (err)
@@ -4432,17 +4463,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	mark_stack_slot_scratched(env, spi);
 	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
 	    !register_is_null(reg) && env->bpf_capable) {
-		if (dst_reg != BPF_REG_FP) {
-			/* The backtracking logic can only recognize explicit
-			 * stack slot address like [fp - 8]. Other spill of
-			 * scalar via different register has to be conservative.
-			 * Backtrack from here and mark all registers as precise
-			 * that contributed into 'reg' being a constant.
-			 */
-			err = mark_chain_precision(env, value_regno);
-			if (err)
-				return err;
-		}
 		save_register_state(state, spi, reg, size);
 		/* Break the relation on a narrowing spill. */
 		if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
@@ -4454,6 +4474,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 		__mark_reg_known(&fake_reg, insn->imm);
 		fake_reg.type = SCALAR_VALUE;
 		save_register_state(state, spi, &fake_reg, size);
+		insn_flags = 0; /* not a register spill */
 	} else if (reg && is_spillable_regtype(reg->type)) {
 		/* register containing pointer is being spilled into stack */
 		if (size != BPF_REG_SIZE) {
@@ -4499,9 +4520,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 
 		/* Mark slots affected by this stack write. */
 		for (i = 0; i < size; i++)
-			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
-				type;
+			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
+		insn_flags = 0; /* not a register spill */
 	}
+
+	if (insn_flags)
+		return push_jmp_history(env, env->cur_state, insn_flags);
 	return 0;
 }
 
@@ -4694,6 +4718,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
 	struct bpf_reg_state *reg;
 	u8 *stype, type;
+	int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
 
 	stype = reg_state->stack[spi].slot_type;
 	reg = &reg_state->stack[spi].spilled_ptr;
@@ -4739,12 +4764,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 					return -EACCES;
 				}
 				mark_reg_unknown(env, state->regs, dst_regno);
+				insn_flags = 0; /* not restoring original register state */
 			}
 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
-			return 0;
-		}
-
-		if (dst_regno >= 0) {
+		} else if (dst_regno >= 0) {
 			/* restore register state from stack */
 			copy_register_state(&state->regs[dst_regno], reg);
 			/* mark reg as written since spilled pointer state likely
@@ -4780,7 +4803,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 		if (dst_regno >= 0)
 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
+		insn_flags = 0; /* we are not restoring spilled register */
 	}
+	if (insn_flags)
+		return push_jmp_history(env, env->cur_state, insn_flags);
 	return 0;
 }
 
@@ -6940,7 +6966,6 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
 			       BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
 	if (err)
 		return err;
-
 	return 0;
 }
 
@@ -16910,7 +16935,8 @@ hit:
 			 * the precision needs to be propagated back in
 			 * the current state.
 			 */
-			err = err ? : push_jmp_history(env, cur);
+			if (is_jmp_point(env, env->insn_idx))
+				err = err ? : push_jmp_history(env, cur, 0);
 			err = err ? : propagate_precision(env, &sl->state);
 			if (err)
 				return err;
@@ -17135,6 +17161,9 @@ static int do_check(struct bpf_verifier_env *env)
 		u8 class;
 		int err;
 
+		/* reset current history entry on each new instruction */
+		env->cur_hist_ent = NULL;
+
 		env->prev_insn_idx = prev_insn_idx;
 		if (env->insn_idx >= insn_cnt) {
 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
@@ -17174,7 +17203,7 @@ static int do_check(struct bpf_verifier_env *env)
 		}
 
 		if (is_jmp_point(env, env->insn_idx)) {
-			err = push_jmp_history(env, state);
+			err = push_jmp_history(env, state, 0);
 			if (err)
 				return err;
 		}
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index 0dfe3f8b69ac..eba98fab2f54 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -589,11 +589,24 @@ static __u64 subprog_spill_reg_precise(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
-/* precision backtracking can't currently handle stack access not through r10,
- * so we won't be able to mark stack slot fp-8 as precise, and so will
- * fallback to forcing all as precise
- */
-__msg("mark_precise: frame0: falling back to forcing all scalars precise")
+__msg("10: (0f) r1 += r7")
+__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
+__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
+__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8:  R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1")
+__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
+__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)")
+__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2")
+__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2")
+__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6")
+__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
 __naked int subprog_spill_into_parent_stack_slot_precise(void)
 {
 	asm volatile (
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 0d84dd1f38b6..8a2ff81d8350 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -140,10 +140,11 @@
 	.result = REJECT,
 },
 {
-	"precise: ST insn causing spi > allocated_stack",
+	"precise: ST zero to stack insn is supported",
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+	/* not a register spill, so we stop precision propagation for R4 here */
 	BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
 	BPF_MOV64_IMM(BPF_REG_0, -1),
@@ -157,11 +158,11 @@
 	mark_precise: frame0: last_idx 4 first_idx 2\
 	mark_precise: frame0: regs=r4 stack= before 4\
 	mark_precise: frame0: regs=r4 stack= before 3\
-	mark_precise: frame0: regs= stack=-8 before 2\
-	mark_precise: frame0: falling back to forcing all scalars precise\
-	force_precise: frame0: forcing r0 to be precise\
 	mark_precise: frame0: last_idx 5 first_idx 5\
-	mark_precise: frame0: parent state regs= stack=:",
+	mark_precise: frame0: parent state regs=r0 stack=:\
+	mark_precise: frame0: last_idx 4 first_idx 2\
+	mark_precise: frame0: regs=r0 stack= before 4\
+	5: R0=-1 R4=0",
 	.result = VERBOSE_ACCEPT,
 	.retval = -1,
 },
@@ -169,6 +170,8 @@
 	"precise: STX insn causing spi > allocated_stack",
 	.insns = {
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+	/* make later reg spill more interesting by having somewhat known scalar */
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
 	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
 	BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8),
@@ -179,18 +182,21 @@
 	},
 	.prog_type = BPF_PROG_TYPE_XDP,
 	.flags = BPF_F_TEST_STATE_FREQ,
-	.errstr = "mark_precise: frame0: last_idx 6 first_idx 6\
+	.errstr = "mark_precise: frame0: last_idx 7 first_idx 7\
 	mark_precise: frame0: parent state regs=r4 stack=:\
-	mark_precise: frame0: last_idx 5 first_idx 3\
-	mark_precise: frame0: regs=r4 stack= before 5\
-	mark_precise: frame0: regs=r4 stack= before 4\
-	mark_precise: frame0: regs= stack=-8 before 3\
-	mark_precise: frame0: falling back to forcing all scalars precise\
-	force_precise: frame0: forcing r0 to be precise\
-	force_precise: frame0: forcing r0 to be precise\
-	force_precise: frame0: forcing r0 to be precise\
-	force_precise: frame0: forcing r0 to be precise\
-	mark_precise: frame0: last_idx 6 first_idx 6\
+	mark_precise: frame0: last_idx 6 first_idx 4\
+	mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\
+	mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\
+	mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\
+	mark_precise: frame0: parent state regs=r0 stack=:\
+	mark_precise: frame0: last_idx 3 first_idx 3\
+	mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\
+	mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\
+	mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\
+	mark_precise: frame0: parent state regs=r0 stack=:\
+	mark_precise: frame0: last_idx 0 first_idx 0\
+	mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\
+	mark_precise: frame0: last_idx 7 first_idx 7\
 	mark_precise: frame0: parent state regs= stack=:",
 	.result = VERBOSE_ACCEPT,
 	.retval = -1,
-- 
cgit v1.2.3-70-g09d2


From 876301881c436bf38e83a2c0d276a24b642e4aab Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 5 Dec 2023 10:42:40 -0800
Subject: selftests/bpf: add stack access precision test

Add a new selftests that validates precision tracking for stack access
instruction, using both r10-based and non-r10-based accesses. For
non-r10 ones we also make sure to have non-zero var_off to validate that
final stack offset is tracked properly in instruction history
information inside verifier.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231205184248.1502704-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/progs/verifier_subprog_precision.c         | 64 ++++++++++++++++++++--
 1 file changed, 59 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index eba98fab2f54..6f5d19665cf6 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -641,14 +641,68 @@ __naked int subprog_spill_into_parent_stack_slot_precise(void)
 	);
 }
 
-__naked __noinline __used
-static __u64 subprog_with_checkpoint(void)
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("17: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32")
+__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10")
+__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10")
+__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15")
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
+__naked int stack_slot_aliases_precision(void)
 {
 	asm volatile (
-		"r0 = 0;"
-		/* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */
-		"goto +0;"
+		"r6 = 1;"
+		/* pass r6 through r1 into subprog to get it back as r0;
+		 * this whole chain will have to be marked as precise later
+		 */
+		"r1 = r6;"
+		"call identity_subprog;"
+		/* let's setup two registers that are aliased to r10 */
+		"r7 = r10;"
+		"r7 += -8;"			/* r7 = r10 - 8 */
+		"r8 = r10;"
+		"r8 += -32;"			/* r8 = r10 - 32 */
+		/* now spill subprog's return value (a r6 -> r1 -> r0 chain)
+		 * a few times through different stack pointer regs, making
+		 * sure to use r10, r7, and r8 both in LDX and STX insns, and
+		 * *importantly* also using a combination of const var_off and
+		 * insn->off to validate that we record final stack slot
+		 * correctly, instead of relying on just insn->off derivation,
+		 * which is only valid for r10-based stack offset
+		 */
+		"*(u64 *)(r10 - 16) = r0;"
+		"r0 = *(u64 *)(r7 - 8);"	/* r7 - 8 == r10 - 16 */
+		"*(u64 *)(r8 + 16) = r0;"	/* r8 + 16 = r10 - 16 */
+		"r0 = *(u64 *)(r8 + 16);"
+		"*(u64 *)(r7 - 8) = r0;"
+		"r0 = *(u64 *)(r10 - 16);"
+		/* get ready to use r0 as an index into array to force precision */
+		"r0 *= 4;"
+		"r1 = %[vals];"
+		/* here r0->r1->r6 chain is forced to be precise and has to be
+		 * propagated back to the beginning, including through the
+		 * subprog call and all the stack spills and loads
+		 */
+		"r1 += r0;"
+		"r0 = *(u32 *)(r1 + 0);"
 		"exit;"
+		:
+		: __imm_ptr(vals)
+		: __clobber_common, "r6"
 	);
 }
 
-- 
cgit v1.2.3-70-g09d2


From b33ceb6a3d2ee07fdd836373383a6d4783581324 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 5 Dec 2023 10:42:43 -0800
Subject: selftests/bpf: validate STACK_ZERO is preserved on subreg spill

Add tests validating that STACK_ZERO slots are preserved when slot is
partially overwritten with subregister spill.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231205184248.1502704-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/verifier_spill_fill.c      | 40 ++++++++++++++++++++++
 1 file changed, 40 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 6115520154e3..d9dabae81176 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -4,6 +4,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
+#include <../../../tools/include/linux/filter.h>
 
 struct {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
@@ -450,4 +451,43 @@ l0_%=:	r1 >>= 16;					\
 	: __clobber_all);
 }
 
+SEC("raw_tp")
+__log_level(2)
+__success
+__msg("fp-8=0m??mmmm")
+__msg("fp-16=00mm??mm")
+__msg("fp-24=00mm???m")
+__naked void spill_subregs_preserve_stack_zero(void)
+{
+	asm volatile (
+		"call %[bpf_get_prandom_u32];"
+
+		/* 32-bit subreg spill with ZERO, MISC, and INVALID */
+		".8byte %[fp1_u8_st_zero];"   /* ZERO, LLVM-18+: *(u8 *)(r10 -1) = 0; */
+		"*(u8 *)(r10 -2) = r0;"       /* MISC */
+		/* fp-3 and fp-4 stay INVALID */
+		"*(u32 *)(r10 -8) = r0;"
+
+		/* 16-bit subreg spill with ZERO, MISC, and INVALID */
+		".8byte %[fp10_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r10 -10) = 0; */
+		"*(u16 *)(r10 -12) = r0;"     /* MISC */
+		/* fp-13 and fp-14 stay INVALID */
+		"*(u16 *)(r10 -16) = r0;"
+
+		/* 8-bit subreg spill with ZERO, MISC, and INVALID */
+		".8byte %[fp18_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r18 -10) = 0; */
+		"*(u16 *)(r10 -20) = r0;"     /* MISC */
+		/* fp-21, fp-22, and fp-23 stay INVALID */
+		"*(u8 *)(r10 -24) = r0;"
+
+		"r0 = 0;"
+		"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm_insn(fp1_u8_st_zero, BPF_ST_MEM(BPF_B, BPF_REG_FP, -1, 0)),
+	  __imm_insn(fp10_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -10, 0)),
+	  __imm_insn(fp18_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -18, 0))
+	: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From add1cd7f22e61756987865ada9fe95cd86569025 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 5 Dec 2023 10:42:45 -0800
Subject: selftests/bpf: validate zero preservation for sub-slot loads

Validate that 1-, 2-, and 4-byte loads from stack slots not aligned on
8-byte boundary still preserve zero, when loading from all-STACK_ZERO
sub-slots, or when stack sub-slots are covered by spilled register with
known constant zero value.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231205184248.1502704-8-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/verifier_spill_fill.c      | 71 ++++++++++++++++++++++
 1 file changed, 71 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index d9dabae81176..41fd61299eab 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -490,4 +490,75 @@ __naked void spill_subregs_preserve_stack_zero(void)
 	: __clobber_all);
 }
 
+char single_byte_buf[1] SEC(".data.single_byte_buf");
+
+SEC("raw_tp")
+__log_level(2)
+__success
+__naked void partial_stack_load_preserves_zeros(void)
+{
+	asm volatile (
+		/* fp-8 is all STACK_ZERO */
+		".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */
+
+		/* fp-16 is const zero register */
+		"r0 = 0;"
+		"*(u64 *)(r10 -16) = r0;"
+
+		/* load single U8 from non-aligned STACK_ZERO slot */
+		"r1 = %[single_byte_buf];"
+		"r2 = *(u8 *)(r10 -1);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		/* load single U8 from non-aligned ZERO REG slot */
+		"r1 = %[single_byte_buf];"
+		"r2 = *(u8 *)(r10 -9);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		/* load single U16 from non-aligned STACK_ZERO slot */
+		"r1 = %[single_byte_buf];"
+		"r2 = *(u16 *)(r10 -2);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		/* load single U16 from non-aligned ZERO REG slot */
+		"r1 = %[single_byte_buf];"
+		"r2 = *(u16 *)(r10 -10);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		/* load single U32 from non-aligned STACK_ZERO slot */
+		"r1 = %[single_byte_buf];"
+		"r2 = *(u32 *)(r10 -4);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		/* load single U32 from non-aligned ZERO REG slot */
+		"r1 = %[single_byte_buf];"
+		"r2 = *(u32 *)(r10 -12);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		/* for completeness, load U64 from STACK_ZERO slot */
+		"r1 = %[single_byte_buf];"
+		"r2 = *(u64 *)(r10 -8);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		/* for completeness, load U64 from ZERO REG slot */
+		"r1 = %[single_byte_buf];"
+		"r2 = *(u64 *)(r10 -16);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		"r0 = 0;"
+		"exit;"
+	:
+	: __imm_ptr(single_byte_buf),
+	  __imm_insn(fp8_st_zero, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0))
+	: __clobber_common);
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 064e0bea19b356c5d5f48a4549d80a3c03ce898b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 5 Dec 2023 10:42:47 -0800
Subject: selftests/bpf: validate precision logic in
 partial_stack_load_preserves_zeros

Enhance partial_stack_load_preserves_zeros subtest with detailed
precision propagation log checks. We know expect fp-16 to be spilled,
initially imprecise, zero const register, which is later marked as
precise even when partial stack slot load is performed, even if it's not
a register fill (!).

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231205184248.1502704-10-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/verifier_spill_fill.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 41fd61299eab..df4920da3472 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -495,6 +495,22 @@ char single_byte_buf[1] SEC(".data.single_byte_buf");
 SEC("raw_tp")
 __log_level(2)
 __success
+/* make sure fp-8 is all STACK_ZERO */
+__msg("2: (7a) *(u64 *)(r10 -8) = 0          ; R10=fp0 fp-8_w=00000000")
+/* but fp-16 is spilled IMPRECISE zero const reg */
+__msg("4: (7b) *(u64 *)(r10 -16) = r0        ; R0_w=0 R10=fp0 fp-16_w=0")
+/* and now check that precision propagation works even for such tricky case */
+__msg("10: (71) r2 = *(u8 *)(r10 -9)         ; R2_w=P0 R10=fp0 fp-16_w=0")
+__msg("11: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (71) r2 = *(u8 *)(r10 -1)")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 4: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0")
 __naked void partial_stack_load_preserves_zeros(void)
 {
 	asm volatile (
-- 
cgit v1.2.3-70-g09d2


From e136735f0c263b8bb9572ff8d35d8d5bc9fd9b66 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 2 Dec 2023 13:10:05 -0800
Subject: tools: pynl: make flags argument optional for do()

Commit 1768d8a767f8 ("tools/net/ynl: Add support for create flags")
added support for setting legacy netlink CRUD flags on netlink
messages (NLM_F_REPLACE, _EXCL, _CREATE etc.).

Most of genetlink won't need these, don't force callers to pass
in an empty argument to each do() call.

Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231202211005.341613-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 92995bca14e1..c56dad9593c6 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -705,7 +705,7 @@ class YnlFamily(SpecFamily):
 
       return op['do']['request']['attributes'].copy()
 
-    def _op(self, method, vals, flags, dump=False):
+    def _op(self, method, vals, flags=None, dump=False):
         op = self.ops[method]
 
         nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK
@@ -769,7 +769,7 @@ class YnlFamily(SpecFamily):
             return rsp[0]
         return rsp
 
-    def do(self, method, vals, flags):
+    def do(self, method, vals, flags=None):
         return self._op(method, vals, flags)
 
     def dump(self, method, vals):
-- 
cgit v1.2.3-70-g09d2


From f2d4d9ad809a1c7173ce66c47cd45e11184a554c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 2 Dec 2023 13:13:10 -0800
Subject: tools: ynl: use strerror() if no extack of note provided

If kernel didn't give use any meaningful error - print
a strerror() to the ynl error message.

Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Link: https://lore.kernel.org/r/20231202211310.342716-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index 830d25097009..587286de10b5 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -145,8 +145,10 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
 	const struct nlattr *attr;
 	const char *str = NULL;
 
-	if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+	if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) {
+		yerr_msg(ys, "%s", strerror(ys->err.code));
 		return MNL_CB_OK;
+	}
 
 	mnl_attr_for_each(attr, nlh, hlen) {
 		unsigned int len, type;
@@ -249,6 +251,8 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
 		yerr_msg(ys, "Kernel %s: %s%s",
 			 ys->err.code ? "error" : "warning",
 			 bad_attr, miss_attr);
+	else
+		yerr_msg(ys, "%s", strerror(ys->err.code));
 
 	return MNL_CB_OK;
 }
-- 
cgit v1.2.3-70-g09d2


From f3c928008ab218055e26917ffdbdaf429e8e616a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Sat, 2 Dec 2023 13:12:25 -0800
Subject: tools: ynl: move private definitions to a separate header

ynl.h has a growing amount of "internal" stuff, which may confuse
users who try to take a look at the external API. Currently the
internals are at the bottom of the file with a banner in between,
but this arrangement makes it hard to add external APIs / inline
helpers which need internal definitions.

Move internals to a separate header.

Link: https://lore.kernel.org/r/20231202211225.342466-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl-priv.h | 144 +++++++++++++++++++++++++++++++++++++++++
 tools/net/ynl/lib/ynl.h      | 148 +------------------------------------------
 2 files changed, 145 insertions(+), 147 deletions(-)
 create mode 100644 tools/net/ynl/lib/ynl-priv.h

(limited to 'tools')

diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
new file mode 100644
index 000000000000..7491da8e7555
--- /dev/null
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+#ifndef __YNL_C_PRIV_H
+#define __YNL_C_PRIV_H 1
+
+#include <stddef.h>
+#include <libmnl/libmnl.h>
+#include <linux/types.h>
+
+/*
+ * YNL internals / low level stuff
+ */
+
+/* Generic mnl helper code */
+
+enum ynl_policy_type {
+	YNL_PT_REJECT = 1,
+	YNL_PT_IGNORE,
+	YNL_PT_NEST,
+	YNL_PT_FLAG,
+	YNL_PT_BINARY,
+	YNL_PT_U8,
+	YNL_PT_U16,
+	YNL_PT_U32,
+	YNL_PT_U64,
+	YNL_PT_UINT,
+	YNL_PT_NUL_STR,
+	YNL_PT_BITFIELD32,
+};
+
+struct ynl_policy_attr {
+	enum ynl_policy_type type;
+	unsigned int len;
+	const char *name;
+	struct ynl_policy_nest *nest;
+};
+
+struct ynl_policy_nest {
+	unsigned int max_attr;
+	struct ynl_policy_attr *table;
+};
+
+struct ynl_parse_arg {
+	struct ynl_sock *ys;
+	struct ynl_policy_nest *rsp_policy;
+	void *data;
+};
+
+struct ynl_dump_list_type {
+	struct ynl_dump_list_type *next;
+	unsigned char data[] __attribute__((aligned(8)));
+};
+extern struct ynl_dump_list_type *YNL_LIST_END;
+
+static inline bool ynl_dump_obj_is_last(void *obj)
+{
+	unsigned long uptr = (unsigned long)obj;
+
+	uptr -= offsetof(struct ynl_dump_list_type, data);
+	return uptr == (unsigned long)YNL_LIST_END;
+}
+
+static inline void *ynl_dump_obj_next(void *obj)
+{
+	unsigned long uptr = (unsigned long)obj;
+	struct ynl_dump_list_type *list;
+
+	uptr -= offsetof(struct ynl_dump_list_type, data);
+	list = (void *)uptr;
+	uptr = (unsigned long)list->next;
+	uptr += offsetof(struct ynl_dump_list_type, data);
+
+	return (void *)uptr;
+}
+
+struct ynl_ntf_base_type {
+	__u16 family;
+	__u8 cmd;
+	struct ynl_ntf_base_type *next;
+	void (*free)(struct ynl_ntf_base_type *ntf);
+	unsigned char data[] __attribute__((aligned(8)));
+};
+
+extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE];
+
+struct nlmsghdr *
+ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
+struct nlmsghdr *
+ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
+
+int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
+
+int ynl_recv_ack(struct ynl_sock *ys, int ret);
+int ynl_cb_null(const struct nlmsghdr *nlh, void *data);
+
+/* YNL specific helpers used by the auto-generated code */
+
+struct ynl_req_state {
+	struct ynl_parse_arg yarg;
+	mnl_cb_t cb;
+	__u32 rsp_cmd;
+};
+
+struct ynl_dump_state {
+	struct ynl_sock *ys;
+	struct ynl_policy_nest *rsp_policy;
+	void *first;
+	struct ynl_dump_list_type *last;
+	size_t alloc_sz;
+	mnl_cb_t cb;
+	__u32 rsp_cmd;
+};
+
+struct ynl_ntf_info {
+	struct ynl_policy_nest *policy;
+	mnl_cb_t cb;
+	size_t alloc_sz;
+	void (*free)(struct ynl_ntf_base_type *ntf);
+};
+
+int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+	     struct ynl_req_state *yrs);
+int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
+		  struct ynl_dump_state *yds);
+
+void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd);
+int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg);
+
+#ifndef MNL_HAS_AUTO_SCALARS
+static inline uint64_t mnl_attr_get_uint(const struct nlattr *attr)
+{
+	if (mnl_attr_get_payload_len(attr) == 4)
+		return mnl_attr_get_u32(attr);
+	return mnl_attr_get_u64(attr);
+}
+
+static inline void
+mnl_attr_put_uint(struct nlmsghdr *nlh, uint16_t type, uint64_t data)
+{
+	if ((uint32_t)data == (uint64_t)data)
+		return mnl_attr_put_u32(nlh, type, data);
+	return mnl_attr_put_u64(nlh, type, data);
+}
+#endif
+#endif
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index 075d868f3b57..5de580b992b8 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -3,20 +3,10 @@
 #define __YNL_C_H 1
 
 #include <stddef.h>
-#include <libmnl/libmnl.h>
 #include <linux/genetlink.h>
 #include <linux/types.h>
 
-struct mnl_socket;
-struct nlmsghdr;
-
-/*
- * User facing code
- */
-
-struct ynl_ntf_base_type;
-struct ynl_ntf_info;
-struct ynl_sock;
+#include "ynl-priv.h"
 
 enum ynl_error_code {
 	YNL_ERROR_NONE = 0,
@@ -116,140 +106,4 @@ static inline bool ynl_has_ntf(struct ynl_sock *ys)
 struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys);
 
 void ynl_ntf_free(struct ynl_ntf_base_type *ntf);
-
-/*
- * YNL internals / low level stuff
- */
-
-/* Generic mnl helper code */
-
-enum ynl_policy_type {
-	YNL_PT_REJECT = 1,
-	YNL_PT_IGNORE,
-	YNL_PT_NEST,
-	YNL_PT_FLAG,
-	YNL_PT_BINARY,
-	YNL_PT_U8,
-	YNL_PT_U16,
-	YNL_PT_U32,
-	YNL_PT_U64,
-	YNL_PT_UINT,
-	YNL_PT_NUL_STR,
-	YNL_PT_BITFIELD32,
-};
-
-struct ynl_policy_attr {
-	enum ynl_policy_type type;
-	unsigned int len;
-	const char *name;
-	struct ynl_policy_nest *nest;
-};
-
-struct ynl_policy_nest {
-	unsigned int max_attr;
-	struct ynl_policy_attr *table;
-};
-
-struct ynl_parse_arg {
-	struct ynl_sock *ys;
-	struct ynl_policy_nest *rsp_policy;
-	void *data;
-};
-
-struct ynl_dump_list_type {
-	struct ynl_dump_list_type *next;
-	unsigned char data[] __attribute__((aligned(8)));
-};
-extern struct ynl_dump_list_type *YNL_LIST_END;
-
-static inline bool ynl_dump_obj_is_last(void *obj)
-{
-	unsigned long uptr = (unsigned long)obj;
-
-	uptr -= offsetof(struct ynl_dump_list_type, data);
-	return uptr == (unsigned long)YNL_LIST_END;
-}
-
-static inline void *ynl_dump_obj_next(void *obj)
-{
-	unsigned long uptr = (unsigned long)obj;
-	struct ynl_dump_list_type *list;
-
-	uptr -= offsetof(struct ynl_dump_list_type, data);
-	list = (void *)uptr;
-	uptr = (unsigned long)list->next;
-	uptr += offsetof(struct ynl_dump_list_type, data);
-
-	return (void *)uptr;
-}
-
-struct ynl_ntf_base_type {
-	__u16 family;
-	__u8 cmd;
-	struct ynl_ntf_base_type *next;
-	void (*free)(struct ynl_ntf_base_type *ntf);
-	unsigned char data[] __attribute__((aligned(8)));
-};
-
-extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE];
-
-struct nlmsghdr *
-ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
-struct nlmsghdr *
-ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
-
-int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
-
-int ynl_recv_ack(struct ynl_sock *ys, int ret);
-int ynl_cb_null(const struct nlmsghdr *nlh, void *data);
-
-/* YNL specific helpers used by the auto-generated code */
-
-struct ynl_req_state {
-	struct ynl_parse_arg yarg;
-	mnl_cb_t cb;
-	__u32 rsp_cmd;
-};
-
-struct ynl_dump_state {
-	struct ynl_sock *ys;
-	struct ynl_policy_nest *rsp_policy;
-	void *first;
-	struct ynl_dump_list_type *last;
-	size_t alloc_sz;
-	mnl_cb_t cb;
-	__u32 rsp_cmd;
-};
-
-struct ynl_ntf_info {
-	struct ynl_policy_nest *policy;
-	mnl_cb_t cb;
-	size_t alloc_sz;
-	void (*free)(struct ynl_ntf_base_type *ntf);
-};
-
-int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
-	     struct ynl_req_state *yrs);
-int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh,
-		  struct ynl_dump_state *yds);
-
-void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd);
-int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg);
-
-#ifndef MNL_HAS_AUTO_SCALARS
-static inline uint64_t mnl_attr_get_uint(const struct nlattr *attr)
-{
-	if (mnl_attr_get_payload_len(attr) == 4)
-		return mnl_attr_get_u32(attr);
-	return mnl_attr_get_u64(attr);
-}
-
-static inline void
-mnl_attr_put_uint(struct nlmsghdr *nlh, uint16_t type, uint64_t data)
-{
-	if ((uint32_t)data == (uint64_t)data)
-		return mnl_attr_put_u32(nlh, type, data);
-	return mnl_attr_put_u64(nlh, type, data);
-}
-#endif
 #endif
-- 
cgit v1.2.3-70-g09d2


From 4527358b76861dfd64ee34aba45d81648fbc8a61 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 30 Nov 2023 10:52:15 -0800
Subject: bpf: introduce BPF token object

Add new kind of BPF kernel object, BPF token. BPF token is meant to
allow delegating privileged BPF functionality, like loading a BPF
program or creating a BPF map, from privileged process to a *trusted*
unprivileged process, all while having a good amount of control over which
privileged operations could be performed using provided BPF token.

This is achieved through mounting BPF FS instance with extra delegation
mount options, which determine what operations are delegatable, and also
constraining it to the owning user namespace (as mentioned in the
previous patch).

BPF token itself is just a derivative from BPF FS and can be created
through a new bpf() syscall command, BPF_TOKEN_CREATE, which accepts BPF
FS FD, which can be attained through open() API by opening BPF FS mount
point. Currently, BPF token "inherits" delegated command, map types,
prog type, and attach type bit sets from BPF FS as is. In the future,
having an BPF token as a separate object with its own FD, we can allow
to further restrict BPF token's allowable set of things either at the
creation time or after the fact, allowing the process to guard itself
further from unintentionally trying to load undesired kind of BPF
programs. But for now we keep things simple and just copy bit sets as is.

When BPF token is created from BPF FS mount, we take reference to the
BPF super block's owning user namespace, and then use that namespace for
checking all the {CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN}
capabilities that are normally only checked against init userns (using
capable()), but now we check them using ns_capable() instead (if BPF
token is provided). See bpf_token_capable() for details.

Such setup means that BPF token in itself is not sufficient to grant BPF
functionality. User namespaced process has to *also* have necessary
combination of capabilities inside that user namespace. So while
previously CAP_BPF was useless when granted within user namespace, now
it gains a meaning and allows container managers and sys admins to have
a flexible control over which processes can and need to use BPF
functionality within the user namespace (i.e., container in practice).
And BPF FS delegation mount options and derived BPF tokens serve as
a per-container "flag" to grant overall ability to use bpf() (plus further
restrict on which parts of bpf() syscalls are treated as namespaced).

Note also, BPF_TOKEN_CREATE command itself requires ns_capable(CAP_BPF)
within the BPF FS owning user namespace, rounding up the ns_capable()
story of BPF token.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231130185229.2688956-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h            |  41 ++++++++
 include/uapi/linux/bpf.h       |  37 +++++++
 kernel/bpf/Makefile            |   2 +-
 kernel/bpf/inode.c             |  12 ++-
 kernel/bpf/syscall.c           |  17 ++++
 kernel/bpf/token.c             | 214 +++++++++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  37 +++++++
 7 files changed, 354 insertions(+), 6 deletions(-)
 create mode 100644 kernel/bpf/token.c

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d3c9acc593ea..aa9cf8e5fab1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -51,6 +51,10 @@ struct module;
 struct bpf_func_state;
 struct ftrace_ops;
 struct cgroup;
+struct bpf_token;
+struct user_namespace;
+struct super_block;
+struct inode;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -1591,6 +1595,13 @@ struct bpf_mount_opts {
 	u64 delegate_attachs;
 };
 
+struct bpf_token {
+	struct work_struct work;
+	atomic64_t refcnt;
+	struct user_namespace *userns;
+	u64 allowed_cmds;
+};
+
 struct bpf_struct_ops_value;
 struct btf_member;
 
@@ -2048,6 +2059,7 @@ static inline void bpf_enable_instrumentation(void)
 	migrate_enable();
 }
 
+extern const struct super_operations bpf_super_ops;
 extern const struct file_operations bpf_map_fops;
 extern const struct file_operations bpf_prog_fops;
 extern const struct file_operations bpf_iter_fops;
@@ -2182,6 +2194,8 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map)
 
 extern int sysctl_unprivileged_bpf_disabled;
 
+bool bpf_token_capable(const struct bpf_token *token, int cap);
+
 static inline bool bpf_allow_ptr_leaks(void)
 {
 	return perfmon_capable();
@@ -2216,8 +2230,17 @@ int bpf_link_new_fd(struct bpf_link *link);
 struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
 
+void bpf_token_inc(struct bpf_token *token);
+void bpf_token_put(struct bpf_token *token);
+int bpf_token_create(union bpf_attr *attr);
+struct bpf_token *bpf_token_get_from_fd(u32 ufd);
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+
 int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
 int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
+struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir,
+			    umode_t mode);
 
 #define BPF_ITER_FUNC_PREFIX "bpf_iter_"
 #define DEFINE_BPF_ITER_FUNC(target, args...)			\
@@ -2580,6 +2603,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
 	return -EOPNOTSUPP;
 }
 
+static inline bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+	return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
+}
+
+static inline void bpf_token_inc(struct bpf_token *token)
+{
+}
+
+static inline void bpf_token_put(struct bpf_token *token)
+{
+}
+
+static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline void __dev_flush(void)
 {
 }
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e88746ba7d21..d4a567e5bc3c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -847,6 +847,36 @@ union bpf_iter_link_info {
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
  *
+ * BPF_TOKEN_CREATE
+ *	Description
+ *		Create BPF token with embedded information about what
+ *		BPF-related functionality it allows:
+ *		- a set of allowed bpf() syscall commands;
+ *		- a set of allowed BPF map types to be created with
+ *		BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ *		- a set of allowed BPF program types and BPF program attach
+ *		types to be loaded with BPF_PROG_LOAD command, if
+ *		BPF_PROG_LOAD itself is allowed.
+ *
+ *		BPF token is created (derived) from an instance of BPF FS,
+ *		assuming it has necessary delegation mount options specified.
+ *		This BPF token can be passed as an extra parameter to various
+ *		bpf() syscall commands to grant BPF subsystem functionality to
+ *		unprivileged processes.
+ *
+ *		When created, BPF token is "associated" with the owning
+ *		user namespace of BPF FS instance (super block) that it was
+ *		derived from, and subsequent BPF operations performed with
+ *		BPF token would be performing capabilities checks (i.e.,
+ *		CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ *		that user namespace. Without BPF token, such capabilities
+ *		have to be granted in init user namespace, making bpf()
+ *		syscall incompatible with user namespace, for the most part.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
  * NOTES
  *	eBPF objects (maps and programs) can be shared between processes.
  *
@@ -901,6 +931,8 @@ enum bpf_cmd {
 	BPF_ITER_CREATE,
 	BPF_LINK_DETACH,
 	BPF_PROG_BIND_MAP,
+	BPF_TOKEN_CREATE,
+	__MAX_BPF_CMD,
 };
 
 enum bpf_map_type {
@@ -1712,6 +1744,11 @@ union bpf_attr {
 		__u32		flags;		/* extra flags */
 	} prog_bind_map;
 
+	struct { /* struct used by BPF_TOKEN_CREATE command */
+		__u32		flags;
+		__u32		bpffs_fd;
+	} token_create;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f526b7573e97..4ce95acfcaa7 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
 endif
 CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 220fe0f99095..6ce3f9696e72 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -99,9 +99,9 @@ static const struct inode_operations bpf_prog_iops = { };
 static const struct inode_operations bpf_map_iops  = { };
 static const struct inode_operations bpf_link_iops  = { };
 
-static struct inode *bpf_get_inode(struct super_block *sb,
-				   const struct inode *dir,
-				   umode_t mode)
+struct inode *bpf_get_inode(struct super_block *sb,
+			    const struct inode *dir,
+			    umode_t mode)
 {
 	struct inode *inode;
 
@@ -602,11 +602,13 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
 {
 	struct bpf_mount_opts *opts = root->d_sb->s_fs_info;
 	umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX;
+	u64 mask;
 
 	if (mode != S_IRWXUGO)
 		seq_printf(m, ",mode=%o", mode);
 
-	if (opts->delegate_cmds == ~0ULL)
+	mask = (1ULL << __MAX_BPF_CMD) - 1;
+	if ((opts->delegate_cmds & mask) == mask)
 		seq_printf(m, ",delegate_cmds=any");
 	else if (opts->delegate_cmds)
 		seq_printf(m, ",delegate_cmds=0x%llx", opts->delegate_cmds);
@@ -639,7 +641,7 @@ static void bpf_free_inode(struct inode *inode)
 	free_inode_nonrcu(inode);
 }
 
-static const struct super_operations bpf_super_ops = {
+const struct super_operations bpf_super_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
 	.show_options	= bpf_show_options,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ee33a52abf18..a156d549b356 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5377,6 +5377,20 @@ out_prog_put:
 	return ret;
 }
 
+#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd
+
+static int token_create(union bpf_attr *attr)
+{
+	if (CHECK_ATTR(BPF_TOKEN_CREATE))
+		return -EINVAL;
+
+	/* no flags are supported yet */
+	if (attr->token_create.flags)
+		return -EINVAL;
+
+	return bpf_token_create(attr);
+}
+
 static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 {
 	union bpf_attr attr;
@@ -5510,6 +5524,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 	case BPF_PROG_BIND_MAP:
 		err = bpf_prog_bind_map(&attr);
 		break;
+	case BPF_TOKEN_CREATE:
+		err = token_create(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c
new file mode 100644
index 000000000000..e18aaecc67e9
--- /dev/null
+++ b/kernel/bpf/token.c
@@ -0,0 +1,214 @@
+#include <linux/bpf.h>
+#include <linux/vmalloc.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/idr.h>
+#include <linux/namei.h>
+#include <linux/user_namespace.h>
+
+bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+	/* BPF token allows ns_capable() level of capabilities, but only if
+	 * token's userns is *exactly* the same as current user's userns
+	 */
+	if (token && current_user_ns() == token->userns) {
+		if (ns_capable(token->userns, cap))
+			return true;
+		if (cap != CAP_SYS_ADMIN && ns_capable(token->userns, CAP_SYS_ADMIN))
+			return true;
+	}
+	/* otherwise fallback to capable() checks */
+	return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
+}
+
+void bpf_token_inc(struct bpf_token *token)
+{
+	atomic64_inc(&token->refcnt);
+}
+
+static void bpf_token_free(struct bpf_token *token)
+{
+	put_user_ns(token->userns);
+	kvfree(token);
+}
+
+static void bpf_token_put_deferred(struct work_struct *work)
+{
+	struct bpf_token *token = container_of(work, struct bpf_token, work);
+
+	bpf_token_free(token);
+}
+
+void bpf_token_put(struct bpf_token *token)
+{
+	if (!token)
+		return;
+
+	if (!atomic64_dec_and_test(&token->refcnt))
+		return;
+
+	INIT_WORK(&token->work, bpf_token_put_deferred);
+	schedule_work(&token->work);
+}
+
+static int bpf_token_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_token *token = filp->private_data;
+
+	bpf_token_put(token);
+	return 0;
+}
+
+static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+	struct bpf_token *token = filp->private_data;
+	u64 mask;
+
+	BUILD_BUG_ON(__MAX_BPF_CMD >= 64);
+	mask = (1ULL << __MAX_BPF_CMD) - 1;
+	if ((token->allowed_cmds & mask) == mask)
+		seq_printf(m, "allowed_cmds:\tany\n");
+	else
+		seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds);
+}
+
+#define BPF_TOKEN_INODE_NAME "bpf-token"
+
+static const struct inode_operations bpf_token_iops = { };
+
+static const struct file_operations bpf_token_fops = {
+	.release	= bpf_token_release,
+	.show_fdinfo	= bpf_token_show_fdinfo,
+};
+
+int bpf_token_create(union bpf_attr *attr)
+{
+	struct bpf_mount_opts *mnt_opts;
+	struct bpf_token *token = NULL;
+	struct user_namespace *userns;
+	struct inode *inode;
+	struct file *file;
+	struct path path;
+	struct fd f;
+	umode_t mode;
+	int err, fd;
+
+	f = fdget(attr->token_create.bpffs_fd);
+	if (!f.file)
+		return -EBADF;
+
+	path = f.file->f_path;
+	path_get(&path);
+	fdput(f);
+
+	if (path.dentry != path.mnt->mnt_sb->s_root) {
+		err = -EINVAL;
+		goto out_path;
+	}
+	if (path.mnt->mnt_sb->s_op != &bpf_super_ops) {
+		err = -EINVAL;
+		goto out_path;
+	}
+	err = path_permission(&path, MAY_ACCESS);
+	if (err)
+		goto out_path;
+
+	userns = path.dentry->d_sb->s_user_ns;
+	/*
+	 * Enforce that creators of BPF tokens are in the same user
+	 * namespace as the BPF FS instance. This makes reasoning about
+	 * permissions a lot easier and we can always relax this later.
+	 */
+	if (current_user_ns() != userns) {
+		err = -EPERM;
+		goto out_path;
+	}
+	if (!ns_capable(userns, CAP_BPF)) {
+		err = -EPERM;
+		goto out_path;
+	}
+
+	mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
+	inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		goto out_path;
+	}
+
+	inode->i_op = &bpf_token_iops;
+	inode->i_fop = &bpf_token_fops;
+	clear_nlink(inode); /* make sure it is unlinked */
+
+	file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops);
+	if (IS_ERR(file)) {
+		iput(inode);
+		err = PTR_ERR(file);
+		goto out_path;
+	}
+
+	token = kvzalloc(sizeof(*token), GFP_USER);
+	if (!token) {
+		err = -ENOMEM;
+		goto out_file;
+	}
+
+	atomic64_set(&token->refcnt, 1);
+
+	/* remember bpffs owning userns for future ns_capable() checks */
+	token->userns = get_user_ns(userns);
+
+	mnt_opts = path.dentry->d_sb->s_fs_info;
+	token->allowed_cmds = mnt_opts->delegate_cmds;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		err = fd;
+		goto out_token;
+	}
+
+	file->private_data = token;
+	fd_install(fd, file);
+
+	path_put(&path);
+	return fd;
+
+out_token:
+	bpf_token_free(token);
+out_file:
+	fput(file);
+out_path:
+	path_put(&path);
+	return err;
+}
+
+struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+	struct fd f = fdget(ufd);
+	struct bpf_token *token;
+
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+	if (f.file->f_op != &bpf_token_fops) {
+		fdput(f);
+		return ERR_PTR(-EINVAL);
+	}
+
+	token = f.file->private_data;
+	bpf_token_inc(token);
+	fdput(f);
+
+	return token;
+}
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+{
+	/* BPF token can be used only within exactly the same userns in which
+	 * it was created
+	 */
+	if (!token || current_user_ns() != token->userns)
+		return false;
+
+	return token->allowed_cmds & (1ULL << cmd);
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e88746ba7d21..d4a567e5bc3c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -847,6 +847,36 @@ union bpf_iter_link_info {
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
  *
+ * BPF_TOKEN_CREATE
+ *	Description
+ *		Create BPF token with embedded information about what
+ *		BPF-related functionality it allows:
+ *		- a set of allowed bpf() syscall commands;
+ *		- a set of allowed BPF map types to be created with
+ *		BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ *		- a set of allowed BPF program types and BPF program attach
+ *		types to be loaded with BPF_PROG_LOAD command, if
+ *		BPF_PROG_LOAD itself is allowed.
+ *
+ *		BPF token is created (derived) from an instance of BPF FS,
+ *		assuming it has necessary delegation mount options specified.
+ *		This BPF token can be passed as an extra parameter to various
+ *		bpf() syscall commands to grant BPF subsystem functionality to
+ *		unprivileged processes.
+ *
+ *		When created, BPF token is "associated" with the owning
+ *		user namespace of BPF FS instance (super block) that it was
+ *		derived from, and subsequent BPF operations performed with
+ *		BPF token would be performing capabilities checks (i.e.,
+ *		CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ *		that user namespace. Without BPF token, such capabilities
+ *		have to be granted in init user namespace, making bpf()
+ *		syscall incompatible with user namespace, for the most part.
+ *
+ *	Return
+ *		A new file descriptor (a nonnegative integer), or -1 if an
+ *		error occurred (in which case, *errno* is set appropriately).
+ *
  * NOTES
  *	eBPF objects (maps and programs) can be shared between processes.
  *
@@ -901,6 +931,8 @@ enum bpf_cmd {
 	BPF_ITER_CREATE,
 	BPF_LINK_DETACH,
 	BPF_PROG_BIND_MAP,
+	BPF_TOKEN_CREATE,
+	__MAX_BPF_CMD,
 };
 
 enum bpf_map_type {
@@ -1712,6 +1744,11 @@ union bpf_attr {
 		__u32		flags;		/* extra flags */
 	} prog_bind_map;
 
+	struct { /* struct used by BPF_TOKEN_CREATE command */
+		__u32		flags;
+		__u32		bpffs_fd;
+	} token_create;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
-- 
cgit v1.2.3-70-g09d2


From 688b7270b3cb75e8ac78123d719967db40336e5b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 30 Nov 2023 10:52:16 -0800
Subject: bpf: add BPF token support to BPF_MAP_CREATE command

Allow providing token_fd for BPF_MAP_CREATE command to allow controlled
BPF map creation from unprivileged process through delegated BPF token.

Wire through a set of allowed BPF map types to BPF token, derived from
BPF FS at BPF token creation time. This, in combination with allowed_cmds
allows to create a narrowly-focused BPF token (controlled by privileged
agent) with a restrictive set of BPF maps that application can attempt
to create.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231130185229.2688956-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |  2 +
 include/uapi/linux/bpf.h                           |  2 +
 kernel/bpf/inode.c                                 |  3 +-
 kernel/bpf/syscall.c                               | 52 ++++++++++++++++------
 kernel/bpf/token.c                                 | 16 +++++++
 tools/include/uapi/linux/bpf.h                     |  2 +
 .../selftests/bpf/prog_tests/libbpf_probes.c       |  2 +
 .../testing/selftests/bpf/prog_tests/libbpf_str.c  |  3 ++
 8 files changed, 67 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index aa9cf8e5fab1..e08e8436df38 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1600,6 +1600,7 @@ struct bpf_token {
 	atomic64_t refcnt;
 	struct user_namespace *userns;
 	u64 allowed_cmds;
+	u64 allowed_maps;
 };
 
 struct bpf_struct_ops_value;
@@ -2236,6 +2237,7 @@ int bpf_token_create(union bpf_attr *attr);
 struct bpf_token *bpf_token_get_from_fd(u32 ufd);
 
 bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type);
 
 int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
 int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d4a567e5bc3c..0bba3392b17a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -983,6 +983,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_BLOOM_FILTER,
 	BPF_MAP_TYPE_USER_RINGBUF,
 	BPF_MAP_TYPE_CGRP_STORAGE,
+	__MAX_BPF_MAP_TYPE
 };
 
 /* Note that tracing related programs such as
@@ -1433,6 +1434,7 @@ union bpf_attr {
 		 * to using 5 hash functions).
 		 */
 		__u64	map_extra;
+		__u32	map_token_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 6ce3f9696e72..9c7865d1c53d 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -613,7 +613,8 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
 	else if (opts->delegate_cmds)
 		seq_printf(m, ",delegate_cmds=0x%llx", opts->delegate_cmds);
 
-	if (opts->delegate_maps == ~0ULL)
+	mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1;
+	if ((opts->delegate_maps & mask) == mask)
 		seq_printf(m, ",delegate_maps=any");
 	else if (opts->delegate_maps)
 		seq_printf(m, ",delegate_maps=0x%llx", opts->delegate_maps);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a156d549b356..22e14124cd61 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1009,8 +1009,8 @@ int map_check_no_btf(const struct bpf_map *map,
 	return -ENOTSUPP;
 }
 
-static int map_check_btf(struct bpf_map *map, const struct btf *btf,
-			 u32 btf_key_id, u32 btf_value_id)
+static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
+			 const struct btf *btf, u32 btf_key_id, u32 btf_value_id)
 {
 	const struct btf_type *key_type, *value_type;
 	u32 key_size, value_size;
@@ -1038,7 +1038,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 	if (!IS_ERR_OR_NULL(map->record)) {
 		int i;
 
-		if (!bpf_capable()) {
+		if (!bpf_token_capable(token, CAP_BPF)) {
 			ret = -EPERM;
 			goto free_map_tab;
 		}
@@ -1126,11 +1126,12 @@ static bool bpf_net_capable(void)
 	return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
 }
 
-#define BPF_MAP_CREATE_LAST_FIELD map_extra
+#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
 	const struct bpf_map_ops *ops;
+	struct bpf_token *token = NULL;
 	int numa_node = bpf_map_attr_numa_node(attr);
 	u32 map_type = attr->map_type;
 	struct bpf_map *map;
@@ -1181,14 +1182,32 @@ static int map_create(union bpf_attr *attr)
 	if (!ops->map_mem_usage)
 		return -EINVAL;
 
+	if (attr->map_token_fd) {
+		token = bpf_token_get_from_fd(attr->map_token_fd);
+		if (IS_ERR(token))
+			return PTR_ERR(token);
+
+		/* if current token doesn't grant map creation permissions,
+		 * then we can't use this token, so ignore it and rely on
+		 * system-wide capabilities checks
+		 */
+		if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) ||
+		    !bpf_token_allow_map_type(token, attr->map_type)) {
+			bpf_token_put(token);
+			token = NULL;
+		}
+	}
+
+	err = -EPERM;
+
 	/* Intent here is for unprivileged_bpf_disabled to block BPF map
 	 * creation for unprivileged users; other actions depend
 	 * on fd availability and access to bpffs, so are dependent on
 	 * object creation success. Even with unprivileged BPF disabled,
 	 * capability checks are still carried out.
 	 */
-	if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
-		return -EPERM;
+	if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF))
+		goto put_token;
 
 	/* check privileged map type permissions */
 	switch (map_type) {
@@ -1221,25 +1240,27 @@ static int map_create(union bpf_attr *attr)
 	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
 	case BPF_MAP_TYPE_STRUCT_OPS:
 	case BPF_MAP_TYPE_CPUMAP:
-		if (!bpf_capable())
-			return -EPERM;
+		if (!bpf_token_capable(token, CAP_BPF))
+			goto put_token;
 		break;
 	case BPF_MAP_TYPE_SOCKMAP:
 	case BPF_MAP_TYPE_SOCKHASH:
 	case BPF_MAP_TYPE_DEVMAP:
 	case BPF_MAP_TYPE_DEVMAP_HASH:
 	case BPF_MAP_TYPE_XSKMAP:
-		if (!bpf_net_capable())
-			return -EPERM;
+		if (!bpf_token_capable(token, CAP_NET_ADMIN))
+			goto put_token;
 		break;
 	default:
 		WARN(1, "unsupported map type %d", map_type);
-		return -EPERM;
+		goto put_token;
 	}
 
 	map = ops->map_alloc(attr);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto put_token;
+	}
 	map->ops = ops;
 	map->map_type = map_type;
 
@@ -1276,7 +1297,7 @@ static int map_create(union bpf_attr *attr)
 		map->btf = btf;
 
 		if (attr->btf_value_type_id) {
-			err = map_check_btf(map, btf, attr->btf_key_type_id,
+			err = map_check_btf(map, token, btf, attr->btf_key_type_id,
 					    attr->btf_value_type_id);
 			if (err)
 				goto free_map;
@@ -1297,6 +1318,7 @@ static int map_create(union bpf_attr *attr)
 		goto free_map_sec;
 
 	bpf_map_save_memcg(map);
+	bpf_token_put(token);
 
 	err = bpf_map_new_fd(map, f_flags);
 	if (err < 0) {
@@ -1317,6 +1339,8 @@ free_map_sec:
 free_map:
 	btf_put(map->btf);
 	map->ops->map_free(map);
+put_token:
+	bpf_token_put(token);
 	return err;
 }
 
diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c
index e18aaecc67e9..06c34dae658e 100644
--- a/kernel/bpf/token.c
+++ b/kernel/bpf/token.c
@@ -72,6 +72,13 @@ static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp)
 		seq_printf(m, "allowed_cmds:\tany\n");
 	else
 		seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds);
+
+	BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64);
+	mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1;
+	if ((token->allowed_maps & mask) == mask)
+		seq_printf(m, "allowed_maps:\tany\n");
+	else
+		seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps);
 }
 
 #define BPF_TOKEN_INODE_NAME "bpf-token"
@@ -161,6 +168,7 @@ int bpf_token_create(union bpf_attr *attr)
 
 	mnt_opts = path.dentry->d_sb->s_fs_info;
 	token->allowed_cmds = mnt_opts->delegate_cmds;
+	token->allowed_maps = mnt_opts->delegate_maps;
 
 	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (fd < 0) {
@@ -212,3 +220,11 @@ bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
 
 	return token->allowed_cmds & (1ULL << cmd);
 }
+
+bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type)
+{
+	if (!token || type >= __MAX_BPF_MAP_TYPE)
+		return false;
+
+	return token->allowed_maps & (1ULL << type);
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d4a567e5bc3c..0bba3392b17a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -983,6 +983,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_BLOOM_FILTER,
 	BPF_MAP_TYPE_USER_RINGBUF,
 	BPF_MAP_TYPE_CGRP_STORAGE,
+	__MAX_BPF_MAP_TYPE
 };
 
 /* Note that tracing related programs such as
@@ -1433,6 +1434,7 @@ union bpf_attr {
 		 * to using 5 hash functions).
 		 */
 		__u64	map_extra;
+		__u32	map_token_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
index 9f766ddd946a..573249a2814d 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
@@ -68,6 +68,8 @@ void test_libbpf_probe_map_types(void)
 
 		if (map_type == BPF_MAP_TYPE_UNSPEC)
 			continue;
+		if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0)
+			continue;
 
 		if (!test__start_subtest(map_type_name))
 			continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index c440ea3311ed..2a0633f43c73 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -132,6 +132,9 @@ static void test_libbpf_bpf_map_type_str(void)
 		const char *map_type_str;
 		char buf[256];
 
+		if (map_type == __MAX_BPF_MAP_TYPE)
+			continue;
+
 		map_type_name = btf__str_by_offset(btf, e->name_off);
 		map_type_str = libbpf_bpf_map_type_str(map_type);
 		ASSERT_OK_PTR(map_type_str, map_type_name);
-- 
cgit v1.2.3-70-g09d2


From ee54b1a910e4d49c9a104f31ae3f5b979131adf8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 30 Nov 2023 10:52:17 -0800
Subject: bpf: add BPF token support to BPF_BTF_LOAD command

Accept BPF token FD in BPF_BTF_LOAD command to allow BTF data loading
through delegated BPF token. BTF loading is a pretty straightforward
operation, so as long as BPF token is created with allow_cmds granting
BPF_BTF_LOAD command, kernel proceeds to parsing BTF data and creating
BTF object.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231130185229.2688956-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/syscall.c           | 20 ++++++++++++++++++--
 tools/include/uapi/linux/bpf.h |  1 +
 3 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0bba3392b17a..9f9989e0d062 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1616,6 +1616,7 @@ union bpf_attr {
 		 * truncated), or smaller (if log buffer wasn't filled completely).
 		 */
 		__u32		btf_log_true_size;
+		__u32		btf_token_fd;
 	};
 
 	struct {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 22e14124cd61..d87c5c27cde3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4777,15 +4777,31 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 	return err;
 }
 
-#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size
+#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
 
 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
 {
+	struct bpf_token *token = NULL;
+
 	if (CHECK_ATTR(BPF_BTF_LOAD))
 		return -EINVAL;
 
-	if (!bpf_capable())
+	if (attr->btf_token_fd) {
+		token = bpf_token_get_from_fd(attr->btf_token_fd);
+		if (IS_ERR(token))
+			return PTR_ERR(token);
+		if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) {
+			bpf_token_put(token);
+			token = NULL;
+		}
+	}
+
+	if (!bpf_token_capable(token, CAP_BPF)) {
+		bpf_token_put(token);
 		return -EPERM;
+	}
+
+	bpf_token_put(token);
 
 	return btf_new_fd(attr, uattr, uattr_size);
 }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0bba3392b17a..9f9989e0d062 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1616,6 +1616,7 @@ union bpf_attr {
 		 * truncated), or smaller (if log buffer wasn't filled completely).
 		 */
 		__u32		btf_log_true_size;
+		__u32		btf_token_fd;
 	};
 
 	struct {
-- 
cgit v1.2.3-70-g09d2


From e1cef620f598853a90f17701fcb1057a6768f7b8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 30 Nov 2023 10:52:18 -0800
Subject: bpf: add BPF token support to BPF_PROG_LOAD command

Add basic support of BPF token to BPF_PROG_LOAD. Wire through a set of
allowed BPF program types and attach types, derived from BPF FS at BPF
token creation time. Then make sure we perform bpf_token_capable()
checks everywhere where it's relevant.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231130185229.2688956-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |  6 ++
 include/uapi/linux/bpf.h                           |  2 +
 kernel/bpf/core.c                                  |  1 +
 kernel/bpf/inode.c                                 |  6 +-
 kernel/bpf/syscall.c                               | 87 ++++++++++++++++------
 kernel/bpf/token.c                                 | 27 +++++++
 tools/include/uapi/linux/bpf.h                     |  2 +
 .../selftests/bpf/prog_tests/libbpf_probes.c       |  2 +
 .../testing/selftests/bpf/prog_tests/libbpf_str.c  |  3 +
 9 files changed, 110 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e08e8436df38..20af87b59d70 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1461,6 +1461,7 @@ struct bpf_prog_aux {
 #ifdef CONFIG_SECURITY
 	void *security;
 #endif
+	struct bpf_token *token;
 	struct bpf_prog_offload *offload;
 	struct btf *btf;
 	struct bpf_func_info *func_info;
@@ -1601,6 +1602,8 @@ struct bpf_token {
 	struct user_namespace *userns;
 	u64 allowed_cmds;
 	u64 allowed_maps;
+	u64 allowed_progs;
+	u64 allowed_attachs;
 };
 
 struct bpf_struct_ops_value;
@@ -2238,6 +2241,9 @@ struct bpf_token *bpf_token_get_from_fd(u32 ufd);
 
 bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
 bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type);
+bool bpf_token_allow_prog_type(const struct bpf_token *token,
+			       enum bpf_prog_type prog_type,
+			       enum bpf_attach_type attach_type);
 
 int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
 int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9f9989e0d062..4df2d025c784 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1028,6 +1028,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_LOOKUP,
 	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
 	BPF_PROG_TYPE_NETFILTER,
+	__MAX_BPF_PROG_TYPE
 };
 
 enum bpf_attach_type {
@@ -1504,6 +1505,7 @@ union bpf_attr {
 		 * truncated), or smaller (if log buffer wasn't filled completely).
 		 */
 		__u32		log_true_size;
+		__u32		prog_token_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4b813da8d6c0..47085839af8d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2751,6 +2751,7 @@ void bpf_prog_free(struct bpf_prog *fp)
 
 	if (aux->dst_prog)
 		bpf_prog_put(aux->dst_prog);
+	bpf_token_put(aux->token);
 	INIT_WORK(&aux->work, bpf_prog_free_deferred);
 	schedule_work(&aux->work);
 }
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 9c7865d1c53d..5359a0929c35 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -619,12 +619,14 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
 	else if (opts->delegate_maps)
 		seq_printf(m, ",delegate_maps=0x%llx", opts->delegate_maps);
 
-	if (opts->delegate_progs == ~0ULL)
+	mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1;
+	if ((opts->delegate_progs & mask) == mask)
 		seq_printf(m, ",delegate_progs=any");
 	else if (opts->delegate_progs)
 		seq_printf(m, ",delegate_progs=0x%llx", opts->delegate_progs);
 
-	if (opts->delegate_attachs == ~0ULL)
+	mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1;
+	if ((opts->delegate_attachs & mask) == mask)
 		seq_printf(m, ",delegate_attachs=any");
 	else if (opts->delegate_attachs)
 		seq_printf(m, ",delegate_attachs=0x%llx", opts->delegate_attachs);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d87c5c27cde3..2c8393c21b8c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2608,13 +2608,15 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD log_true_size
+#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
 
 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 {
 	enum bpf_prog_type type = attr->prog_type;
 	struct bpf_prog *prog, *dst_prog = NULL;
 	struct btf *attach_btf = NULL;
+	struct bpf_token *token = NULL;
+	bool bpf_cap;
 	int err;
 	char license[128];
 
@@ -2631,10 +2633,31 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 				 BPF_F_TEST_REG_INVARIANTS))
 		return -EINVAL;
 
+	bpf_prog_load_fixup_attach_type(attr);
+
+	if (attr->prog_token_fd) {
+		token = bpf_token_get_from_fd(attr->prog_token_fd);
+		if (IS_ERR(token))
+			return PTR_ERR(token);
+		/* if current token doesn't grant prog loading permissions,
+		 * then we can't use this token, so ignore it and rely on
+		 * system-wide capabilities checks
+		 */
+		if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) ||
+		    !bpf_token_allow_prog_type(token, attr->prog_type,
+					       attr->expected_attach_type)) {
+			bpf_token_put(token);
+			token = NULL;
+		}
+	}
+
+	bpf_cap = bpf_token_capable(token, CAP_BPF);
+	err = -EPERM;
+
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
 	    (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
-	    !bpf_capable())
-		return -EPERM;
+	    !bpf_cap)
+		goto put_token;
 
 	/* Intent here is for unprivileged_bpf_disabled to block BPF program
 	 * creation for unprivileged users; other actions depend
@@ -2643,21 +2666,23 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	 * capability checks are still carried out for these
 	 * and other operations.
 	 */
-	if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
-		return -EPERM;
+	if (sysctl_unprivileged_bpf_disabled && !bpf_cap)
+		goto put_token;
 
 	if (attr->insn_cnt == 0 ||
-	    attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
-		return -E2BIG;
+	    attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) {
+		err = -E2BIG;
+		goto put_token;
+	}
 	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
 	    type != BPF_PROG_TYPE_CGROUP_SKB &&
-	    !bpf_capable())
-		return -EPERM;
+	    !bpf_cap)
+		goto put_token;
 
-	if (is_net_admin_prog_type(type) && !bpf_net_capable())
-		return -EPERM;
-	if (is_perfmon_prog_type(type) && !perfmon_capable())
-		return -EPERM;
+	if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN))
+		goto put_token;
+	if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON))
+		goto put_token;
 
 	/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
 	 * or btf, we need to check which one it is
@@ -2667,27 +2692,33 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 		if (IS_ERR(dst_prog)) {
 			dst_prog = NULL;
 			attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
-			if (IS_ERR(attach_btf))
-				return -EINVAL;
+			if (IS_ERR(attach_btf)) {
+				err = -EINVAL;
+				goto put_token;
+			}
 			if (!btf_is_kernel(attach_btf)) {
 				/* attaching through specifying bpf_prog's BTF
 				 * objects directly might be supported eventually
 				 */
 				btf_put(attach_btf);
-				return -ENOTSUPP;
+				err = -ENOTSUPP;
+				goto put_token;
 			}
 		}
 	} else if (attr->attach_btf_id) {
 		/* fall back to vmlinux BTF, if BTF type ID is specified */
 		attach_btf = bpf_get_btf_vmlinux();
-		if (IS_ERR(attach_btf))
-			return PTR_ERR(attach_btf);
-		if (!attach_btf)
-			return -EINVAL;
+		if (IS_ERR(attach_btf)) {
+			err = PTR_ERR(attach_btf);
+			goto put_token;
+		}
+		if (!attach_btf) {
+			err = -EINVAL;
+			goto put_token;
+		}
 		btf_get(attach_btf);
 	}
 
-	bpf_prog_load_fixup_attach_type(attr);
 	if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
 				       attach_btf, attr->attach_btf_id,
 				       dst_prog)) {
@@ -2695,7 +2726,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 			bpf_prog_put(dst_prog);
 		if (attach_btf)
 			btf_put(attach_btf);
-		return -EINVAL;
+		err = -EINVAL;
+		goto put_token;
 	}
 
 	/* plain bpf_prog allocation */
@@ -2705,7 +2737,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 			bpf_prog_put(dst_prog);
 		if (attach_btf)
 			btf_put(attach_btf);
-		return -ENOMEM;
+		err = -EINVAL;
+		goto put_token;
 	}
 
 	prog->expected_attach_type = attr->expected_attach_type;
@@ -2716,6 +2749,10 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
 	prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
 
+	/* move token into prog->aux, reuse taken refcnt */
+	prog->aux->token = token;
+	token = NULL;
+
 	err = security_bpf_prog_alloc(prog->aux);
 	if (err)
 		goto free_prog;
@@ -2817,6 +2854,8 @@ free_prog:
 	if (prog->aux->attach_btf)
 		btf_put(prog->aux->attach_btf);
 	bpf_prog_free(prog);
+put_token:
+	bpf_token_put(token);
 	return err;
 }
 
@@ -3806,7 +3845,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 	case BPF_PROG_TYPE_SK_LOOKUP:
 		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
 	case BPF_PROG_TYPE_CGROUP_SKB:
-		if (!bpf_net_capable())
+		if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN))
 			/* cg-skb progs can be loaded by unpriv user.
 			 * check permissions at attach time.
 			 */
diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c
index 06c34dae658e..5a51e6b8f6bf 100644
--- a/kernel/bpf/token.c
+++ b/kernel/bpf/token.c
@@ -79,6 +79,20 @@ static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp)
 		seq_printf(m, "allowed_maps:\tany\n");
 	else
 		seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps);
+
+	BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64);
+	mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1;
+	if ((token->allowed_progs & mask) == mask)
+		seq_printf(m, "allowed_progs:\tany\n");
+	else
+		seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs);
+
+	BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64);
+	mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1;
+	if ((token->allowed_attachs & mask) == mask)
+		seq_printf(m, "allowed_attachs:\tany\n");
+	else
+		seq_printf(m, "allowed_attachs:\t0x%llx\n", token->allowed_attachs);
 }
 
 #define BPF_TOKEN_INODE_NAME "bpf-token"
@@ -169,6 +183,8 @@ int bpf_token_create(union bpf_attr *attr)
 	mnt_opts = path.dentry->d_sb->s_fs_info;
 	token->allowed_cmds = mnt_opts->delegate_cmds;
 	token->allowed_maps = mnt_opts->delegate_maps;
+	token->allowed_progs = mnt_opts->delegate_progs;
+	token->allowed_attachs = mnt_opts->delegate_attachs;
 
 	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (fd < 0) {
@@ -228,3 +244,14 @@ bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type t
 
 	return token->allowed_maps & (1ULL << type);
 }
+
+bool bpf_token_allow_prog_type(const struct bpf_token *token,
+			       enum bpf_prog_type prog_type,
+			       enum bpf_attach_type attach_type)
+{
+	if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE)
+		return false;
+
+	return (token->allowed_progs & (1ULL << prog_type)) &&
+	       (token->allowed_attachs & (1ULL << attach_type));
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9f9989e0d062..4df2d025c784 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1028,6 +1028,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_LOOKUP,
 	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
 	BPF_PROG_TYPE_NETFILTER,
+	__MAX_BPF_PROG_TYPE
 };
 
 enum bpf_attach_type {
@@ -1504,6 +1505,7 @@ union bpf_attr {
 		 * truncated), or smaller (if log buffer wasn't filled completely).
 		 */
 		__u32		log_true_size;
+		__u32		prog_token_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
index 573249a2814d..4ed46ed58a7b 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
@@ -30,6 +30,8 @@ void test_libbpf_probe_prog_types(void)
 
 		if (prog_type == BPF_PROG_TYPE_UNSPEC)
 			continue;
+		if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0)
+			continue;
 
 		if (!test__start_subtest(prog_type_name))
 			continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index 2a0633f43c73..384bc1f7a65e 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -189,6 +189,9 @@ static void test_libbpf_bpf_prog_type_str(void)
 		const char *prog_type_str;
 		char buf[256];
 
+		if (prog_type == __MAX_BPF_PROG_TYPE)
+			continue;
+
 		prog_type_name = btf__str_by_offset(btf, e->name_off);
 		prog_type_str = libbpf_bpf_prog_type_str(prog_type);
 		ASSERT_OK_PTR(prog_type_str, prog_type_name);
-- 
cgit v1.2.3-70-g09d2


From ecd435143eb03611e25694141bf59d1c04ad5b9e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 30 Nov 2023 10:52:24 -0800
Subject: libbpf: add bpf_token_create() API

Add low-level wrapper API for BPF_TOKEN_CREATE command in bpf() syscall.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231130185229.2688956-13-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c      | 17 +++++++++++++++++
 tools/lib/bpf/bpf.h      | 24 ++++++++++++++++++++++++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 42 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9dc9625651dc..d4019928a864 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -1287,3 +1287,20 @@ int bpf_prog_bind_map(int prog_fd, int map_fd,
 	ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
+
+int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, token_create);
+	union bpf_attr attr;
+	int fd;
+
+	if (!OPTS_VALID(opts, bpf_token_create_opts))
+		return libbpf_err(-EINVAL);
+
+	memset(&attr, 0, attr_sz);
+	attr.token_create.bpffs_fd = bpffs_fd;
+	attr.token_create.flags = OPTS_GET(opts, flags, 0);
+
+	fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz);
+	return libbpf_err_errno(fd);
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index d0f53772bdc0..e49254c9f68f 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -640,6 +640,30 @@ struct bpf_test_run_opts {
 LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
 				      struct bpf_test_run_opts *opts);
 
+struct bpf_token_create_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 flags;
+	size_t :0;
+};
+#define bpf_token_create_opts__last_field flags
+
+/**
+ * @brief **bpf_token_create()** creates a new instance of BPF token derived
+ * from specified BPF FS mount point.
+ *
+ * BPF token created with this API can be passed to bpf() syscall for
+ * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc.
+ *
+ * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token
+ * instance.
+ * @param opts optional BPF token creation options, can be NULL
+ *
+ * @return BPF token FD > 0, on success; negative error code, otherwise (errno
+ * is also set to the error code)
+ */
+LIBBPF_API int bpf_token_create(int bpffs_fd,
+				struct bpf_token_create_opts *opts);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 91c5aef7dae7..df7657b65c47 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -401,6 +401,7 @@ LIBBPF_1.3.0 {
 		bpf_program__attach_netkit;
 		bpf_program__attach_tcx;
 		bpf_program__attach_uprobe_multi;
+		bpf_token_create;
 		ring__avail_data_size;
 		ring__consume;
 		ring__consumer_pos;
-- 
cgit v1.2.3-70-g09d2


From 37891cea6699200fb83eae464ebe1c0f73040474 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 30 Nov 2023 10:52:25 -0800
Subject: libbpf: add BPF token support to bpf_map_create() API

Add ability to provide token_fd for BPF_MAP_CREATE command through
bpf_map_create() API.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231130185229.2688956-14-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c | 4 +++-
 tools/lib/bpf/bpf.h | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d4019928a864..1653b64b7015 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -169,7 +169,7 @@ int bpf_map_create(enum bpf_map_type map_type,
 		   __u32 max_entries,
 		   const struct bpf_map_create_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+	const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
 	union bpf_attr attr;
 	int fd;
 
@@ -198,6 +198,8 @@ int bpf_map_create(enum bpf_map_type map_type,
 	attr.numa_node = OPTS_GET(opts, numa_node, 0);
 	attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
 
+	attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+
 	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index e49254c9f68f..ae2136f596b4 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -51,8 +51,11 @@ struct bpf_map_create_opts {
 
 	__u32 numa_node;
 	__u32 map_ifindex;
+
+	__u32 token_fd;
+	size_t :0;
 };
-#define bpf_map_create_opts__last_field map_ifindex
+#define bpf_map_create_opts__last_field token_fd
 
 LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
 			      const char *map_name,
-- 
cgit v1.2.3-70-g09d2


From 1a8df7fa00aac35aff9ef1941c5334b3a01d09e4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 30 Nov 2023 10:52:26 -0800
Subject: libbpf: add BPF token support to bpf_btf_load() API

Allow user to specify token_fd for bpf_btf_load() API that wraps
kernel's BPF_BTF_LOAD command. This allows loading BTF from unprivileged
process as long as it has BPF token allowing BPF_BTF_LOAD command, which
can be created and delegated by privileged process.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231130185229.2688956-15-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c | 4 +++-
 tools/lib/bpf/bpf.h | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 1653b64b7015..544ae2376b6b 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -1184,7 +1184,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 
 int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size);
+	const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd);
 	union bpf_attr attr;
 	char *log_buf;
 	size_t log_size;
@@ -1209,6 +1209,8 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts
 
 	attr.btf = ptr_to_u64(btf_data);
 	attr.btf_size = btf_size;
+	attr.btf_token_fd = OPTS_GET(opts, token_fd, 0);
+
 	/* log_level == 0 and log_buf != NULL means "try loading without
 	 * log_buf, but retry with log_buf and log_level=1 on error", which is
 	 * consistent across low-level and high-level BTF and program loading
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index ae2136f596b4..4b0f25e97b0d 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -133,9 +133,10 @@ struct bpf_btf_load_opts {
 	 * If kernel doesn't support this feature, log_size is left unchanged.
 	 */
 	__u32 log_true_size;
+	__u32 token_fd;
 	size_t :0;
 };
-#define bpf_btf_load_opts__last_field log_true_size
+#define bpf_btf_load_opts__last_field token_fd
 
 LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
 			    struct bpf_btf_load_opts *opts);
-- 
cgit v1.2.3-70-g09d2


From 1571740a9ba036f26cc5211a86021199987219e8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 30 Nov 2023 10:52:27 -0800
Subject: libbpf: add BPF token support to bpf_prog_load() API

Wire through token_fd into bpf_prog_load().

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231130185229.2688956-16-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c | 3 ++-
 tools/lib/bpf/bpf.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 544ae2376b6b..f4e1da3c6d5f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -234,7 +234,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 		  const struct bpf_insn *insns, size_t insn_cnt,
 		  struct bpf_prog_load_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, log_true_size);
+	const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
 	void *finfo = NULL, *linfo = NULL;
 	const char *func_info, *line_info;
 	__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -263,6 +263,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 	attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
 	attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
 	attr.kern_version = OPTS_GET(opts, kern_version, 0);
+	attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
 
 	if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME))
 		libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 4b0f25e97b0d..991b86bfe7e4 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -105,9 +105,10 @@ struct bpf_prog_load_opts {
 	 * If kernel doesn't support this feature, log_size is left unchanged.
 	 */
 	__u32 log_true_size;
+	__u32 token_fd;
 	size_t :0;
 };
-#define bpf_prog_load_opts__last_field log_true_size
+#define bpf_prog_load_opts__last_field token_fd
 
 LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
 			     const char *prog_name, const char *license,
-- 
cgit v1.2.3-70-g09d2


From dc5196fac40c2cb96330bcb98eef868a7fd225b3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 30 Nov 2023 10:52:28 -0800
Subject: selftests/bpf: add BPF token-enabled tests

Add a selftest that attempts to conceptually replicate intended BPF
token use cases inside user namespaced container.

Child process is forked. It is then put into its own userns and mountns.
Child creates BPF FS context object. This ensures child userns is
captured as the owning userns for this instance of BPF FS. Given setting
delegation mount options is privileged operation, we ensure that child
cannot set them.

This context is passed back to privileged parent process through Unix
socket, where parent sets up delegation options, creates, and mounts it
as a detached mount. This mount FD is passed back to the child to be
used for BPF token creation, which allows otherwise privileged BPF
operations to succeed inside userns.

We validate that all of token-enabled privileged commands (BPF_BTF_LOAD,
BPF_MAP_CREATE, and BPF_PROG_LOAD) work as intended. They should only
succeed inside the userns if a) BPF token is provided with proper
allowed sets of commands and types; and b) namespaces CAP_BPF and other
privileges are set. Lacking a) or b) should lead to -EPERM failures.

Based on suggested workflow by Christian Brauner ([0]).

  [0] https://lore.kernel.org/bpf/20230704-hochverdient-lehne-eeb9eeef785e@brauner/

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231130185229.2688956-17-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/token.c | 672 +++++++++++++++++++++++++
 1 file changed, 672 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/token.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
new file mode 100644
index 000000000000..dc03790c6272
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "cap_helpers.h"
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+
+static inline int sys_mount(const char *dev_name, const char *dir_name,
+			    const char *type, unsigned long flags,
+			    const void *data)
+{
+	return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
+}
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+	return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fspick(int dfd, const char *path, unsigned flags)
+{
+	return syscall(__NR_fspick, dfd, path, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+	return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+	return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+static int drop_priv_caps(__u64 *old_caps)
+{
+	return cap_disable_effective((1ULL << CAP_BPF) |
+				     (1ULL << CAP_PERFMON) |
+				     (1ULL << CAP_NET_ADMIN) |
+				     (1ULL << CAP_SYS_ADMIN), old_caps);
+}
+
+static int restore_priv_caps(__u64 old_caps)
+{
+	return cap_enable_effective(old_caps, NULL);
+}
+
+static int set_delegate_mask(int fs_fd, const char *key, __u64 mask)
+{
+	char buf[32];
+	int err;
+
+	snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
+	err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
+			   mask == ~0ULL ? "any" : buf, 0);
+	if (err < 0)
+		err = -errno;
+	return err;
+}
+
+#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
+
+struct bpffs_opts {
+	__u64 cmds;
+	__u64 maps;
+	__u64 progs;
+	__u64 attachs;
+};
+
+static int create_bpffs_fd(void)
+{
+	int fs_fd;
+
+	/* create VFS context */
+	fs_fd = sys_fsopen("bpf", 0);
+	ASSERT_GE(fs_fd, 0, "fs_fd");
+
+	return fs_fd;
+}
+
+static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
+{
+	int mnt_fd, err;
+
+	/* set up token delegation mount options */
+	err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds);
+	if (!ASSERT_OK(err, "fs_cfg_cmds"))
+		return err;
+	err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps);
+	if (!ASSERT_OK(err, "fs_cfg_maps"))
+		return err;
+	err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs);
+	if (!ASSERT_OK(err, "fs_cfg_progs"))
+		return err;
+	err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs);
+	if (!ASSERT_OK(err, "fs_cfg_attachs"))
+		return err;
+
+	/* instantiate FS object */
+	err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+	if (err < 0)
+		return -errno;
+
+	/* create O_PATH fd for detached mount */
+	mnt_fd = sys_fsmount(fs_fd, 0, 0);
+	if (err < 0)
+		return -errno;
+
+	return mnt_fd;
+}
+
+/* send FD over Unix domain (AF_UNIX) socket */
+static int sendfd(int sockfd, int fd)
+{
+	struct msghdr msg = {};
+	struct cmsghdr *cmsg;
+	int fds[1] = { fd }, err;
+	char iobuf[1];
+	struct iovec io = {
+		.iov_base = iobuf,
+		.iov_len = sizeof(iobuf),
+	};
+	union {
+		char buf[CMSG_SPACE(sizeof(fds))];
+		struct cmsghdr align;
+	} u;
+
+	msg.msg_iov = &io;
+	msg.msg_iovlen = 1;
+	msg.msg_control = u.buf;
+	msg.msg_controllen = sizeof(u.buf);
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+	memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
+	err = sendmsg(sockfd, &msg, 0);
+	if (err < 0)
+		err = -errno;
+	if (!ASSERT_EQ(err, 1, "sendmsg"))
+		return -EINVAL;
+
+	return 0;
+}
+
+/* receive FD over Unix domain (AF_UNIX) socket */
+static int recvfd(int sockfd, int *fd)
+{
+	struct msghdr msg = {};
+	struct cmsghdr *cmsg;
+	int fds[1], err;
+	char iobuf[1];
+	struct iovec io = {
+		.iov_base = iobuf,
+		.iov_len = sizeof(iobuf),
+	};
+	union {
+		char buf[CMSG_SPACE(sizeof(fds))];
+		struct cmsghdr align;
+	} u;
+
+	msg.msg_iov = &io;
+	msg.msg_iovlen = 1;
+	msg.msg_control = u.buf;
+	msg.msg_controllen = sizeof(u.buf);
+
+	err = recvmsg(sockfd, &msg, 0);
+	if (err < 0)
+		err = -errno;
+	if (!ASSERT_EQ(err, 1, "recvmsg"))
+		return -EINVAL;
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+	if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
+	    !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
+	    !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
+	    !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
+		return -EINVAL;
+
+	memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
+	*fd = fds[0];
+
+	return 0;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+	ssize_t ret;
+
+	do {
+		ret = write(fd, buf, count);
+	} while (ret < 0 && errno == EINTR);
+
+	return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+	int fd;
+	ssize_t ret;
+
+	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+	if (fd < 0)
+		return -1;
+
+	ret = write_nointr(fd, buf, count);
+	close(fd);
+	if (ret < 0 || (size_t)ret != count)
+		return -1;
+
+	return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+	uid_t uid;
+	gid_t gid;
+	char map[100];
+
+	uid = getuid();
+	gid = getgid();
+
+	if (unshare(CLONE_NEWUSER))
+		return -1;
+
+	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+	    errno != ENOENT)
+		return -1;
+
+	snprintf(map, sizeof(map), "0 %d 1", uid);
+	if (write_file("/proc/self/uid_map", map, strlen(map)))
+		return -1;
+
+
+	snprintf(map, sizeof(map), "0 %d 1", gid);
+	if (write_file("/proc/self/gid_map", map, strlen(map)))
+		return -1;
+
+	if (setgid(0))
+		return -1;
+
+	if (setuid(0))
+		return -1;
+
+	return 0;
+}
+
+typedef int (*child_callback_fn)(int);
+
+static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
+{
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts);
+	int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1;
+
+	/* setup userns with root mappings */
+	err = create_and_enter_userns();
+	if (!ASSERT_OK(err, "create_and_enter_userns"))
+		goto cleanup;
+
+	/* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
+	err = unshare(CLONE_NEWNS);
+	if (!ASSERT_OK(err, "create_mountns"))
+		goto cleanup;
+
+	err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+	if (!ASSERT_OK(err, "remount_root"))
+		goto cleanup;
+
+	fs_fd = create_bpffs_fd();
+	if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* ensure unprivileged child cannot set delegation options */
+	err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1);
+	ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
+	err = set_delegate_mask(fs_fd, "delegate_maps", 0x1);
+	ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
+	err = set_delegate_mask(fs_fd, "delegate_progs", 0x1);
+	ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
+	err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1);
+	ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
+
+	/* pass BPF FS context object to parent */
+	err = sendfd(sock_fd, fs_fd);
+	if (!ASSERT_OK(err, "send_fs_fd"))
+		goto cleanup;
+	zclose(fs_fd);
+
+	/* avoid mucking around with mount namespaces and mounting at
+	 * well-known path, just get detach-mounted BPF FS fd back from parent
+	 */
+	err = recvfd(sock_fd, &mnt_fd);
+	if (!ASSERT_OK(err, "recv_mnt_fd"))
+		goto cleanup;
+
+	/* try to fspick() BPF FS and try to add some delegation options */
+	fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
+	if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* ensure unprivileged child cannot reconfigure to set delegation options */
+	err = set_delegate_mask(fs_fd, "delegate_cmds", ~0ULL);
+	if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+	err = set_delegate_mask(fs_fd, "delegate_maps", ~0ULL);
+	if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+	err = set_delegate_mask(fs_fd, "delegate_progs", ~0ULL);
+	if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+	err = set_delegate_mask(fs_fd, "delegate_attachs", ~0ULL);
+	if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+	zclose(fs_fd);
+
+	bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
+	if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* do custom test logic with customly set up BPF FS instance */
+	err = callback(bpffs_fd);
+	if (!ASSERT_OK(err, "test_callback"))
+		goto cleanup;
+
+	err = 0;
+cleanup:
+	zclose(sock_fd);
+	zclose(mnt_fd);
+	zclose(fs_fd);
+	zclose(bpffs_fd);
+
+	exit(-err);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+	int status, ret;
+
+again:
+	ret = waitpid(pid, &status, 0);
+	if (ret == -1) {
+		if (errno == EINTR)
+			goto again;
+
+		return -1;
+	}
+
+	if (!WIFEXITED(status))
+		return -1;
+
+	return WEXITSTATUS(status);
+}
+
+static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
+{
+	int fs_fd = -1, mnt_fd = -1, err;
+
+	err = recvfd(sock_fd, &fs_fd);
+	if (!ASSERT_OK(err, "recv_bpffs_fd"))
+		goto cleanup;
+
+	mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
+	if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+	zclose(fs_fd);
+
+	/* pass BPF FS context object to parent */
+	err = sendfd(sock_fd, mnt_fd);
+	if (!ASSERT_OK(err, "send_mnt_fd"))
+		goto cleanup;
+	zclose(mnt_fd);
+
+	err = wait_for_pid(child_pid);
+	ASSERT_OK(err, "waitpid_child");
+
+cleanup:
+	zclose(sock_fd);
+	zclose(fs_fd);
+	zclose(mnt_fd);
+
+	if (child_pid > 0)
+		(void)kill(child_pid, SIGKILL);
+}
+
+static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb)
+{
+	int sock_fds[2] = { -1, -1 };
+	int child_pid = 0, err;
+
+	err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
+	if (!ASSERT_OK(err, "socketpair"))
+		goto cleanup;
+
+	child_pid = fork();
+	if (!ASSERT_GE(child_pid, 0, "fork"))
+		goto cleanup;
+
+	if (child_pid == 0) {
+		zclose(sock_fds[0]);
+		return child(sock_fds[1], bpffs_opts, cb);
+
+	} else {
+		zclose(sock_fds[1]);
+		return parent(child_pid, bpffs_opts, sock_fds[0]);
+	}
+
+cleanup:
+	zclose(sock_fds[0]);
+	zclose(sock_fds[1]);
+	if (child_pid > 0)
+		(void)kill(child_pid, SIGKILL);
+}
+
+static int userns_map_create(int mnt_fd)
+{
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts);
+	int err, token_fd = -1, map_fd = -1;
+	__u64 old_caps = 0;
+
+	/* create BPF token from BPF FS mount */
+	token_fd = bpf_token_create(mnt_fd, NULL);
+	if (!ASSERT_GT(token_fd, 0, "token_create")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* while inside non-init userns, we need both a BPF token *and*
+	 * CAP_BPF inside current userns to create privileged map; let's test
+	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+	 */
+	err = drop_priv_caps(&old_caps);
+	if (!ASSERT_OK(err, "drop_caps"))
+		goto cleanup;
+
+	/* no token, no CAP_BPF -> fail */
+	map_opts.token_fd = 0;
+	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
+	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* token without CAP_BPF -> fail */
+	map_opts.token_fd = token_fd;
+	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
+	if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+	err = restore_priv_caps(old_caps);
+	if (!ASSERT_OK(err, "restore_caps"))
+		goto cleanup;
+
+	/* CAP_BPF without token -> fail */
+	map_opts.token_fd = 0;
+	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
+	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* finally, namespaced CAP_BPF + token -> success */
+	map_opts.token_fd = token_fd;
+	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
+	if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+cleanup:
+	zclose(token_fd);
+	zclose(map_fd);
+	return err;
+}
+
+static int userns_btf_load(int mnt_fd)
+{
+	LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
+	int err, token_fd = -1, btf_fd = -1;
+	const void *raw_btf_data;
+	struct btf *btf = NULL;
+	__u32 raw_btf_size;
+	__u64 old_caps = 0;
+
+	/* create BPF token from BPF FS mount */
+	token_fd = bpf_token_create(mnt_fd, NULL);
+	if (!ASSERT_GT(token_fd, 0, "token_create")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* while inside non-init userns, we need both a BPF token *and*
+	 * CAP_BPF inside current userns to create privileged map; let's test
+	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+	 */
+	err = drop_priv_caps(&old_caps);
+	if (!ASSERT_OK(err, "drop_caps"))
+		goto cleanup;
+
+	/* setup a trivial BTF data to load to the kernel */
+	btf = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf, "empty_btf"))
+		goto cleanup;
+
+	ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
+
+	raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+	if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
+		goto cleanup;
+
+	/* no token + no CAP_BPF -> failure */
+	btf_opts.token_fd = 0;
+	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+	if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
+		goto cleanup;
+
+	/* token + no CAP_BPF -> failure */
+	btf_opts.token_fd = token_fd;
+	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+	if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
+		goto cleanup;
+
+	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+	err = restore_priv_caps(old_caps);
+	if (!ASSERT_OK(err, "restore_caps"))
+		goto cleanup;
+
+	/* token + CAP_BPF -> success */
+	btf_opts.token_fd = token_fd;
+	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+	if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
+		goto cleanup;
+
+	err = 0;
+cleanup:
+	btf__free(btf);
+	zclose(btf_fd);
+	zclose(token_fd);
+	return err;
+}
+
+static int userns_prog_load(int mnt_fd)
+{
+	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
+	int err, token_fd = -1, prog_fd = -1;
+	struct bpf_insn insns[] = {
+		/* bpf_jiffies64() requires CAP_BPF */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+		/* bpf_get_current_task() requires CAP_PERFMON */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
+		/* r0 = 0; exit; */
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	size_t insn_cnt = ARRAY_SIZE(insns);
+	__u64 old_caps = 0;
+
+	/* create BPF token from BPF FS mount */
+	token_fd = bpf_token_create(mnt_fd, NULL);
+	if (!ASSERT_GT(token_fd, 0, "token_create")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* validate we can successfully load BPF program with token; this
+	 * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
+	 * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
+	 * BPF token wired properly in a bunch of places in the kernel
+	 */
+	prog_opts.token_fd = token_fd;
+	prog_opts.expected_attach_type = BPF_XDP;
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+				insns, insn_cnt, &prog_opts);
+	if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
+		err = -EPERM;
+		goto cleanup;
+	}
+
+	/* no token + caps -> failure */
+	prog_opts.token_fd = 0;
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+				insns, insn_cnt, &prog_opts);
+	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+		err = -EPERM;
+		goto cleanup;
+	}
+
+	err = drop_priv_caps(&old_caps);
+	if (!ASSERT_OK(err, "drop_caps"))
+		goto cleanup;
+
+	/* no caps + token -> failure */
+	prog_opts.token_fd = token_fd;
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+				insns, insn_cnt, &prog_opts);
+	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+		err = -EPERM;
+		goto cleanup;
+	}
+
+	/* no caps + no token -> definitely a failure */
+	prog_opts.token_fd = 0;
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+				insns, insn_cnt, &prog_opts);
+	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+		err = -EPERM;
+		goto cleanup;
+	}
+
+	err = 0;
+cleanup:
+	zclose(prog_fd);
+	zclose(token_fd);
+	return err;
+}
+
+void test_token(void)
+{
+	if (test__start_subtest("map_token")) {
+		struct bpffs_opts opts = {
+			.cmds = 1ULL << BPF_MAP_CREATE,
+			.maps = 1ULL << BPF_MAP_TYPE_STACK,
+		};
+
+		subtest_userns(&opts, userns_map_create);
+	}
+	if (test__start_subtest("btf_token")) {
+		struct bpffs_opts opts = {
+			.cmds = 1ULL << BPF_BTF_LOAD,
+		};
+
+		subtest_userns(&opts, userns_btf_load);
+	}
+	if (test__start_subtest("prog_token")) {
+		struct bpffs_opts opts = {
+			.cmds = 1ULL << BPF_PROG_LOAD,
+			.progs = 1ULL << BPF_PROG_TYPE_XDP,
+			.attachs = 1ULL << BPF_XDP,
+		};
+
+		subtest_userns(&opts, userns_prog_load);
+	}
+}
-- 
cgit v1.2.3-70-g09d2


From 7065eefb38f16c91e9ace36fb7c873e4c9857c27 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 6 Dec 2023 11:09:20 -0800
Subject: bpf: rename MAX_BPF_LINK_TYPE into __MAX_BPF_LINK_TYPE for
 consistency

To stay consistent with the naming pattern used for similar cases in BPF
UAPI (__MAX_BPF_ATTACH_TYPE, etc), rename MAX_BPF_LINK_TYPE into
__MAX_BPF_LINK_TYPE.

Also similar to MAX_BPF_ATTACH_TYPE and MAX_BPF_REG, add:

  #define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE

Not all __MAX_xxx enums have such #define, so I'm not sure if we should
add it or not, but I figured I'll start with a completely backwards
compatible way, and we can drop that, if necessary.

Also adjust a selftest that used MAX_BPF_LINK_TYPE enum.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20231206190920.1651226-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h                            | 4 +++-
 tools/include/uapi/linux/bpf.h                      | 4 +++-
 tools/testing/selftests/bpf/prog_tests/libbpf_str.c | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4df2d025c784..e0545201b55f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1108,9 +1108,11 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_TCX = 11,
 	BPF_LINK_TYPE_UPROBE_MULTI = 12,
 	BPF_LINK_TYPE_NETKIT = 13,
-	MAX_BPF_LINK_TYPE,
+	__MAX_BPF_LINK_TYPE,
 };
 
+#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE
+
 enum bpf_perf_event_type {
 	BPF_PERF_EVENT_UNSPEC = 0,
 	BPF_PERF_EVENT_UPROBE = 1,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4df2d025c784..e0545201b55f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1108,9 +1108,11 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_TCX = 11,
 	BPF_LINK_TYPE_UPROBE_MULTI = 12,
 	BPF_LINK_TYPE_NETKIT = 13,
-	MAX_BPF_LINK_TYPE,
+	__MAX_BPF_LINK_TYPE,
 };
 
+#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE
+
 enum bpf_perf_event_type {
 	BPF_PERF_EVENT_UNSPEC = 0,
 	BPF_PERF_EVENT_UPROBE = 1,
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index 384bc1f7a65e..62ea855ec4d0 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -87,7 +87,7 @@ static void test_libbpf_bpf_link_type_str(void)
 		const char *link_type_str;
 		char buf[256];
 
-		if (link_type == MAX_BPF_LINK_TYPE)
+		if (link_type == __MAX_BPF_LINK_TYPE)
 			continue;
 
 		link_type_name = btf__str_by_offset(btf, e->name_off);
-- 
cgit v1.2.3-70-g09d2


From e28bd359bcc8eb849aaa475f3c3f9705fba26d6e Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Wed, 6 Dec 2023 23:11:49 -0500
Subject: bpf: Add verifier regression test for previous patch

Add a regression test for var-off zero-sized reads.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20231207041150.229139-3-andreimatei1@gmail.com
---
 .../testing/selftests/bpf/progs/verifier_var_off.c | 29 ++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
index 83a90afba785..b7bdd7db3a35 100644
--- a/tools/testing/selftests/bpf/progs/verifier_var_off.c
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -224,6 +224,35 @@ __naked void access_max_out_of_bound(void)
 	: __clobber_all);
 }
 
+/* Similar to the test above, but this time check the special case of a
+ * zero-sized stack access. We used to have a bug causing crashes for zero-sized
+ * out-of-bounds accesses.
+ */
+SEC("socket")
+__description("indirect variable-offset stack access, zero-sized, max out of bound")
+__failure __msg("invalid variable-offset indirect access to stack R1")
+__naked void zero_sized_access_max_out_of_bound(void)
+{
+	asm volatile ("                      \
+	r0 = 0;                              \
+	/* Fill some stack */                \
+	*(u64*)(r10 - 16) = r0;              \
+	*(u64*)(r10 - 8) = r0;               \
+	/* Get an unknown value */           \
+	r1 = *(u32*)(r1 + 0);                \
+	r1 &= 63;                            \
+	r1 += -16;                           \
+	/* r1 is now anywhere in [-16,48) */ \
+	r1 += r10;                           \
+	r2 = 0;                              \
+	r3 = 0;                              \
+	call %[bpf_probe_read_kernel];       \
+	exit;                                \
+"	:
+	: __imm(bpf_probe_read_kernel)
+	: __clobber_all);
+}
+
 SEC("lwt_in")
 __description("indirect variable-offset stack access, min out of bound")
 __failure __msg("invalid variable-offset indirect access to stack R2")
-- 
cgit v1.2.3-70-g09d2


From 4624a78c18c62da815f3253966b7a87995f77e1b Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 6 Dec 2023 15:07:53 +0800
Subject: selftests/net: convert test_bridge_backup_port.sh to run it in unique
 namespace

There is no h1 h2 actually. Remove it. Here is the test result after
conversion.

]# ./test_bridge_backup_port.sh

Backup port
-----------
TEST: Forwarding out of swp1                                        [ OK ]
TEST: No forwarding out of vx0                                      [ OK ]
TEST: swp1 carrier off                                              [ OK ]
TEST: No forwarding out of swp1                                     [ OK ]
...
Backup nexthop ID - ping
------------------------
TEST: Ping with backup nexthop ID                                   [ OK ]
TEST: Ping after disabling backup nexthop ID                        [ OK ]

Backup nexthop ID - torture test
--------------------------------
TEST: Torture test                                                  [ OK ]

Tests passed:  83
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/test_bridge_backup_port.sh       | 371 ++++++++++-----------
 1 file changed, 182 insertions(+), 189 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
index 112cfd8a10ad..70a7d87ba2d2 100755
--- a/tools/testing/selftests/net/test_bridge_backup_port.sh
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -35,9 +35,8 @@
 # | sw1                                | | sw2                                |
 # +------------------------------------+ +------------------------------------+
 
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 # All tests in this script. Can be overridden with -t option.
 TESTS="
@@ -132,9 +131,6 @@ setup_topo_ns()
 {
 	local ns=$1; shift
 
-	ip netns add $ns
-	ip -n $ns link set dev lo up
-
 	ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
 	ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
 	ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
@@ -145,13 +141,14 @@ setup_topo()
 {
 	local ns
 
-	for ns in sw1 sw2; do
+	setup_ns sw1 sw2
+	for ns in $sw1 $sw2; do
 		setup_topo_ns $ns
 	done
 
 	ip link add name veth0 type veth peer name veth1
-	ip link set dev veth0 netns sw1 name veth0
-	ip link set dev veth1 netns sw2 name veth0
+	ip link set dev veth0 netns $sw1 name veth0
+	ip link set dev veth1 netns $sw2 name veth0
 }
 
 setup_sw_common()
@@ -190,7 +187,7 @@ setup_sw_common()
 
 setup_sw1()
 {
-	local ns=sw1
+	local ns=$sw1
 	local local_addr=192.0.2.33
 	local remote_addr=192.0.2.34
 	local veth_addr=192.0.2.49
@@ -203,7 +200,7 @@ setup_sw1()
 
 setup_sw2()
 {
-	local ns=sw2
+	local ns=$sw2
 	local local_addr=192.0.2.34
 	local remote_addr=192.0.2.33
 	local veth_addr=192.0.2.50
@@ -229,11 +226,7 @@ setup()
 
 cleanup()
 {
-	local ns
-
-	for ns in h1 h2 sw1 sw2; do
-		ip netns del $ns &> /dev/null
-	done
+	cleanup_ns $sw1 $sw2
 }
 
 ################################################################################
@@ -248,85 +241,85 @@ backup_port()
 	echo "Backup port"
 	echo "-----------"
 
-	run_cmd "tc -n sw1 qdisc replace dev swp1 clsact"
-	run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+	run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
 
-	run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
 
-	run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
 
 	# Initial state - check that packets are forwarded out of swp1 when it
 	# has a carrier and not forwarded out of any port when it does not have
 	# a carrier.
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 1
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 1
 	log_test $? 0 "Forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 0
+	tc_check_packets $sw1 "dev vx0 egress" 101 0
 	log_test $? 0 "No forwarding out of vx0"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier off"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
 	log_test $? 0 "swp1 carrier off"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 1
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 1
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 0
+	tc_check_packets $sw1 "dev vx0 egress" 101 0
 	log_test $? 0 "No forwarding out of vx0"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier on"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier on"
 	log_test $? 0 "swp1 carrier on"
 
 	# Configure vx0 as the backup port of swp1 and check that packets are
 	# forwarded out of swp1 when it has a carrier and out of vx0 when swp1
 	# does not have a carrier.
-	run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
 	log_test $? 0 "vx0 configured as backup port of swp1"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 2
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 2
 	log_test $? 0 "Forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 0
+	tc_check_packets $sw1 "dev vx0 egress" 101 0
 	log_test $? 0 "No forwarding out of vx0"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier off"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
 	log_test $? 0 "swp1 carrier off"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 2
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 2
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "Forwarding out of vx0"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier on"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier on"
 	log_test $? 0 "swp1 carrier on"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 3
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 3
 	log_test $? 0 "Forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "No forwarding out of vx0"
 
 	# Remove vx0 as the backup port of swp1 and check that packets are no
 	# longer forwarded out of vx0 when swp1 does not have a carrier.
-	run_cmd "bridge -n sw1 link set dev swp1 nobackup_port"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+	run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
 	log_test $? 1 "vx0 not configured as backup port of swp1"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 4
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 4
 	log_test $? 0 "Forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "No forwarding out of vx0"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier off"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
 	log_test $? 0 "swp1 carrier off"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 4
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 4
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "No forwarding out of vx0"
 }
 
@@ -339,125 +332,125 @@ backup_nhid()
 	echo "Backup nexthop ID"
 	echo "-----------------"
 
-	run_cmd "tc -n sw1 qdisc replace dev swp1 clsact"
-	run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+	run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
 
-	run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
 
-	run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
-	run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb"
-	run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"
 
-	run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
-	run_cmd "bridge -n sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010"
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010"
 
-	run_cmd "ip -n sw2 address replace 192.0.2.36/32 dev lo"
+	run_cmd "ip -n $sw2 address replace 192.0.2.36/32 dev lo"
 
 	# The first filter matches on packets forwarded using the backup
 	# nexthop ID and the second filter matches on packets forwarded using a
 	# regular VXLAN FDB entry.
-	run_cmd "tc -n sw2 qdisc replace dev vx0 clsact"
-	run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
-	run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass"
+	run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+	run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass"
 
 	# Configure vx0 as the backup port of swp1 and check that packets are
 	# forwarded out of swp1 when it has a carrier and out of vx0 when swp1
 	# does not have a carrier. When packets are forwarded out of vx0, check
 	# that they are forwarded by the VXLAN FDB entry.
-	run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
 	log_test $? 0 "vx0 configured as backup port of swp1"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 1
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 1
 	log_test $? 0 "Forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 0
+	tc_check_packets $sw1 "dev vx0 egress" 101 0
 	log_test $? 0 "No forwarding out of vx0"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier off"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
 	log_test $? 0 "swp1 carrier off"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 1
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 1
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "Forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 0
+	tc_check_packets $sw2 "dev vx0 ingress" 101 0
 	log_test $? 0 "No forwarding using backup nexthop ID"
-	tc_check_packets sw2 "dev vx0 ingress" 102 1
+	tc_check_packets $sw2 "dev vx0 ingress" 102 1
 	log_test $? 0 "Forwarding using VXLAN FDB entry"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier on"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier on"
 	log_test $? 0 "swp1 carrier on"
 
 	# Configure nexthop ID 10 as the backup nexthop ID of swp1 and check
 	# that when packets are forwarded out of vx0, they are forwarded using
 	# the backup nexthop ID.
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
 	log_test $? 0 "nexthop ID 10 configured as backup nexthop ID of swp1"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 2
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 2
 	log_test $? 0 "Forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "No forwarding out of vx0"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier off"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
 	log_test $? 0 "swp1 carrier off"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 2
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 2
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "Forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 1
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
 	log_test $? 0 "Forwarding using backup nexthop ID"
-	tc_check_packets sw2 "dev vx0 ingress" 102 1
+	tc_check_packets $sw2 "dev vx0 ingress" 102 1
 	log_test $? 0 "No forwarding using VXLAN FDB entry"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier on"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier on"
 	log_test $? 0 "swp1 carrier on"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 3
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 3
 	log_test $? 0 "Forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "No forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 1
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
 	log_test $? 0 "No forwarding using backup nexthop ID"
-	tc_check_packets sw2 "dev vx0 ingress" 102 1
+	tc_check_packets $sw2 "dev vx0 ingress" 102 1
 	log_test $? 0 "No forwarding using VXLAN FDB entry"
 
 	# Reset the backup nexthop ID to 0 and check that packets are no longer
 	# forwarded using the backup nexthop ID when swp1 does not have a
 	# carrier and are instead forwarded by the VXLAN FDB.
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid\""
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid\""
 	log_test $? 1 "No backup nexthop ID configured for swp1"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 4
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 4
 	log_test $? 0 "Forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "No forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 1
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
 	log_test $? 0 "No forwarding using backup nexthop ID"
-	tc_check_packets sw2 "dev vx0 ingress" 102 1
+	tc_check_packets $sw2 "dev vx0 ingress" 102 1
 	log_test $? 0 "No forwarding using VXLAN FDB entry"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier off"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
 	log_test $? 0 "swp1 carrier off"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 4
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 4
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 3
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
 	log_test $? 0 "Forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 1
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
 	log_test $? 0 "No forwarding using backup nexthop ID"
-	tc_check_packets sw2 "dev vx0 ingress" 102 2
+	tc_check_packets $sw2 "dev vx0 ingress" 102 2
 	log_test $? 0 "Forwarding using VXLAN FDB entry"
 }
 
@@ -475,109 +468,109 @@ backup_nhid_invalid()
 	# is forwarded out of the VXLAN port, but dropped by the VXLAN driver
 	# and does not crash the host.
 
-	run_cmd "tc -n sw1 qdisc replace dev swp1 clsact"
-	run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+	run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
 
-	run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
 	# Drop all other Tx traffic to avoid changes to Tx drop counter.
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop"
 
-	tx_drop=$(ip -n sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]')
+	tx_drop=$(ip -n $sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]')
 
-	run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
-	run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb"
-	run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"
 
-	run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
 
-	run_cmd "tc -n sw2 qdisc replace dev vx0 clsact"
-	run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+	run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
 
 	# First, check that redirection works.
-	run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
 	log_test $? 0 "vx0 configured as backup port of swp1"
 
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
 	log_test $? 0 "Valid nexthop as backup nexthop"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier off"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
 	log_test $? 0 "swp1 carrier off"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 0
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "Forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 1
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
 	log_test $? 0 "Forwarding using backup nexthop ID"
-	run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'"
 	log_test $? 0 "No Tx drop increase"
 
 	# Use a non-existent nexthop ID.
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 20"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 20\""
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 20"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 20\""
 	log_test $? 0 "Non-existent nexthop as backup nexthop"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 0
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "Forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 1
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
 	log_test $? 0 "No forwarding using backup nexthop ID"
-	run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'"
 	log_test $? 0 "Tx drop increased"
 
 	# Use a blckhole nexthop.
-	run_cmd "ip -n sw1 nexthop replace id 30 blackhole"
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 30"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 30\""
+	run_cmd "ip -n $sw1 nexthop replace id 30 blackhole"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 30"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 30\""
 	log_test $? 0 "Blackhole nexthop as backup nexthop"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 0
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 3
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
 	log_test $? 0 "Forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 1
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
 	log_test $? 0 "No forwarding using backup nexthop ID"
-	run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'"
 	log_test $? 0 "Tx drop increased"
 
 	# Non-group FDB nexthop.
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 1"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 1\""
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 1"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 1\""
 	log_test $? 0 "Non-group FDB nexthop as backup nexthop"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 0
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 4
+	tc_check_packets $sw1 "dev vx0 egress" 101 4
 	log_test $? 0 "Forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 1
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
 	log_test $? 0 "No forwarding using backup nexthop ID"
-	run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'"
 	log_test $? 0 "Tx drop increased"
 
 	# IPv6 address family nexthop.
-	run_cmd "ip -n sw1 nexthop replace id 100 via 2001:db8:100::1 fdb"
-	run_cmd "ip -n sw1 nexthop replace id 200 via 2001:db8:100::1 fdb"
-	run_cmd "ip -n sw1 nexthop replace id 300 group 100/200 fdb"
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 300"
-	run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 300\""
+	run_cmd "ip -n $sw1 nexthop replace id 100 via 2001:db8:100::1 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 200 via 2001:db8:100::1 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 300 group 100/200 fdb"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 300"
+	run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 300\""
 	log_test $? 0 "IPv6 address family nexthop as backup nexthop"
 
-	run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets sw1 "dev swp1 egress" 101 0
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 0
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets sw1 "dev vx0 egress" 101 5
+	tc_check_packets $sw1 "dev vx0 egress" 101 5
 	log_test $? 0 "Forwarding out of vx0"
-	tc_check_packets sw2 "dev vx0 ingress" 101 1
+	tc_check_packets $sw2 "dev vx0 ingress" 101 1
 	log_test $? 0 "No forwarding using backup nexthop ID"
-	run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'"
+	run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'"
 	log_test $? 0 "Tx drop increased"
 }
 
@@ -591,44 +584,44 @@ backup_nhid_ping()
 	echo "------------------------"
 
 	# Test bidirectional traffic when traffic is redirected in both VTEPs.
-	sw1_mac=$(ip -n sw1 -j -p link show br0.10 | jq -r '.[]["address"]')
-	sw2_mac=$(ip -n sw2 -j -p link show br0.10 | jq -r '.[]["address"]')
+	sw1_mac=$(ip -n $sw1 -j -p link show br0.10 | jq -r '.[]["address"]')
+	sw2_mac=$(ip -n $sw2 -j -p link show br0.10 | jq -r '.[]["address"]')
 
-	run_cmd "bridge -n sw1 fdb replace $sw2_mac dev swp1 master static vlan 10"
-	run_cmd "bridge -n sw2 fdb replace $sw1_mac dev swp1 master static vlan 10"
+	run_cmd "bridge -n $sw1 fdb replace $sw2_mac dev swp1 master static vlan 10"
+	run_cmd "bridge -n $sw2 fdb replace $sw1_mac dev swp1 master static vlan 10"
 
-	run_cmd "ip -n sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10"
-	run_cmd "ip -n sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10"
+	run_cmd "ip -n $sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10"
+	run_cmd "ip -n $sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10"
 
-	run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
-	run_cmd "ip -n sw2 nexthop replace id 1 via 192.0.2.33 fdb"
-	run_cmd "ip -n sw1 nexthop replace id 10 group 1 fdb"
-	run_cmd "ip -n sw2 nexthop replace id 10 group 1 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw2 nexthop replace id 1 via 192.0.2.33 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 10 group 1 fdb"
+	run_cmd "ip -n $sw2 nexthop replace id 10 group 1 fdb"
 
-	run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
-	run_cmd "bridge -n sw2 link set dev swp1 backup_port vx0"
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
-	run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 10"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw2 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+	run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10"
 
-	run_cmd "ip -n sw1 link set dev swp1 carrier off"
-	run_cmd "ip -n sw2 link set dev swp1 carrier off"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+	run_cmd "ip -n $sw2 link set dev swp1 carrier off"
 
-	run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+	run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
 	log_test $? 0 "Ping with backup nexthop ID"
 
 	# Reset the backup nexthop ID to 0 and check that ping fails.
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0"
-	run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 0"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0"
+	run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 0"
 
-	run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+	run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
 	log_test $? 1 "Ping after disabling backup nexthop ID"
 }
 
 backup_nhid_add_del_loop()
 {
 	while true; do
-		ip -n sw1 nexthop del id 10
-		ip -n sw1 nexthop replace id 10 group 1/2 fdb
+		ip -n $sw1 nexthop del id 10
+		ip -n $sw1 nexthop replace id 10 group 1/2 fdb
 	done >/dev/null 2>&1
 }
 
@@ -648,19 +641,19 @@ backup_nhid_torture()
 	# deleting the group. The test is considered successful if nothing
 	# crashed.
 
-	run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb"
-	run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb"
-	run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+	run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"
 
-	run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10"
+	run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
 
-	run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0"
-	run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10"
-	run_cmd "ip -n sw1 link set dev swp1 carrier off"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+	run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
 
 	backup_nhid_add_del_loop &
 	pid1=$!
-	ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 &
+	ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 &
 	pid2=$!
 
 	sleep 30
-- 
cgit v1.2.3-70-g09d2


From 312abe3d93a35f9c486a4703d39cab52457266f0 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 6 Dec 2023 15:07:54 +0800
Subject: selftests/net: convert test_bridge_neigh_suppress.sh to run it in
 unique namespace

Here is the test result after conversion.

]# ./test_bridge_neigh_suppress.sh

Per-port ARP suppression - VLAN 10
----------------------------------
TEST: arping                                                        [ OK ]
TEST: ARP suppression                                               [ OK ]

...

TEST: NS suppression (VLAN 20)                                      [ OK ]

Tests passed: 148
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/test_bridge_neigh_suppress.sh    | 331 ++++++++++-----------
 1 file changed, 162 insertions(+), 169 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index d80f2cd87614..8533393a4f18 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -45,9 +45,8 @@
 # | sw1                                | | sw2                                |
 # +------------------------------------+ +------------------------------------+
 
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 # All tests in this script. Can be overridden with -t option.
 TESTS="
@@ -140,9 +139,6 @@ setup_topo_ns()
 {
 	local ns=$1; shift
 
-	ip netns add $ns
-	ip -n $ns link set dev lo up
-
 	ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
 	ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
 	ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
@@ -153,21 +149,22 @@ setup_topo()
 {
 	local ns
 
-	for ns in h1 h2 sw1 sw2; do
+	setup_ns h1 h2 sw1 sw2
+	for ns in $h1 $h2 $sw1 $sw2; do
 		setup_topo_ns $ns
 	done
 
 	ip link add name veth0 type veth peer name veth1
-	ip link set dev veth0 netns h1 name eth0
-	ip link set dev veth1 netns sw1 name swp1
+	ip link set dev veth0 netns $h1 name eth0
+	ip link set dev veth1 netns $sw1 name swp1
 
 	ip link add name veth0 type veth peer name veth1
-	ip link set dev veth0 netns sw1 name veth0
-	ip link set dev veth1 netns sw2 name veth0
+	ip link set dev veth0 netns $sw1 name veth0
+	ip link set dev veth1 netns $sw2 name veth0
 
 	ip link add name veth0 type veth peer name veth1
-	ip link set dev veth0 netns h2 name eth0
-	ip link set dev veth1 netns sw2 name swp1
+	ip link set dev veth0 netns $h2 name eth0
+	ip link set dev veth1 netns $sw2 name swp1
 }
 
 setup_host_common()
@@ -190,7 +187,7 @@ setup_host_common()
 
 setup_h1()
 {
-	local ns=h1
+	local ns=$h1
 	local v4addr1=192.0.2.1/28
 	local v4addr2=192.0.2.17/28
 	local v6addr1=2001:db8:1::1/64
@@ -201,7 +198,7 @@ setup_h1()
 
 setup_h2()
 {
-	local ns=h2
+	local ns=$h2
 	local v4addr1=192.0.2.2/28
 	local v4addr2=192.0.2.18/28
 	local v6addr1=2001:db8:1::2/64
@@ -254,7 +251,7 @@ setup_sw_common()
 
 setup_sw1()
 {
-	local ns=sw1
+	local ns=$sw1
 	local local_addr=192.0.2.33
 	local remote_addr=192.0.2.34
 	local veth_addr=192.0.2.49
@@ -265,7 +262,7 @@ setup_sw1()
 
 setup_sw2()
 {
-	local ns=sw2
+	local ns=$sw2
 	local local_addr=192.0.2.34
 	local remote_addr=192.0.2.33
 	local veth_addr=192.0.2.50
@@ -291,11 +288,7 @@ setup()
 
 cleanup()
 {
-	local ns
-
-	for ns in h1 h2 sw1 sw2; do
-		ip netns del $ns &> /dev/null
-	done
+	cleanup_ns $h1 $h2 $sw1 $sw2
 }
 
 ################################################################################
@@ -312,80 +305,80 @@ neigh_suppress_arp_common()
 	echo "Per-port ARP suppression - VLAN $vid"
 	echo "----------------------------------"
 
-	run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass"
 
 	# Initial state - check that ARP requests are not suppressed and that
 	# ARP replies are received.
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
 	log_test $? 0 "arping"
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "ARP suppression"
 
 	# Enable neighbor suppression and check that nothing changes compared
 	# to the initial state.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
 	log_test $? 0 "\"neigh_suppress\" is on"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
 	log_test $? 0 "arping"
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "ARP suppression"
 
 	# Install an FDB entry for the remote host and check that nothing
 	# changes compared to the initial state.
-	h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
-	run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
+	h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
 	log_test $? 0 "FDB entry installation"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
 	log_test $? 0 "arping"
-	tc_check_packets sw1 "dev vx0 egress" 101 3
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
 	log_test $? 0 "ARP suppression"
 
 	# Install a neighbor on the matching SVI interface and check that ARP
 	# requests are suppressed.
-	run_cmd "ip -n sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid"
+	run_cmd "ip -n $sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid"
 	log_test $? 0 "Neighbor entry installation"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
 	log_test $? 0 "arping"
-	tc_check_packets sw1 "dev vx0 egress" 101 3
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
 	log_test $? 0 "ARP suppression"
 
 	# Take the second host down and check that ARP requests are suppressed
 	# and that ARP replies are received.
-	run_cmd "ip -n h2 link set dev eth0.$vid down"
+	run_cmd "ip -n $h2 link set dev eth0.$vid down"
 	log_test $? 0 "H2 down"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
 	log_test $? 0 "arping"
-	tc_check_packets sw1 "dev vx0 egress" 101 3
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
 	log_test $? 0 "ARP suppression"
 
-	run_cmd "ip -n h2 link set dev eth0.$vid up"
+	run_cmd "ip -n $h2 link set dev eth0.$vid up"
 	log_test $? 0 "H2 up"
 
 	# Disable neighbor suppression and check that ARP requests are no
 	# longer suppressed.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
 	log_test $? 0 "\"neigh_suppress\" is off"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
 	log_test $? 0 "arping"
-	tc_check_packets sw1 "dev vx0 egress" 101 4
+	tc_check_packets $sw1 "dev vx0 egress" 101 4
 	log_test $? 0 "ARP suppression"
 
 	# Take the second host down and check that ARP requests are not
 	# suppressed and that ARP replies are not received.
-	run_cmd "ip -n h2 link set dev eth0.$vid down"
+	run_cmd "ip -n $h2 link set dev eth0.$vid down"
 	log_test $? 0 "H2 down"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
 	log_test $? 1 "arping"
-	tc_check_packets sw1 "dev vx0 egress" 101 5
+	tc_check_packets $sw1 "dev vx0 egress" 101 5
 	log_test $? 0 "ARP suppression"
 }
 
@@ -415,80 +408,80 @@ neigh_suppress_ns_common()
 	echo "Per-port NS suppression - VLAN $vid"
 	echo "---------------------------------"
 
-	run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass"
 
 	# Initial state - check that NS messages are not suppressed and that ND
 	# messages are received.
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
 	log_test $? 0 "ndisc6"
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "NS suppression"
 
 	# Enable neighbor suppression and check that nothing changes compared
 	# to the initial state.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
 	log_test $? 0 "\"neigh_suppress\" is on"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
 	log_test $? 0 "ndisc6"
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "NS suppression"
 
 	# Install an FDB entry for the remote host and check that nothing
 	# changes compared to the initial state.
-	h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
-	run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
+	h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
 	log_test $? 0 "FDB entry installation"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
 	log_test $? 0 "ndisc6"
-	tc_check_packets sw1 "dev vx0 egress" 101 3
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
 	log_test $? 0 "NS suppression"
 
 	# Install a neighbor on the matching SVI interface and check that NS
 	# messages are suppressed.
-	run_cmd "ip -n sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid"
+	run_cmd "ip -n $sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid"
 	log_test $? 0 "Neighbor entry installation"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
 	log_test $? 0 "ndisc6"
-	tc_check_packets sw1 "dev vx0 egress" 101 3
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
 	log_test $? 0 "NS suppression"
 
 	# Take the second host down and check that NS messages are suppressed
 	# and that ND messages are received.
-	run_cmd "ip -n h2 link set dev eth0.$vid down"
+	run_cmd "ip -n $h2 link set dev eth0.$vid down"
 	log_test $? 0 "H2 down"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
 	log_test $? 0 "ndisc6"
-	tc_check_packets sw1 "dev vx0 egress" 101 3
+	tc_check_packets $sw1 "dev vx0 egress" 101 3
 	log_test $? 0 "NS suppression"
 
-	run_cmd "ip -n h2 link set dev eth0.$vid up"
+	run_cmd "ip -n $h2 link set dev eth0.$vid up"
 	log_test $? 0 "H2 up"
 
 	# Disable neighbor suppression and check that NS messages are no longer
 	# suppressed.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
 	log_test $? 0 "\"neigh_suppress\" is off"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
 	log_test $? 0 "ndisc6"
-	tc_check_packets sw1 "dev vx0 egress" 101 4
+	tc_check_packets $sw1 "dev vx0 egress" 101 4
 	log_test $? 0 "NS suppression"
 
 	# Take the second host down and check that NS messages are not
 	# suppressed and that ND messages are not received.
-	run_cmd "ip -n h2 link set dev eth0.$vid down"
+	run_cmd "ip -n $h2 link set dev eth0.$vid down"
 	log_test $? 0 "H2 down"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
 	log_test $? 2 "ndisc6"
-	tc_check_packets sw1 "dev vx0 egress" 101 5
+	tc_check_packets $sw1 "dev vx0 egress" 101 5
 	log_test $? 0 "NS suppression"
 }
 
@@ -524,118 +517,118 @@ neigh_vlan_suppress_arp()
 	echo "Per-{Port, VLAN} ARP suppression"
 	echo "--------------------------------"
 
-	run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass"
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass"
 
-	h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
-	h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
-	run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
-	run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
-	run_cmd "ip -n sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
-	run_cmd "ip -n sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
+	h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
+	h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
+	run_cmd "ip -n $sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
+	run_cmd "ip -n $sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
 
 	# Enable per-{Port, VLAN} neighbor suppression and check that ARP
 	# requests are not suppressed and that ARP replies are received.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
 	log_test $? 0 "\"neigh_vlan_suppress\" is on"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
 	log_test $? 0 "arping (VLAN $vid1)"
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
 	log_test $? 0 "arping (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "ARP suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 1
+	tc_check_packets $sw1 "dev vx0 egress" 102 1
 	log_test $? 0 "ARP suppression (VLAN $vid2)"
 
 	# Enable neighbor suppression on VLAN 10 and check that only on this
 	# VLAN ARP requests are suppressed.
-	run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
-	run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
+	run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
 	log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)"
-	run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
 	log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
 	log_test $? 0 "arping (VLAN $vid1)"
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
 	log_test $? 0 "arping (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "ARP suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 2
+	tc_check_packets $sw1 "dev vx0 egress" 102 2
 	log_test $? 0 "ARP suppression (VLAN $vid2)"
 
 	# Enable neighbor suppression on the port and check that it has no
 	# effect compared to previous state.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
 	log_test $? 0 "\"neigh_suppress\" is on"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
 	log_test $? 0 "arping (VLAN $vid1)"
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
 	log_test $? 0 "arping (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "ARP suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 3
+	tc_check_packets $sw1 "dev vx0 egress" 102 3
 	log_test $? 0 "ARP suppression (VLAN $vid2)"
 
 	# Disable neighbor suppression on the port and check that it has no
 	# effect compared to previous state.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
 	log_test $? 0 "\"neigh_suppress\" is off"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
 	log_test $? 0 "arping (VLAN $vid1)"
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
 	log_test $? 0 "arping (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "ARP suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 4
+	tc_check_packets $sw1 "dev vx0 egress" 102 4
 	log_test $? 0 "ARP suppression (VLAN $vid2)"
 
 	# Disable neighbor suppression on VLAN 10 and check that ARP requests
 	# are no longer suppressed on this VLAN.
-	run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
-	run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
+	run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
 	log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
 	log_test $? 0 "arping (VLAN $vid1)"
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
 	log_test $? 0 "arping (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "ARP suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 5
+	tc_check_packets $sw1 "dev vx0 egress" 102 5
 	log_test $? 0 "ARP suppression (VLAN $vid2)"
 
 	# Disable per-{Port, VLAN} neighbor suppression, enable neighbor
 	# suppression on the port and check that on both VLANs ARP requests are
 	# suppressed.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
 	log_test $? 0 "\"neigh_vlan_suppress\" is off"
 
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
 	log_test $? 0 "\"neigh_suppress\" is on"
 
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
 	log_test $? 0 "arping (VLAN $vid1)"
-	run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+	run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
 	log_test $? 0 "arping (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "ARP suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 5
+	tc_check_packets $sw1 "dev vx0 egress" 102 5
 	log_test $? 0 "ARP suppression (VLAN $vid2)"
 }
 
@@ -655,118 +648,118 @@ neigh_vlan_suppress_ns()
 	echo "Per-{Port, VLAN} NS suppression"
 	echo "-------------------------------"
 
-	run_cmd "tc -n sw1 qdisc replace dev vx0 clsact"
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass"
-	run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass"
+	run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass"
+	run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass"
 
-	h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
-	h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
-	run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
-	run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
-	run_cmd "ip -n sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
-	run_cmd "ip -n sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
+	h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
+	h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
+	run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
+	run_cmd "ip -n $sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
+	run_cmd "ip -n $sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
 
 	# Enable per-{Port, VLAN} neighbor suppression and check that NS
 	# messages are not suppressed and that ND messages are received.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
 	log_test $? 0 "\"neigh_vlan_suppress\" is on"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
 	log_test $? 0 "ndisc6 (VLAN $vid1)"
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
 	log_test $? 0 "ndisc6 (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "NS suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 1
+	tc_check_packets $sw1 "dev vx0 egress" 102 1
 	log_test $? 0 "NS suppression (VLAN $vid2)"
 
 	# Enable neighbor suppression on VLAN 10 and check that only on this
 	# VLAN NS messages are suppressed.
-	run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
-	run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
+	run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
 	log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)"
-	run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
 	log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
 	log_test $? 0 "ndisc6 (VLAN $vid1)"
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
 	log_test $? 0 "ndisc6 (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "NS suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 2
+	tc_check_packets $sw1 "dev vx0 egress" 102 2
 	log_test $? 0 "NS suppression (VLAN $vid2)"
 
 	# Enable neighbor suppression on the port and check that it has no
 	# effect compared to previous state.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
 	log_test $? 0 "\"neigh_suppress\" is on"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
 	log_test $? 0 "ndisc6 (VLAN $vid1)"
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
 	log_test $? 0 "ndisc6 (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "NS suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 3
+	tc_check_packets $sw1 "dev vx0 egress" 102 3
 	log_test $? 0 "NS suppression (VLAN $vid2)"
 
 	# Disable neighbor suppression on the port and check that it has no
 	# effect compared to previous state.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
 	log_test $? 0 "\"neigh_suppress\" is off"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
 	log_test $? 0 "ndisc6 (VLAN $vid1)"
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
 	log_test $? 0 "ndisc6 (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "NS suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 4
+	tc_check_packets $sw1 "dev vx0 egress" 102 4
 	log_test $? 0 "NS suppression (VLAN $vid2)"
 
 	# Disable neighbor suppression on VLAN 10 and check that NS messages
 	# are no longer suppressed on this VLAN.
-	run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
-	run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
+	run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
+	run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
 	log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
 	log_test $? 0 "ndisc6 (VLAN $vid1)"
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
 	log_test $? 0 "ndisc6 (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "NS suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 5
+	tc_check_packets $sw1 "dev vx0 egress" 102 5
 	log_test $? 0 "NS suppression (VLAN $vid2)"
 
 	# Disable per-{Port, VLAN} neighbor suppression, enable neighbor
 	# suppression on the port and check that on both VLANs NS messages are
 	# suppressed.
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
 	log_test $? 0 "\"neigh_vlan_suppress\" is off"
 
-	run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on"
-	run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
 	log_test $? 0 "\"neigh_suppress\" is on"
 
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
 	log_test $? 0 "ndisc6 (VLAN $vid1)"
-	run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+	run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
 	log_test $? 0 "ndisc6 (VLAN $vid2)"
 
-	tc_check_packets sw1 "dev vx0 egress" 101 2
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "NS suppression (VLAN $vid1)"
-	tc_check_packets sw1 "dev vx0 egress" 102 5
+	tc_check_packets $sw1 "dev vx0 egress" 102 5
 	log_test $? 0 "NS suppression (VLAN $vid2)"
 }
 
-- 
cgit v1.2.3-70-g09d2


From a8258e64ca74314478fda85a42b1c1eb2db29c67 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 6 Dec 2023 15:07:55 +0800
Subject: selftests/net: convert test_vxlan_mdb.sh to run it in unique
 namespace

Here is the test result after conversion.

]# ./test_vxlan_mdb.sh

Control path: Basic (*, G) operations - IPv4 overlay / IPv4 underlay
--------------------------------------------------------------------
TEST: MDB entry addition                                            [ OK ]

...

Data path: MDB torture test - IPv6 overlay / IPv6 underlay
----------------------------------------------------------
TEST: Torture test                                                  [ OK ]

Tests passed: 620
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/test_vxlan_mdb.sh | 202 +++++++++++++-------------
 1 file changed, 99 insertions(+), 103 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 6e996f8063cd..6725fd9157b9 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -55,9 +55,8 @@
 # | ns2_v4                             | | ns2_v6                             |
 # +------------------------------------+ +------------------------------------+
 
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 CONTROL_PATH_TESTS="
 	basic_star_g_ipv4_ipv4
@@ -260,9 +259,6 @@ setup_common()
 	local local_addr1=$1; shift
 	local local_addr2=$1; shift
 
-	ip netns add $ns1
-	ip netns add $ns2
-
 	ip link add name veth0 type veth peer name veth1
 	ip link set dev veth0 netns $ns1 name veth0
 	ip link set dev veth1 netns $ns2 name veth0
@@ -273,36 +269,36 @@ setup_common()
 
 setup_v4()
 {
-	setup_common ns1_v4 ns2_v4 192.0.2.1 192.0.2.2
+	setup_ns ns1_v4 ns2_v4
+	setup_common $ns1_v4 $ns2_v4 192.0.2.1 192.0.2.2
 
-	ip -n ns1_v4 address add 192.0.2.17/28 dev veth0
-	ip -n ns2_v4 address add 192.0.2.18/28 dev veth0
+	ip -n $ns1_v4 address add 192.0.2.17/28 dev veth0
+	ip -n $ns2_v4 address add 192.0.2.18/28 dev veth0
 
-	ip -n ns1_v4 route add default via 192.0.2.18
-	ip -n ns2_v4 route add default via 192.0.2.17
+	ip -n $ns1_v4 route add default via 192.0.2.18
+	ip -n $ns2_v4 route add default via 192.0.2.17
 }
 
 cleanup_v4()
 {
-	ip netns del ns2_v4
-	ip netns del ns1_v4
+	cleanup_ns $ns2_v4 $ns1_v4
 }
 
 setup_v6()
 {
-	setup_common ns1_v6 ns2_v6 2001:db8:1::1 2001:db8:1::2
+	setup_ns ns1_v6 ns2_v6
+	setup_common $ns1_v6 $ns2_v6 2001:db8:1::1 2001:db8:1::2
 
-	ip -n ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad
-	ip -n ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad
+	ip -n $ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad
+	ip -n $ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad
 
-	ip -n ns1_v6 route add default via 2001:db8:2::2
-	ip -n ns2_v6 route add default via 2001:db8:2::1
+	ip -n $ns1_v6 route add default via 2001:db8:2::2
+	ip -n $ns2_v6 route add default via 2001:db8:2::1
 }
 
 cleanup_v6()
 {
-	ip netns del ns2_v6
-	ip netns del ns1_v6
+	cleanup_ns $ns2_v6 $ns1_v6
 }
 
 setup()
@@ -433,7 +429,7 @@ basic_common()
 
 basic_star_g_ipv4_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local grp_key="grp 239.1.1.1"
 	local vtep_ip=198.51.100.100
 
@@ -446,7 +442,7 @@ basic_star_g_ipv4_ipv4()
 
 basic_star_g_ipv6_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local grp_key="grp ff0e::1"
 	local vtep_ip=198.51.100.100
 
@@ -459,7 +455,7 @@ basic_star_g_ipv6_ipv4()
 
 basic_star_g_ipv4_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local grp_key="grp 239.1.1.1"
 	local vtep_ip=2001:db8:1000::1
 
@@ -472,7 +468,7 @@ basic_star_g_ipv4_ipv6()
 
 basic_star_g_ipv6_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local grp_key="grp ff0e::1"
 	local vtep_ip=2001:db8:1000::1
 
@@ -485,7 +481,7 @@ basic_star_g_ipv6_ipv6()
 
 basic_sg_ipv4_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local grp_key="grp 239.1.1.1 src 192.0.2.129"
 	local vtep_ip=198.51.100.100
 
@@ -498,7 +494,7 @@ basic_sg_ipv4_ipv4()
 
 basic_sg_ipv6_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local grp_key="grp ff0e::1 src 2001:db8:100::1"
 	local vtep_ip=198.51.100.100
 
@@ -511,7 +507,7 @@ basic_sg_ipv6_ipv4()
 
 basic_sg_ipv4_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local grp_key="grp 239.1.1.1 src 192.0.2.129"
 	local vtep_ip=2001:db8:1000::1
 
@@ -524,7 +520,7 @@ basic_sg_ipv4_ipv6()
 
 basic_sg_ipv6_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local grp_key="grp ff0e::1 src 2001:db8:100::1"
 	local vtep_ip=2001:db8:1000::1
 
@@ -694,7 +690,7 @@ star_g_common()
 
 star_g_ipv4_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local grp=239.1.1.1
 	local src1=192.0.2.129
 	local src2=192.0.2.130
@@ -711,7 +707,7 @@ star_g_ipv4_ipv4()
 
 star_g_ipv6_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local grp=ff0e::1
 	local src1=2001:db8:100::1
 	local src2=2001:db8:100::2
@@ -728,7 +724,7 @@ star_g_ipv6_ipv4()
 
 star_g_ipv4_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local grp=239.1.1.1
 	local src1=192.0.2.129
 	local src2=192.0.2.130
@@ -745,7 +741,7 @@ star_g_ipv4_ipv6()
 
 star_g_ipv6_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local grp=ff0e::1
 	local src1=2001:db8:100::1
 	local src2=2001:db8:100::2
@@ -793,7 +789,7 @@ sg_common()
 
 sg_ipv4_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local grp=239.1.1.1
 	local src=192.0.2.129
 	local vtep_ip=198.51.100.100
@@ -808,7 +804,7 @@ sg_ipv4_ipv4()
 
 sg_ipv6_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local grp=ff0e::1
 	local src=2001:db8:100::1
 	local vtep_ip=198.51.100.100
@@ -823,7 +819,7 @@ sg_ipv6_ipv4()
 
 sg_ipv4_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local grp=239.1.1.1
 	local src=192.0.2.129
 	local vtep_ip=2001:db8:1000::1
@@ -838,7 +834,7 @@ sg_ipv4_ipv6()
 
 sg_ipv6_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local grp=ff0e::1
 	local src=2001:db8:100::1
 	local vtep_ip=2001:db8:1000::1
@@ -918,7 +914,7 @@ dump_common()
 
 dump_ipv4_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local local_addr=192.0.2.1
 	local remote_prefix=198.51.100.
 	local fn=ipv4_grps_get
@@ -932,7 +928,7 @@ dump_ipv4_ipv4()
 
 dump_ipv6_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local local_addr=192.0.2.1
 	local remote_prefix=198.51.100.
 	local fn=ipv6_grps_get
@@ -946,7 +942,7 @@ dump_ipv6_ipv4()
 
 dump_ipv4_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local local_addr=2001:db8:1::1
 	local remote_prefix=2001:db8:1000::
 	local fn=ipv4_grps_get
@@ -960,7 +956,7 @@ dump_ipv4_ipv6()
 
 dump_ipv6_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local local_addr=2001:db8:1::1
 	local remote_prefix=2001:db8:1000::
 	local fn=ipv6_grps_get
@@ -1072,8 +1068,8 @@ encap_params_common()
 
 encap_params_ipv4_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local plen=32
@@ -1091,8 +1087,8 @@ encap_params_ipv4_ipv4()
 
 encap_params_ipv6_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local plen=32
@@ -1110,8 +1106,8 @@ encap_params_ipv6_ipv4()
 
 encap_params_ipv4_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local plen=128
@@ -1129,8 +1125,8 @@ encap_params_ipv4_ipv6()
 
 encap_params_ipv6_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local plen=128
@@ -1208,8 +1204,8 @@ starg_exclude_ir_common()
 
 starg_exclude_ir_ipv4_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local plen=32
@@ -1227,8 +1223,8 @@ starg_exclude_ir_ipv4_ipv4()
 
 starg_exclude_ir_ipv6_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local plen=32
@@ -1246,8 +1242,8 @@ starg_exclude_ir_ipv6_ipv4()
 
 starg_exclude_ir_ipv4_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local plen=128
@@ -1265,8 +1261,8 @@ starg_exclude_ir_ipv4_ipv6()
 
 starg_exclude_ir_ipv6_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local plen=128
@@ -1344,8 +1340,8 @@ starg_include_ir_common()
 
 starg_include_ir_ipv4_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local plen=32
@@ -1363,8 +1359,8 @@ starg_include_ir_ipv4_ipv4()
 
 starg_include_ir_ipv6_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local plen=32
@@ -1382,8 +1378,8 @@ starg_include_ir_ipv6_ipv4()
 
 starg_include_ir_ipv4_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local plen=128
@@ -1401,8 +1397,8 @@ starg_include_ir_ipv4_ipv6()
 
 starg_include_ir_ipv6_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local plen=128
@@ -1462,8 +1458,8 @@ starg_exclude_p2mp_common()
 
 starg_exclude_p2mp_ipv4_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local mcast_grp=238.1.1.1
 	local plen=32
 	local grp=239.1.1.1
@@ -1480,8 +1476,8 @@ starg_exclude_p2mp_ipv4_ipv4()
 
 starg_exclude_p2mp_ipv6_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local mcast_grp=238.1.1.1
 	local plen=32
 	local grp=ff0e::1
@@ -1498,8 +1494,8 @@ starg_exclude_p2mp_ipv6_ipv4()
 
 starg_exclude_p2mp_ipv4_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local mcast_grp=ff0e::2
 	local plen=128
 	local grp=239.1.1.1
@@ -1516,8 +1512,8 @@ starg_exclude_p2mp_ipv4_ipv6()
 
 starg_exclude_p2mp_ipv6_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local mcast_grp=ff0e::2
 	local plen=128
 	local grp=ff0e::1
@@ -1576,8 +1572,8 @@ starg_include_p2mp_common()
 
 starg_include_p2mp_ipv4_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local mcast_grp=238.1.1.1
 	local plen=32
 	local grp=239.1.1.1
@@ -1594,8 +1590,8 @@ starg_include_p2mp_ipv4_ipv4()
 
 starg_include_p2mp_ipv6_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local mcast_grp=238.1.1.1
 	local plen=32
 	local grp=ff0e::1
@@ -1612,8 +1608,8 @@ starg_include_p2mp_ipv6_ipv4()
 
 starg_include_p2mp_ipv4_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local mcast_grp=ff0e::2
 	local plen=128
 	local grp=239.1.1.1
@@ -1630,8 +1626,8 @@ starg_include_p2mp_ipv4_ipv6()
 
 starg_include_p2mp_ipv6_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local mcast_grp=ff0e::2
 	local plen=128
 	local grp=ff0e::1
@@ -1709,8 +1705,8 @@ egress_vni_translation_common()
 
 egress_vni_translation_ipv4_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local mcast_grp=238.1.1.1
 	local plen=32
 	local proto="ipv4"
@@ -1727,8 +1723,8 @@ egress_vni_translation_ipv4_ipv4()
 
 egress_vni_translation_ipv6_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local mcast_grp=238.1.1.1
 	local plen=32
 	local proto="ipv6"
@@ -1745,8 +1741,8 @@ egress_vni_translation_ipv6_ipv4()
 
 egress_vni_translation_ipv4_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local mcast_grp=ff0e::2
 	local plen=128
 	local proto="ipv4"
@@ -1763,8 +1759,8 @@ egress_vni_translation_ipv4_ipv6()
 
 egress_vni_translation_ipv6_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local mcast_grp=ff0e::2
 	local plen=128
 	local proto="ipv6"
@@ -1929,8 +1925,8 @@ all_zeros_mdb_common()
 
 all_zeros_mdb_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local vtep1_ip=198.51.100.101
 	local vtep2_ip=198.51.100.102
 	local vtep3_ip=198.51.100.103
@@ -1947,8 +1943,8 @@ all_zeros_mdb_ipv4()
 
 all_zeros_mdb_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local vtep3_ip=2001:db8:3000::1
@@ -2021,8 +2017,8 @@ mdb_fdb_common()
 
 mdb_fdb_ipv4_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local plen=32
@@ -2040,8 +2036,8 @@ mdb_fdb_ipv4_ipv4()
 
 mdb_fdb_ipv6_ipv4()
 {
-	local ns1=ns1_v4
-	local ns2=ns2_v4
+	local ns1=$ns1_v4
+	local ns2=$ns2_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local plen=32
@@ -2059,8 +2055,8 @@ mdb_fdb_ipv6_ipv4()
 
 mdb_fdb_ipv4_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local plen=128
@@ -2078,8 +2074,8 @@ mdb_fdb_ipv4_ipv6()
 
 mdb_fdb_ipv6_ipv6()
 {
-	local ns1=ns1_v6
-	local ns2=ns2_v6
+	local ns1=$ns1_v6
+	local ns2=$ns2_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local plen=128
@@ -2166,7 +2162,7 @@ mdb_torture_common()
 
 mdb_torture_ipv4_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local grp1=239.1.1.1
@@ -2183,7 +2179,7 @@ mdb_torture_ipv4_ipv4()
 
 mdb_torture_ipv6_ipv4()
 {
-	local ns1=ns1_v4
+	local ns1=$ns1_v4
 	local vtep1_ip=198.51.100.100
 	local vtep2_ip=198.51.100.200
 	local grp1=ff0e::1
@@ -2200,7 +2196,7 @@ mdb_torture_ipv6_ipv4()
 
 mdb_torture_ipv4_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local grp1=239.1.1.1
@@ -2217,7 +2213,7 @@ mdb_torture_ipv4_ipv6()
 
 mdb_torture_ipv6_ipv6()
 {
-	local ns1=ns1_v6
+	local ns1=$ns1_v6
 	local vtep1_ip=2001:db8:1000::1
 	local vtep2_ip=2001:db8:2000::1
 	local grp1=ff0e::1
-- 
cgit v1.2.3-70-g09d2


From d79e907b425d1dcc831f9eddbdb09682292faed0 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 6 Dec 2023 15:07:56 +0800
Subject: selftests/net: convert test_vxlan_nolocalbypass.sh to run it in
 unique namespace

Here is the test result after conversion.

]# ./test_vxlan_nolocalbypass.sh
TEST: localbypass enabled                                           [ OK ]
TEST: Packet received by local VXLAN device - localbypass           [ OK ]
TEST: localbypass disabled                                          [ OK ]
TEST: Packet not received by local VXLAN device - nolocalbypass     [ OK ]
TEST: localbypass enabled                                           [ OK ]
TEST: Packet received by local VXLAN device - localbypass           [ OK ]

Tests passed:   6
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/test_vxlan_nolocalbypass.sh      | 48 +++++++++++-----------
 1 file changed, 23 insertions(+), 25 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
index f75212bf142c..b8805983b728 100755
--- a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
+++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
@@ -9,9 +9,8 @@
 # option and verifies that packets are no longer received by the second VXLAN
 # device.
 
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 TESTS="
 	nolocalbypass
@@ -98,20 +97,19 @@ tc_check_packets()
 
 setup()
 {
-	ip netns add ns1
+	setup_ns ns1
 
-	ip -n ns1 link set dev lo up
-	ip -n ns1 address add 192.0.2.1/32 dev lo
-	ip -n ns1 address add 198.51.100.1/32 dev lo
+	ip -n $ns1 address add 192.0.2.1/32 dev lo
+	ip -n $ns1 address add 198.51.100.1/32 dev lo
 
-	ip -n ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \
+	ip -n $ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \
 		dstport 4789 nolearning
-	ip -n ns1 link add name vx1 up type vxlan id 100 dstport 4790
+	ip -n $ns1 link add name vx1 up type vxlan id 100 dstport 4790
 }
 
 cleanup()
 {
-	ip netns del ns1 &> /dev/null
+	cleanup_ns $ns1
 }
 
 ################################################################################
@@ -122,40 +120,40 @@ nolocalbypass()
 	local smac=00:01:02:03:04:05
 	local dmac=00:0a:0b:0c:0d:0e
 
-	run_cmd "bridge -n ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790"
+	run_cmd "bridge -n $ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790"
 
-	run_cmd "tc -n ns1 qdisc add dev vx1 clsact"
-	run_cmd "tc -n ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass"
+	run_cmd "tc -n $ns1 qdisc add dev vx1 clsact"
+	run_cmd "tc -n $ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass"
 
-	run_cmd "tc -n ns1 qdisc add dev lo clsact"
-	run_cmd "tc -n ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop"
+	run_cmd "tc -n $ns1 qdisc add dev lo clsact"
+	run_cmd "tc -n $ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop"
 
-	run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
+	run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
 	log_test $? 0 "localbypass enabled"
 
-	run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+	run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
 
-	tc_check_packets "ns1" "dev vx1 ingress" 101 1
+	tc_check_packets "$ns1" "dev vx1 ingress" 101 1
 	log_test $? 0 "Packet received by local VXLAN device - localbypass"
 
-	run_cmd "ip -n ns1 link set dev vx0 type vxlan nolocalbypass"
+	run_cmd "ip -n $ns1 link set dev vx0 type vxlan nolocalbypass"
 
-	run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'"
+	run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'"
 	log_test $? 0 "localbypass disabled"
 
-	run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+	run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
 
-	tc_check_packets "ns1" "dev vx1 ingress" 101 1
+	tc_check_packets "$ns1" "dev vx1 ingress" 101 1
 	log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass"
 
-	run_cmd "ip -n ns1 link set dev vx0 type vxlan localbypass"
+	run_cmd "ip -n $ns1 link set dev vx0 type vxlan localbypass"
 
-	run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
+	run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
 	log_test $? 0 "localbypass enabled"
 
-	run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+	run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
 
-	tc_check_packets "ns1" "dev vx1 ingress" 101 2
+	tc_check_packets "$ns1" "dev vx1 ingress" 101 2
 	log_test $? 0 "Packet received by local VXLAN device - localbypass"
 }
 
-- 
cgit v1.2.3-70-g09d2


From d6aab1f632978880660f41c48b760dd48461dcfb Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 6 Dec 2023 15:07:57 +0800
Subject: selftests/net: convert test_vxlan_under_vrf.sh to run it in unique
 namespace

Here is the test result after conversion.

]# ./test_vxlan_under_vrf.sh
Checking HV connectivity                                           [ OK ]
Check VM connectivity through VXLAN (underlay in the default VRF)  [ OK ]
Check VM connectivity through VXLAN (underlay in a VRF)            [ OK ]

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/test_vxlan_under_vrf.sh  | 70 +++++++++++-----------
 1 file changed, 36 insertions(+), 34 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
index 1fd1250ebc66..ae8fbe3f0779 100755
--- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -43,15 +43,14 @@
 # This tests both the connectivity between vm-1 and vm-2, and that the underlay
 # can be moved in and out of the vrf by unsetting and setting veth0's master.
 
+source lib.sh
 set -e
 
 cleanup() {
     ip link del veth-hv-1 2>/dev/null || true
     ip link del veth-tap 2>/dev/null || true
 
-    for ns in hv-1 hv-2 vm-1 vm-2; do
-        ip netns del $ns 2>/dev/null || true
-    done
+    cleanup_ns $hv_1 $hv_2 $vm_1 $vm_2
 }
 
 # Clean start
@@ -60,72 +59,75 @@ cleanup &> /dev/null
 [[ $1 == "clean" ]] && exit 0
 
 trap cleanup EXIT
+setup_ns hv_1 hv_2 vm_1 vm_2
+hv[1]=$hv_1
+hv[2]=$hv_2
+vm[1]=$vm_1
+vm[2]=$vm_2
 
 # Setup "Hypervisors" simulated with netns
 ip link add veth-hv-1 type veth peer name veth-hv-2
 setup-hv-networking() {
-    hv=$1
+    id=$1
 
-    ip netns add hv-$hv
-    ip link set veth-hv-$hv netns hv-$hv
-    ip -netns hv-$hv link set veth-hv-$hv name veth0
+    ip link set veth-hv-$id netns ${hv[$id]}
+    ip -netns ${hv[$id]} link set veth-hv-$id name veth0
 
-    ip -netns hv-$hv link add vrf-underlay type vrf table 1
-    ip -netns hv-$hv link set vrf-underlay up
-    ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0
-    ip -netns hv-$hv link set veth0 up
+    ip -netns ${hv[$id]} link add vrf-underlay type vrf table 1
+    ip -netns ${hv[$id]} link set vrf-underlay up
+    ip -netns ${hv[$id]} addr add 172.16.0.$id/24 dev veth0
+    ip -netns ${hv[$id]} link set veth0 up
 
-    ip -netns hv-$hv link add br0 type bridge
-    ip -netns hv-$hv link set br0 up
+    ip -netns ${hv[$id]} link add br0 type bridge
+    ip -netns ${hv[$id]} link set br0 up
 
-    ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789
-    ip -netns hv-$hv link set vxlan0 master br0
-    ip -netns hv-$hv link set vxlan0 up
+    ip -netns ${hv[$id]} link add vxlan0 type vxlan id 10 local 172.16.0.$id dev veth0 dstport 4789
+    ip -netns ${hv[$id]} link set vxlan0 master br0
+    ip -netns ${hv[$id]} link set vxlan0 up
 }
 setup-hv-networking 1
 setup-hv-networking 2
 
 # Check connectivity between HVs by pinging hv-2 from hv-1
 echo -n "Checking HV connectivity                                           "
-ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false)
+ip netns exec $hv_1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false)
 echo "[ OK ]"
 
 # Setups a "VM" simulated by a netns an a veth pair
 setup-vm() {
     id=$1
 
-    ip netns add vm-$id
     ip link add veth-tap type veth peer name veth-hv
 
-    ip link set veth-tap netns hv-$id
-    ip -netns hv-$id link set veth-tap master br0
-    ip -netns hv-$id link set veth-tap up
+    ip link set veth-tap netns ${hv[$id]}
+    ip -netns ${hv[$id]} link set veth-tap master br0
+    ip -netns ${hv[$id]} link set veth-tap up
 
     ip link set veth-hv address 02:1d:8d:dd:0c:6$id
 
-    ip link set veth-hv netns vm-$id
-    ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv
-    ip -netns vm-$id link set veth-hv up
+    ip link set veth-hv netns ${vm[$id]}
+    ip -netns ${vm[$id]} addr add 10.0.0.$id/24 dev veth-hv
+    ip -netns ${vm[$id]} link set veth-hv up
 }
 setup-vm 1
 setup-vm 2
 
 # Setup VTEP routes to make ARP work
-bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent
-bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent
+bridge -netns $hv_1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent
+bridge -netns $hv_2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent
 
 echo -n "Check VM connectivity through VXLAN (underlay in the default VRF)  "
-ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
+ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
 echo "[ OK ]"
 
 # Move the underlay to a non-default VRF
-ip -netns hv-1 link set veth0 vrf vrf-underlay
-ip -netns hv-1 link set vxlan0 down
-ip -netns hv-1 link set vxlan0 up
-ip -netns hv-2 link set veth0 vrf vrf-underlay
-ip -netns hv-2 link set vxlan0 down
-ip -netns hv-2 link set vxlan0 up
+ip -netns $hv_1 link set veth0 vrf vrf-underlay
+ip -netns $hv_1 link set vxlan0 down
+ip -netns $hv_1 link set vxlan0 up
+ip -netns $hv_2 link set veth0 vrf vrf-underlay
+ip -netns $hv_2 link set vxlan0 down
+ip -netns $hv_2 link set vxlan0 up
 
 echo -n "Check VM connectivity through VXLAN (underlay in a VRF)            "
-ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
+ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
 echo "[ OK ]"
-- 
cgit v1.2.3-70-g09d2


From 5ece8371747d57ae113aaf8a7b6468d6681d099f Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 6 Dec 2023 15:07:58 +0800
Subject: selftests/net: convert test_vxlan_vnifiltering.sh to run it in unique
 namespace

Here is the test result after conversion.

]# ./test_vxlan_vnifiltering.sh
    TEST: Create traditional vxlan device                               [ OK ]
    TEST: Cannot create vnifilter device without external flag          [ OK ]
    TEST: Creating external vxlan device with vnifilter flag            [ OK ]
...
    TEST: VM connectivity over traditional vxlan (ipv6 default rdst)    [ OK ]
    TEST: VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)  [ OK ]

Tests passed:  27
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/test_vxlan_vnifiltering.sh       | 154 +++++++++++++--------
 1 file changed, 95 insertions(+), 59 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 8c3ac0a72545..6127a78ee988 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -78,10 +78,8 @@
 #
 #
 # This test tests the new vxlan vnifiltering api
-
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
 TESTS="
@@ -148,18 +146,18 @@ run_cmd()
 }
 
 check_hv_connectivity() {
-	ip netns exec hv-1 ping -c 1 -W 1 $1 &>/dev/null
+	ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
 	sleep 1
-	ip netns exec hv-1 ping -c 1 -W 1 $2 &>/dev/null
+	ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
 
 	return $?
 }
 
 check_vm_connectivity() {
-	run_cmd "ip netns exec vm-11 ping -c 1 -W 1 10.0.10.12"
+	run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
 	log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
 
-	run_cmd "ip netns exec vm-21 ping -c 1 -W 1 10.0.10.22"
+	run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
 	log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
 }
 
@@ -167,26 +165,23 @@ cleanup() {
 	ip link del veth-hv-1 2>/dev/null || true
 	ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true
 
-	for ns in hv-1 hv-2 vm-11 vm-21 vm-12 vm-22 vm-31 vm-32; do
-		ip netns del $ns 2>/dev/null || true
-	done
+	cleanup_ns $hv_1 $hv_2 $vm_11 $vm_21 $vm_12 $vm_22 $vm_31 $vm_32
 }
 
 trap cleanup EXIT
 
 setup-hv-networking() {
-	hv=$1
+	id=$1
 	local1=$2
 	mask1=$3
 	local2=$4
 	mask2=$5
 
-	ip netns add hv-$hv
-	ip link set veth-hv-$hv netns hv-$hv
-	ip -netns hv-$hv link set veth-hv-$hv name veth0
-	ip -netns hv-$hv addr add $local1/$mask1 dev veth0
-	ip -netns hv-$hv addr add $local2/$mask2 dev veth0
-	ip -netns hv-$hv link set veth0 up
+	ip link set veth-hv-$id netns ${hv[$id]}
+	ip -netns ${hv[$id]} link set veth-hv-$id name veth0
+	ip -netns ${hv[$id]} addr add $local1/$mask1 dev veth0
+	ip -netns ${hv[$id]} addr add $local2/$mask2 dev veth0
+	ip -netns ${hv[$id]} link set veth0 up
 }
 
 # Setups a "VM" simulated by a netns an a veth pair
@@ -208,21 +203,20 @@ setup-vm() {
 	lastvxlandev=""
 
 	# create bridge
-	ip -netns hv-$hvid link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \
+	ip -netns ${hv[$hvid]} link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \
 		mcast_snooping 0
-	ip -netns hv-$hvid link set br$brid up
+	ip -netns ${hv[$hvid]} link set br$brid up
 
 	# create vm namespace and interfaces and connect to hypervisor
 	# namespace
-	ip netns add vm-$vmid
 	hvvethif="vethhv-$vmid"
 	vmvethif="veth-$vmid"
 	ip link add $hvvethif type veth peer name $vmvethif
-	ip link set $hvvethif netns hv-$hvid
-	ip link set $vmvethif netns vm-$vmid
-	ip -netns hv-$hvid link set $hvvethif up
-	ip -netns vm-$vmid link set $vmvethif up
-	ip -netns hv-$hvid link set $hvvethif master br$brid
+	ip link set $hvvethif netns ${hv[$hvid]}
+	ip link set $vmvethif netns ${vm[$vmid]}
+	ip -netns ${hv[$hvid]} link set $hvvethif up
+	ip -netns ${vm[$vmid]} link set $vmvethif up
+	ip -netns ${hv[$hvid]} link set $hvvethif master br$brid
 
 	# configure VM vlan/vni filtering on hypervisor
 	for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ')
@@ -234,9 +228,9 @@ setup-vm() {
 	local vtype=$(echo $vmap | awk -F'-' '{print ($5)}')
 	local port=$(echo $vmap | awk -F'-' '{print ($6)}')
 
-	ip -netns vm-$vmid link add name $vmvethif.$vid link $vmvethif type vlan id $vid
-	ip -netns vm-$vmid addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid
-	ip -netns vm-$vmid link set $vmvethif.$vid up
+	ip -netns ${vm[$vmid]} link add name $vmvethif.$vid link $vmvethif type vlan id $vid
+	ip -netns ${vm[$vmid]} addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid
+	ip -netns ${vm[$vmid]} link set $vmvethif.$vid up
 
 	tid=$vid
 	vxlandev="vxlan$brid"
@@ -268,35 +262,35 @@ setup-vm() {
 
 	# create vxlan device
 	if [ "$vxlandev" != "$lastvxlandev" ]; then
-	     ip -netns hv-$hvid link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null
-	     ip -netns hv-$hvid link set $vxlandev master br$brid
-	     ip -netns hv-$hvid link set $vxlandev up
+	     ip -netns ${hv[$hvid]} link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null
+	     ip -netns ${hv[$hvid]} link set $vxlandev master br$brid
+	     ip -netns ${hv[$hvid]} link set $vxlandev up
 	     lastvxlandev=$vxlandev
 	fi
 
 	# add vlan
-	bridge -netns hv-$hvid vlan add vid $vid dev $hvvethif
-	bridge -netns hv-$hvid vlan add vid $vid pvid dev $vxlandev
+	bridge -netns ${hv[$hvid]} vlan add vid $vid dev $hvvethif
+	bridge -netns ${hv[$hvid]} vlan add vid $vid pvid dev $vxlandev
 
 	# Add bridge vni filter for tx
 	if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then
-	   bridge -netns hv-$hvid link set dev $vxlandev vlan_tunnel on
-	   bridge -netns hv-$hvid vlan add dev $vxlandev vid $vid tunnel_info id $tid
+	   bridge -netns ${hv[$hvid]} link set dev $vxlandev vlan_tunnel on
+	   bridge -netns ${hv[$hvid]} vlan add dev $vxlandev vid $vid tunnel_info id $tid
 	fi
 
 	if [[ -n $vtype && $vtype == "metadata" ]]; then
-	   bridge -netns hv-$hvid fdb add 00:00:00:00:00:00 dev $vxlandev \
+	   bridge -netns ${hv[$hvid]} fdb add 00:00:00:00:00:00 dev $vxlandev \
 								src_vni $tid vni $tid dst $group self
 	elif [[ -n $vtype && $vtype == "vnifilter" ]]; then
 	   # Add per vni rx filter with 'bridge vni' api
-	   bridge -netns hv-$hvid vni add dev $vxlandev vni $tid
+	   bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid
 	elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then
 	   # Add per vni group config with 'bridge vni' api
 	   if [ -n "$group" ]; then
 		if [ $mcast -eq 1 ]; then
-			bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group
+			bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid group $group
 		else
-			bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group
+			bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid remote $group
 		fi
 	   fi
 	fi
@@ -306,14 +300,14 @@ setup-vm() {
 setup_vnifilter_api()
 {
 	ip link add veth-host type veth peer name veth-testns
-	ip netns add testns
-	ip link set veth-testns netns testns
+	setup_ns testns
+	ip link set veth-testns netns $testns
 }
 
 cleanup_vnifilter_api()
 {
 	ip link del veth-host 2>/dev/null || true
-	ip netns del testns 2>/dev/null || true
+	ip netns del $testns 2>/dev/null || true
 }
 
 # tests vxlan filtering api
@@ -331,52 +325,52 @@ vxlan_vnifilter_api()
 
 	# Duplicate vni test
 	# create non-vnifiltering traditional vni device
-	run_cmd "ip -netns testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789"
+	run_cmd "ip -netns $testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789"
 	log_test $? 0 "Create traditional vxlan device"
 
 	# create vni filtering device
-	run_cmd "ip -netns testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789"
+	run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789"
 	log_test $? 1 "Cannot create vnifilter device without external flag"
 
-	run_cmd "ip -netns testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+	run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
 	log_test $? 0 "Creating external vxlan device with vnifilter flag"
 
-	run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 100"
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 100"
 	log_test $? 0 "Cannot set in-use vni id on vnifiltering device"
 
-	run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200"
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200"
 	log_test $? 0 "Set new vni id on vnifiltering device"
 
-	run_cmd "ip -netns testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+	run_cmd "ip -netns $testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
 	log_test $? 0 "Create second external vxlan device with vnifilter flag"
 
-	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 200"
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 200"
 	log_test $? 255 "Cannot set in-use vni id on vnifiltering device"
 
-	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300"
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300"
 	log_test $? 0 "Set new vni id on vnifiltering device"
 
 	# check in bridge vni show
-	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300"
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300"
 	log_test $? 0 "Update vni id on vnifiltering device"
 
-	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 400"
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 400"
 	log_test $? 0 "Add new vni id on vnifiltering device"
 
 	# add multicast group per vni
-	run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200 group $group"
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200 group $group"
 	log_test $? 0 "Set multicast group on existing vni"
 
 	# add multicast group per vni
-	run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300 group $group"
+	run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300 group $group"
 	log_test $? 0 "Set multicast group on existing vni"
 
 	# set vnifilter on an existing external vxlan device
-	run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external vnifilter"
+	run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external vnifilter"
 	log_test $? 2 "Cannot set vnifilter flag on a device"
 
 	# change vxlan vnifilter flag
-	run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external novnifilter"
+	run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external novnifilter"
 	log_test $? 2 "Cannot unset vnifilter flag on a device"
 }
 
@@ -390,12 +384,20 @@ vxlan_vnifilter_datapath()
 	hv1addr2="2002:fee1::1"
 	hv2addr2="2002:fee1::2"
 
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
 	ip link add veth-hv-1 type veth peer name veth-hv-2
 	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2
 	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2
 
         check_hv_connectivity hv2addr1 hv2addr2
 
+	setup_ns vm_11 vm_21 vm_12 vm_22
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[12]=$vm_12
+	vm[22]=$vm_22
 	setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
 	setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
 
@@ -415,12 +417,20 @@ vxlan_vnifilter_datapath_pervni()
 	hv1addr2="2002:fee1::1"
 	hv2addr2="2002:fee1::2"
 
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
 	ip link add veth-hv-1 type veth peer name veth-hv-2
 	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
 	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
 
         check_hv_connectivity hv2addr1 hv2addr2
 
+	setup_ns vm_11 vm_21 vm_12 vm_22
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[12]=$vm_12
+	vm[22]=$vm_22
 	setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0
 	setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0
 
@@ -440,12 +450,20 @@ vxlan_vnifilter_datapath_mgroup()
         group="239.1.1.100"
         group6="ff07::1"
 
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
 	ip link add veth-hv-1 type veth peer name veth-hv-2
 	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
 	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
 
         check_hv_connectivity hv2addr1 hv2addr2
 
+	setup_ns vm_11 vm_21 vm_12 vm_22
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[12]=$vm_12
+	vm[22]=$vm_22
 	setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1
 	setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1
 
@@ -464,12 +482,20 @@ vxlan_vnifilter_datapath_mgroup_pervni()
         group="239.1.1.100"
         group6="ff07::1"
 
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
 	ip link add veth-hv-1 type veth peer name veth-hv-2
 	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
 	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
 
         check_hv_connectivity hv2addr1 hv2addr2
 
+	setup_ns vm_11 vm_21 vm_12 vm_22
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[12]=$vm_12
+	vm[22]=$vm_22
 	setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1
 	setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1
 
@@ -486,12 +512,22 @@ vxlan_vnifilter_metadata_and_traditional_mix()
 	hv1addr2="2002:fee1::1"
 	hv2addr2="2002:fee1::2"
 
+	setup_ns hv_1 hv_2
+	hv[1]=$hv_1
+	hv[2]=$hv_2
 	ip link add veth-hv-1 type veth peer name veth-hv-2
 	setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
 	setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
 
         check_hv_connectivity hv2addr1 hv2addr2
 
+	setup_ns vm_11 vm_21 vm_31 vm_12 vm_22 vm_32
+	vm[11]=$vm_11
+	vm[21]=$vm_21
+	vm[31]=$vm_31
+	vm[12]=$vm_12
+	vm[22]=$vm_22
+	vm[32]=$vm_32
 	setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
 	setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
 	setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0
@@ -504,13 +540,13 @@ vxlan_vnifilter_metadata_and_traditional_mix()
         check_vm_connectivity "vnifiltering vxlan pervni remote mix"
 
 	# check VM connectivity over traditional/non-vxlan filtering vxlan devices
-	run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.30.32"
+	run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.30.32"
         log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)"
 
-	run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.40.32"
+	run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.40.32"
         log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)"
 
-	run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.50.32"
+	run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.50.32"
         log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)"
 }
 
-- 
cgit v1.2.3-70-g09d2


From bedc99abcaf88df25b044fc4e3c80d69d0dbfc9b Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 6 Dec 2023 15:07:59 +0800
Subject: selftests/net: convert vrf_route_leaking.sh to run it in unique
 namespace

Here is the test result after conversion.

 ]# ./vrf_route_leaking.sh

 ###########################################################################
 IPv4 (sym route): VRF ICMP ttl error route lookup ping
 ###########################################################################

 TEST: Basic IPv4 connectivity                                       [ OK ]
 TEST: Ping received ICMP ttl exceeded                               [ OK ]

 ...

 TEST: Basic IPv6 connectivity                                       [ OK ]
 TEST: Traceroute6 reports a hop on r1                               [ OK ]

 Tests passed:  18
 Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/vrf_route_leaking.sh | 201 +++++++++++------------
 1 file changed, 96 insertions(+), 105 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index dedc52562b4f..2da32f4c479b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -58,6 +58,7 @@
 # to send an ICMP error back to the source when the ttl of a packet reaches 1
 # while it is forwarded between different vrfs.
 
+source lib.sh
 VERBOSE=0
 PAUSE_ON_FAIL=no
 DEFAULT_TTYPE=sym
@@ -171,11 +172,7 @@ run_cmd_grep()
 
 cleanup()
 {
-	local ns
-
-	for ns in h1 h2 r1 r2; do
-		ip netns del $ns 2>/dev/null
-	done
+	cleanup_ns $h1 $h2 $r1 $r2
 }
 
 setup_vrf()
@@ -212,72 +209,69 @@ setup_sym()
 
 	#
 	# create nodes as namespaces
-	#
-	for ns in h1 h2 r1; do
-		ip netns add $ns
-		ip -netns $ns link set lo up
-
-		case "${ns}" in
-		h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
-			;;
-		r1)    ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
-		esac
+	setup_ns h1 h2 r1
+	for ns in $h1 $h2 $r1; do
+		if echo $ns | grep -q h[12]-; then
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+		else
+			ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		fi
 	done
 
 	#
 	# create interconnects
 	#
-	ip -netns h1 link add eth0 type veth peer name r1h1
-	ip -netns h1 link set r1h1 netns r1 name eth0 up
+	ip -netns $h1 link add eth0 type veth peer name r1h1
+	ip -netns $h1 link set r1h1 netns $r1 name eth0 up
 
-	ip -netns h2 link add eth0 type veth peer name r1h2
-	ip -netns h2 link set r1h2 netns r1 name eth1 up
+	ip -netns $h2 link add eth0 type veth peer name r1h2
+	ip -netns $h2 link set r1h2 netns $r1 name eth1 up
 
 	#
 	# h1
 	#
-	ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
-	ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
-	ip -netns h1 link set eth0 up
+	ip -netns $h1 addr add dev eth0 ${H1_N1_IP}/24
+	ip -netns $h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
+	ip -netns $h1 link set eth0 up
 
 	# h1 to h2 via r1
-	ip -netns h1    route add ${H2_N2} via ${R1_N1_IP} dev eth0
-	ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
+	ip -netns $h1    route add ${H2_N2} via ${R1_N1_IP} dev eth0
+	ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
 
 	#
 	# h2
 	#
-	ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
-	ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
-	ip -netns h2 link set eth0 up
+	ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24
+	ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+	ip -netns $h2 link set eth0 up
 
 	# h2 to h1 via r1
-	ip -netns h2 route add default via ${R1_N2_IP} dev eth0
-	ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
+	ip -netns $h2 route add default via ${R1_N2_IP} dev eth0
+	ip -netns $h2 -6 route add default via ${R1_N2_IP6} dev eth0
 
 	#
 	# r1
 	#
-	setup_vrf r1
-	create_vrf r1 blue 1101
-	create_vrf r1 red 1102
-	ip -netns r1 link set mtu 1400 dev eth1
-	ip -netns r1 link set eth0 vrf blue up
-	ip -netns r1 link set eth1 vrf red up
-	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
-	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
-	ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
-	ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+	setup_vrf $r1
+	create_vrf $r1 blue 1101
+	create_vrf $r1 red 1102
+	ip -netns $r1 link set mtu 1400 dev eth1
+	ip -netns $r1 link set eth0 vrf blue up
+	ip -netns $r1 link set eth1 vrf red up
+	ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24
+	ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
 
 	# Route leak from blue to red
-	ip -netns r1 route add vrf blue ${H2_N2} dev red
-	ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+	ip -netns $r1 route add vrf blue ${H2_N2} dev red
+	ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red
 
 	# Route leak from red to blue
-	ip -netns r1 route add vrf red ${H1_N1} dev blue
-	ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
+	ip -netns $r1 route add vrf red ${H1_N1} dev blue
+	ip -netns $r1 -6 route add vrf red ${H1_N1_6} dev blue
 
 
 	# Wait for ip config to settle
@@ -293,90 +287,87 @@ setup_asym()
 
 	#
 	# create nodes as namespaces
-	#
-	for ns in h1 h2 r1 r2; do
-		ip netns add $ns
-		ip -netns $ns link set lo up
-
-		case "${ns}" in
-		h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
-			;;
-		r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
-		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
-		esac
+	setup_ns h1 h2 r1 r2
+	for ns in $h1 $h2 $r1 $r2; do
+		if echo $ns | grep -q h[12]-; then
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+		else
+			ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		fi
 	done
 
 	#
 	# create interconnects
 	#
-	ip -netns h1 link add eth0 type veth peer name r1h1
-	ip -netns h1 link set r1h1 netns r1 name eth0 up
+	ip -netns $h1 link add eth0 type veth peer name r1h1
+	ip -netns $h1 link set r1h1 netns $r1 name eth0 up
 
-	ip -netns h1 link add eth1 type veth peer name r2h1
-	ip -netns h1 link set r2h1 netns r2 name eth0 up
+	ip -netns $h1 link add eth1 type veth peer name r2h1
+	ip -netns $h1 link set r2h1 netns $r2 name eth0 up
 
-	ip -netns h2 link add eth0 type veth peer name r1h2
-	ip -netns h2 link set r1h2 netns r1 name eth1 up
+	ip -netns $h2 link add eth0 type veth peer name r1h2
+	ip -netns $h2 link set r1h2 netns $r1 name eth1 up
 
-	ip -netns h2 link add eth1 type veth peer name r2h2
-	ip -netns h2 link set r2h2 netns r2 name eth1 up
+	ip -netns $h2 link add eth1 type veth peer name r2h2
+	ip -netns $h2 link set r2h2 netns $r2 name eth1 up
 
 	#
 	# h1
 	#
-	ip -netns h1 link add br0 type bridge
-	ip -netns h1 link set br0 up
-	ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
-	ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
-	ip -netns h1 link set eth0 master br0 up
-	ip -netns h1 link set eth1 master br0 up
+	ip -netns $h1 link add br0 type bridge
+	ip -netns $h1 link set br0 up
+	ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24
+	ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+	ip -netns $h1 link set eth0 master br0 up
+	ip -netns $h1 link set eth1 master br0 up
 
 	# h1 to h2 via r1
-	ip -netns h1    route add ${H2_N2} via ${R1_N1_IP} dev br0
-	ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
+	ip -netns $h1    route add ${H2_N2} via ${R1_N1_IP} dev br0
+	ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
 
 	#
 	# h2
 	#
-	ip -netns h2 link add br0 type bridge
-	ip -netns h2 link set br0 up
-	ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
-	ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
-	ip -netns h2 link set eth0 master br0 up
-	ip -netns h2 link set eth1 master br0 up
+	ip -netns $h2 link add br0 type bridge
+	ip -netns $h2 link set br0 up
+	ip -netns $h2 addr add dev br0 ${H2_N2_IP}/24
+	ip -netns $h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
+	ip -netns $h2 link set eth0 master br0 up
+	ip -netns $h2 link set eth1 master br0 up
 
 	# h2 to h1 via r2
-	ip -netns h2 route add default via ${R2_N2_IP} dev br0
-	ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
+	ip -netns $h2 route add default via ${R2_N2_IP} dev br0
+	ip -netns $h2 -6 route add default via ${R2_N2_IP6} dev br0
 
 	#
 	# r1
 	#
-	setup_vrf r1
-	create_vrf r1 blue 1101
-	create_vrf r1 red 1102
-	ip -netns r1 link set mtu 1400 dev eth1
-	ip -netns r1 link set eth0 vrf blue up
-	ip -netns r1 link set eth1 vrf red up
-	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
-	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
-	ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
-	ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+	setup_vrf $r1
+	create_vrf $r1 blue 1101
+	create_vrf $r1 red 1102
+	ip -netns $r1 link set mtu 1400 dev eth1
+	ip -netns $r1 link set eth0 vrf blue up
+	ip -netns $r1 link set eth1 vrf red up
+	ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24
+	ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
 
 	# Route leak from blue to red
-	ip -netns r1 route add vrf blue ${H2_N2} dev red
-	ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+	ip -netns $r1 route add vrf blue ${H2_N2} dev red
+	ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red
 
 	# No route leak from red to blue
 
 	#
 	# r2
 	#
-	ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
-	ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
-	ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
-	ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
+	ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24
+	ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+	ip -netns $r2 addr add dev eth1 ${R2_N2_IP}/24
+	ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
 
 	# Wait for ip config to settle
 	sleep 2
@@ -384,14 +375,14 @@ setup_asym()
 
 check_connectivity()
 {
-	ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
+	ip netns exec $h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
 	log_test $? 0 "Basic IPv4 connectivity"
 	return $?
 }
 
 check_connectivity6()
 {
-	ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
+	ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
 	log_test $? 0 "Basic IPv6 connectivity"
 	return $?
 }
@@ -426,7 +417,7 @@ ipv4_traceroute()
 
 	check_connectivity || return
 
-	run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
+	run_cmd_grep "${R1_N1_IP}" ip netns exec $h1 traceroute ${H2_N2_IP}
 	log_test $? 0 "Traceroute reports a hop on r1"
 }
 
@@ -449,7 +440,7 @@ ipv6_traceroute()
 
 	check_connectivity6 || return
 
-	run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
+	run_cmd_grep "${R1_N1_IP6}" ip netns exec $h1 traceroute6 ${H2_N2_IP6}
 	log_test $? 0 "Traceroute6 reports a hop on r1"
 }
 
@@ -470,7 +461,7 @@ ipv4_ping_ttl()
 
 	check_connectivity || return
 
-	run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
+	run_cmd_grep "Time to live exceeded" ip netns exec $h1 ping -t1 -c1 -W2 ${H2_N2_IP}
 	log_test $? 0 "Ping received ICMP ttl exceeded"
 }
 
@@ -491,7 +482,7 @@ ipv4_ping_frag()
 
 	check_connectivity || return
 
-	run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
+	run_cmd_grep "Frag needed" ip netns exec $h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
 	log_test $? 0 "Ping received ICMP Frag needed"
 }
 
@@ -512,7 +503,7 @@ ipv6_ping_ttl()
 
 	check_connectivity6 || return
 
-	run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
+	run_cmd_grep "Time exceeded: Hop limit" ip netns exec $h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
 	log_test $? 0 "Ping received ICMP Hop limit"
 }
 
@@ -533,7 +524,7 @@ ipv6_ping_frag()
 
 	check_connectivity6 || return
 
-	run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
+	run_cmd_grep "Packet too big" ip netns exec $h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
 	log_test $? 0 "Ping received ICMP Packet too big"
 }
 
-- 
cgit v1.2.3-70-g09d2


From 51f64acbe36e13e113d284fcdefc751216984c01 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 6 Dec 2023 15:08:00 +0800
Subject: selftests/net: convert vrf_strict_mode_test.sh to run it in unique
 namespace

Here is the test result after conversion.

 ]# ./vrf_strict_mode_test.sh

 ################################################################################
 TEST SECTION: VRF strict_mode test on init network namespace
 ################################################################################

     TEST: init: net.vrf.strict_mode is available                        [ OK ]

     TEST: init: strict_mode=0 by default, 0 vrfs                        [ OK ]

 ...

     TEST: init: check strict_mode=1                                     [ OK ]

     TEST: testns-HvoZkB: check strict_mode=0                            [ OK ]

 Tests passed:  37
 Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/vrf_strict_mode_test.sh  | 47 ++++++++++------------
 1 file changed, 22 insertions(+), 25 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 417d214264f3..01552b542544 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -3,9 +3,7 @@
 
 # This test is designed for testing the new VRF strict_mode functionality.
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
+source lib.sh
 ret=0
 
 # identifies the "init" network namespace which is often called root network
@@ -247,13 +245,12 @@ setup()
 {
 	modprobe vrf
 
-	ip netns add testns
-	ip netns exec testns ip link set lo up
+	setup_ns testns
 }
 
 cleanup()
 {
-	ip netns del testns 2>/dev/null
+	ip netns del $testns 2>/dev/null
 
 	ip link del vrf100 2>/dev/null
 	ip link del vrf101 2>/dev/null
@@ -298,28 +295,28 @@ vrf_strict_mode_tests_testns()
 {
 	log_section "VRF strict_mode test on testns network namespace"
 
-	vrf_strict_mode_check_support testns
+	vrf_strict_mode_check_support $testns
 
-	strict_mode_check_default testns
+	strict_mode_check_default $testns
 
-	enable_strict_mode_and_check testns
+	enable_strict_mode_and_check $testns
 
-	add_vrf_and_check testns vrf100 100
-	config_vrf_and_check testns 10.0.100.1/24 vrf100
+	add_vrf_and_check $testns vrf100 100
+	config_vrf_and_check $testns 10.0.100.1/24 vrf100
 
-	add_vrf_and_check_fail testns vrf101 100
+	add_vrf_and_check_fail $testns vrf101 100
 
-	add_vrf_and_check_fail testns vrf102 100
+	add_vrf_and_check_fail $testns vrf102 100
 
-	add_vrf_and_check testns vrf200 200
+	add_vrf_and_check $testns vrf200 200
 
-	disable_strict_mode_and_check testns
+	disable_strict_mode_and_check $testns
 
-	add_vrf_and_check testns vrf101 100
+	add_vrf_and_check $testns vrf101 100
 
-	add_vrf_and_check testns vrf102 100
+	add_vrf_and_check $testns vrf102 100
 
-	#the strict_mode is disabled in the testns
+	#the strict_mode is disabled in the $testns
 }
 
 vrf_strict_mode_tests_mix()
@@ -328,25 +325,25 @@ vrf_strict_mode_tests_mix()
 
 	read_strict_mode_compare_and_check init 1
 
-	read_strict_mode_compare_and_check testns 0
+	read_strict_mode_compare_and_check $testns 0
 
-	del_vrf_and_check testns vrf101
+	del_vrf_and_check $testns vrf101
 
-	del_vrf_and_check testns vrf102
+	del_vrf_and_check $testns vrf102
 
 	disable_strict_mode_and_check init
 
-	enable_strict_mode_and_check testns
+	enable_strict_mode_and_check $testns
 
 	enable_strict_mode_and_check init
 	enable_strict_mode_and_check init
 
-	disable_strict_mode_and_check testns
-	disable_strict_mode_and_check testns
+	disable_strict_mode_and_check $testns
+	disable_strict_mode_and_check $testns
 
 	read_strict_mode_compare_and_check init 1
 
-	read_strict_mode_compare_and_check testns 0
+	read_strict_mode_compare_and_check $testns 0
 }
 
 ################################################################################
-- 
cgit v1.2.3-70-g09d2


From 61b12ebe439adfd9853c13499c2099e2a6d41771 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 6 Dec 2023 15:08:01 +0800
Subject: selftests/net: convert vrf-xfrm-tests.sh to run it in unique
 namespace

Here is the test result after conversion.

]# ./vrf-xfrm-tests.sh

No qdisc on VRF device
TEST: IPv4 no xfrm policy                                           [ OK ]
TEST: IPv6 no xfrm policy                                           [ OK ]
TEST: IPv4 xfrm policy based on address                             [ OK ]
TEST: IPv6 xfrm policy based on address                             [ OK ]
TEST: IPv6 xfrm policy with VRF in selector                         [ OK ]
TEST: IPv4 xfrm policy with xfrm device                             [ OK ]
TEST: IPv6 xfrm policy with xfrm device                             [ OK ]

netem qdisc on VRF device
TEST: IPv4 no xfrm policy                                           [ OK ]
TEST: IPv6 no xfrm policy                                           [ OK ]
TEST: IPv4 xfrm policy based on address                             [ OK ]
TEST: IPv6 xfrm policy based on address                             [ OK ]
TEST: IPv6 xfrm policy with VRF in selector                         [ OK ]
TEST: IPv4 xfrm policy with xfrm device                             [ OK ]
TEST: IPv6 xfrm policy with xfrm device                             [ OK ]

Tests passed:  14
Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/vrf-xfrm-tests.sh | 77 +++++++++++++--------------
 1 file changed, 36 insertions(+), 41 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh
index 452638ae8aed..b64dd891699d 100755
--- a/tools/testing/selftests/net/vrf-xfrm-tests.sh
+++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh
@@ -3,9 +3,7 @@
 #
 # Various combinations of VRF with xfrms and qdisc.
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
+source lib.sh
 PAUSE_ON_FAIL=no
 VERBOSE=0
 ret=0
@@ -67,7 +65,7 @@ run_cmd_host1()
 		printf "    COMMAND: $cmd\n"
 	fi
 
-	out=$(eval ip netns exec host1 $cmd 2>&1)
+	out=$(eval ip netns exec $host1 $cmd 2>&1)
 	rc=$?
 	if [ "$VERBOSE" = "1" ]; then
 		if [ -n "$out" ]; then
@@ -116,9 +114,6 @@ create_ns()
 	[ -z "${addr}" ] && addr="-"
 	[ -z "${addr6}" ] && addr6="-"
 
-	ip netns add ${ns}
-
-	ip -netns ${ns} link set lo up
 	if [ "${addr}" != "-" ]; then
 		ip -netns ${ns} addr add dev lo ${addr}
 	fi
@@ -177,25 +172,25 @@ connect_ns()
 
 cleanup()
 {
-	ip netns del host1
-	ip netns del host2
+	cleanup_ns $host1 $host2
 }
 
 setup()
 {
-	create_ns "host1"
-	create_ns "host2"
+	setup_ns host1 host2
+	create_ns "$host1"
+	create_ns "$host2"
 
-	connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
-	           "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
+	connect_ns "$host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
+	           "$host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
 
-	create_vrf "host1" ${VRF} ${TABLE}
-	ip -netns host1 link set dev eth0 master ${VRF}
+	create_vrf "$host1" ${VRF} ${TABLE}
+	ip -netns $host1 link set dev eth0 master ${VRF}
 }
 
 cleanup_xfrm()
 {
-	for ns in host1 host2
+	for ns in $host1 $host2
 	do
 		for x in state policy
 		do
@@ -218,57 +213,57 @@ setup_xfrm()
 	#
 
 	# host1 - IPv4 out
-	ip -netns host1 xfrm policy add \
+	ip -netns $host1 xfrm policy add \
 	  src ${h1_4} dst ${h2_4} ${devarg} dir out \
 	  tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
 
 	# host2 - IPv4 in
-	ip -netns host2 xfrm policy add \
+	ip -netns $host2 xfrm policy add \
 	  src ${h1_4} dst ${h2_4} dir in \
 	  tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
 
 	# host1 - IPv4 in
-	ip -netns host1 xfrm policy add \
+	ip -netns $host1 xfrm policy add \
 	  src ${h2_4} dst ${h1_4} ${devarg} dir in \
 	  tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
 
 	# host2 - IPv4 out
-	ip -netns host2 xfrm policy add \
+	ip -netns $host2 xfrm policy add \
 	  src ${h2_4} dst ${h1_4} dir out \
 	  tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
 
 
 	# host1 - IPv6 out
-	ip -6 -netns host1 xfrm policy add \
+	ip -6 -netns $host1 xfrm policy add \
 	  src ${h1_6} dst ${h2_6} ${devarg} dir out \
 	  tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
 
 	# host2 - IPv6 in
-	ip -6 -netns host2 xfrm policy add \
+	ip -6 -netns $host2 xfrm policy add \
 	  src ${h1_6} dst ${h2_6} dir in \
 	  tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
 
 	# host1 - IPv6 in
-	ip -6 -netns host1 xfrm policy add \
+	ip -6 -netns $host1 xfrm policy add \
 	  src ${h2_6} dst ${h1_6} ${devarg} dir in \
 	  tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
 
 	# host2 - IPv6 out
-	ip -6 -netns host2 xfrm policy add \
+	ip -6 -netns $host2 xfrm policy add \
 	  src ${h2_6} dst ${h1_6} dir out \
 	  tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
 
 	#
 	# state
 	#
-	ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+	ip -netns $host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
 	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
 	    auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
 	    enc 'cbc(aes)' ${ENC_1} \
 	    sel src ${h1_4} dst ${h2_4} ${devarg}
 
-	ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+	ip -netns $host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
 	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
 	    auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
@@ -276,14 +271,14 @@ setup_xfrm()
 	    sel src ${h1_4} dst ${h2_4}
 
 
-	ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+	ip -netns $host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
 	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
 	    auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
 	    enc 'cbc(aes)' ${ENC_2} \
 	    sel src ${h2_4} dst ${h1_4} ${devarg}
 
-	ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+	ip -netns $host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
 	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
 	    auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
@@ -291,14 +286,14 @@ setup_xfrm()
 	    sel src ${h2_4} dst ${h1_4}
 
 
-	ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+	ip -6 -netns $host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
 	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
 	    auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
 	    enc 'cbc(aes)' ${ENC_1} \
 	    sel src ${h1_6} dst ${h2_6} ${devarg}
 
-	ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+	ip -6 -netns $host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
 	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
 	    auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
@@ -306,14 +301,14 @@ setup_xfrm()
 	    sel src ${h1_6} dst ${h2_6}
 
 
-	ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+	ip -6 -netns $host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
 	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
 	    auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
 	    enc 'cbc(aes)' ${ENC_2} \
 	    sel src ${h2_6} dst ${h1_6} ${devarg}
 
-	ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+	ip -6 -netns $host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
 	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
 	    replay-window 4 replay-oseq 0x4 \
 	    auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
@@ -323,22 +318,22 @@ setup_xfrm()
 
 cleanup_xfrm_dev()
 {
-	ip -netns host1 li del xfrm0
-	ip -netns host2 addr del ${XFRM2_4}/24 dev eth0
-	ip -netns host2 addr del ${XFRM2_6}/64 dev eth0
+	ip -netns $host1 li del xfrm0
+	ip -netns $host2 addr del ${XFRM2_4}/24 dev eth0
+	ip -netns $host2 addr del ${XFRM2_6}/64 dev eth0
 }
 
 setup_xfrm_dev()
 {
 	local vrfarg="vrf ${VRF}"
 
-	ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID}
-	ip -netns host1 li set xfrm0 ${vrfarg} up
-	ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0
-	ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0
+	ip -netns $host1 li add type xfrm dev eth0 if_id ${IF_ID}
+	ip -netns $host1 li set xfrm0 ${vrfarg} up
+	ip -netns $host1 addr add ${XFRM1_4}/24 dev xfrm0
+	ip -netns $host1 addr add ${XFRM1_6}/64 dev xfrm0
 
-	ip -netns host2 addr add ${XFRM2_4}/24 dev eth0
-	ip -netns host2 addr add ${XFRM2_6}/64 dev eth0
+	ip -netns $host2 addr add ${XFRM2_4}/24 dev eth0
+	ip -netns $host2 addr add ${XFRM2_6}/64 dev eth0
 
 	setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}"
 }
-- 
cgit v1.2.3-70-g09d2


From 8b7b0e5fe47de90ba6c350f9abece589fb637f79 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Fri, 8 Dec 2023 00:17:03 -0600
Subject: bpf: Load vmlinux btf for any struct_ops map

In libbpf, when determining whether we need to load vmlinux btf, we're
currently (among other things) checking whether there is any struct_ops
program present in the object. This works for most realistic struct_ops
maps, as a struct_ops map is of course typically composed of one or more
struct_ops programs. However, that technically need not be the case. A
struct_ops interface could be defined which allows a map to be specified
which one or more non-prog fields, and which provides default behavior
if no struct_ops progs is actually provided otherwise. For sched_ext,
for example, you technically only need to specify the name of the
scheduler in the struct_ops map, with the core scheduler logic providing
default behavior if no prog is actually specified.

If we were to define and try to load such a struct_ops map, we would
crash in libbpf when initializing it as obj->btf_vmlinux will be NULL:

Reading symbols from minimal...
(gdb) r
Starting program: minimal_example
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib/libthread_db.so.1".

Program received signal SIGSEGV, Segmentation fault.
0x000055555558308c in btf__type_cnt (btf=0x0) at btf.c:612
612             return btf->start_id + btf->nr_types;
(gdb) bt
    type_name=0x5555555d99e3 "sched_ext_ops", kind=4) at btf.c:914
    kind=4) at btf.c:942
    type=0x7fffffffe558, type_id=0x7fffffffe548, ...
    data_member=0x7fffffffe568) at libbpf.c:948
    kern_btf=0x0) at libbpf.c:1017
    at libbpf.c:8059

So as to account for such bare-bones struct_ops maps, let's update
obj_needs_vmlinux_btf() to also iterate over an obj's maps and check
whether any of them are struct_ops maps.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/20231208061704.400463-1-void@manifault.com
---
 tools/lib/bpf/libbpf.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ea9b8158c20d..ac54ebc0629f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3054,9 +3054,15 @@ static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
 	return false;
 }
 
+static bool map_needs_vmlinux_btf(struct bpf_map *map)
+{
+	return bpf_map__is_struct_ops(map);
+}
+
 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
 {
 	struct bpf_program *prog;
+	struct bpf_map *map;
 	int i;
 
 	/* CO-RE relocations need kernel BTF, only when btf_custom_path
@@ -3081,6 +3087,11 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
 			return true;
 	}
 
+	bpf_object__for_each_map(map, obj) {
+		if (map_needs_vmlinux_btf(map))
+			return true;
+	}
+
 	return false;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 6b4a64bafd107e521c01eec3453ce94a3fb38529 Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Thu, 7 Dec 2023 22:25:18 -0500
Subject: bpf: Fix accesses to uninit stack slots

Privileged programs are supposed to be able to read uninitialized stack
memory (ever since 6715df8d5) but, before this patch, these accesses
were permitted inconsistently. In particular, accesses were permitted
above state->allocated_stack, but not below it. In other words, if the
stack was already "large enough", the access was permitted, but
otherwise the access was rejected instead of being allowed to "grow the
stack". This undesired rejection was happening in two places:
- in check_stack_slot_within_bounds()
- in check_stack_range_initialized()
This patch arranges for these accesses to be permitted. A bunch of tests
that were relying on the old rejection had to change; all of them were
changed to add also run unprivileged, in which case the old behavior
persists. One tests couldn't be updated - global_func16 - because it
can't run unprivileged for other reasons.

This patch also fixes the tracking of the stack size for variable-offset
reads. This second fix is bundled in the same commit as the first one
because they're inter-related. Before this patch, writes to the stack
using registers containing a variable offset (as opposed to registers
with fixed, known values) were not properly contributing to the
function's needed stack size. As a result, it was possible for a program
to verify, but then to attempt to read out-of-bounds data at runtime
because a too small stack had been allocated for it.

Each function tracks the size of the stack it needs in
bpf_subprog_info.stack_depth, which is maintained by
update_stack_depth(). For regular memory accesses, check_mem_access()
was calling update_state_depth() but it was passing in only the fixed
part of the offset register, ignoring the variable offset. This was
incorrect; the minimum possible value of that register should be used
instead.

This tracking is now fixed by centralizing the tracking of stack size in
grow_stack_state(), and by lifting the calls to grow_stack_state() to
check_stack_access_within_bounds() as suggested by Andrii. The code is
now simpler and more convincingly tracks the correct maximum stack size.
check_stack_range_initialized() can now rely on enough stack having been
allocated for the access; this helps with the fix for the first issue.

A few tests were changed to also check the stack depth computation. The
one that fails without this patch is verifier_var_off:stack_write_priv_vs_unpriv.

Fixes: 01f810ace9ed3 ("bpf: Allow variable-offset stack access")
Reported-by: Hao Sun <sunhao.th@gmail.com>
Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231208032519.260451-3-andreimatei1@gmail.com

Closes: https://lore.kernel.org/bpf/CABWLsev9g8UP_c3a=1qbuZUi20tGoUXoU07FPf-5FLvhOKOY+Q@mail.gmail.com/
---
 kernel/bpf/verifier.c                              | 65 +++++++++-------------
 tools/testing/selftests/bpf/progs/iters.c          |  2 +-
 .../selftests/bpf/progs/test_global_func16.c       |  2 +-
 .../selftests/bpf/progs/verifier_basic_stack.c     |  8 +--
 .../testing/selftests/bpf/progs/verifier_int_ptr.c |  5 +-
 .../selftests/bpf/progs/verifier_raw_stack.c       |  5 +-
 .../testing/selftests/bpf/progs/verifier_var_off.c | 62 +++++++++++++++++----
 .../selftests/bpf/verifier/atomic_cmpxchg.c        | 11 ----
 tools/testing/selftests/bpf/verifier/calls.c       |  4 +-
 9 files changed, 92 insertions(+), 72 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0e77bb52542d..de1e29fa467e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1259,7 +1259,10 @@ static int resize_reference_state(struct bpf_func_state *state, size_t n)
 	return 0;
 }
 
-static int grow_stack_state(struct bpf_func_state *state, int size)
+/* Possibly update state->allocated_stack to be at least size bytes. Also
+ * possibly update the function's high-water mark in its bpf_subprog_info.
+ */
+static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
 {
 	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
 
@@ -1271,6 +1274,11 @@ static int grow_stack_state(struct bpf_func_state *state, int size)
 		return -ENOMEM;
 
 	state->allocated_stack = size;
+
+	/* update known max for given subprogram */
+	if (env->subprog_info[state->subprogno].stack_depth < size)
+		env->subprog_info[state->subprogno].stack_depth = size;
+
 	return 0;
 }
 
@@ -4440,9 +4448,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	struct bpf_reg_state *reg = NULL;
 	int insn_flags = insn_stack_access_flags(state->frameno, spi);
 
-	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
-	if (err)
-		return err;
 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
 	 * so it's aligned access and [off, off + size) are within stack limits
 	 */
@@ -4595,10 +4600,6 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
 	    (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
 		writing_zero = true;
 
-	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
-	if (err)
-		return err;
-
 	for (i = min_off; i < max_off; i++) {
 		int spi;
 
@@ -5774,20 +5775,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 					   strict);
 }
 
-static int update_stack_depth(struct bpf_verifier_env *env,
-			      const struct bpf_func_state *func,
-			      int off)
-{
-	u16 stack = env->subprog_info[func->subprogno].stack_depth;
-
-	if (stack >= -off)
-		return 0;
-
-	/* update known max for given subprogram */
-	env->subprog_info[func->subprogno].stack_depth = -off;
-	return 0;
-}
-
 /* starting from main bpf function walk all instructions of the function
  * and recursively walk all callees that given function can call.
  * Ignore jump and exit insns.
@@ -6577,13 +6564,14 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
  * The minimum valid offset is -MAX_BPF_STACK for writes, and
  * -state->allocated_stack for reads.
  */
-static int check_stack_slot_within_bounds(s64 off,
-					  struct bpf_func_state *state,
-					  enum bpf_access_type t)
+static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
+                                          s64 off,
+                                          struct bpf_func_state *state,
+                                          enum bpf_access_type t)
 {
 	int min_valid_off;
 
-	if (t == BPF_WRITE)
+	if (t == BPF_WRITE || env->allow_uninit_stack)
 		min_valid_off = -MAX_BPF_STACK;
 	else
 		min_valid_off = -state->allocated_stack;
@@ -6632,7 +6620,7 @@ static int check_stack_access_within_bounds(
 		max_off = reg->smax_value + off + access_size;
 	}
 
-	err = check_stack_slot_within_bounds(min_off, state, type);
+	err = check_stack_slot_within_bounds(env, min_off, state, type);
 	if (!err && max_off > 0)
 		err = -EINVAL; /* out of stack access into non-negative offsets */
 
@@ -6647,8 +6635,10 @@ static int check_stack_access_within_bounds(
 			verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
 				err_extra, regno, tn_buf, off, access_size);
 		}
+		return err;
 	}
-	return err;
+
+	return grow_stack_state(env, state, round_up(-min_off, BPF_REG_SIZE));
 }
 
 /* check whether memory at (regno + off) is accessible for t = (read | write)
@@ -6663,7 +6653,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 {
 	struct bpf_reg_state *regs = cur_regs(env);
 	struct bpf_reg_state *reg = regs + regno;
-	struct bpf_func_state *state;
 	int size, err = 0;
 
 	size = bpf_size_to_bytes(bpf_size);
@@ -6806,11 +6795,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		if (err)
 			return err;
 
-		state = func(env, reg);
-		err = update_stack_depth(env, state, off);
-		if (err)
-			return err;
-
 		if (t == BPF_READ)
 			err = check_stack_read(env, regno, off, size,
 					       value_regno);
@@ -7004,7 +6988,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
 
 /* When register 'regno' is used to read the stack (either directly or through
  * a helper function) make sure that it's within stack boundary and, depending
- * on the access type, that all elements of the stack are initialized.
+ * on the access type and privileges, that all elements of the stack are
+ * initialized.
  *
  * 'off' includes 'regno->off', but not its dynamic part (if any).
  *
@@ -7112,8 +7097,11 @@ static int check_stack_range_initialized(
 
 		slot = -i - 1;
 		spi = slot / BPF_REG_SIZE;
-		if (state->allocated_stack <= slot)
-			goto err;
+		if (state->allocated_stack <= slot) {
+			verbose(env, "verifier bug: allocated_stack too small");
+			return -EFAULT;
+		}
+
 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
 		if (*stype == STACK_MISC)
 			goto mark;
@@ -7137,7 +7125,6 @@ static int check_stack_range_initialized(
 			goto mark;
 		}
 
-err:
 		if (tnum_is_const(reg->var_off)) {
 			verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
 				err_extra, regno, min_off, i - min_off, access_size);
@@ -7162,7 +7149,7 @@ mark:
 		 * helper may write to the entire memory range.
 		 */
 	}
-	return update_stack_depth(env, state, min_off);
+	return 0;
 }
 
 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index b2181f850d3e..3aca3dc145b5 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -846,7 +846,7 @@ __naked int delayed_precision_mark(void)
 		"call %[bpf_iter_num_next];"
 		"if r0 == 0 goto 2f;"
 		"if r6 != 42 goto 3f;"
-		"r7 = -32;"
+		"r7 = -33;"
 		"call %[bpf_get_prandom_u32];"
 		"r6 = r0;"
 		"goto 1b;\n"
diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c
index e7206304632e..e3e64bc472cd 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func16.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func16.c
@@ -13,7 +13,7 @@ __noinline int foo(int (*arr)[10])
 }
 
 SEC("cgroup_skb/ingress")
-__failure __msg("invalid indirect read from stack")
+__success
 int global_func16(struct __sk_buff *skb)
 {
 	int array[10];
diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
index 359df865a8f3..8d77cc5323d3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
@@ -27,8 +27,8 @@ __naked void stack_out_of_bounds(void)
 
 SEC("socket")
 __description("uninitialized stack1")
-__failure __msg("invalid indirect read from stack")
-__failure_unpriv
+__success __log_level(4) __msg("stack depth 8")
+__failure_unpriv __msg_unpriv("invalid indirect read from stack")
 __naked void uninitialized_stack1(void)
 {
 	asm volatile ("					\
@@ -45,8 +45,8 @@ __naked void uninitialized_stack1(void)
 
 SEC("socket")
 __description("uninitialized stack2")
-__failure __msg("invalid read from stack")
-__failure_unpriv
+__success __log_level(4) __msg("stack depth 8")
+__failure_unpriv __msg_unpriv("invalid read from stack")
 __naked void uninitialized_stack2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
index 74d9cad469d9..9fc3fae5cd83 100644
--- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -5,9 +5,10 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
-SEC("cgroup/sysctl")
+SEC("socket")
 __description("ARG_PTR_TO_LONG uninitialized")
-__failure __msg("invalid indirect read from stack R4 off -16+0 size 8")
+__success
+__failure_unpriv __msg_unpriv("invalid indirect read from stack R4 off -16+0 size 8")
 __naked void arg_ptr_to_long_uninitialized(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
index efbfc3a4ad6a..f67390224a9c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
@@ -5,9 +5,10 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
-SEC("tc")
+SEC("socket")
 __description("raw_stack: no skb_load_bytes")
-__failure __msg("invalid read from stack R6 off=-8 size=8")
+__success
+__failure_unpriv __msg_unpriv("invalid read from stack R6 off=-8 size=8")
 __naked void stack_no_skb_load_bytes(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
index b7bdd7db3a35..c810f4f6f479 100644
--- a/tools/testing/selftests/bpf/progs/verifier_var_off.c
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -59,9 +59,10 @@ __naked void stack_read_priv_vs_unpriv(void)
 "	::: __clobber_all);
 }
 
-SEC("lwt_in")
+SEC("cgroup/skb")
 __description("variable-offset stack read, uninitialized")
-__failure __msg("invalid variable-offset read from stack R2")
+__success
+__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
 __naked void variable_offset_stack_read_uninitialized(void)
 {
 	asm volatile ("					\
@@ -83,12 +84,55 @@ __naked void variable_offset_stack_read_uninitialized(void)
 
 SEC("socket")
 __description("variable-offset stack write, priv vs unpriv")
-__success __failure_unpriv
+__success
+/* Check that the maximum stack depth is correctly maintained according to the
+ * maximum possible variable offset.
+ */
+__log_level(4) __msg("stack depth 16")
+__failure_unpriv
 /* Variable stack access is rejected for unprivileged.
  */
 __msg_unpriv("R2 variable stack access prohibited for !root")
 __retval(0)
 __naked void stack_write_priv_vs_unpriv(void)
+{
+	asm volatile ("                               \
+	/* Get an unknown value */                    \
+	r2 = *(u32*)(r1 + 0);                         \
+	/* Make it small and 8-byte aligned */        \
+	r2 &= 8;                                      \
+	r2 -= 16;                                     \
+	/* Add it to fp. We now have either fp-8 or   \
+	 * fp-16, but we don't know which             \
+	 */                                           \
+	r2 += r10;                                    \
+	/* Dereference it for a stack write */        \
+	r0 = 0;                                       \
+	*(u64*)(r2 + 0) = r0;                         \
+	exit;                                         \
+"	::: __clobber_all);
+}
+
+/* Similar to the previous test, but this time also perform a read from the
+ * address written to with a variable offset. The read is allowed, showing that,
+ * after a variable-offset write, a priviledged program can read the slots that
+ * were in the range of that write (even if the verifier doesn't actually know if
+ * the slot being read was really written to or not.
+ *
+ * Despite this test being mostly a superset, the previous test is also kept for
+ * the sake of it checking the stack depth in the case where there is no read.
+ */
+SEC("socket")
+__description("variable-offset stack write followed by read")
+__success
+/* Check that the maximum stack depth is correctly maintained according to the
+ * maximum possible variable offset.
+ */
+__log_level(4) __msg("stack depth 16")
+__failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_write_followed_by_read(void)
 {
 	asm volatile ("					\
 	/* Get an unknown value */			\
@@ -103,12 +147,7 @@ __naked void stack_write_priv_vs_unpriv(void)
 	/* Dereference it for a stack write */		\
 	r0 = 0;						\
 	*(u64*)(r2 + 0) = r0;				\
-	/* Now read from the address we just wrote. This shows\
-	 * that, after a variable-offset write, a priviledged\
-	 * program can read the slots that were in the range of\
-	 * that write (even if the verifier doesn't actually know\
-	 * if the slot being read was really written to or not.\
-	 */						\
+	/* Now read from the address we just wrote. */ \
 	r3 = *(u64*)(r2 + 0);				\
 	r0 = 0;						\
 	exit;						\
@@ -282,9 +321,10 @@ __naked void access_min_out_of_bound(void)
 	: __clobber_all);
 }
 
-SEC("lwt_in")
+SEC("cgroup/skb")
 __description("indirect variable-offset stack access, min_off < min_initialized")
-__failure __msg("invalid indirect read from stack R2 var_off")
+__success
+__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
 __naked void access_min_off_min_initialized(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
index 319337bdcfc8..9a7b1106fda8 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
@@ -83,17 +83,6 @@
 	.result = REJECT,
 	.errstr = "!read_ok",
 },
-{
-	"Can't use cmpxchg on uninit memory",
-	.insns = {
-		BPF_MOV64_IMM(BPF_REG_0, 3),
-		BPF_MOV64_IMM(BPF_REG_2, 4),
-		BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8),
-		BPF_EXIT_INSN(),
-	},
-	.result = REJECT,
-	.errstr = "invalid read from stack",
-},
 {
 	"BPF_W cmpxchg should zero top 32 bits",
 	.insns = {
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 3d5cd51071f0..ab25a81fd3a1 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1505,7 +1505,9 @@
 	.prog_type = BPF_PROG_TYPE_XDP,
 	.fixup_map_hash_8b = { 23 },
 	.result = REJECT,
-	.errstr = "invalid read from stack R7 off=-16 size=8",
+	.errstr = "R0 invalid mem access 'scalar'",
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "invalid read from stack R7 off=-16 size=8",
 },
 {
 	"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
-- 
cgit v1.2.3-70-g09d2


From 1720c42b90c8f14ffcb2f2f39a1abafc82a5b22e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 8 Dec 2023 15:30:28 -0800
Subject: selftests/bpf: fix timer/test_bad_ret subtest on test_progs-cpuv4
 flavor

Because test_bad_ret main program is not written in assembly, we don't
control instruction indices in timer_cb_ret_bad() subprog. This bites us
in timer/test_bad_ret subtest, where we see difference between cpuv4 and
other flavors.

For now, make __msg() expectations not rely on instruction indices by
anchoring them around bpf_get_prandom_u32 call. Once we have regex/glob
support for __msg(), this can be expressed a bit more nicely, but for
now just mitigating the problem with available means.

Fixes: e02dea158dda ("selftests/bpf: validate async callback return value check correctness")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231208233028.3412690-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/timer_failure.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c
index 9fbc69c77bbb..0996c2486f05 100644
--- a/tools/testing/selftests/bpf/progs/timer_failure.c
+++ b/tools/testing/selftests/bpf/progs/timer_failure.c
@@ -47,9 +47,10 @@ __log_level(2)
 __flag(BPF_F_TEST_STATE_FREQ)
 __failure
 /* check that fallthrough code path marks r0 as precise */
-__msg("mark_precise: frame0: regs=r0 stack= before 22: (b7) r0 = 0")
+__msg("mark_precise: frame0: regs=r0 stack= before")
+__msg(": (85) call bpf_get_prandom_u32#7") /* anchor message */
 /* check that branch code path marks r0 as precise */
-__msg("mark_precise: frame0: regs=r0 stack= before 24: (85) call bpf_get_prandom_u32#7")
+__msg("mark_precise: frame0: regs=r0 stack= before ") __msg(": (85) call bpf_get_prandom_u32#7")
 __msg("should have been in [0, 0]")
 long BPF_PROG2(test_bad_ret, int, a)
 {
-- 
cgit v1.2.3-70-g09d2


From f4199271dae12ae407fa739e7012914ea6b3f37b Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Wed, 6 Dec 2023 11:53:25 +0000
Subject: selftests/bpf: Add a new cgroup helper open_classid()

This new helper allows us to obtain the fd of a net_cls cgroup, which will
be utilized in the subsequent patch.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20231206115326.4295-3-laoar.shao@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/cgroup_helpers.c | 16 ++++++++++++++++
 tools/testing/selftests/bpf/cgroup_helpers.h |  1 +
 2 files changed, 17 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 5aa133bf3688..19be9c63d5e8 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -689,3 +689,19 @@ int get_cgroup1_hierarchy_id(const char *subsys_name)
 	fclose(file);
 	return found ? id : -1;
 }
+
+/**
+ * open_classid() - Open a cgroupv1 net_cls classid
+ *
+ * This function expects the cgroup work dir to be already created, as we
+ * open it here.
+ *
+ * On success, it returns the file descriptor. On failure it returns -1.
+ */
+int open_classid(void)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_classid_path(cgroup_workdir);
+	return open(cgroup_workdir, O_RDONLY);
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index ee053641c026..502845160d88 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -33,6 +33,7 @@ void cleanup_cgroup_environment(void);
 int set_classid(void);
 int join_classid(void);
 unsigned long long get_classid_cgroup_id(void);
+int open_classid(void);
 
 int setup_classid_environment(void);
 void cleanup_classid_environment(void);
-- 
cgit v1.2.3-70-g09d2


From a2c6380b17b6339bfedc98d253b6d85e7014953b Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Wed, 6 Dec 2023 11:53:26 +0000
Subject: selftests/bpf: Add selftests for cgroup1 local storage

Expanding the test coverage from cgroup2 to include cgroup1. The result
as follows,

Already existing test cases for cgroup2:
  #48/1    cgrp_local_storage/tp_btf:OK
  #48/2    cgrp_local_storage/attach_cgroup:OK
  #48/3    cgrp_local_storage/recursion:OK
  #48/4    cgrp_local_storage/negative:OK
  #48/5    cgrp_local_storage/cgroup_iter_sleepable:OK
  #48/6    cgrp_local_storage/yes_rcu_lock:OK
  #48/7    cgrp_local_storage/no_rcu_lock:OK

Expanded test cases for cgroup1:
  #48/8    cgrp_local_storage/cgrp1_tp_btf:OK
  #48/9    cgrp_local_storage/cgrp1_recursion:OK
  #48/10   cgrp_local_storage/cgrp1_negative:OK
  #48/11   cgrp_local_storage/cgrp1_iter_sleepable:OK
  #48/12   cgrp_local_storage/cgrp1_yes_rcu_lock:OK
  #48/13   cgrp_local_storage/cgrp1_no_rcu_lock:OK

Summary:
  #48      cgrp_local_storage:OK
  Summary: 1/13 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20231206115326.4295-4-laoar.shao@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../selftests/bpf/prog_tests/cgrp_local_storage.c  | 98 +++++++++++++++++++++-
 .../selftests/bpf/progs/cgrp_ls_recursion.c        | 84 +++++++++++++++----
 .../selftests/bpf/progs/cgrp_ls_sleepable.c        | 61 ++++++++++++--
 tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c | 82 +++++++++++++-----
 4 files changed, 278 insertions(+), 47 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
index 63e776f4176e..747761572098 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -19,6 +19,21 @@ struct socket_cookie {
 	__u64 cookie_value;
 };
 
+static bool is_cgroup1;
+static int target_hid;
+
+#define CGROUP_MODE_SET(skel)			\
+{						\
+	skel->bss->is_cgroup1 = is_cgroup1;	\
+	skel->bss->target_hid = target_hid;	\
+}
+
+static void cgroup_mode_value_init(bool cgroup, int hid)
+{
+	is_cgroup1 = cgroup;
+	target_hid = hid;
+}
+
 static void test_tp_btf(int cgroup_fd)
 {
 	struct cgrp_ls_tp_btf *skel;
@@ -29,6 +44,8 @@ static void test_tp_btf(int cgroup_fd)
 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
 		return;
 
+	CGROUP_MODE_SET(skel);
+
 	/* populate a value in map_b */
 	err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY);
 	if (!ASSERT_OK(err, "map_update_elem"))
@@ -130,6 +147,8 @@ static void test_recursion(int cgroup_fd)
 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
 		return;
 
+	CGROUP_MODE_SET(skel);
+
 	err = cgrp_ls_recursion__attach(skel);
 	if (!ASSERT_OK(err, "skel_attach"))
 		goto out;
@@ -165,6 +184,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
 
+	CGROUP_MODE_SET(skel);
+
 	bpf_program__set_autoload(skel->progs.cgroup_iter, true);
 	err = cgrp_ls_sleepable__load(skel);
 	if (!ASSERT_OK(err, "skel_load"))
@@ -202,6 +223,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id)
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
 
+	CGROUP_MODE_SET(skel);
 	skel->bss->target_pid = syscall(SYS_gettid);
 
 	bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
@@ -229,6 +251,8 @@ static void test_no_rcu_lock(void)
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
 
+	CGROUP_MODE_SET(skel);
+
 	bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
 	err = cgrp_ls_sleepable__load(skel);
 	ASSERT_ERR(err, "skel_load");
@@ -236,7 +260,25 @@ static void test_no_rcu_lock(void)
 	cgrp_ls_sleepable__destroy(skel);
 }
 
-void test_cgrp_local_storage(void)
+static void test_cgrp1_no_rcu_lock(void)
+{
+	struct cgrp_ls_sleepable *skel;
+	int err;
+
+	skel = cgrp_ls_sleepable__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	CGROUP_MODE_SET(skel);
+
+	bpf_program__set_autoload(skel->progs.cgrp1_no_rcu_lock, true);
+	err = cgrp_ls_sleepable__load(skel);
+	ASSERT_OK(err, "skel_load");
+
+	cgrp_ls_sleepable__destroy(skel);
+}
+
+static void cgrp2_local_storage(void)
 {
 	__u64 cgroup_id;
 	int cgroup_fd;
@@ -245,6 +287,8 @@ void test_cgrp_local_storage(void)
 	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage"))
 		return;
 
+	cgroup_mode_value_init(0, -1);
+
 	cgroup_id = get_cgroup_id("/cgrp_local_storage");
 	if (test__start_subtest("tp_btf"))
 		test_tp_btf(cgroup_fd);
@@ -263,3 +307,55 @@ void test_cgrp_local_storage(void)
 
 	close(cgroup_fd);
 }
+
+static void cgrp1_local_storage(void)
+{
+	int cgrp1_fd, cgrp1_hid, cgrp1_id, err;
+
+	/* Setup cgroup1 hierarchy */
+	err = setup_classid_environment();
+	if (!ASSERT_OK(err, "setup_classid_environment"))
+		return;
+
+	err = join_classid();
+	if (!ASSERT_OK(err, "join_cgroup1"))
+		goto cleanup;
+
+	cgrp1_fd = open_classid();
+	if (!ASSERT_GE(cgrp1_fd, 0, "cgroup1 fd"))
+		goto cleanup;
+
+	cgrp1_id = get_classid_cgroup_id();
+	if (!ASSERT_GE(cgrp1_id, 0, "cgroup1 id"))
+		goto close_fd;
+
+	cgrp1_hid = get_cgroup1_hierarchy_id("net_cls");
+	if (!ASSERT_GE(cgrp1_hid, 0, "cgroup1 hid"))
+		goto close_fd;
+
+	cgroup_mode_value_init(1, cgrp1_hid);
+
+	if (test__start_subtest("cgrp1_tp_btf"))
+		test_tp_btf(cgrp1_fd);
+	if (test__start_subtest("cgrp1_recursion"))
+		test_recursion(cgrp1_fd);
+	if (test__start_subtest("cgrp1_negative"))
+		test_negative();
+	if (test__start_subtest("cgrp1_iter_sleepable"))
+		test_cgroup_iter_sleepable(cgrp1_fd, cgrp1_id);
+	if (test__start_subtest("cgrp1_yes_rcu_lock"))
+		test_yes_rcu_lock(cgrp1_id);
+	if (test__start_subtest("cgrp1_no_rcu_lock"))
+		test_cgrp1_no_rcu_lock();
+
+close_fd:
+	close(cgrp1_fd);
+cleanup:
+	cleanup_classid_environment();
+}
+
+void test_cgrp_local_storage(void)
+{
+	cgrp2_local_storage();
+	cgrp1_local_storage();
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
index a043d8fefdac..610c2427fd93 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
@@ -21,50 +21,100 @@ struct {
 	__type(value, long);
 } map_b SEC(".maps");
 
+int target_hid = 0;
+bool is_cgroup1 = 0;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static void __on_lookup(struct cgroup *cgrp)
+{
+	bpf_cgrp_storage_delete(&map_a, cgrp);
+	bpf_cgrp_storage_delete(&map_b, cgrp);
+}
+
 SEC("fentry/bpf_local_storage_lookup")
 int BPF_PROG(on_lookup)
 {
 	struct task_struct *task = bpf_get_current_task_btf();
+	struct cgroup *cgrp;
+
+	if (is_cgroup1) {
+		cgrp = bpf_task_get_cgroup1(task, target_hid);
+		if (!cgrp)
+			return 0;
 
-	bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp);
-	bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp);
+		__on_lookup(cgrp);
+		bpf_cgroup_release(cgrp);
+		return 0;
+	}
+
+	__on_lookup(task->cgroups->dfl_cgrp);
 	return 0;
 }
 
-SEC("fentry/bpf_local_storage_update")
-int BPF_PROG(on_update)
+static void __on_update(struct cgroup *cgrp)
 {
-	struct task_struct *task = bpf_get_current_task_btf();
 	long *ptr;
 
-	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
-				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (ptr)
 		*ptr += 1;
 
-	ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0,
-				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (ptr)
 		*ptr += 1;
+}
 
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct cgroup *cgrp;
+
+	if (is_cgroup1) {
+		cgrp = bpf_task_get_cgroup1(task, target_hid);
+		if (!cgrp)
+			return 0;
+
+		__on_update(cgrp);
+		bpf_cgroup_release(cgrp);
+		return 0;
+	}
+
+	__on_update(task->cgroups->dfl_cgrp);
 	return 0;
 }
 
-SEC("tp_btf/sys_enter")
-int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
 {
-	struct task_struct *task;
 	long *ptr;
 
-	task = bpf_get_current_task_btf();
-	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
-				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (ptr)
 		*ptr = 200;
 
-	ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0,
-				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (ptr)
 		*ptr = 100;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct cgroup *cgrp;
+
+	if (is_cgroup1) {
+		cgrp = bpf_task_get_cgroup1(task, target_hid);
+		if (!cgrp)
+			return 0;
+
+		__on_enter(regs, id, cgrp);
+		bpf_cgroup_release(cgrp);
+		return 0;
+	}
+
+	__on_enter(regs, id, task->cgroups->dfl_cgrp);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
index 4c7844e1dbfa..facedd8b8250 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -17,7 +17,11 @@ struct {
 
 __u32 target_pid;
 __u64 cgroup_id;
+int target_hid;
+bool is_cgroup1;
 
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
 void bpf_rcu_read_lock(void) __ksym;
 void bpf_rcu_read_unlock(void) __ksym;
 
@@ -37,23 +41,50 @@ int cgroup_iter(struct bpf_iter__cgroup *ctx)
 	return 0;
 }
 
+static void __no_rcu_lock(struct cgroup *cgrp)
+{
+	long *ptr;
+
+	/* Note that trace rcu is held in sleepable prog, so we can use
+	 * bpf_cgrp_storage_get() in sleepable prog.
+	 */
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		cgroup_id = cgrp->kn->id;
+}
+
 SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-int no_rcu_lock(void *ctx)
+int cgrp1_no_rcu_lock(void *ctx)
 {
 	struct task_struct *task;
 	struct cgroup *cgrp;
-	long *ptr;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	/* bpf_task_get_cgroup1 can work in sleepable prog */
+	cgrp = bpf_task_get_cgroup1(task, target_hid);
+	if (!cgrp)
+		return 0;
+
+	__no_rcu_lock(cgrp);
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int no_rcu_lock(void *ctx)
+{
+	struct task_struct *task;
 
 	task = bpf_get_current_task_btf();
 	if (task->pid != target_pid)
 		return 0;
 
 	/* task->cgroups is untrusted in sleepable prog outside of RCU CS */
-	cgrp = task->cgroups->dfl_cgrp;
-	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
-				   BPF_LOCAL_STORAGE_GET_F_CREATE);
-	if (ptr)
-		cgroup_id = cgrp->kn->id;
+	__no_rcu_lock(task->cgroups->dfl_cgrp);
 	return 0;
 }
 
@@ -68,6 +99,22 @@ int yes_rcu_lock(void *ctx)
 	if (task->pid != target_pid)
 		return 0;
 
+	if (is_cgroup1) {
+		bpf_rcu_read_lock();
+		cgrp = bpf_task_get_cgroup1(task, target_hid);
+		if (!cgrp) {
+			bpf_rcu_read_unlock();
+			return 0;
+		}
+
+		ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+		if (ptr)
+			cgroup_id = cgrp->kn->id;
+		bpf_cgroup_release(cgrp);
+		bpf_rcu_read_unlock();
+		return 0;
+	}
+
 	bpf_rcu_read_lock();
 	cgrp = task->cgroups->dfl_cgrp;
 	/* cgrp is trusted under RCU CS */
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
index 9ebb8e2fe541..1c348f000f38 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
@@ -27,62 +27,100 @@ pid_t target_pid = 0;
 int mismatch_cnt = 0;
 int enter_cnt = 0;
 int exit_cnt = 0;
+int target_hid = 0;
+bool is_cgroup1 = 0;
 
-SEC("tp_btf/sys_enter")
-int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
 {
-	struct task_struct *task;
 	long *ptr;
 	int err;
 
-	task = bpf_get_current_task_btf();
-	if (task->pid != target_pid)
-		return 0;
-
 	/* populate value 0 */
-	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
 				   BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!ptr)
-		return 0;
+		return;
 
 	/* delete value 0 */
-	err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp);
+	err = bpf_cgrp_storage_delete(&map_a, cgrp);
 	if (err)
-		return 0;
+		return;
 
 	/* value is not available */
-	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0);
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, 0);
 	if (ptr)
-		return 0;
+		return;
 
 	/* re-populate the value */
-	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
 				   BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!ptr)
-		return 0;
+		return;
 	__sync_fetch_and_add(&enter_cnt, 1);
 	*ptr = MAGIC_VALUE + enter_cnt;
-
-	return 0;
 }
 
-SEC("tp_btf/sys_exit")
-int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
 {
 	struct task_struct *task;
-	long *ptr;
+	struct cgroup *cgrp;
 
 	task = bpf_get_current_task_btf();
 	if (task->pid != target_pid)
 		return 0;
 
-	ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+	if (is_cgroup1) {
+		cgrp = bpf_task_get_cgroup1(task, target_hid);
+		if (!cgrp)
+			return 0;
+
+		__on_enter(regs, id, cgrp);
+		bpf_cgroup_release(cgrp);
+		return 0;
+	}
+
+	__on_enter(regs, id, task->cgroups->dfl_cgrp);
+	return 0;
+}
+
+static void __on_exit(struct pt_regs *regs, long id, struct cgroup *cgrp)
+{
+	long *ptr;
+
+	ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
 				   BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!ptr)
-		return 0;
+		return;
 
 	__sync_fetch_and_add(&exit_cnt, 1);
 	if (*ptr != MAGIC_VALUE + exit_cnt)
 		__sync_fetch_and_add(&mismatch_cnt, 1);
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+	struct task_struct *task;
+	struct cgroup *cgrp;
+
+	task = bpf_get_current_task_btf();
+	if (task->pid != target_pid)
+		return 0;
+
+	if (is_cgroup1) {
+		cgrp = bpf_task_get_cgroup1(task, target_hid);
+		if (!cgrp)
+			return 0;
+
+		__on_exit(regs, id, cgrp);
+		bpf_cgroup_release(cgrp);
+		return 0;
+	}
+
+	__on_exit(regs, id, task->cgroups->dfl_cgrp);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 32fa058398624166dd04ff4af49cfef69c94abbc Mon Sep 17 00:00:00 2001
From: Sergei Trofimovich <slyich@gmail.com>
Date: Fri, 8 Dec 2023 21:51:00 +0000
Subject: libbpf: Add pr_warn() for EINVAL cases in linker_sanity_check_elf

Before the change on `i686-linux` `systemd` build failed as:

    $ bpftool gen object src/core/bpf/socket_bind/socket-bind.bpf.o src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o
    Error: failed to link 'src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o': Invalid argument (22)

After the change it fails as:

    $ bpftool gen object src/core/bpf/socket_bind/socket-bind.bpf.o src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o
    libbpf: ELF section #9 has inconsistent alignment addr=8 != d=4 in src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o
    Error: failed to link 'src/core/bpf/socket_bind/socket-bind.bpf.unstripped.o': Invalid argument (22)

Now it's slightly easier to figure out what is wrong with an ELF file.

Signed-off-by: Sergei Trofimovich <slyich@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20231208215100.435876-1-slyich@gmail.com
---
 tools/lib/bpf/linker.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 5ced96d99f8c..52a2901e8bd0 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -719,13 +719,25 @@ static int linker_sanity_check_elf(struct src_obj *obj)
 			return -EINVAL;
 		}
 
-		if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign))
+		if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) {
+			pr_warn("ELF section #%zu alignment %llu is non pow-of-2 alignment in %s\n",
+				sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign,
+				obj->filename);
 			return -EINVAL;
-		if (sec->shdr->sh_addralign != sec->data->d_align)
+		}
+		if (sec->shdr->sh_addralign != sec->data->d_align) {
+			pr_warn("ELF section #%zu has inconsistent alignment addr=%llu != d=%llu in %s\n",
+				sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign,
+				(long long unsigned)sec->data->d_align, obj->filename);
 			return -EINVAL;
+		}
 
-		if (sec->shdr->sh_size != sec->data->d_size)
+		if (sec->shdr->sh_size != sec->data->d_size) {
+			pr_warn("ELF section #%zu has inconsistent section size sh=%llu != d=%llu in %s\n",
+				sec->sec_idx, (long long unsigned)sec->shdr->sh_size,
+				(long long unsigned)sec->data->d_size, obj->filename);
 			return -EINVAL;
+		}
 
 		switch (sec->shdr->sh_type) {
 		case SHT_SYMTAB:
@@ -737,8 +749,12 @@ static int linker_sanity_check_elf(struct src_obj *obj)
 			break;
 		case SHT_PROGBITS:
 			if (sec->shdr->sh_flags & SHF_EXECINSTR) {
-				if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0)
+				if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) {
+					pr_warn("ELF section #%zu has unexpected size alignment %llu in %s\n",
+						sec->sec_idx, (long long unsigned)sec->shdr->sh_size,
+						obj->filename);
 					return -EINVAL;
+				}
 			}
 			break;
 		case SHT_NOBITS:
-- 
cgit v1.2.3-70-g09d2


From 7d8ed51bcb32716a40d71043fcd01c4118858c51 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 8 Dec 2023 17:09:58 -0800
Subject: selftests/bpf: validate fake register spill/fill precision
 backtracking logic

Add two tests validating that verifier's precision backtracking logic
handles BPF_ST_MEM instructions that produce fake register spill into
register slot. This is happening when non-zero constant is written
directly to a slot, e.g., *(u64 *)(r10 -8) = 123.

Add both full 64-bit register spill, as well as 32-bit "sub-spill".

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20231209010958.66758-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/verifier_spill_fill.c      | 154 +++++++++++++++++++++
 1 file changed, 154 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index df4920da3472..508f5d6c7347 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -577,4 +577,158 @@ __naked void partial_stack_load_preserves_zeros(void)
 	: __clobber_common);
 }
 
+char two_byte_buf[2] SEC(".data.two_byte_buf");
+
+SEC("raw_tp")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__success
+/* make sure fp-8 is IMPRECISE fake register spill */
+__msg("3: (7a) *(u64 *)(r10 -8) = 1          ; R10=fp0 fp-8_w=1")
+/* and fp-16 is spilled IMPRECISE const reg */
+__msg("5: (7b) *(u64 *)(r10 -16) = r0        ; R0_w=1 R10=fp0 fp-16_w=1")
+/* validate load from fp-8, which was initialized using BPF_ST_MEM */
+__msg("8: (79) r2 = *(u64 *)(r10 -8)         ; R2_w=1 R10=fp0 fp-8=1")
+__msg("9: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
+/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */
+__msg("mark_precise: frame0: parent state regs= stack=-8:  R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
+__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+/* validate load from fp-16, which was initialized using BPF_STX_MEM */
+__msg("12: (79) r2 = *(u64 *)(r10 -16)       ; R2_w=1 R10=fp0 fp-16=1")
+__msg("13: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
+/* now both fp-8 and fp-16 are precise, very good */
+__msg("mark_precise: frame0: parent state regs= stack=-16:  R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
+__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__naked void stack_load_preserves_const_precision(void)
+{
+	asm volatile (
+		/* establish checkpoint with state that has no stack slots;
+		 * if we bubble up to this state without finding desired stack
+		 * slot, then it's a bug and should be caught
+		 */
+		"goto +0;"
+
+		/* fp-8 is const 1 *fake* register */
+		".8byte %[fp8_st_one];" /* LLVM-18+: *(u64 *)(r10 -8) = 1; */
+
+		/* fp-16 is const 1 register */
+		"r0 = 1;"
+		"*(u64 *)(r10 -16) = r0;"
+
+		/* force checkpoint to check precision marks preserved in parent states */
+		"goto +0;"
+
+		/* load single U64 from aligned FAKE_REG=1 slot */
+		"r1 = %[two_byte_buf];"
+		"r2 = *(u64 *)(r10 -8);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		/* load single U64 from aligned REG=1 slot */
+		"r1 = %[two_byte_buf];"
+		"r2 = *(u64 *)(r10 -16);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		"r0 = 0;"
+		"exit;"
+	:
+	: __imm_ptr(two_byte_buf),
+	  __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 1))
+	: __clobber_common);
+}
+
+SEC("raw_tp")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__success
+/* make sure fp-8 is 32-bit FAKE subregister spill */
+__msg("3: (62) *(u32 *)(r10 -8) = 1          ; R10=fp0 fp-8=????1")
+/* but fp-16 is spilled IMPRECISE zero const reg */
+__msg("5: (63) *(u32 *)(r10 -16) = r0        ; R0_w=1 R10=fp0 fp-16=????1")
+/* validate load from fp-8, which was initialized using BPF_ST_MEM */
+__msg("8: (61) r2 = *(u32 *)(r10 -8)         ; R2_w=1 R10=fp0 fp-8=????1")
+__msg("9: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
+__msg("mark_precise: frame0: parent state regs= stack=-8:  R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
+__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+/* validate load from fp-16, which was initialized using BPF_STX_MEM */
+__msg("12: (61) r2 = *(u32 *)(r10 -16)       ; R2_w=1 R10=fp0 fp-16=????1")
+__msg("13: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
+__msg("mark_precise: frame0: parent state regs= stack=-16:  R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
+__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__naked void stack_load_preserves_const_precision_subreg(void)
+{
+	asm volatile (
+		/* establish checkpoint with state that has no stack slots;
+		 * if we bubble up to this state without finding desired stack
+		 * slot, then it's a bug and should be caught
+		 */
+		"goto +0;"
+
+		/* fp-8 is const 1 *fake* SUB-register */
+		".8byte %[fp8_st_one];" /* LLVM-18+: *(u32 *)(r10 -8) = 1; */
+
+		/* fp-16 is const 1 SUB-register */
+		"r0 = 1;"
+		"*(u32 *)(r10 -16) = r0;"
+
+		/* force checkpoint to check precision marks preserved in parent states */
+		"goto +0;"
+
+		/* load single U32 from aligned FAKE_REG=1 slot */
+		"r1 = %[two_byte_buf];"
+		"r2 = *(u32 *)(r10 -8);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		/* load single U32 from aligned REG=1 slot */
+		"r1 = %[two_byte_buf];"
+		"r2 = *(u32 *)(r10 -16);"
+		"r1 += r2;"
+		"*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+		"r0 = 0;"
+		"exit;"
+	:
+	: __imm_ptr(two_byte_buf),
+	  __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_W, BPF_REG_FP, -8, 1)) /* 32-bit spill */
+	: __clobber_common);
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 88f6047191e69bdd02cf1b9b5b514f7e514e8b86 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 7 Dec 2023 15:08:43 -0600
Subject: selftests/bpf: Add test for bpf_cpumask_weight() kfunc

The new bpf_cpumask_weight() kfunc can be used to count the number of
bits that are set in a struct cpumask* kptr. Let's add a selftest to
verify its behavior.

Signed-off-by: David Vernet <void@manifault.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20231207210843.168466-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/cpumask.c   |  1 +
 tools/testing/selftests/bpf/progs/cpumask_common.h |  1 +
 .../testing/selftests/bpf/progs/cpumask_success.c  | 43 ++++++++++++++++++++++
 3 files changed, 45 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index 756ea8b590b6..c2e886399e3c 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -18,6 +18,7 @@ static const char * const cpumask_success_testcases[] = {
 	"test_insert_leave",
 	"test_insert_remove_release",
 	"test_global_mask_rcu",
+	"test_cpumask_weight",
 };
 
 static void verify_success(const char *prog_name)
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index b15c588ace15..0cd4aebb97cf 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -54,6 +54,7 @@ bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym;
 void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym;
 u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym;
 u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym;
+u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
 
 void bpf_rcu_read_lock(void) __ksym;
 void bpf_rcu_read_unlock(void) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index 674a63424dee..fc3666edf456 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -460,6 +460,49 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags)
 	return 0;
 }
 
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *local;
+
+	if (!is_test_task())
+		return 0;
+
+	local = create_cpumask();
+	if (!local)
+		return 0;
+
+	if (bpf_cpumask_weight(cast(local)) != 0) {
+		err = 3;
+		goto out;
+	}
+
+	bpf_cpumask_set_cpu(0, local);
+	if (bpf_cpumask_weight(cast(local)) != 1) {
+		err = 4;
+		goto out;
+	}
+
+	/*
+	 * Make sure that adding additional CPUs changes the weight. Test to
+	 * see whether the CPU was set to account for running on UP machines.
+	 */
+	bpf_cpumask_set_cpu(1, local);
+	if (bpf_cpumask_test_cpu(1, cast(local)) && bpf_cpumask_weight(cast(local)) != 2) {
+		err = 5;
+		goto out;
+	}
+
+	bpf_cpumask_clear(local);
+	if (bpf_cpumask_weight(cast(local)) != 0) {
+		err = 6;
+		goto out;
+	}
+out:
+	bpf_cpumask_release(local);
+	return 0;
+}
+
 SEC("tp_btf/task_newtask")
 __success
 int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags)
-- 
cgit v1.2.3-70-g09d2


From 15c79c6507c0eab5ec0d4cd402ac52d42735a43e Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Wed, 6 Dec 2023 21:59:18 +0100
Subject: selftests/bpf: Increase invalid metadata size

Changed check expects passed data meta to be deemed invalid. After loosening
the requirement, the size of 36 bytes becomes valid. Therefore, increase
tested meta size to 256, so we do not get an unexpected success.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20231206205919.404415-2-larysa.zaremba@intel.com
---
 tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index ab4952b9fb1d..e6a783c7f5db 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -77,8 +77,8 @@ void test_xdp_context_test_run(void)
 	test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
 			       0, 0, 0);
 
-	/* Meta data must be 32 bytes or smaller */
-	test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0);
+	/* Meta data must be 255 bytes or smaller */
+	test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
 
 	/* Total size of data must match data_end - data_meta */
 	test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
-- 
cgit v1.2.3-70-g09d2


From e72c1ccfd449598f7eda10d3bb7441d501ddcfc3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 11 Dec 2023 09:41:31 -0800
Subject: selftests/bpf: validate eliminated global subprog is not freplaceable

Add selftest that establishes dead code-eliminated valid global subprog
(global_dead) and makes sure that it's not possible to freplace it, as
it's effectively not there. This test will fail with unexpected success
before 2afae08c9dcb ("bpf: Validate global subprogs lazily").

v2->v3:
  - add missing err assignment (Alan);
  - undo unnecessary signature changes in verifier_global_subprogs.c (Eduard);
v1->v2:
  - don't rely on assembly output in verifier log, which changes between
    compiler versions (CI).

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20231211174131.2324306-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/prog_tests/global_func_dead_code.c         | 60 ++++++++++++++++++++++
 .../bpf/progs/freplace_dead_global_func.c          | 11 ++++
 .../selftests/bpf/progs/verifier_global_subprogs.c | 15 ++++--
 3 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c
 create mode 100644 tools/testing/selftests/bpf/progs/freplace_dead_global_func.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c
new file mode 100644
index 000000000000..65309894b27a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "verifier_global_subprogs.skel.h"
+#include "freplace_dead_global_func.skel.h"
+
+void test_global_func_dead_code(void)
+{
+	struct verifier_global_subprogs *tgt_skel = NULL;
+	struct freplace_dead_global_func *skel = NULL;
+	char log_buf[4096];
+	int err, tgt_fd;
+
+	/* first, try to load target with good global subprog */
+	tgt_skel = verifier_global_subprogs__open();
+	if (!ASSERT_OK_PTR(tgt_skel, "tgt_skel_good_open"))
+		return;
+
+	bpf_program__set_autoload(tgt_skel->progs.chained_global_func_calls_success, true);
+
+	err = verifier_global_subprogs__load(tgt_skel);
+	if (!ASSERT_OK(err, "tgt_skel_good_load"))
+		goto out;
+
+	tgt_fd = bpf_program__fd(tgt_skel->progs.chained_global_func_calls_success);
+
+	/* Attach to good non-eliminated subprog */
+	skel = freplace_dead_global_func__open();
+	if (!ASSERT_OK_PTR(skel, "skel_good_open"))
+		goto out;
+
+	err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_good");
+	ASSERT_OK(err, "attach_target_good");
+
+	err = freplace_dead_global_func__load(skel);
+	if (!ASSERT_OK(err, "skel_good_load"))
+		goto out;
+
+	freplace_dead_global_func__destroy(skel);
+
+	/* Try attaching to dead code-eliminated subprog */
+	skel = freplace_dead_global_func__open();
+	if (!ASSERT_OK_PTR(skel, "skel_dead_open"))
+		goto out;
+
+	bpf_program__set_log_buf(skel->progs.freplace_prog, log_buf, sizeof(log_buf));
+	err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_dead");
+	ASSERT_OK(err, "attach_target_dead");
+
+	err = freplace_dead_global_func__load(skel);
+	if (!ASSERT_ERR(err, "skel_dead_load"))
+		goto out;
+
+	ASSERT_HAS_SUBSTR(log_buf, "Subprog global_dead doesn't exist", "dead_subprog_missing_msg");
+
+out:
+	verifier_global_subprogs__destroy(tgt_skel);
+	freplace_dead_global_func__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c
new file mode 100644
index 000000000000..e6a75f86cac6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("freplace")
+int freplace_prog(void)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
index a0a5efd1caa1..bd696a431244 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -10,6 +10,7 @@
 
 int arr[1];
 int unkn_idx;
+const volatile bool call_dead_subprog = false;
 
 __noinline long global_bad(void)
 {
@@ -31,23 +32,31 @@ __noinline long global_calls_good_only(void)
 	return global_good();
 }
 
+__noinline long global_dead(void)
+{
+	return arr[0] * 2;
+}
+
 SEC("?raw_tp")
 __success __log_level(2)
 /* main prog is validated completely first */
 __msg("('global_calls_good_only') is global and assumed valid.")
-__msg("1: (95) exit")
 /* eventually global_good() is transitively validated as well */
 __msg("Validating global_good() func")
 __msg("('global_good') is safe for any args that match its prototype")
 int chained_global_func_calls_success(void)
 {
-	return global_calls_good_only();
+	int sum = 0;
+
+	if (call_dead_subprog)
+		sum += global_dead();
+	return global_calls_good_only() + sum;
 }
 
 SEC("?raw_tp")
 __failure __log_level(2)
 /* main prog validated successfully first */
-__msg("1: (95) exit")
+__msg("('global_calls_bad') is global and assumed valid.")
 /* eventually we validate global_bad() and fail */
 __msg("Validating global_bad() func")
 __msg("math between map_value pointer and register") /* BOOM */
-- 
cgit v1.2.3-70-g09d2


From f77d795618b92ac6fdb43de0d4036c6ce49f0b82 Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantr4@gmail.com>
Date: Mon, 11 Dec 2023 10:07:33 -0800
Subject: selftests/bpf: Fixes tests for filesystem kfuncs

`fs_kfuncs.c`'s `test_xattr` would fail the test even when the
filesystem did not support xattr, for instance when /tmp is mounted as
tmpfs.

This change checks errno when setxattr fail. If the failure is due to
the operation being unsupported, we will skip the test (just like we
would if verity was not enabled on the FS.

Before the change, fs_kfuncs test would fail in test_axattr:

 $ vmtest -k $(make -s image_name) './tools/testing/selftests/bpf/test_progs -a fs_kfuncs'
 => bzImage
 ===> Booting
 [    0.000000] rcu:        RCU restricting CPUs from NR_CPUS=128 to
 nr_cpu_
 ===> Setting up VM
 ===> Running command
 [    4.157491] bpf_testmod: loading out-of-tree module taints kernel.
 [    4.161515] bpf_testmod: module verification failed: signature and/or
 required key missing - tainting kernel
 test_xattr:PASS:create_file 0 nsec
 test_xattr:FAIL:setxattr unexpected error: -1 (errno 95)
 #90/1    fs_kfuncs/xattr:FAIL
 #90/2    fs_kfuncs/fsverity:SKIP
 #90      fs_kfuncs:FAIL

 All error logs:
 test_xattr:PASS:create_file 0 nsec
 test_xattr:FAIL:setxattr unexpected error: -1 (errno 95)
 #90/1    fs_kfuncs/xattr:FAIL
 #90      fs_kfuncs:FAIL

 Summary: 0/0 PASSED, 1 SKIPPED, 1 FAILED

Test plan:

  $ touch tmpfs_file && truncate -s 1G tmpfs_file && mkfs.ext4 tmpfs_file
  # /tmp mounted as tmpfs
  $ vmtest -k $(make -s image_name) './tools/testing/selftests/bpf/test_progs -a fs_kfuncs'
  => bzImage
  ===> Booting
  ===> Setting up VM
  ===> Running command
  WARNING! Selftests relying on bpf_testmod.ko will be skipped.
  Can't find bpf_testmod.ko kernel module: -2
  #90/1    fs_kfuncs/xattr:SKIP
  #90/2    fs_kfuncs/fsverity:SKIP
  #90      fs_kfuncs:SKIP
  Summary: 1/0 PASSED, 2 SKIPPED, 0 FAILED
  # /tmp mounted as ext4 with xattr enabled but not verity
  $ vmtest -k $(make -s image_name) 'mount -o loop tmpfs_file /tmp && \
    /tools/testing/selftests/bpf/test_progs -a fs_kfuncs'
  => bzImage
  ===> Booting
  ===> Setting up VM
  ===> Running command
  [    4.067071] loop0: detected capacity change from 0 to 2097152
  [    4.191882] EXT4-fs (loop0): mounted filesystem
  407ffa36-4553-4c8c-8c78-134443630f69 r/w with ordered data mode. Quota
  mode: none.
  WARNING! Selftests relying on bpf_testmod.ko will be skipped.
  Can't find bpf_testmod.ko kernel module: -2
  #90/1    fs_kfuncs/xattr:OK
  #90/2    fs_kfuncs/fsverity:SKIP
  #90      fs_kfuncs:OK (SKIP: 1/2)
  Summary: 1/1 PASSED, 1 SKIPPED, 0 FAILED
  $ tune2fs -O verity tmpfs_file
  # /tmp as ext4 with both xattr and verity enabled
  $ vmtest -k $(make -s image_name) 'mount -o loop tmpfs_file /tmp && \
    ./tools/testing/selftests/bpf/test_progs -a fs_kfuncs'
  => bzImage
  ===> Booting
  ===> Setting up VM
  ===> Running command
  [    4.291434] loop0: detected capacity change from 0 to 2097152
  [    4.460828] EXT4-fs (loop0): recovery complete
  [    4.468631] EXT4-fs (loop0): mounted filesystem
  7b4a7b7f-c442-4b06-9ede-254e63cceb52 r/w with ordered data mode. Quota
  mode: none.
  [    4.988074] fs-verity: sha256 using implementation "sha256-generic"
  WARNING! Selftests relying on bpf_testmod.ko will be skipped.
  Can't find bpf_testmod.ko kernel module: -2
  #90/1    fs_kfuncs/xattr:OK
  #90/2    fs_kfuncs/fsverity:OK
  #90      fs_kfuncs:OK
  Summary: 1/2 PASSED, 0 SKIPPED, 0 FAILED

Fixes: 341f06fdddf7 ("selftests/bpf: Add tests for filesystem kfuncs")
Signed-off-by: Manu Bretelle <chantr4@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20231211180733.763025-1-chantr4@gmail.com
---
 tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
index d3196a4b089f..37056ba73847 100644
--- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -25,6 +25,14 @@ static void test_xattr(void)
 	fd = -1;
 
 	err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0);
+	if (err && errno == EOPNOTSUPP) {
+		printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
+		       "To run this test, make sure /tmp filesystem supports xattr.\n",
+		       __func__, errno);
+		test__skip();
+		goto out;
+	}
+
 	if (!ASSERT_OK(err, "setxattr"))
 		goto out;
 
-- 
cgit v1.2.3-70-g09d2


From e1ba7f64b192f083b4423644be03bb9e3dc8ae84 Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Tue, 12 Dec 2023 18:29:11 +0000
Subject: selftests/bpf: Relax time_tai test for equal timestamps in
 tai_forward

We're observing test flakiness on an arm64 platform which might not
have timestamps as precise as x86. The test log looks like:

  test_time_tai:PASS:tai_open 0 nsec
  test_time_tai:PASS:test_run 0 nsec
  test_time_tai:PASS:tai_ts1 0 nsec
  test_time_tai:PASS:tai_ts2 0 nsec
  test_time_tai:FAIL:tai_forward unexpected tai_forward: actual 1702348135471494160 <= expected 1702348135471494160
  test_time_tai:PASS:tai_gettime 0 nsec
  test_time_tai:PASS:tai_future_ts1 0 nsec
  test_time_tai:PASS:tai_future_ts2 0 nsec
  test_time_tai:PASS:tai_range_ts1 0 nsec
  test_time_tai:PASS:tai_range_ts2 0 nsec
  #199     time_tai:FAIL

This patch changes ASSERT_GT to ASSERT_GE in the tai_forward assertion
so that equal timestamps are permitted.

Fixes: 64e15820b987 ("selftests/bpf: Add BPF-helper test for CLOCK_TAI access")
Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231212182911.3784108-1-zhuyifei@google.com
---
 tools/testing/selftests/bpf/prog_tests/time_tai.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c
index a31119823666..f45af1b0ef2c 100644
--- a/tools/testing/selftests/bpf/prog_tests/time_tai.c
+++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c
@@ -56,7 +56,7 @@ void test_time_tai(void)
 	ASSERT_NEQ(ts2, 0, "tai_ts2");
 
 	/* TAI is moving forward only */
-	ASSERT_GT(ts2, ts1, "tai_forward");
+	ASSERT_GE(ts2, ts1, "tai_forward");
 
 	/* Check for future */
 	ret = clock_gettime(CLOCK_TAI, &now_tai);
-- 
cgit v1.2.3-70-g09d2


From 62d9a969f4a95219c757831e9ad66cd4dd9edee5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 12 Dec 2023 14:53:43 -0800
Subject: selftests/bpf: fix compiler warnings in RELEASE=1 mode

When compiling BPF selftests with RELEASE=1, we get two new
warnings, which are treated as errors. Fix them.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20231212225343.1723081-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c        | 2 +-
 tools/testing/selftests/bpf/xdp_hw_metadata.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 1d418d66e375..244d4996e06e 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -1254,7 +1254,7 @@ static int cmp_join_stat(const struct verif_stats_join *s1,
 			 bool asc, bool abs)
 {
 	const char *str1 = NULL, *str2 = NULL;
-	double v1, v2;
+	double v1 = 0.0, v2 = 0.0;
 	int cmp = 0;
 
 	fetch_join_stat_value(s1, id, var, &str1, &v1);
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 3291625ba4fb..c69c08933fdd 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
 		.flags = XSK_UMEM__DEFAULT_FLAGS,
 		.tx_metadata_len = sizeof(struct xsk_tx_metadata),
 	};
-	__u32 idx;
+	__u32 idx = 0;
 	u64 addr;
 	int ret;
 	int i;
-- 
cgit v1.2.3-70-g09d2


From 2f70803532e9b7f14897d17f8944d431755661a7 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Mon, 11 Dec 2023 13:20:07 -0700
Subject: libbpf: Add BPF_CORE_WRITE_BITFIELD() macro

=== Motivation ===

Similar to reading from CO-RE bitfields, we need a CO-RE aware bitfield
writing wrapper to make the verifier happy.

Two alternatives to this approach are:

1. Use the upcoming `preserve_static_offset` [0] attribute to disable
   CO-RE on specific structs.
2. Use broader byte-sized writes to write to bitfields.

(1) is a bit hard to use. It requires specific and not-very-obvious
annotations to bpftool generated vmlinux.h. It's also not generally
available in released LLVM versions yet.

(2) makes the code quite hard to read and write. And especially if
BPF_CORE_READ_BITFIELD() is already being used, it makes more sense to
to have an inverse helper for writing.

=== Implementation details ===

Since the logic is a bit non-obvious, I thought it would be helpful
to explain exactly what's going on.

To start, it helps by explaining what LSHIFT_U64 (lshift) and RSHIFT_U64
(rshift) is designed to mean. Consider the core of the
BPF_CORE_READ_BITFIELD() algorithm:

        val <<= __CORE_RELO(s, field, LSHIFT_U64);
        val = val >> __CORE_RELO(s, field, RSHIFT_U64);

Basically what happens is we lshift to clear the non-relevant (blank)
higher order bits. Then we rshift to bring the relevant bits (bitfield)
down to LSB position (while also clearing blank lower order bits). To
illustrate:

        Start:    ........XXX......
        Lshift:   XXX......00000000
        Rshift:   00000000000000XXX

where `.` means blank bit, `0` means 0 bit, and `X` means bitfield bit.

After the two operations, the bitfield is ready to be interpreted as a
regular integer.

Next, we want to build an alternative (but more helpful) mental model
on lshift and rshift. That is, to consider:

* rshift as the total number of blank bits in the u64
* lshift as number of blank bits left of the bitfield in the u64

Take a moment to consider why that is true by consulting the above
diagram.

With this insight, we can now define the following relationship:

              bitfield
                 _
                | |
        0.....00XXX0...00
        |      |   |    |
        |______|   |    |
         lshift    |    |
                   |____|
              (rshift - lshift)

That is, we know the number of higher order blank bits is just lshift.
And the number of lower order blank bits is (rshift - lshift).

Finally, we can examine the core of the write side algorithm:

        mask = (~0ULL << rshift) >> lshift;              // 1
        val = (val & ~mask) | ((nval << rpad) & mask);   // 2

1. Compute a mask where the set bits are the bitfield bits. The first
   left shift zeros out exactly the number of blank bits, leaving a
   bitfield sized set of 1s. The subsequent right shift inserts the
   correct amount of higher order blank bits.

2. On the left of the `|`, mask out the bitfield bits. This creates
   0s where the new bitfield bits will go. On the right of the `|`,
   bring nval into the correct bit position and mask out any bits
   that fall outside of the bitfield. Finally, by bor'ing the two
   halves, we get the final set of bits to write back.

[0]: https://reviews.llvm.org/D133361
Co-developed-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Co-developed-by: Jonathan Lemon <jlemon@aviatrix.com>
Signed-off-by: Jonathan Lemon <jlemon@aviatrix.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/4d3dd215a4fd57d980733886f9c11a45e1a9adf3.1702325874.git.dxu@dxuuu.xyz
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/lib/bpf/bpf_core_read.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 1ac57bb7ac55..7325a12692a3 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -111,6 +111,38 @@ enum bpf_enum_value_kind {
 	val;								      \
 })
 
+/*
+ * Write to a bitfield, identified by s->field.
+ * This is the inverse of BPF_CORE_WRITE_BITFIELD().
+ */
+#define BPF_CORE_WRITE_BITFIELD(s, field, new_val) ({			\
+	void *p = (void *)s + __CORE_RELO(s, field, BYTE_OFFSET);	\
+	unsigned int byte_size = __CORE_RELO(s, field, BYTE_SIZE);	\
+	unsigned int lshift = __CORE_RELO(s, field, LSHIFT_U64);	\
+	unsigned int rshift = __CORE_RELO(s, field, RSHIFT_U64);	\
+	unsigned long long mask, val, nval = new_val;			\
+	unsigned int rpad = rshift - lshift;				\
+									\
+	asm volatile("" : "+r"(p));					\
+									\
+	switch (byte_size) {						\
+	case 1: val = *(unsigned char *)p; break;			\
+	case 2: val = *(unsigned short *)p; break;			\
+	case 4: val = *(unsigned int *)p; break;			\
+	case 8: val = *(unsigned long long *)p; break;			\
+	}								\
+									\
+	mask = (~0ULL << rshift) >> lshift;				\
+	val = (val & ~mask) | ((nval << rpad) & mask);			\
+									\
+	switch (byte_size) {						\
+	case 1: *(unsigned char *)p      = val; break;			\
+	case 2: *(unsigned short *)p     = val; break;			\
+	case 4: *(unsigned int *)p       = val; break;			\
+	case 8: *(unsigned long long *)p = val; break;			\
+	}								\
+})
+
 #define ___bpf_field_ref1(field)	(field)
 #define ___bpf_field_ref2(type, field)	(((typeof(type) *)0)->field)
 #define ___bpf_field_ref(args...)					    \
-- 
cgit v1.2.3-70-g09d2


From 7d19c00e9abc8ad3b3b72a1989331f45287e6bf5 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Mon, 11 Dec 2023 13:20:08 -0700
Subject: bpf: selftests: test_loader: Support __btf_path() annotation

This commit adds support for per-prog btf_custom_path. This is necessary
for testing CO-RE relocations on non-vmlinux types using test_loader
infrastructure.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/660ea7f2fdbdd5103bc1af87c9fc931f05327926.1702325874.git.dxu@dxuuu.xyz
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/progs/bpf_misc.h | 1 +
 tools/testing/selftests/bpf/test_loader.c    | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 799fff4995d8..2fd59970c43a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -71,6 +71,7 @@
 #define __retval_unpriv(val)	__attribute__((btf_decl_tag("comment:test_retval_unpriv="#val)))
 #define __auxiliary		__attribute__((btf_decl_tag("comment:test_auxiliary")))
 #define __auxiliary_unpriv	__attribute__((btf_decl_tag("comment:test_auxiliary_unpriv")))
+#define __btf_path(path)	__attribute__((btf_decl_tag("comment:test_btf_path=" path)))
 
 /* Convenience macro for use with 'asm volatile' blocks */
 #define __naked __attribute__((naked))
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index a350ecdfba4a..74ceb7877ae2 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -27,6 +27,7 @@
 #define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv="
 #define TEST_TAG_AUXILIARY "comment:test_auxiliary"
 #define TEST_TAG_AUXILIARY_UNPRIV "comment:test_auxiliary_unpriv"
+#define TEST_BTF_PATH "comment:test_btf_path="
 
 /* Warning: duplicated in bpf_misc.h */
 #define POINTER_VALUE	0xcafe4all
@@ -58,6 +59,7 @@ struct test_spec {
 	const char *prog_name;
 	struct test_subspec priv;
 	struct test_subspec unpriv;
+	const char *btf_custom_path;
 	int log_level;
 	int prog_flags;
 	int mode_mask;
@@ -288,6 +290,8 @@ static int parse_test_spec(struct test_loader *tester,
 					goto cleanup;
 				update_flags(&spec->prog_flags, flags, clear);
 			}
+		} else if (str_has_pfx(s, TEST_BTF_PATH)) {
+			spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1;
 		}
 	}
 
@@ -578,6 +582,9 @@ void run_subtest(struct test_loader *tester,
 		}
 	}
 
+	/* Implicitly reset to NULL if next test case doesn't specify */
+	open_opts->btf_custom_path = spec->btf_custom_path;
+
 	tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts);
 	if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */
 		goto subtest_cleanup;
-- 
cgit v1.2.3-70-g09d2


From f04f2ce6018f3cb33ac96270b9153c2920ead190 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Mon, 11 Dec 2023 13:20:09 -0700
Subject: bpf: selftests: Add verifier tests for CO-RE bitfield writes

Add some tests that exercise BPF_CORE_WRITE_BITFIELD() macro. Since some
non-trivial bit fiddling is going on, make sure various edge cases (such
as adjacent bitfields and bitfields at the edge of structs) are
exercised.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/72698a1080fa565f541d5654705255984ea2a029.1702325874.git.dxu@dxuuu.xyz
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/verifier.c  |   2 +
 .../selftests/bpf/progs/verifier_bitfield_write.c  | 100 +++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/verifier_bitfield_write.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 8d746642cbd7..ac49ec25211d 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -6,6 +6,7 @@
 #include "verifier_and.skel.h"
 #include "verifier_array_access.skel.h"
 #include "verifier_basic_stack.skel.h"
+#include "verifier_bitfield_write.skel.h"
 #include "verifier_bounds.skel.h"
 #include "verifier_bounds_deduction.skel.h"
 #include "verifier_bounds_deduction_non_const.skel.h"
@@ -116,6 +117,7 @@ static void run_tests_aux(const char *skel_name,
 
 void test_verifier_and(void)                  { RUN(verifier_and); }
 void test_verifier_basic_stack(void)          { RUN(verifier_basic_stack); }
+void test_verifier_bitfield_write(void)       { RUN(verifier_bitfield_write); }
 void test_verifier_bounds(void)               { RUN(verifier_bounds); }
 void test_verifier_bounds_deduction(void)     { RUN(verifier_bounds_deduction); }
 void test_verifier_bounds_deduction_non_const(void)     { RUN(verifier_bounds_deduction_non_const); }
diff --git a/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c
new file mode 100644
index 000000000000..623f130a3198
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+
+struct core_reloc_bitfields {
+	/* unsigned bitfields */
+	uint8_t		ub1: 1;
+	uint8_t		ub2: 2;
+	uint32_t	ub7: 7;
+	/* signed bitfields */
+	int8_t		sb4: 4;
+	int32_t		sb20: 20;
+	/* non-bitfields */
+	uint32_t	u32;
+	int32_t		s32;
+} __attribute__((preserve_access_index));
+
+SEC("tc")
+__description("single CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(3)
+int single_field_roundtrip(struct __sk_buff *ctx)
+{
+	struct core_reloc_bitfields bitfields;
+
+	__builtin_memset(&bitfields, 0, sizeof(bitfields));
+	BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3);
+	return BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+}
+
+SEC("tc")
+__description("multiple CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(0x3FD)
+int multiple_field_roundtrip(struct __sk_buff *ctx)
+{
+	struct core_reloc_bitfields bitfields;
+	uint8_t ub2;
+	int8_t sb4;
+
+	__builtin_memset(&bitfields, 0, sizeof(bitfields));
+	BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 1);
+	BPF_CORE_WRITE_BITFIELD(&bitfields, sb4, -1);
+
+	ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+	sb4 = BPF_CORE_READ_BITFIELD(&bitfields, sb4);
+
+	return (((uint8_t)sb4) << 2) | ub2;
+}
+
+SEC("tc")
+__description("adjacent CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(7)
+int adjacent_field_roundtrip(struct __sk_buff *ctx)
+{
+	struct core_reloc_bitfields bitfields;
+	uint8_t ub1, ub2;
+
+	__builtin_memset(&bitfields, 0, sizeof(bitfields));
+	BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1);
+	BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3);
+
+	ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1);
+	ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+
+	return (ub2 << 1) | ub1;
+}
+
+SEC("tc")
+__description("multibyte CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(0x21)
+int multibyte_field_roundtrip(struct __sk_buff *ctx)
+{
+	struct core_reloc_bitfields bitfields;
+	uint32_t ub7;
+	uint8_t ub1;
+
+	__builtin_memset(&bitfields, 0, sizeof(bitfields));
+	BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1);
+	BPF_CORE_WRITE_BITFIELD(&bitfields, ub7, 16);
+
+	ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1);
+	ub7 = BPF_CORE_READ_BITFIELD(&bitfields, ub7);
+
+	return (ub7 << 1) | ub1;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From c6c5be3eee975ae640966844db66d404c1de79b1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 13 Dec 2023 11:08:34 -0800
Subject: libbpf: split feature detectors definitions from cached results

Split a list of supported feature detectors with their corresponding
callbacks from actual cached supported/missing values. This will allow
to have more flexible per-token or per-object feature detectors in
subsequent refactorings.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231213190842.3844987-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ac54ebc0629f..d2828a26b011 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4999,12 +4999,17 @@ enum kern_feature_result {
 	FEAT_MISSING = 2,
 };
 
+struct kern_feature_cache {
+	enum kern_feature_result res[__FEAT_CNT];
+};
+
 typedef int (*feature_probe_fn)(void);
 
+static struct kern_feature_cache feature_cache;
+
 static struct kern_feature_desc {
 	const char *desc;
 	feature_probe_fn probe;
-	enum kern_feature_result res;
 } feature_probes[__FEAT_CNT] = {
 	[FEAT_PROG_NAME] = {
 		"BPF program name", probe_kern_prog_name,
@@ -5072,6 +5077,7 @@ static struct kern_feature_desc {
 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 {
 	struct kern_feature_desc *feat = &feature_probes[feat_id];
+	struct kern_feature_cache *cache = &feature_cache;
 	int ret;
 
 	if (obj && obj->gen_loader)
@@ -5080,19 +5086,19 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 		 */
 		return true;
 
-	if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
+	if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
 		ret = feat->probe();
 		if (ret > 0) {
-			WRITE_ONCE(feat->res, FEAT_SUPPORTED);
+			WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
 		} else if (ret == 0) {
-			WRITE_ONCE(feat->res, FEAT_MISSING);
+			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
 		} else {
 			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
-			WRITE_ONCE(feat->res, FEAT_MISSING);
+			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
 		}
 	}
 
-	return READ_ONCE(feat->res) == FEAT_SUPPORTED;
+	return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
 }
 
 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
-- 
cgit v1.2.3-70-g09d2


From 29c302a2e265a356434b005155990a9e766db75d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 13 Dec 2023 11:08:35 -0800
Subject: libbpf: further decouple feature checking logic from bpf_object

Add feat_supported() helper that accepts feature cache instead of
bpf_object. This allows low-level code in bpf.c to not know or care
about higher-level concept of bpf_object, yet it will be able to utilize
custom feature checking in cases where BPF token might influence the
outcome.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231213190842.3844987-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c             |  6 +++---
 tools/lib/bpf/libbpf.c          | 22 +++++++++++++++-------
 tools/lib/bpf/libbpf_internal.h |  5 ++++-
 3 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index f4e1da3c6d5f..120855ac6859 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -146,7 +146,7 @@ int bump_rlimit_memlock(void)
 	struct rlimit rlim;
 
 	/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
-	if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
+	if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT))
 		return 0;
 
 	memlock_bumped = true;
@@ -181,7 +181,7 @@ int bpf_map_create(enum bpf_map_type map_type,
 		return libbpf_err(-EINVAL);
 
 	attr.map_type = map_type;
-	if (map_name && kernel_supports(NULL, FEAT_PROG_NAME))
+	if (map_name && feat_supported(NULL, FEAT_PROG_NAME))
 		libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
 	attr.key_size = key_size;
 	attr.value_size = value_size;
@@ -265,7 +265,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 	attr.kern_version = OPTS_GET(opts, kern_version, 0);
 	attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
 
-	if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME))
+	if (prog_name && feat_supported(NULL, FEAT_PROG_NAME))
 		libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
 	attr.license = ptr_to_u64(license);
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d2828a26b011..2b7962120730 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5074,17 +5074,14 @@ static struct kern_feature_desc {
 	},
 };
 
-bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
 {
 	struct kern_feature_desc *feat = &feature_probes[feat_id];
-	struct kern_feature_cache *cache = &feature_cache;
 	int ret;
 
-	if (obj && obj->gen_loader)
-		/* To generate loader program assume the latest kernel
-		 * to avoid doing extra prog_load, map_create syscalls.
-		 */
-		return true;
+	/* assume global feature cache, unless custom one is provided */
+	if (!cache)
+		cache = &feature_cache;
 
 	if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
 		ret = feat->probe();
@@ -5101,6 +5098,17 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 	return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
 }
 
+bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
+{
+	if (obj && obj->gen_loader)
+		/* To generate loader program assume the latest kernel
+		 * to avoid doing extra prog_load, map_create syscalls.
+		 */
+		return true;
+
+	return feat_supported(NULL, feat_id);
+}
+
 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
 {
 	struct bpf_map_info map_info;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index b5d334754e5d..754a432335e4 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -360,8 +360,11 @@ enum kern_feature_id {
 	__FEAT_CNT,
 };
 
-int probe_memcg_account(void);
+struct kern_feature_cache;
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+
+int probe_memcg_account(void);
 int bump_rlimit_memlock(void);
 
 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
-- 
cgit v1.2.3-70-g09d2


From ab8fc393b27cd2d6dd1ced1ba2358ddcd123fc15 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 13 Dec 2023 11:08:36 -0800
Subject: libbpf: move feature detection code into its own file

It's quite a lot of well isolated code, so it seems like a good
candidate to move it out of libbpf.c to reduce its size.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231213190842.3844987-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Build             |   2 +-
 tools/lib/bpf/elf.c             |   2 -
 tools/lib/bpf/features.c        | 463 ++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.c          | 463 +---------------------------------------
 tools/lib/bpf/libbpf_internal.h |  12 +-
 tools/lib/bpf/str_error.h       |   3 +
 6 files changed, 479 insertions(+), 466 deletions(-)
 create mode 100644 tools/lib/bpf/features.c

(limited to 'tools')

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 2d0c282c8588..b6619199a706 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
 	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
-	    usdt.o zip.o elf.o
+	    usdt.o zip.o elf.o features.o
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index b02faec748a5..c92e02394159 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -11,8 +11,6 @@
 #include "libbpf_internal.h"
 #include "str_error.h"
 
-#define STRERR_BUFSIZE  128
-
 /* A SHT_GNU_versym section holds 16-bit words. This bit is set if
  * the symbol is hidden and can only be seen when referenced using an
  * explicit version number. This is a GNU extension.
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
new file mode 100644
index 000000000000..338fd0dcd3bd
--- /dev/null
+++ b/tools/lib/bpf/features.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/kernel.h>
+#include <linux/filter.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_common.h"
+#include "libbpf_internal.h"
+#include "str_error.h"
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static int probe_fd(int fd)
+{
+	if (fd >= 0)
+		close(fd);
+	return fd >= 0;
+}
+
+static int probe_kern_prog_name(void)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	union bpf_attr attr;
+	int ret;
+
+	memset(&attr, 0, attr_sz);
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.license = ptr_to_u64("GPL");
+	attr.insns = ptr_to_u64(insns);
+	attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
+	libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
+
+	/* make sure loading with name works */
+	ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
+	return probe_fd(ret);
+}
+
+static int probe_kern_global_data(void)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret, map, insn_cnt = ARRAY_SIZE(insns);
+
+	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
+	if (map < 0) {
+		ret = -errno;
+		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+			__func__, cp, -ret);
+		return ret;
+	}
+
+	insns[0].imm = map;
+
+	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+	close(map);
+	return probe_fd(ret);
+}
+
+static int probe_kern_btf(void)
+{
+	static const char strs[] = "\0int";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_func(void)
+{
+	static const char strs[] = "\0int\0x\0a";
+	/* void x(int a) {} */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* FUNC_PROTO */                                /* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+		BTF_PARAM_ENC(7, 1),
+		/* FUNC x */                                    /* [3] */
+		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_func_global(void)
+{
+	static const char strs[] = "\0int\0x\0a";
+	/* static void x(int a) {} */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* FUNC_PROTO */                                /* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+		BTF_PARAM_ENC(7, 1),
+		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
+		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_datasec(void)
+{
+	static const char strs[] = "\0x\0.data";
+	/* static int a; */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* VAR x */                                     /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+		BTF_VAR_STATIC,
+		/* DATASEC val */                               /* [3] */
+		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(2, 0, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_float(void)
+{
+	static const char strs[] = "\0float";
+	__u32 types[] = {
+		/* float */
+		BTF_TYPE_FLOAT_ENC(1, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_decl_tag(void)
+{
+	static const char strs[] = "\0tag";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* VAR x */                                     /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+		BTF_VAR_STATIC,
+		/* attr */
+		BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_type_tag(void)
+{
+	static const char strs[] = "\0tag";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		/* attr */
+		BTF_TYPE_TYPE_TAG_ENC(1, 1),				/* [2] */
+		/* ptr */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),	/* [3] */
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_array_mmap(void)
+{
+	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
+	int fd;
+
+	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
+	return probe_fd(fd);
+}
+
+static int probe_kern_exp_attach_type(void)
+{
+	LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int fd, insn_cnt = ARRAY_SIZE(insns);
+
+	/* use any valid combination of program type and (optional)
+	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
+	 * to see if kernel supports expected_attach_type field for
+	 * BPF_PROG_LOAD command
+	 */
+	fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
+	return probe_fd(fd);
+}
+
+static int probe_kern_probe_read_kernel(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
+		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
+		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+		BPF_EXIT_INSN(),
+	};
+	int fd, insn_cnt = ARRAY_SIZE(insns);
+
+	fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
+	return probe_fd(fd);
+}
+
+static int probe_prog_bind_map(void)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
+
+	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
+	if (map < 0) {
+		ret = -errno;
+		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+			__func__, cp, -ret);
+		return ret;
+	}
+
+	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+	if (prog < 0) {
+		close(map);
+		return 0;
+	}
+
+	ret = bpf_prog_bind_map(prog, map, NULL);
+
+	close(map);
+	close(prog);
+
+	return ret >= 0;
+}
+
+static int probe_module_btf(void)
+{
+	static const char strs[] = "\0int";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+	};
+	struct bpf_btf_info info;
+	__u32 len = sizeof(info);
+	char name[16];
+	int fd, err;
+
+	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
+	if (fd < 0)
+		return 0; /* BTF not supported at all */
+
+	memset(&info, 0, sizeof(info));
+	info.name = ptr_to_u64(name);
+	info.name_len = sizeof(name);
+
+	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
+	 * kernel's module BTF support coincides with support for
+	 * name/name_len fields in struct bpf_btf_info.
+	 */
+	err = bpf_btf_get_info_by_fd(fd, &info, &len);
+	close(fd);
+	return !err;
+}
+
+static int probe_perf_link(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd, link_fd, err;
+
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
+				insns, ARRAY_SIZE(insns), NULL);
+	if (prog_fd < 0)
+		return -errno;
+
+	/* use invalid perf_event FD to get EBADF, if link is supported;
+	 * otherwise EINVAL should be returned
+	 */
+	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+	err = -errno; /* close() can clobber errno */
+
+	if (link_fd >= 0)
+		close(link_fd);
+	close(prog_fd);
+
+	return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_uprobe_multi_link(void)
+{
+	LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
+		.expected_attach_type = BPF_TRACE_UPROBE_MULTI,
+	);
+	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd, link_fd, err;
+	unsigned long offset = 0;
+
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
+				insns, ARRAY_SIZE(insns), &load_opts);
+	if (prog_fd < 0)
+		return -errno;
+
+	/* Creating uprobe in '/' binary should fail with -EBADF. */
+	link_opts.uprobe_multi.path = "/";
+	link_opts.uprobe_multi.offsets = &offset;
+	link_opts.uprobe_multi.cnt = 1;
+
+	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
+	err = -errno; /* close() can clobber errno */
+
+	if (link_fd >= 0)
+		close(link_fd);
+	close(prog_fd);
+
+	return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_kern_bpf_cookie(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
+		BPF_EXIT_INSN(),
+	};
+	int ret, insn_cnt = ARRAY_SIZE(insns);
+
+	ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
+	return probe_fd(ret);
+}
+
+static int probe_kern_btf_enum64(void)
+{
+	static const char strs[] = "\0enum64";
+	__u32 types[] = {
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+typedef int (*feature_probe_fn)(void);
+
+static struct kern_feature_cache feature_cache;
+
+static struct kern_feature_desc {
+	const char *desc;
+	feature_probe_fn probe;
+} feature_probes[__FEAT_CNT] = {
+	[FEAT_PROG_NAME] = {
+		"BPF program name", probe_kern_prog_name,
+	},
+	[FEAT_GLOBAL_DATA] = {
+		"global variables", probe_kern_global_data,
+	},
+	[FEAT_BTF] = {
+		"minimal BTF", probe_kern_btf,
+	},
+	[FEAT_BTF_FUNC] = {
+		"BTF functions", probe_kern_btf_func,
+	},
+	[FEAT_BTF_GLOBAL_FUNC] = {
+		"BTF global function", probe_kern_btf_func_global,
+	},
+	[FEAT_BTF_DATASEC] = {
+		"BTF data section and variable", probe_kern_btf_datasec,
+	},
+	[FEAT_ARRAY_MMAP] = {
+		"ARRAY map mmap()", probe_kern_array_mmap,
+	},
+	[FEAT_EXP_ATTACH_TYPE] = {
+		"BPF_PROG_LOAD expected_attach_type attribute",
+		probe_kern_exp_attach_type,
+	},
+	[FEAT_PROBE_READ_KERN] = {
+		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+	},
+	[FEAT_PROG_BIND_MAP] = {
+		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
+	},
+	[FEAT_MODULE_BTF] = {
+		"module BTF support", probe_module_btf,
+	},
+	[FEAT_BTF_FLOAT] = {
+		"BTF_KIND_FLOAT support", probe_kern_btf_float,
+	},
+	[FEAT_PERF_LINK] = {
+		"BPF perf link support", probe_perf_link,
+	},
+	[FEAT_BTF_DECL_TAG] = {
+		"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
+	},
+	[FEAT_BTF_TYPE_TAG] = {
+		"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
+	},
+	[FEAT_MEMCG_ACCOUNT] = {
+		"memcg-based memory accounting", probe_memcg_account,
+	},
+	[FEAT_BPF_COOKIE] = {
+		"BPF cookie support", probe_kern_bpf_cookie,
+	},
+	[FEAT_BTF_ENUM64] = {
+		"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
+	},
+	[FEAT_SYSCALL_WRAPPER] = {
+		"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
+	},
+	[FEAT_UPROBE_MULTI_LINK] = {
+		"BPF multi-uprobe link support", probe_uprobe_multi_link,
+	},
+};
+
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
+{
+	struct kern_feature_desc *feat = &feature_probes[feat_id];
+	int ret;
+
+	/* assume global feature cache, unless custom one is provided */
+	if (!cache)
+		cache = &feature_cache;
+
+	if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
+		ret = feat->probe();
+		if (ret > 0) {
+			WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
+		} else if (ret == 0) {
+			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+		} else {
+			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+		}
+	}
+
+	return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2b7962120730..4e6f8d225caa 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4637,467 +4637,6 @@ bpf_object__probe_loading(struct bpf_object *obj)
 	return 0;
 }
 
-static int probe_fd(int fd)
-{
-	if (fd >= 0)
-		close(fd);
-	return fd >= 0;
-}
-
-static int probe_kern_prog_name(void)
-{
-	const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	union bpf_attr attr;
-	int ret;
-
-	memset(&attr, 0, attr_sz);
-	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
-	attr.license = ptr_to_u64("GPL");
-	attr.insns = ptr_to_u64(insns);
-	attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
-	libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
-
-	/* make sure loading with name works */
-	ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
-	return probe_fd(ret);
-}
-
-static int probe_kern_global_data(void)
-{
-	char *cp, errmsg[STRERR_BUFSIZE];
-	struct bpf_insn insns[] = {
-		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
-		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	int ret, map, insn_cnt = ARRAY_SIZE(insns);
-
-	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
-	if (map < 0) {
-		ret = -errno;
-		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
-		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
-			__func__, cp, -ret);
-		return ret;
-	}
-
-	insns[0].imm = map;
-
-	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
-	close(map);
-	return probe_fd(ret);
-}
-
-static int probe_kern_btf(void)
-{
-	static const char strs[] = "\0int";
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func(void)
-{
-	static const char strs[] = "\0int\0x\0a";
-	/* void x(int a) {} */
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
-		/* FUNC_PROTO */                                /* [2] */
-		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
-		BTF_PARAM_ENC(7, 1),
-		/* FUNC x */                                    /* [3] */
-		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func_global(void)
-{
-	static const char strs[] = "\0int\0x\0a";
-	/* static void x(int a) {} */
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
-		/* FUNC_PROTO */                                /* [2] */
-		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
-		BTF_PARAM_ENC(7, 1),
-		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
-		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_datasec(void)
-{
-	static const char strs[] = "\0x\0.data";
-	/* static int a; */
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
-		/* VAR x */                                     /* [2] */
-		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
-		BTF_VAR_STATIC,
-		/* DATASEC val */                               /* [3] */
-		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
-		BTF_VAR_SECINFO_ENC(2, 0, 4),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_float(void)
-{
-	static const char strs[] = "\0float";
-	__u32 types[] = {
-		/* float */
-		BTF_TYPE_FLOAT_ENC(1, 4),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_decl_tag(void)
-{
-	static const char strs[] = "\0tag";
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
-		/* VAR x */                                     /* [2] */
-		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
-		BTF_VAR_STATIC,
-		/* attr */
-		BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_type_tag(void)
-{
-	static const char strs[] = "\0tag";
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
-		/* attr */
-		BTF_TYPE_TYPE_TAG_ENC(1, 1),				/* [2] */
-		/* ptr */
-		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),	/* [3] */
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
-}
-
-static int probe_kern_array_mmap(void)
-{
-	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
-	int fd;
-
-	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
-	return probe_fd(fd);
-}
-
-static int probe_kern_exp_attach_type(void)
-{
-	LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	int fd, insn_cnt = ARRAY_SIZE(insns);
-
-	/* use any valid combination of program type and (optional)
-	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
-	 * to see if kernel supports expected_attach_type field for
-	 * BPF_PROG_LOAD command
-	 */
-	fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
-	return probe_fd(fd);
-}
-
-static int probe_kern_probe_read_kernel(void)
-{
-	struct bpf_insn insns[] = {
-		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
-		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
-		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
-		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
-		BPF_EXIT_INSN(),
-	};
-	int fd, insn_cnt = ARRAY_SIZE(insns);
-
-	fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
-	return probe_fd(fd);
-}
-
-static int probe_prog_bind_map(void)
-{
-	char *cp, errmsg[STRERR_BUFSIZE];
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
-
-	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
-	if (map < 0) {
-		ret = -errno;
-		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
-		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
-			__func__, cp, -ret);
-		return ret;
-	}
-
-	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
-	if (prog < 0) {
-		close(map);
-		return 0;
-	}
-
-	ret = bpf_prog_bind_map(prog, map, NULL);
-
-	close(map);
-	close(prog);
-
-	return ret >= 0;
-}
-
-static int probe_module_btf(void)
-{
-	static const char strs[] = "\0int";
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
-	};
-	struct bpf_btf_info info;
-	__u32 len = sizeof(info);
-	char name[16];
-	int fd, err;
-
-	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
-	if (fd < 0)
-		return 0; /* BTF not supported at all */
-
-	memset(&info, 0, sizeof(info));
-	info.name = ptr_to_u64(name);
-	info.name_len = sizeof(name);
-
-	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
-	 * kernel's module BTF support coincides with support for
-	 * name/name_len fields in struct bpf_btf_info.
-	 */
-	err = bpf_btf_get_info_by_fd(fd, &info, &len);
-	close(fd);
-	return !err;
-}
-
-static int probe_perf_link(void)
-{
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	int prog_fd, link_fd, err;
-
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
-				insns, ARRAY_SIZE(insns), NULL);
-	if (prog_fd < 0)
-		return -errno;
-
-	/* use invalid perf_event FD to get EBADF, if link is supported;
-	 * otherwise EINVAL should be returned
-	 */
-	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
-	err = -errno; /* close() can clobber errno */
-
-	if (link_fd >= 0)
-		close(link_fd);
-	close(prog_fd);
-
-	return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_uprobe_multi_link(void)
-{
-	LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
-		.expected_attach_type = BPF_TRACE_UPROBE_MULTI,
-	);
-	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	int prog_fd, link_fd, err;
-	unsigned long offset = 0;
-
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
-				insns, ARRAY_SIZE(insns), &load_opts);
-	if (prog_fd < 0)
-		return -errno;
-
-	/* Creating uprobe in '/' binary should fail with -EBADF. */
-	link_opts.uprobe_multi.path = "/";
-	link_opts.uprobe_multi.offsets = &offset;
-	link_opts.uprobe_multi.cnt = 1;
-
-	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
-	err = -errno; /* close() can clobber errno */
-
-	if (link_fd >= 0)
-		close(link_fd);
-	close(prog_fd);
-
-	return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_kern_bpf_cookie(void)
-{
-	struct bpf_insn insns[] = {
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
-		BPF_EXIT_INSN(),
-	};
-	int ret, insn_cnt = ARRAY_SIZE(insns);
-
-	ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
-	return probe_fd(ret);
-}
-
-static int probe_kern_btf_enum64(void)
-{
-	static const char strs[] = "\0enum64";
-	__u32 types[] = {
-		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
-}
-
-static int probe_kern_syscall_wrapper(void);
-
-enum kern_feature_result {
-	FEAT_UNKNOWN = 0,
-	FEAT_SUPPORTED = 1,
-	FEAT_MISSING = 2,
-};
-
-struct kern_feature_cache {
-	enum kern_feature_result res[__FEAT_CNT];
-};
-
-typedef int (*feature_probe_fn)(void);
-
-static struct kern_feature_cache feature_cache;
-
-static struct kern_feature_desc {
-	const char *desc;
-	feature_probe_fn probe;
-} feature_probes[__FEAT_CNT] = {
-	[FEAT_PROG_NAME] = {
-		"BPF program name", probe_kern_prog_name,
-	},
-	[FEAT_GLOBAL_DATA] = {
-		"global variables", probe_kern_global_data,
-	},
-	[FEAT_BTF] = {
-		"minimal BTF", probe_kern_btf,
-	},
-	[FEAT_BTF_FUNC] = {
-		"BTF functions", probe_kern_btf_func,
-	},
-	[FEAT_BTF_GLOBAL_FUNC] = {
-		"BTF global function", probe_kern_btf_func_global,
-	},
-	[FEAT_BTF_DATASEC] = {
-		"BTF data section and variable", probe_kern_btf_datasec,
-	},
-	[FEAT_ARRAY_MMAP] = {
-		"ARRAY map mmap()", probe_kern_array_mmap,
-	},
-	[FEAT_EXP_ATTACH_TYPE] = {
-		"BPF_PROG_LOAD expected_attach_type attribute",
-		probe_kern_exp_attach_type,
-	},
-	[FEAT_PROBE_READ_KERN] = {
-		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
-	},
-	[FEAT_PROG_BIND_MAP] = {
-		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
-	},
-	[FEAT_MODULE_BTF] = {
-		"module BTF support", probe_module_btf,
-	},
-	[FEAT_BTF_FLOAT] = {
-		"BTF_KIND_FLOAT support", probe_kern_btf_float,
-	},
-	[FEAT_PERF_LINK] = {
-		"BPF perf link support", probe_perf_link,
-	},
-	[FEAT_BTF_DECL_TAG] = {
-		"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
-	},
-	[FEAT_BTF_TYPE_TAG] = {
-		"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
-	},
-	[FEAT_MEMCG_ACCOUNT] = {
-		"memcg-based memory accounting", probe_memcg_account,
-	},
-	[FEAT_BPF_COOKIE] = {
-		"BPF cookie support", probe_kern_bpf_cookie,
-	},
-	[FEAT_BTF_ENUM64] = {
-		"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
-	},
-	[FEAT_SYSCALL_WRAPPER] = {
-		"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
-	},
-	[FEAT_UPROBE_MULTI_LINK] = {
-		"BPF multi-uprobe link support", probe_uprobe_multi_link,
-	},
-};
-
-bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
-{
-	struct kern_feature_desc *feat = &feature_probes[feat_id];
-	int ret;
-
-	/* assume global feature cache, unless custom one is provided */
-	if (!cache)
-		cache = &feature_cache;
-
-	if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
-		ret = feat->probe();
-		if (ret > 0) {
-			WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
-		} else if (ret == 0) {
-			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
-		} else {
-			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
-			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
-		}
-	}
-
-	return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
-}
-
 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 {
 	if (obj && obj->gen_loader)
@@ -10626,7 +10165,7 @@ static const char *arch_specific_syscall_pfx(void)
 #endif
 }
 
-static int probe_kern_syscall_wrapper(void)
+int probe_kern_syscall_wrapper(void)
 {
 	char syscall_name[64];
 	const char *ksys_pfx;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 754a432335e4..db4a499c0ec5 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -360,10 +360,20 @@ enum kern_feature_id {
 	__FEAT_CNT,
 };
 
-struct kern_feature_cache;
+enum kern_feature_result {
+	FEAT_UNKNOWN = 0,
+	FEAT_SUPPORTED = 1,
+	FEAT_MISSING = 2,
+};
+
+struct kern_feature_cache {
+	enum kern_feature_result res[__FEAT_CNT];
+};
+
 bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
 
+int probe_kern_syscall_wrapper(void);
 int probe_memcg_account(void);
 int bump_rlimit_memlock(void);
 
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
index a139334d57b6..626d7ffb03d6 100644
--- a/tools/lib/bpf/str_error.h
+++ b/tools/lib/bpf/str_error.h
@@ -2,5 +2,8 @@
 #ifndef __LIBBPF_STR_ERROR_H
 #define __LIBBPF_STR_ERROR_H
 
+#define STRERR_BUFSIZE  128
+
 char *libbpf_strerror_r(int err, char *dst, int len);
+
 #endif /* __LIBBPF_STR_ERROR_H */
-- 
cgit v1.2.3-70-g09d2


From a75bb6a16518d4a224f24116633f3f9d5787f6d1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 13 Dec 2023 11:08:37 -0800
Subject: libbpf: wire up token_fd into feature probing logic

Adjust feature probing callbacks to take into account optional token_fd.
In unprivileged contexts, some feature detectors would fail to detect
kernel support just because BPF program, BPF map, or BTF object can't be
loaded due to privileged nature of those operations. So when BPF object
is loaded with BPF token, this token should be used for feature probing.

This patch is setting support for this scenario, but we don't yet pass
non-zero token FD. This will be added in the next patch.

We also switched BPF cookie detector from using kprobe program to
tracepoint one, as tracepoint is somewhat less dangerous BPF program
type and has higher likelihood of being allowed through BPF token in the
future. This change has no effect on detection behavior.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231213190842.3844987-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c             |  3 +-
 tools/lib/bpf/features.c        | 91 ++++++++++++++++++++++++-----------------
 tools/lib/bpf/libbpf.c          |  2 +-
 tools/lib/bpf/libbpf_internal.h |  8 ++--
 tools/lib/bpf/libbpf_probes.c   |  8 ++--
 5 files changed, 66 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 120855ac6859..0ad8e532b3cf 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
  *   [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
  *   [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
  */
-int probe_memcg_account(void)
+int probe_memcg_account(int token_fd)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
 	struct bpf_insn insns[] = {
@@ -120,6 +120,7 @@ int probe_memcg_account(void)
 	attr.insns = ptr_to_u64(insns);
 	attr.insn_cnt = insn_cnt;
 	attr.license = ptr_to_u64("GPL");
+	attr.prog_token_fd = token_fd;
 
 	prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
 	if (prog_fd >= 0) {
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index 338fd0dcd3bd..ce98a334be21 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -20,7 +20,7 @@ static int probe_fd(int fd)
 	return fd >= 0;
 }
 
-static int probe_kern_prog_name(void)
+static int probe_kern_prog_name(int token_fd)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
 	struct bpf_insn insns[] = {
@@ -35,6 +35,7 @@ static int probe_kern_prog_name(void)
 	attr.license = ptr_to_u64("GPL");
 	attr.insns = ptr_to_u64(insns);
 	attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
+	attr.prog_token_fd = token_fd;
 	libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
 
 	/* make sure loading with name works */
@@ -42,7 +43,7 @@ static int probe_kern_prog_name(void)
 	return probe_fd(ret);
 }
 
-static int probe_kern_global_data(void)
+static int probe_kern_global_data(int token_fd)
 {
 	char *cp, errmsg[STRERR_BUFSIZE];
 	struct bpf_insn insns[] = {
@@ -51,9 +52,11 @@ static int probe_kern_global_data(void)
 		BPF_MOV64_IMM(BPF_REG_0, 0),
 		BPF_EXIT_INSN(),
 	};
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd);
+	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd);
 	int ret, map, insn_cnt = ARRAY_SIZE(insns);
 
-	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
+	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts);
 	if (map < 0) {
 		ret = -errno;
 		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -64,12 +67,12 @@ static int probe_kern_global_data(void)
 
 	insns[0].imm = map;
 
-	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
 	close(map);
 	return probe_fd(ret);
 }
 
-static int probe_kern_btf(void)
+static int probe_kern_btf(int token_fd)
 {
 	static const char strs[] = "\0int";
 	__u32 types[] = {
@@ -78,10 +81,10 @@ static int probe_kern_btf(void)
 	};
 
 	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
+					     strs, sizeof(strs), token_fd));
 }
 
-static int probe_kern_btf_func(void)
+static int probe_kern_btf_func(int token_fd)
 {
 	static const char strs[] = "\0int\0x\0a";
 	/* void x(int a) {} */
@@ -96,10 +99,10 @@ static int probe_kern_btf_func(void)
 	};
 
 	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
+					     strs, sizeof(strs), token_fd));
 }
 
-static int probe_kern_btf_func_global(void)
+static int probe_kern_btf_func_global(int token_fd)
 {
 	static const char strs[] = "\0int\0x\0a";
 	/* static void x(int a) {} */
@@ -114,10 +117,10 @@ static int probe_kern_btf_func_global(void)
 	};
 
 	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
+					     strs, sizeof(strs), token_fd));
 }
 
-static int probe_kern_btf_datasec(void)
+static int probe_kern_btf_datasec(int token_fd)
 {
 	static const char strs[] = "\0x\0.data";
 	/* static int a; */
@@ -133,10 +136,10 @@ static int probe_kern_btf_datasec(void)
 	};
 
 	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
+					     strs, sizeof(strs), token_fd));
 }
 
-static int probe_kern_btf_float(void)
+static int probe_kern_btf_float(int token_fd)
 {
 	static const char strs[] = "\0float";
 	__u32 types[] = {
@@ -145,10 +148,10 @@ static int probe_kern_btf_float(void)
 	};
 
 	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
+					     strs, sizeof(strs), token_fd));
 }
 
-static int probe_kern_btf_decl_tag(void)
+static int probe_kern_btf_decl_tag(int token_fd)
 {
 	static const char strs[] = "\0tag";
 	__u32 types[] = {
@@ -162,10 +165,10 @@ static int probe_kern_btf_decl_tag(void)
 	};
 
 	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
+					     strs, sizeof(strs), token_fd));
 }
 
-static int probe_kern_btf_type_tag(void)
+static int probe_kern_btf_type_tag(int token_fd)
 {
 	static const char strs[] = "\0tag";
 	__u32 types[] = {
@@ -178,21 +181,27 @@ static int probe_kern_btf_type_tag(void)
 	};
 
 	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
+					     strs, sizeof(strs), token_fd));
 }
 
-static int probe_kern_array_mmap(void)
+static int probe_kern_array_mmap(int token_fd)
 {
-	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		.map_flags = BPF_F_MMAPABLE,
+		.token_fd = token_fd,
+	);
 	int fd;
 
 	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
 	return probe_fd(fd);
 }
 
-static int probe_kern_exp_attach_type(void)
+static int probe_kern_exp_attach_type(int token_fd)
 {
-	LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
+	LIBBPF_OPTS(bpf_prog_load_opts, opts,
+		.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+		.token_fd = token_fd,
+	);
 	struct bpf_insn insns[] = {
 		BPF_MOV64_IMM(BPF_REG_0, 0),
 		BPF_EXIT_INSN(),
@@ -208,8 +217,9 @@ static int probe_kern_exp_attach_type(void)
 	return probe_fd(fd);
 }
 
-static int probe_kern_probe_read_kernel(void)
+static int probe_kern_probe_read_kernel(int token_fd)
 {
+	LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
 	struct bpf_insn insns[] = {
 		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
 		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
@@ -220,20 +230,22 @@ static int probe_kern_probe_read_kernel(void)
 	};
 	int fd, insn_cnt = ARRAY_SIZE(insns);
 
-	fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
+	fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
 	return probe_fd(fd);
 }
 
-static int probe_prog_bind_map(void)
+static int probe_prog_bind_map(int token_fd)
 {
 	char *cp, errmsg[STRERR_BUFSIZE];
 	struct bpf_insn insns[] = {
 		BPF_MOV64_IMM(BPF_REG_0, 0),
 		BPF_EXIT_INSN(),
 	};
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd);
+	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd);
 	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
 
-	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
+	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts);
 	if (map < 0) {
 		ret = -errno;
 		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -242,7 +254,7 @@ static int probe_prog_bind_map(void)
 		return ret;
 	}
 
-	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
 	if (prog < 0) {
 		close(map);
 		return 0;
@@ -256,7 +268,7 @@ static int probe_prog_bind_map(void)
 	return ret >= 0;
 }
 
-static int probe_module_btf(void)
+static int probe_module_btf(int token_fd)
 {
 	static const char strs[] = "\0int";
 	__u32 types[] = {
@@ -268,7 +280,7 @@ static int probe_module_btf(void)
 	char name[16];
 	int fd, err;
 
-	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
+	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
 	if (fd < 0)
 		return 0; /* BTF not supported at all */
 
@@ -285,16 +297,17 @@ static int probe_module_btf(void)
 	return !err;
 }
 
-static int probe_perf_link(void)
+static int probe_perf_link(int token_fd)
 {
 	struct bpf_insn insns[] = {
 		BPF_MOV64_IMM(BPF_REG_0, 0),
 		BPF_EXIT_INSN(),
 	};
+	LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
 	int prog_fd, link_fd, err;
 
 	prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
-				insns, ARRAY_SIZE(insns), NULL);
+				insns, ARRAY_SIZE(insns), &opts);
 	if (prog_fd < 0)
 		return -errno;
 
@@ -311,10 +324,11 @@ static int probe_perf_link(void)
 	return link_fd < 0 && err == -EBADF;
 }
 
-static int probe_uprobe_multi_link(void)
+static int probe_uprobe_multi_link(int token_fd)
 {
 	LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
 		.expected_attach_type = BPF_TRACE_UPROBE_MULTI,
+		.token_fd = token_fd,
 	);
 	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
 	struct bpf_insn insns[] = {
@@ -344,19 +358,20 @@ static int probe_uprobe_multi_link(void)
 	return link_fd < 0 && err == -EBADF;
 }
 
-static int probe_kern_bpf_cookie(void)
+static int probe_kern_bpf_cookie(int token_fd)
 {
 	struct bpf_insn insns[] = {
 		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
 		BPF_EXIT_INSN(),
 	};
+	LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
 	int ret, insn_cnt = ARRAY_SIZE(insns);
 
-	ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
+	ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
 	return probe_fd(ret);
 }
 
-static int probe_kern_btf_enum64(void)
+static int probe_kern_btf_enum64(int token_fd)
 {
 	static const char strs[] = "\0enum64";
 	__u32 types[] = {
@@ -364,10 +379,10 @@ static int probe_kern_btf_enum64(void)
 	};
 
 	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs)));
+					     strs, sizeof(strs), token_fd));
 }
 
-typedef int (*feature_probe_fn)(void);
+typedef int (*feature_probe_fn)(int /* token_fd */);
 
 static struct kern_feature_cache feature_cache;
 
@@ -448,7 +463,7 @@ bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_
 		cache = &feature_cache;
 
 	if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
-		ret = feat->probe();
+		ret = feat->probe(cache->token_fd);
 		if (ret > 0) {
 			WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
 		} else if (ret == 0) {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4e6f8d225caa..1acd1c224c5e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10165,7 +10165,7 @@ static const char *arch_specific_syscall_pfx(void)
 #endif
 }
 
-int probe_kern_syscall_wrapper(void)
+int probe_kern_syscall_wrapper(int token_fd)
 {
 	char syscall_name[64];
 	const char *ksys_pfx;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index db4a499c0ec5..b45566e428d7 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -368,19 +368,21 @@ enum kern_feature_result {
 
 struct kern_feature_cache {
 	enum kern_feature_result res[__FEAT_CNT];
+	int token_fd;
 };
 
 bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
 
-int probe_kern_syscall_wrapper(void);
-int probe_memcg_account(void);
+int probe_kern_syscall_wrapper(int token_fd);
+int probe_memcg_account(int token_fd);
 int bump_rlimit_memlock(void);
 
 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
-			 const char *str_sec, size_t str_len);
+			 const char *str_sec, size_t str_len,
+			 int token_fd);
 int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);
 
 struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 9c4db90b92b6..8e7437006639 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
 }
 
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
-			 const char *str_sec, size_t str_len)
+			 const char *str_sec, size_t str_len,
+			 int token_fd)
 {
 	struct btf_header hdr = {
 		.magic = BTF_MAGIC,
@@ -229,6 +230,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 		.str_off = types_len,
 		.str_len = str_len,
 	};
+	LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd);
 	int btf_fd, btf_len;
 	__u8 *raw_btf;
 
@@ -241,7 +243,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 	memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
 	memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
 
-	btf_fd = bpf_btf_load(raw_btf, btf_len, NULL);
+	btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
 
 	free(raw_btf);
 	return btf_fd;
@@ -271,7 +273,7 @@ static int load_local_storage_btf(void)
 	};
 
 	return libbpf__load_raw_btf((char *)types, sizeof(types),
-				     strs, sizeof(strs));
+				     strs, sizeof(strs), 0);
 }
 
 static int probe_map_create(enum bpf_map_type map_type)
-- 
cgit v1.2.3-70-g09d2


From 1d0dd6ea2e38c18e1b31a8c3c59b6bdfe4f4efde Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 13 Dec 2023 11:08:38 -0800
Subject: libbpf: wire up BPF token support at BPF object level

Add BPF token support to BPF object-level functionality.

BPF token is supported by BPF object logic either as an explicitly
provided BPF token from outside (through BPF FS path or explicit BPF
token FD), or implicitly (unless prevented through
bpf_object_open_opts).

Implicit mode is assumed to be the most common one for user namespaced
unprivileged workloads. The assumption is that privileged container
manager sets up default BPF FS mount point at /sys/fs/bpf with BPF token
delegation options (delegate_{cmds,maps,progs,attachs} mount options).
BPF object during loading will attempt to create BPF token from
/sys/fs/bpf location, and pass it for all relevant operations
(currently, map creation, BTF load, and program load).

In this implicit mode, if BPF token creation fails due to whatever
reason (BPF FS is not mounted, or kernel doesn't support BPF token,
etc), this is not considered an error. BPF object loading sequence will
proceed with no BPF token.

In explicit BPF token mode, user provides explicitly either custom BPF
FS mount point path or creates BPF token on their own and just passes
token FD directly. In such case, BPF object will either dup() token FD
(to not require caller to hold onto it for entire duration of BPF object
lifetime) or will attempt to create BPF token from provided BPF FS
location. If BPF token creation fails, that is considered a critical
error and BPF object load fails with an error.

Libbpf provides a way to disable implicit BPF token creation, if it
causes any troubles (BPF token is designed to be completely optional and
shouldn't cause any problems even if provided, but in the world of BPF
LSM, custom security logic can be installed that might change outcome
dependin on the presence of BPF token). To disable libbpf's default BPF
token creation behavior user should provide either invalid BPF token FD
(negative), or empty bpf_token_path option.

BPF token presence can influence libbpf's feature probing, so if BPF
object has associated BPF token, feature probing is instructed to use
BPF object-specific feature detection cache and token FD.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231213190842.3844987-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c             |   7 ++-
 tools/lib/bpf/libbpf.c          | 118 +++++++++++++++++++++++++++++++++++++---
 tools/lib/bpf/libbpf.h          |  28 +++++++++-
 tools/lib/bpf/libbpf_internal.h |  17 +++++-
 4 files changed, 158 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index ee95fd379d4d..63033c334320 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)
 
 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
 
-int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level)
+int btf_load_into_kernel(struct btf *btf,
+			 char *log_buf, size_t log_sz, __u32 log_level,
+			 int token_fd)
 {
 	LIBBPF_OPTS(bpf_btf_load_opts, opts);
 	__u32 buf_sz = 0, raw_size;
@@ -1367,6 +1369,7 @@ retry_load:
 		opts.log_level = log_level;
 	}
 
+	opts.token_fd = token_fd;
 	btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
 	if (btf->fd < 0) {
 		/* time to turn on verbose mode and try again */
@@ -1394,7 +1397,7 @@ done:
 
 int btf__load_into_kernel(struct btf *btf)
 {
-	return btf_load_into_kernel(btf, NULL, 0, 0);
+	return btf_load_into_kernel(btf, NULL, 0, 0, 0);
 }
 
 int btf__fd(const struct btf *btf)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 1acd1c224c5e..db94bbe163e3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -59,6 +59,8 @@
 #define BPF_FS_MAGIC		0xcafe4a11
 #endif
 
+#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
+
 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
 
 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
@@ -693,6 +695,10 @@ struct bpf_object {
 
 	struct usdt_manager *usdt_man;
 
+	struct kern_feature_cache *feat_cache;
+	char *token_path;
+	int token_fd;
+
 	char path[];
 };
 
@@ -2192,7 +2198,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
 	int err;
 
 	if (!path)
-		path = "/sys/fs/bpf";
+		path = BPF_FS_DEFAULT_PATH;
 
 	err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
 	if (err)
@@ -3279,7 +3285,7 @@ skip_exception_cb:
 	} else {
 		/* currently BPF_BTF_LOAD only supports log_level 1 */
 		err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
-					   obj->log_level ? 1 : 0);
+					   obj->log_level ? 1 : 0, obj->token_fd);
 	}
 	if (sanitize) {
 		if (!err) {
@@ -4602,6 +4608,63 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 	return 0;
 }
 
+static int bpf_object_prepare_token(struct bpf_object *obj)
+{
+	const char *bpffs_path;
+	int bpffs_fd = -1, token_fd, err;
+	bool mandatory;
+	enum libbpf_print_level level;
+
+	/* token is already set up */
+	if (obj->token_fd > 0)
+		return 0;
+	/* token is explicitly prevented */
+	if (obj->token_fd < 0) {
+		pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
+		/* reset to zero to avoid extra checks during map_create and prog_load steps */
+		obj->token_fd = 0;
+		return 0;
+	}
+
+	mandatory = obj->token_path != NULL;
+	level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
+
+	bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
+	bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
+	if (bpffs_fd < 0) {
+		err = -errno;
+		__pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
+		     obj->name, err, bpffs_path,
+		     mandatory ? "" : ", skipping optional step...");
+		return mandatory ? err : 0;
+	}
+
+	token_fd = bpf_token_create(bpffs_fd, 0);
+	close(bpffs_fd);
+	if (token_fd < 0) {
+		if (!mandatory && token_fd == -ENOENT) {
+			pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
+				 obj->name, bpffs_path);
+			return 0;
+		}
+		__pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
+		     obj->name, token_fd, bpffs_path,
+		     mandatory ? "" : ", skipping optional step...");
+		return mandatory ? token_fd : 0;
+	}
+
+	obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
+	if (!obj->feat_cache) {
+		close(token_fd);
+		return -ENOMEM;
+	}
+
+	obj->token_fd = token_fd;
+	obj->feat_cache->token_fd = token_fd;
+
+	return 0;
+}
+
 static int
 bpf_object__probe_loading(struct bpf_object *obj)
 {
@@ -4611,6 +4674,7 @@ bpf_object__probe_loading(struct bpf_object *obj)
 		BPF_EXIT_INSN(),
 	};
 	int ret, insn_cnt = ARRAY_SIZE(insns);
+	LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd);
 
 	if (obj->gen_loader)
 		return 0;
@@ -4620,9 +4684,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
 		pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
 
 	/* make sure basic loading works */
-	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
 	if (ret < 0)
-		ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
+		ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
 	if (ret < 0) {
 		ret = errno;
 		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4645,6 +4709,9 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 		 */
 		return true;
 
+	if (obj->token_fd)
+		return feat_supported(obj->feat_cache, feat_id);
+
 	return feat_supported(NULL, feat_id);
 }
 
@@ -4764,6 +4831,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 	create_attr.map_flags = def->map_flags;
 	create_attr.numa_node = map->numa_node;
 	create_attr.map_extra = map->map_extra;
+	create_attr.token_fd = obj->token_fd;
 
 	if (bpf_map__is_struct_ops(map))
 		create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
@@ -6599,6 +6667,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog
 	load_attr.attach_btf_id = prog->attach_btf_id;
 	load_attr.kern_version = kern_version;
 	load_attr.prog_ifindex = prog->prog_ifindex;
+	load_attr.token_fd = obj->token_fd;
 
 	/* specify func_info/line_info only if kernel supports them */
 	btf_fd = bpf_object__btf_fd(obj);
@@ -7060,10 +7129,10 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 					  const struct bpf_object_open_opts *opts)
 {
-	const char *obj_name, *kconfig, *btf_tmp_path;
+	const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
 	struct bpf_object *obj;
 	char tmp_name[64];
-	int err;
+	int err, token_fd;
 	char *log_buf;
 	size_t log_size;
 	__u32 log_level;
@@ -7097,6 +7166,22 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
 	if (log_size && !log_buf)
 		return ERR_PTR(-EINVAL);
 
+	token_path = OPTS_GET(opts, bpf_token_path, NULL);
+	token_fd = OPTS_GET(opts, bpf_token_fd, -1);
+	/* non-empty token path can't be combined with invalid token FD */
+	if (token_path && token_path[0] != '\0' && token_fd < 0)
+		return ERR_PTR(-EINVAL);
+	if (token_path && token_path[0] == '\0') {
+		/* empty token path can't be combined with valid token FD */
+		if (token_fd > 0)
+			return ERR_PTR(-EINVAL);
+		/* empty token_path is equivalent to invalid token_fd */
+		token_path = NULL;
+		token_fd = -1;
+	}
+	if (token_path && strlen(token_path) >= PATH_MAX)
+		return ERR_PTR(-ENAMETOOLONG);
+
 	obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
 	if (IS_ERR(obj))
 		return obj;
@@ -7105,6 +7190,19 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
 	obj->log_size = log_size;
 	obj->log_level = log_level;
 
+	obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd);
+	if (token_fd > 0 && obj->token_fd < 0) {
+		err = -errno;
+		goto out;
+	}
+	if (token_path) {
+		obj->token_path = strdup(token_path);
+		if (!obj->token_path) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
 	btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
 	if (btf_tmp_path) {
 		if (strlen(btf_tmp_path) >= PATH_MAX) {
@@ -7615,7 +7713,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
 	if (obj->gen_loader)
 		bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
 
-	err = bpf_object__probe_loading(obj);
+	err = bpf_object_prepare_token(obj);
+	err = err ? : bpf_object__probe_loading(obj);
 	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 	err = err ? : bpf_object__sanitize_and_load_btf(obj);
@@ -8152,6 +8251,11 @@ void bpf_object__close(struct bpf_object *obj)
 	}
 	zfree(&obj->programs);
 
+	zfree(&obj->feat_cache);
+	zfree(&obj->token_path);
+	if (obj->token_fd > 0)
+		close(obj->token_fd);
+
 	free(obj);
 }
 
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6cd9c501624f..d3de39b537f3 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -177,10 +177,36 @@ struct bpf_object_open_opts {
 	 * logs through its print callback.
 	 */
 	__u32 kernel_log_level;
+	/* FD of a BPF token instantiated by user through bpf_token_create()
+	 * API. BPF object will keep dup()'ed FD internally, so passed token
+	 * FD can be closed after BPF object/skeleton open step.
+	 *
+	 * Setting bpf_token_fd to negative value disables libbpf's automatic
+	 * attempt to create BPF token from default BPF FS mount point
+	 * (/sys/fs/bpf), in case this default behavior is undesirable.
+	 *
+	 * bpf_token_path and bpf_token_fd are mutually exclusive and only one
+	 * of those options should be set.
+	 */
+	int bpf_token_fd;
+	/* Path to BPF FS mount point to derive BPF token from.
+	 *
+	 * Created BPF token will be used for all bpf() syscall operations
+	 * that accept BPF token (e.g., map creation, BTF and program loads,
+	 * etc) automatically within instantiated BPF object.
+	 *
+	 * Setting bpf_token_path option to empty string disables libbpf's
+	 * automatic attempt to create BPF token from default BPF FS mount
+	 * point (/sys/fs/bpf), in case this default behavior is undesirable.
+	 *
+	 * bpf_token_path and bpf_token_fd are mutually exclusive and only one
+	 * of those options should be set.
+	 */
+	const char *bpf_token_path;
 
 	size_t :0;
 };
-#define bpf_object_open_opts__last_field kernel_log_level
+#define bpf_object_open_opts__last_field bpf_token_path
 
 /**
  * @brief **bpf_object__open()** creates a bpf_object by opening
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index b45566e428d7..4cda32298c49 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -383,7 +383,9 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 			 const char *str_sec, size_t str_len,
 			 int token_fd);
-int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);
+int btf_load_into_kernel(struct btf *btf,
+			 char *log_buf, size_t log_sz, __u32 log_level,
+			 int token_fd);
 
 struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
@@ -547,6 +549,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn)
 	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
 }
 
+/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range.
+ * Original FD is not closed or altered in any other way.
+ * Preserves original FD value, if it's invalid (negative).
+ */
+static inline int dup_good_fd(int fd)
+{
+	if (fd < 0)
+		return fd;
+	return fcntl(fd, F_DUPFD_CLOEXEC, 3);
+}
+
 /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2
  * Takes ownership of the fd passed in, and closes it if calling
  * fcntl(fd, F_DUPFD_CLOEXEC, 3).
@@ -558,7 +571,7 @@ static inline int ensure_good_fd(int fd)
 	if (fd < 0)
 		return fd;
 	if (fd < 3) {
-		fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+		fd = dup_good_fd(fd);
 		saved_errno = errno;
 		close(old_fd);
 		errno = saved_errno;
-- 
cgit v1.2.3-70-g09d2


From 98e0eaa36adfb580a3aa43fca62847ec0f625d3f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 13 Dec 2023 11:08:39 -0800
Subject: selftests/bpf: add BPF object loading tests with explicit token
 passing

Add a few tests that attempt to load BPF object containing privileged
map, program, and the one requiring mandatory BTF uploading into the
kernel (to validate token FD propagation to BPF_BTF_LOAD command).

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231213190842.3844987-8-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/token.c | 159 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/priv_map.c   |  13 ++
 tools/testing/selftests/bpf/progs/priv_prog.c  |  13 ++
 3 files changed, 185 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/priv_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/priv_prog.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index dc03790c6272..9812292336c9 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -14,6 +14,9 @@
 #include <sys/socket.h>
 #include <sys/syscall.h>
 #include <sys/un.h>
+#include "priv_map.skel.h"
+#include "priv_prog.skel.h"
+#include "dummy_st_ops_success.skel.h"
 
 static inline int sys_mount(const char *dev_name, const char *dir_name,
 			    const char *type, unsigned long flags,
@@ -643,6 +646,123 @@ cleanup:
 	return err;
 }
 
+static int userns_obj_priv_map(int mnt_fd)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	char buf[256];
+	struct priv_map *skel;
+	int err, token_fd;
+
+	skel = priv_map__open_and_load();
+	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+		priv_map__destroy(skel);
+		return -EINVAL;
+	}
+
+	/* use bpf_token_path to provide BPF FS path */
+	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+	opts.bpf_token_path = buf;
+	skel = priv_map__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+		return -EINVAL;
+
+	err = priv_map__load(skel);
+	priv_map__destroy(skel);
+	if (!ASSERT_OK(err, "obj_token_path_load"))
+		return -EINVAL;
+
+	/* create token and pass it through bpf_token_fd */
+	token_fd = bpf_token_create(mnt_fd, NULL);
+	if (!ASSERT_GT(token_fd, 0, "create_token"))
+		return -EINVAL;
+
+	opts.bpf_token_path = NULL;
+	opts.bpf_token_fd = token_fd;
+	skel = priv_map__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "obj_token_fd_open"))
+		return -EINVAL;
+
+	/* we can close our token FD, bpf_object owns dup()'ed FD now */
+	close(token_fd);
+
+	err = priv_map__load(skel);
+	priv_map__destroy(skel);
+	if (!ASSERT_OK(err, "obj_token_fd_load"))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int userns_obj_priv_prog(int mnt_fd)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	char buf[256];
+	struct priv_prog *skel;
+	int err;
+
+	skel = priv_prog__open_and_load();
+	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+		priv_prog__destroy(skel);
+		return -EINVAL;
+	}
+
+	/* use bpf_token_path to provide BPF FS path */
+	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+	opts.bpf_token_path = buf;
+	skel = priv_prog__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+		return -EINVAL;
+
+	err = priv_prog__load(skel);
+	priv_prog__destroy(skel);
+	if (!ASSERT_OK(err, "obj_token_path_load"))
+		return -EINVAL;
+
+	return 0;
+}
+
+/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
+ * which should cause struct_ops application to fail, as BTF won't be uploaded
+ * into the kernel, even if STRUCT_OPS programs themselves are allowed
+ */
+static int validate_struct_ops_load(int mnt_fd, bool expect_success)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	char buf[256];
+	struct dummy_st_ops_success *skel;
+	int err;
+
+	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+	opts.bpf_token_path = buf;
+	skel = dummy_st_ops_success__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+		return -EINVAL;
+
+	err = dummy_st_ops_success__load(skel);
+	dummy_st_ops_success__destroy(skel);
+	if (expect_success) {
+		if (!ASSERT_OK(err, "obj_token_path_load"))
+			return -EINVAL;
+	} else /* expect failure */ {
+		if (!ASSERT_ERR(err, "obj_token_path_load"))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int userns_obj_priv_btf_fail(int mnt_fd)
+{
+	return validate_struct_ops_load(mnt_fd, false /* should fail */);
+}
+
+static int userns_obj_priv_btf_success(int mnt_fd)
+{
+	return validate_struct_ops_load(mnt_fd, true /* should succeed */);
+}
+
+#define bit(n) (1ULL << (n))
+
 void test_token(void)
 {
 	if (test__start_subtest("map_token")) {
@@ -669,4 +789,43 @@ void test_token(void)
 
 		subtest_userns(&opts, userns_prog_load);
 	}
+	if (test__start_subtest("obj_priv_map")) {
+		struct bpffs_opts opts = {
+			.cmds = bit(BPF_MAP_CREATE),
+			.maps = bit(BPF_MAP_TYPE_QUEUE),
+		};
+
+		subtest_userns(&opts, userns_obj_priv_map);
+	}
+	if (test__start_subtest("obj_priv_prog")) {
+		struct bpffs_opts opts = {
+			.cmds = bit(BPF_PROG_LOAD),
+			.progs = bit(BPF_PROG_TYPE_KPROBE),
+			.attachs = ~0ULL,
+		};
+
+		subtest_userns(&opts, userns_obj_priv_prog);
+	}
+	if (test__start_subtest("obj_priv_btf_fail")) {
+		struct bpffs_opts opts = {
+			/* disallow BTF loading */
+			.cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+			.attachs = ~0ULL,
+		};
+
+		subtest_userns(&opts, userns_obj_priv_btf_fail);
+	}
+	if (test__start_subtest("obj_priv_btf_success")) {
+		struct bpffs_opts opts = {
+			/* allow BTF loading */
+			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+			.attachs = ~0ULL,
+		};
+
+		subtest_userns(&opts, userns_obj_priv_btf_success);
+	}
 }
diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c
new file mode 100644
index 000000000000..9085be50f03b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_map.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_QUEUE);
+	__uint(max_entries, 1);
+	__type(value, __u32);
+} priv_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
new file mode 100644
index 000000000000..3c7b2b618c8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe")
+int kprobe_prog(void *ctx)
+{
+	return 1;
+}
-- 
cgit v1.2.3-70-g09d2


From 18678cf0ee13cf19bac4ecd55665e6d1d63108b3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 13 Dec 2023 11:08:40 -0800
Subject: selftests/bpf: add tests for BPF object load with implicit token

Add a test to validate libbpf's implicit BPF token creation from default
BPF FS location (/sys/fs/bpf). Also validate that disabling this
implicit BPF token creation works.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231213190842.3844987-9-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/token.c | 76 ++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index 9812292336c9..1a3c3aacf537 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -12,6 +12,7 @@
 #include <linux/unistd.h>
 #include <linux/mount.h>
 #include <sys/socket.h>
+#include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/un.h>
 #include "priv_map.skel.h"
@@ -45,6 +46,13 @@ static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
 	return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
 }
 
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+				 int to_dfd, const char *to_path,
+				 unsigned flags)
+{
+	return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
+}
+
 static int drop_priv_caps(__u64 *old_caps)
 {
 	return cap_disable_effective((1ULL << CAP_BPF) |
@@ -761,6 +769,63 @@ static int userns_obj_priv_btf_success(int mnt_fd)
 	return validate_struct_ops_load(mnt_fd, true /* should succeed */);
 }
 
+static int userns_obj_priv_implicit_token(int mnt_fd)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct dummy_st_ops_success *skel;
+	int err;
+
+	/* before we mount BPF FS with token delegation, struct_ops skeleton
+	 * should fail to load
+	 */
+	skel = dummy_st_ops_success__open_and_load();
+	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+		dummy_st_ops_success__destroy(skel);
+		return -EINVAL;
+	}
+
+	/* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
+	 * token automatically and implicitly
+	 */
+	err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
+	if (!ASSERT_OK(err, "move_mount_bpffs"))
+		return -EINVAL;
+
+	/* now the same struct_ops skeleton should succeed thanks to libppf
+	 * creating BPF token from /sys/fs/bpf mount point
+	 */
+	skel = dummy_st_ops_success__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+		return -EINVAL;
+
+	dummy_st_ops_success__destroy(skel);
+
+	/* now disable implicit token through empty bpf_token_path, should fail */
+	opts.bpf_token_path = "";
+	skel = dummy_st_ops_success__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+		return -EINVAL;
+
+	err = dummy_st_ops_success__load(skel);
+	dummy_st_ops_success__destroy(skel);
+	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+		return -EINVAL;
+
+	/* now disable implicit token through negative bpf_token_fd, should fail */
+	opts.bpf_token_path = NULL;
+	opts.bpf_token_fd = -1;
+	skel = dummy_st_ops_success__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
+		return -EINVAL;
+
+	err = dummy_st_ops_success__load(skel);
+	dummy_st_ops_success__destroy(skel);
+	if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
+		return -EINVAL;
+
+	return 0;
+}
+
 #define bit(n) (1ULL << (n))
 
 void test_token(void)
@@ -828,4 +893,15 @@ void test_token(void)
 
 		subtest_userns(&opts, userns_obj_priv_btf_success);
 	}
+	if (test__start_subtest("obj_priv_implicit_token")) {
+		struct bpffs_opts opts = {
+			/* allow BTF loading */
+			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+			.attachs = ~0ULL,
+		};
+
+		subtest_userns(&opts, userns_obj_priv_implicit_token);
+	}
 }
-- 
cgit v1.2.3-70-g09d2


From ed54124b88056fd629c6af71664dfcd4d3b3e0b8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 13 Dec 2023 11:08:41 -0800
Subject: libbpf: support BPF token path setting through LIBBPF_BPF_TOKEN_PATH
 envvar

To allow external admin authority to override default BPF FS location
(/sys/fs/bpf) for implicit BPF token creation, teach libbpf to recognize
LIBBPF_BPF_TOKEN_PATH envvar. If it is specified and user application
didn't explicitly specify neither bpf_token_path nor bpf_token_fd
option, it will be treated exactly like bpf_token_path option,
overriding default /sys/fs/bpf location and making BPF token mandatory.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231213190842.3844987-10-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 14 ++++++++++----
 tools/lib/bpf/libbpf.h | 13 +++++++++++--
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index db94bbe163e3..4b5ff9508e18 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7171,11 +7171,17 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
 	/* non-empty token path can't be combined with invalid token FD */
 	if (token_path && token_path[0] != '\0' && token_fd < 0)
 		return ERR_PTR(-EINVAL);
+	/* empty token path can't be combined with valid token FD */
+	if (token_path && token_path[0] == '\0' && token_fd > 0)
+		return ERR_PTR(-EINVAL);
+	/* if user didn't specify bpf_token_path/bpf_token_fd explicitly,
+	 * check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as
+	 * bpf_token_path option
+	 */
+	if (token_fd == 0 && !token_path)
+		token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
+	/* empty token_path is equivalent to invalid token_fd */
 	if (token_path && token_path[0] == '\0') {
-		/* empty token path can't be combined with valid token FD */
-		if (token_fd > 0)
-			return ERR_PTR(-EINVAL);
-		/* empty token_path is equivalent to invalid token_fd */
 		token_path = NULL;
 		token_fd = -1;
 	}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index d3de39b537f3..916904bd2a7a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -185,8 +185,16 @@ struct bpf_object_open_opts {
 	 * attempt to create BPF token from default BPF FS mount point
 	 * (/sys/fs/bpf), in case this default behavior is undesirable.
 	 *
+	 * If bpf_token_path and bpf_token_fd are not specified, libbpf will
+	 * consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will
+	 * be taken as a value of bpf_token_path option and will force libbpf
+	 * to either create BPF token from provided custom BPF FS path, or
+	 * will disable implicit BPF token creation, if envvar value is an
+	 * empty string.
+	 *
 	 * bpf_token_path and bpf_token_fd are mutually exclusive and only one
-	 * of those options should be set.
+	 * of those options should be set. Either of them overrides
+	 * LIBBPF_BPF_TOKEN_PATH envvar.
 	 */
 	int bpf_token_fd;
 	/* Path to BPF FS mount point to derive BPF token from.
@@ -200,7 +208,8 @@ struct bpf_object_open_opts {
 	 * point (/sys/fs/bpf), in case this default behavior is undesirable.
 	 *
 	 * bpf_token_path and bpf_token_fd are mutually exclusive and only one
-	 * of those options should be set.
+	 * of those options should be set. Either of them overrides
+	 * LIBBPF_BPF_TOKEN_PATH envvar.
 	 */
 	const char *bpf_token_path;
 
-- 
cgit v1.2.3-70-g09d2


From 322122bf8c75b1df78d6608516807a0354f6ab3c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 13 Dec 2023 11:08:42 -0800
Subject: selftests/bpf: add tests for LIBBPF_BPF_TOKEN_PATH envvar

Add new subtest validating LIBBPF_BPF_TOKEN_PATH envvar semantics.
Extend existing test to validate that LIBBPF_BPF_TOKEN_PATH allows to
disable implicit BPF token creation by setting envvar to empty string.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231213190842.3844987-11-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/token.c | 112 +++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index 1a3c3aacf537..548aeb91ab0d 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -769,6 +769,9 @@ static int userns_obj_priv_btf_success(int mnt_fd)
 	return validate_struct_ops_load(mnt_fd, true /* should succeed */);
 }
 
+#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
+#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
+
 static int userns_obj_priv_implicit_token(int mnt_fd)
 {
 	LIBBPF_OPTS(bpf_object_open_opts, opts);
@@ -791,6 +794,20 @@ static int userns_obj_priv_implicit_token(int mnt_fd)
 	if (!ASSERT_OK(err, "move_mount_bpffs"))
 		return -EINVAL;
 
+	/* disable implicit BPF token creation by setting
+	 * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
+	 */
+	err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
+	if (!ASSERT_OK(err, "setenv_token_path"))
+		return -EINVAL;
+	skel = dummy_st_ops_success__open_and_load();
+	if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
+		unsetenv(TOKEN_ENVVAR);
+		dummy_st_ops_success__destroy(skel);
+		return -EINVAL;
+	}
+	unsetenv(TOKEN_ENVVAR);
+
 	/* now the same struct_ops skeleton should succeed thanks to libppf
 	 * creating BPF token from /sys/fs/bpf mount point
 	 */
@@ -826,6 +843,90 @@ static int userns_obj_priv_implicit_token(int mnt_fd)
 	return 0;
 }
 
+static int userns_obj_priv_implicit_token_envvar(int mnt_fd)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct dummy_st_ops_success *skel;
+	int err;
+
+	/* before we mount BPF FS with token delegation, struct_ops skeleton
+	 * should fail to load
+	 */
+	skel = dummy_st_ops_success__open_and_load();
+	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+		dummy_st_ops_success__destroy(skel);
+		return -EINVAL;
+	}
+
+	/* mount custom BPF FS over custom location, so libbpf can't create
+	 * BPF token implicitly, unless pointed to it through
+	 * LIBBPF_BPF_TOKEN_PATH envvar
+	 */
+	rmdir(TOKEN_BPFFS_CUSTOM);
+	if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
+		goto err_out;
+	err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
+	if (!ASSERT_OK(err, "move_mount_bpffs"))
+		goto err_out;
+
+	/* even though we have BPF FS with delegation, it's not at default
+	 * /sys/fs/bpf location, so we still fail to load until envvar is set up
+	 */
+	skel = dummy_st_ops_success__open_and_load();
+	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
+		dummy_st_ops_success__destroy(skel);
+		goto err_out;
+	}
+
+	err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
+	if (!ASSERT_OK(err, "setenv_token_path"))
+		goto err_out;
+
+	/* now the same struct_ops skeleton should succeed thanks to libppf
+	 * creating BPF token from custom mount point
+	 */
+	skel = dummy_st_ops_success__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+		goto err_out;
+
+	dummy_st_ops_success__destroy(skel);
+
+	/* now disable implicit token through empty bpf_token_path, envvar
+	 * will be ignored, should fail
+	 */
+	opts.bpf_token_path = "";
+	skel = dummy_st_ops_success__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+		goto err_out;
+
+	err = dummy_st_ops_success__load(skel);
+	dummy_st_ops_success__destroy(skel);
+	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+		goto err_out;
+
+	/* now disable implicit token through negative bpf_token_fd, envvar
+	 * will be ignored, should fail
+	 */
+	opts.bpf_token_path = NULL;
+	opts.bpf_token_fd = -1;
+	skel = dummy_st_ops_success__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
+		goto err_out;
+
+	err = dummy_st_ops_success__load(skel);
+	dummy_st_ops_success__destroy(skel);
+	if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
+		goto err_out;
+
+	rmdir(TOKEN_BPFFS_CUSTOM);
+	unsetenv(TOKEN_ENVVAR);
+	return 0;
+err_out:
+	rmdir(TOKEN_BPFFS_CUSTOM);
+	unsetenv(TOKEN_ENVVAR);
+	return -EINVAL;
+}
+
 #define bit(n) (1ULL << (n))
 
 void test_token(void)
@@ -904,4 +1005,15 @@ void test_token(void)
 
 		subtest_userns(&opts, userns_obj_priv_implicit_token);
 	}
+	if (test__start_subtest("obj_priv_implicit_token_envvar")) {
+		struct bpffs_opts opts = {
+			/* allow BTF loading */
+			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+			.attachs = ~0ULL,
+		};
+
+		subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
+	}
 }
-- 
cgit v1.2.3-70-g09d2


From e6795330f88b4f643c649a02662d47b779340535 Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Tue, 5 Dec 2023 22:08:38 +0100
Subject: xdp: Add VLAN tag hint

Implement functionality that enables drivers to expose VLAN tag
to XDP code.

VLAN tag is represented by 2 variables:
- protocol ID, which is passed to bpf code in BE
- VLAN TCI, in host byte order

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Link: https://lore.kernel.org/r/20231205210847.28460-10-larysa.zaremba@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml      |  4 ++++
 Documentation/networking/xdp-rx-metadata.rst |  8 ++++++-
 include/net/xdp.h                            |  6 +++++
 include/uapi/linux/netdev.h                  |  3 +++
 net/core/xdp.c                               | 33 ++++++++++++++++++++++++++++
 tools/include/uapi/linux/netdev.h            |  3 +++
 tools/net/ynl/generated/netdev-user.c        |  1 +
 7 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index eef6358ec587..aeec090e1387 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -54,6 +54,10 @@ definitions:
         name: hash
         doc:
           Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
+      -
+        name: vlan-tag
+        doc:
+          Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
   -
     type: flags
     name: xsk-flags
diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
index e3e9420fd817..a6e0ece18be5 100644
--- a/Documentation/networking/xdp-rx-metadata.rst
+++ b/Documentation/networking/xdp-rx-metadata.rst
@@ -20,7 +20,13 @@ Currently, the following kfuncs are supported. In the future, as more
 metadata is supported, this set will grow:
 
 .. kernel-doc:: net/core/xdp.c
-   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
+   :identifiers: bpf_xdp_metadata_rx_timestamp
+
+.. kernel-doc:: net/core/xdp.c
+   :identifiers: bpf_xdp_metadata_rx_hash
+
+.. kernel-doc:: net/core/xdp.c
+   :identifiers: bpf_xdp_metadata_rx_vlan_tag
 
 An XDP program can use these kfuncs to read the metadata into stack
 variables for its own consumption. Or, to pass the metadata on to other
diff --git a/include/net/xdp.h b/include/net/xdp.h
index b7d6fe61381f..8cd04a74dba5 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -404,6 +404,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
 			   NETDEV_XDP_RX_METADATA_HASH, \
 			   bpf_xdp_metadata_rx_hash, \
 			   xmo_rx_hash) \
+	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \
+			   NETDEV_XDP_RX_METADATA_VLAN_TAG, \
+			   bpf_xdp_metadata_rx_vlan_tag, \
+			   xmo_rx_vlan_tag) \
 
 enum xdp_rx_metadata {
 #define XDP_METADATA_KFUNC(name, _, __, ___) name,
@@ -465,6 +469,8 @@ struct xdp_metadata_ops {
 	int	(*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
 	int	(*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
 			       enum xdp_rss_hash_type *rss_type);
+	int	(*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto,
+				   u16 *vlan_tci);
 };
 
 #ifdef CONFIG_NET
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 6244c0164976..966638b08ccf 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -44,10 +44,13 @@ enum netdev_xdp_act {
  *   timestamp via bpf_xdp_metadata_rx_timestamp().
  * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
  *   hash via bpf_xdp_metadata_rx_hash().
+ * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive
+ *   packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
  */
 enum netdev_xdp_rx_metadata {
 	NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
 	NETDEV_XDP_RX_METADATA_HASH = 2,
+	NETDEV_XDP_RX_METADATA_VLAN_TAG = 4,
 };
 
 /**
diff --git a/net/core/xdp.c b/net/core/xdp.c
index b6f1d6dab3f2..4869c1c2d8f3 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -736,6 +736,39 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
 	return -EOPNOTSUPP;
 }
 
+/**
+ * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag
+ * @ctx: XDP context pointer.
+ * @vlan_proto: Destination pointer for VLAN Tag protocol identifier (TPID).
+ * @vlan_tci: Destination pointer for VLAN TCI (VID + DEI + PCP)
+ *
+ * In case of success, ``vlan_proto`` contains *Tag protocol identifier (TPID)*,
+ * usually ``ETH_P_8021Q`` or ``ETH_P_8021AD``, but some networks can use
+ * custom TPIDs. ``vlan_proto`` is stored in **network byte order (BE)**
+ * and should be used as follows:
+ * ``if (vlan_proto == bpf_htons(ETH_P_8021Q)) do_something();``
+ *
+ * ``vlan_tci`` contains the remaining 16 bits of a VLAN tag.
+ * Driver is expected to provide those in **host byte order (usually LE)**,
+ * so the bpf program should not perform byte conversion.
+ * According to 802.1Q standard, *VLAN TCI (Tag control information)*
+ * is a bit field that contains:
+ * *VLAN identifier (VID)* that can be read with ``vlan_tci & 0xfff``,
+ * *Drop eligible indicator (DEI)* - 1 bit,
+ * *Priority code point (PCP)* - 3 bits.
+ * For detailed meaning of DEI and PCP, please refer to other sources.
+ *
+ * Return:
+ * * Returns 0 on success or ``-errno`` on error.
+ * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
+ * * ``-ENODATA``    : VLAN tag was not stripped or is not available
+ */
+__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+					     __be16 *vlan_proto, u16 *vlan_tci)
+{
+	return -EOPNOTSUPP;
+}
+
 __bpf_kfunc_end_defs();
 
 BTF_SET8_START(xdp_metadata_kfunc_ids)
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 6244c0164976..966638b08ccf 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -44,10 +44,13 @@ enum netdev_xdp_act {
  *   timestamp via bpf_xdp_metadata_rx_timestamp().
  * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
  *   hash via bpf_xdp_metadata_rx_hash().
+ * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive
+ *   packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
  */
 enum netdev_xdp_rx_metadata {
 	NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
 	NETDEV_XDP_RX_METADATA_HASH = 2,
+	NETDEV_XDP_RX_METADATA_VLAN_TAG = 4,
 };
 
 /**
diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c
index 3b9dee94d4ce..e3fe748086bd 100644
--- a/tools/net/ynl/generated/netdev-user.c
+++ b/tools/net/ynl/generated/netdev-user.c
@@ -53,6 +53,7 @@ const char *netdev_xdp_act_str(enum netdev_xdp_act value)
 static const char * const netdev_xdp_rx_metadata_strmap[] = {
 	[0] = "timestamp",
 	[1] = "hash",
+	[2] = "vlan-tag",
 };
 
 const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value)
-- 
cgit v1.2.3-70-g09d2


From e71a9fa7fdb2effcaaed37c207ec4f634c8f4901 Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Tue, 5 Dec 2023 22:08:44 +0100
Subject: selftests/bpf: Allow VLAN packets in xdp_hw_metadata

Make VLAN c-tag and s-tag XDP hint testing more convenient
by not skipping VLAN-ed packets.

Allow both 802.1ad and 802.1Q headers.

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://lore.kernel.org/r/20231205210847.28460-16-larysa.zaremba@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/xdp_hw_metadata.c | 10 +++++++++-
 tools/testing/selftests/bpf/xdp_metadata.h          |  8 ++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index f6d1cc9ad892..8767d919c881 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -26,15 +26,23 @@ int rx(struct xdp_md *ctx)
 {
 	void *data, *data_meta, *data_end;
 	struct ipv6hdr *ip6h = NULL;
-	struct ethhdr *eth = NULL;
 	struct udphdr *udp = NULL;
 	struct iphdr *iph = NULL;
 	struct xdp_meta *meta;
+	struct ethhdr *eth;
 	int err;
 
 	data = (void *)(long)ctx->data;
 	data_end = (void *)(long)ctx->data_end;
 	eth = data;
+
+	if (eth + 1 < data_end && (eth->h_proto == bpf_htons(ETH_P_8021AD) ||
+				   eth->h_proto == bpf_htons(ETH_P_8021Q)))
+		eth = (void *)eth + sizeof(struct vlan_hdr);
+
+	if (eth + 1 < data_end && eth->h_proto == bpf_htons(ETH_P_8021Q))
+		eth = (void *)eth + sizeof(struct vlan_hdr);
+
 	if (eth + 1 < data_end) {
 		if (eth->h_proto == bpf_htons(ETH_P_IP)) {
 			iph = (void *)(eth + 1);
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
index 938a729bd307..6664893c2c77 100644
--- a/tools/testing/selftests/bpf/xdp_metadata.h
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -9,6 +9,14 @@
 #define ETH_P_IPV6 0x86DD
 #endif
 
+#ifndef ETH_P_8021Q
+#define ETH_P_8021Q 0x8100
+#endif
+
+#ifndef ETH_P_8021AD
+#define ETH_P_8021AD 0x88A8
+#endif
+
 struct xdp_meta {
 	__u64 rx_timestamp;
 	__u64 xdp_timestamp;
-- 
cgit v1.2.3-70-g09d2


From 8e68a4beba943bdffb342c601c649223f44b7329 Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Tue, 5 Dec 2023 22:08:45 +0100
Subject: selftests/bpf: Add flags and VLAN hint to xdp_hw_metadata

Add VLAN hint to the xdp_hw_metadata program.

Also, to make metadata layout more straightforward, add flags field
to pass information about validity of every separate hint separately.

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://lore.kernel.org/r/20231205210847.28460-17-larysa.zaremba@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/progs/xdp_hw_metadata.c  | 28 ++++++++++++++----
 tools/testing/selftests/bpf/xdp_hw_metadata.c      | 34 ++++++++++++++++++----
 tools/testing/selftests/bpf/xdp_metadata.h         | 26 ++++++++++++++++-
 3 files changed, 76 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index 8767d919c881..330ece2eabdb 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -20,6 +20,9 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
 					 __u64 *timestamp) __ksym;
 extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
 				    enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+					__be16 *vlan_proto,
+					__u16 *vlan_tci) __ksym;
 
 SEC("xdp.frags")
 int rx(struct xdp_md *ctx)
@@ -84,15 +87,28 @@ int rx(struct xdp_md *ctx)
 		return XDP_PASS;
 	}
 
+	meta->hint_valid = 0;
+
+	meta->xdp_timestamp = bpf_ktime_get_tai_ns();
 	err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
-	if (!err)
-		meta->xdp_timestamp = bpf_ktime_get_tai_ns();
+	if (err)
+		meta->rx_timestamp_err = err;
+	else
+		meta->hint_valid |= XDP_META_FIELD_TS;
+
+	err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash,
+				       &meta->rx_hash_type);
+	if (err)
+		meta->rx_hash_err = err;
 	else
-		meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
+		meta->hint_valid |= XDP_META_FIELD_RSS;
 
-	err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
-	if (err < 0)
-		meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */
+	err = bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto,
+					   &meta->rx_vlan_tci);
+	if (err)
+		meta->rx_vlan_tag_err = err;
+	else
+		meta->hint_valid |= XDP_META_FIELD_VLAN_TAG;
 
 	__sync_add_and_fetch(&pkts_redir, 1);
 	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index c69c08933fdd..878d68db0325 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -21,6 +21,9 @@
 #include "xsk.h"
 
 #include <error.h>
+#include <linux/kernel.h>
+#include <linux/bits.h>
+#include <linux/bitfield.h>
 #include <linux/errqueue.h>
 #include <linux/if_link.h>
 #include <linux/net_tstamp.h>
@@ -182,19 +185,31 @@ static void print_tstamp_delta(const char *name, const char *refname,
 	       (double)delta / 1000);
 }
 
+#define VLAN_PRIO_MASK		GENMASK(15, 13) /* Priority Code Point */
+#define VLAN_DEI_MASK		GENMASK(12, 12) /* Drop Eligible Indicator */
+#define VLAN_VID_MASK		GENMASK(11, 0)	/* VLAN Identifier */
+static void print_vlan_tci(__u16 tag)
+{
+	__u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag);
+	__u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag);
+	bool dei = FIELD_GET(VLAN_DEI_MASK, tag);
+
+	printf("PCP=%u, DEI=%d, VID=0x%X\n", pcp, dei, vlan_id);
+}
+
 static void verify_xdp_metadata(void *data, clockid_t clock_id)
 {
 	struct xdp_meta *meta;
 
 	meta = data - sizeof(*meta);
 
-	if (meta->rx_hash_err < 0)
-		printf("No rx_hash err=%d\n", meta->rx_hash_err);
-	else
+	if (meta->hint_valid & XDP_META_FIELD_RSS)
 		printf("rx_hash: 0x%X with RSS type:0x%X\n",
 		       meta->rx_hash, meta->rx_hash_type);
+	else
+		printf("No rx_hash, err=%d\n", meta->rx_hash_err);
 
-	if (meta->rx_timestamp) {
+	if (meta->hint_valid & XDP_META_FIELD_TS) {
 		__u64 ref_tstamp = gettime(clock_id);
 
 		/* store received timestamps to calculate a delta at tx */
@@ -206,7 +221,16 @@ static void verify_xdp_metadata(void *data, clockid_t clock_id)
 		print_tstamp_delta("XDP RX-time", "User RX-time",
 				   meta->xdp_timestamp, ref_tstamp);
 	} else {
-		printf("No rx_timestamp\n");
+		printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err);
+	}
+
+	if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) {
+		printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto));
+		printf("rx_vlan_tci: ");
+		print_vlan_tci(meta->rx_vlan_tci);
+	} else {
+		printf("No rx_vlan_tci or rx_vlan_proto, err=%d\n",
+		       meta->rx_vlan_tag_err);
 	}
 }
 
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
index 6664893c2c77..87318ad1117a 100644
--- a/tools/testing/selftests/bpf/xdp_metadata.h
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -17,12 +17,36 @@
 #define ETH_P_8021AD 0x88A8
 #endif
 
+#ifndef BIT
+#define BIT(nr)			(1 << (nr))
+#endif
+
+/* Non-existent checksum status */
+#define XDP_CHECKSUM_MAGIC	BIT(2)
+
+enum xdp_meta_field {
+	XDP_META_FIELD_TS	= BIT(0),
+	XDP_META_FIELD_RSS	= BIT(1),
+	XDP_META_FIELD_VLAN_TAG	= BIT(2),
+};
+
 struct xdp_meta {
-	__u64 rx_timestamp;
+	union {
+		__u64 rx_timestamp;
+		__s32 rx_timestamp_err;
+	};
 	__u64 xdp_timestamp;
 	__u32 rx_hash;
 	union {
 		__u32 rx_hash_type;
 		__s32 rx_hash_err;
 	};
+	union {
+		struct {
+			__be16 rx_vlan_proto;
+			__u16 rx_vlan_tci;
+		};
+		__s32 rx_vlan_tag_err;
+	};
+	enum xdp_meta_field hint_valid;
 };
-- 
cgit v1.2.3-70-g09d2


From a3850af4ea25dadc8b35edf132340907d523657e Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Tue, 5 Dec 2023 22:08:46 +0100
Subject: selftests/bpf: Add AF_INET packet generation to xdp_metadata

The easiest way to simulate stripped VLAN tag in veth is to send a packet
from VLAN interface, attached to veth. Unfortunately, this approach is
incompatible with AF_XDP on TX side, because VLAN interfaces do not have
such feature.

Check both packets sent via AF_XDP TX and regular socket.

AF_INET packet will also have a filled-in hash type (XDP_RSS_TYPE_L4),
unlike AF_XDP packet, so more values can be checked.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20231205210847.28460-18-larysa.zaremba@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/xdp_metadata.c        | 116 +++++++++++++++++----
 1 file changed, 97 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 33cdf88efa6b..e7f06cbdd845 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -20,7 +20,7 @@
 
 #define UDP_PAYLOAD_BYTES 4
 
-#define AF_XDP_SOURCE_PORT 1234
+#define UDP_SOURCE_PORT 1234
 #define AF_XDP_CONSUMER_PORT 8080
 
 #define UMEM_NUM 16
@@ -33,6 +33,12 @@
 #define RX_ADDR "10.0.0.2"
 #define PREFIX_LEN "8"
 #define FAMILY AF_INET
+#define TX_NETNS_NAME "xdp_metadata_tx"
+#define RX_NETNS_NAME "xdp_metadata_rx"
+#define TX_MAC "00:00:00:00:00:01"
+#define RX_MAC "00:00:00:00:00:02"
+
+#define XDP_RSS_TYPE_L4 BIT(3)
 
 struct xsk {
 	void *umem_area;
@@ -181,7 +187,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
 	ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
 	ip_csum(iph);
 
-	udph->source = htons(AF_XDP_SOURCE_PORT);
+	udph->source = htons(UDP_SOURCE_PORT);
 	udph->dest = htons(dst_port);
 	udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
 	udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
@@ -204,6 +210,30 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
 	return 0;
 }
 
+static int generate_packet_inet(void)
+{
+	char udp_payload[UDP_PAYLOAD_BYTES];
+	struct sockaddr_in rx_addr;
+	int sock_fd, err = 0;
+
+	/* Build a packet */
+	memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES);
+	rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR);
+	rx_addr.sin_family = AF_INET;
+	rx_addr.sin_port = htons(AF_XDP_CONSUMER_PORT);
+
+	sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+	if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)"))
+		return sock_fd;
+
+	err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT,
+		     (void *)&rx_addr, sizeof(rx_addr));
+	ASSERT_GE(err, 0, "sendto");
+
+	close(sock_fd);
+	return err;
+}
+
 static void complete_tx(struct xsk *xsk)
 {
 	struct xsk_tx_metadata *meta;
@@ -236,7 +266,7 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
 	}
 }
 
-static int verify_xsk_metadata(struct xsk *xsk)
+static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp)
 {
 	const struct xdp_desc *rx_desc;
 	struct pollfd fds = {};
@@ -290,17 +320,36 @@ static int verify_xsk_metadata(struct xsk *xsk)
 	if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
 		return -1;
 
+	if (!sent_from_af_xdp) {
+		if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type"))
+			return -1;
+		goto done;
+	}
+
 	ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
 
 	/* checksum offload */
 	ASSERT_EQ(udph->check, htons(0x721c), "csum");
 
+done:
 	xsk_ring_cons__release(&xsk->rx, 1);
 	refill_rx(xsk, comp_addr);
 
 	return 0;
 }
 
+static void switch_ns_to_rx(struct nstoken **tok)
+{
+	close_netns(*tok);
+	*tok = open_netns(RX_NETNS_NAME);
+}
+
+static void switch_ns_to_tx(struct nstoken **tok)
+{
+	close_netns(*tok);
+	*tok = open_netns(TX_NETNS_NAME);
+}
+
 void test_xdp_metadata(void)
 {
 	struct xdp_metadata2 *bpf_obj2 = NULL;
@@ -318,27 +367,31 @@ void test_xdp_metadata(void)
 	int sock_fd;
 	int ret;
 
-	/* Setup new networking namespace, with a veth pair. */
+	/* Setup new networking namespaces, with a veth pair. */
+	SYS(out, "ip netns add " TX_NETNS_NAME);
+	SYS(out, "ip netns add " RX_NETNS_NAME);
 
-	SYS(out, "ip netns add xdp_metadata");
-	tok = open_netns("xdp_metadata");
+	tok = open_netns(TX_NETNS_NAME);
 	SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
 	    " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
-	SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01");
-	SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02");
+	SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME);
+
+	SYS(out, "ip link set dev " TX_NAME " address " TX_MAC);
 	SYS(out, "ip link set dev " TX_NAME " up");
-	SYS(out, "ip link set dev " RX_NAME " up");
 	SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
+
+	/* Avoid ARP calls */
+	SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME);
+
+	switch_ns_to_rx(&tok);
+
+	SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
+	SYS(out, "ip link set dev " RX_NAME " up");
 	SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
 
 	rx_ifindex = if_nametoindex(RX_NAME);
-	tx_ifindex = if_nametoindex(TX_NAME);
 
-	/* Setup separate AF_XDP for TX and RX interfaces. */
-
-	ret = open_xsk(tx_ifindex, &tx_xsk);
-	if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
-		goto out;
+	/* Setup separate AF_XDP for RX interface. */
 
 	ret = open_xsk(rx_ifindex, &rx_xsk);
 	if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
@@ -379,18 +432,38 @@ void test_xdp_metadata(void)
 	if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
 		goto out;
 
-	/* Send packet destined to RX AF_XDP socket. */
+	switch_ns_to_tx(&tok);
+
+	/* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */
+	tx_ifindex = if_nametoindex(TX_NAME);
+	ret = open_xsk(tx_ifindex, &tx_xsk);
+	if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
+		goto out;
+
 	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
 		       "generate AF_XDP_CONSUMER_PORT"))
 		goto out;
 
-	/* Verify AF_XDP RX packet has proper metadata. */
-	if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0,
+	switch_ns_to_rx(&tok);
+
+	/* Verify packet sent from AF_XDP has proper metadata. */
+	if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0,
 		       "verify_xsk_metadata"))
 		goto out;
 
+	switch_ns_to_tx(&tok);
 	complete_tx(&tx_xsk);
 
+	/* Now check metadata of packet, generated with network stack */
+	if (!ASSERT_GE(generate_packet_inet(), 0, "generate UDP packet"))
+		goto out;
+
+	switch_ns_to_rx(&tok);
+
+	if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0,
+		       "verify_xsk_metadata"))
+		goto out;
+
 	/* Make sure freplace correctly picks up original bound device
 	 * and doesn't crash.
 	 */
@@ -408,11 +481,15 @@ void test_xdp_metadata(void)
 	if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
 		goto out;
 
+	switch_ns_to_tx(&tok);
+
 	/* Send packet to trigger . */
 	if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
 		       "generate freplace packet"))
 		goto out;
 
+	switch_ns_to_rx(&tok);
+
 	while (!retries--) {
 		if (bpf_obj2->bss->called)
 			break;
@@ -427,5 +504,6 @@ out:
 	xdp_metadata__destroy(bpf_obj);
 	if (tok)
 		close_netns(tok);
-	SYS_NOFAIL("ip netns del xdp_metadata");
+	SYS_NOFAIL("ip netns del " RX_NETNS_NAME);
+	SYS_NOFAIL("ip netns del " TX_NETNS_NAME);
 }
-- 
cgit v1.2.3-70-g09d2


From 4c6612f6100c2d85212865dbd1a5d8a7e391d3cb Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Tue, 5 Dec 2023 22:08:47 +0100
Subject: selftests/bpf: Check VLAN tag and proto in xdp_metadata

Verify, whether VLAN tag and proto are set correctly.

To simulate "stripped" VLAN tag on veth, send test packet from VLAN
interface.

Also, add TO_STR() macro for convenience.

Acked-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Link: https://lore.kernel.org/r/20231205210847.28460-19-larysa.zaremba@intel.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/xdp_metadata.c  | 20 ++++++++++++++++++--
 tools/testing/selftests/bpf/progs/xdp_metadata.c     |  5 +++++
 tools/testing/selftests/bpf/testing_helpers.h        |  3 +++
 3 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index e7f06cbdd845..05edcf32f528 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -38,7 +38,13 @@
 #define TX_MAC "00:00:00:00:00:01"
 #define RX_MAC "00:00:00:00:00:02"
 
+#define VLAN_ID 59
+#define VLAN_PROTO "802.1Q"
+#define VLAN_PID htons(ETH_P_8021Q)
+#define TX_NAME_VLAN TX_NAME "." TO_STR(VLAN_ID)
+
 #define XDP_RSS_TYPE_L4 BIT(3)
+#define VLAN_VID_MASK 0xfff
 
 struct xsk {
 	void *umem_area;
@@ -323,6 +329,12 @@ static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp)
 	if (!sent_from_af_xdp) {
 		if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type"))
 			return -1;
+
+		if (!ASSERT_EQ(meta->rx_vlan_tci & VLAN_VID_MASK, VLAN_ID, "rx_vlan_tci"))
+			return -1;
+
+		if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto"))
+			return -1;
 		goto done;
 	}
 
@@ -378,10 +390,14 @@ void test_xdp_metadata(void)
 
 	SYS(out, "ip link set dev " TX_NAME " address " TX_MAC);
 	SYS(out, "ip link set dev " TX_NAME " up");
-	SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
+
+	SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN
+		 " type vlan proto " VLAN_PROTO " id " TO_STR(VLAN_ID));
+	SYS(out, "ip link set dev " TX_NAME_VLAN " up");
+	SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN);
 
 	/* Avoid ARP calls */
-	SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME);
+	SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN);
 
 	switch_ns_to_rx(&tok);
 
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index 5d6c1245c310..31ca229bb3c0 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -23,6 +23,9 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
 					 __u64 *timestamp) __ksym;
 extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
 				    enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+					__be16 *vlan_proto,
+					__u16 *vlan_tci) __ksym;
 
 SEC("xdp")
 int rx(struct xdp_md *ctx)
@@ -86,6 +89,8 @@ int rx(struct xdp_md *ctx)
 		meta->rx_timestamp = 1;
 
 	bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
+	bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto,
+				     &meta->rx_vlan_tci);
 
 	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
 }
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 5b7a55136741..35284faff4f2 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -9,6 +9,9 @@
 #include <bpf/libbpf.h>
 #include <time.h>
 
+#define __TO_STR(x) #x
+#define TO_STR(x) __TO_STR(x)
+
 int parse_num_list(const char *s, bool **set, int *set_len);
 __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);
 int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
-- 
cgit v1.2.3-70-g09d2


From 2e1d6a04116c373fbd25beddba4267178535bc60 Mon Sep 17 00:00:00 2001
From: Tushar Vyavahare <tushar.vyavahare@intel.com>
Date: Thu, 14 Dec 2023 13:00:07 +0000
Subject: selftests/xsk: Fix for SEND_RECEIVE_UNALIGNED test

Fix test broken by shared umem test and framework enhancement commit.

Correct the current implementation of pkt_stream_replace_half() by
ensuring that nb_valid_entries are not set to half, as this is not true
for all the tests. Ensure that the expected value for valid_entries for
the SEND_RECEIVE_UNALIGNED test equals the total number of packets sent,
which is 4096.

Create a new function called pkt_stream_pkt_set() that allows for packet
modification to meet specific requirements while ensuring the accurate
maintenance of the valid packet count to prevent inconsistencies in packet
tracking.

Fixes: 6d198a89c004 ("selftests/xsk: Add a test for shared umem feature")
Reported-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Tushar Vyavahare <tushar.vyavahare@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20231214130007.33281-1-tushar.vyavahare@intel.com
---
 tools/testing/selftests/bpf/xskxceiver.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index b604c570309a..b1102ee13faa 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -634,16 +634,24 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk
 	return nb_frags;
 }
 
+static bool set_pkt_valid(int offset, u32 len)
+{
+	return len <= MAX_ETH_JUMBO_SIZE;
+}
+
 static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
 {
 	pkt->offset = offset;
 	pkt->len = len;
-	if (len > MAX_ETH_JUMBO_SIZE) {
-		pkt->valid = false;
-	} else {
-		pkt->valid = true;
-		pkt_stream->nb_valid_entries++;
-	}
+	pkt->valid = set_pkt_valid(offset, len);
+}
+
+static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+	bool prev_pkt_valid = pkt->valid;
+
+	pkt_set(pkt_stream, pkt, offset, len);
+	pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
 }
 
 static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
@@ -665,7 +673,7 @@ static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb
 	for (i = 0; i < nb_pkts; i++) {
 		struct pkt *pkt = &pkt_stream->pkts[i];
 
-		pkt_set(pkt_stream, pkt, 0, pkt_len);
+		pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
 		pkt->pkt_nb = nb_start + i * nb_off;
 	}
 
@@ -700,10 +708,9 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
 
 	pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
 	for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
-		pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
+		pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
 
 	ifobj->xsk->pkt_stream = pkt_stream;
-	pkt_stream->nb_valid_entries /= 2;
 }
 
 static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
-- 
cgit v1.2.3-70-g09d2


From 56925f389e152dcb8d093435d43b78a310539c23 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Thu, 14 Dec 2023 12:38:20 -0800
Subject: selftests/bpf: Remove flaky test_btf_id test

With previous patch, one of subtests in test_btf_id becomes
flaky and may fail. The following is a failing example:

  Error: #26 btf
  Error: #26/174 btf/BTF ID
    Error: #26/174 btf/BTF ID
    btf_raw_create:PASS:check 0 nsec
    btf_raw_create:PASS:check 0 nsec
    test_btf_id:PASS:check 0 nsec
    ...
    test_btf_id:PASS:check 0 nsec
    test_btf_id:FAIL:check BTF lingersdo_test_get_info:FAIL:check failed: -1

The test tries to prove a btf_id not available after the map is closed.
But btf_id is freed only after workqueue and a rcu grace period, compared
to previous case just after a rcu grade period.
Depending on system workload, workqueue could take quite some time
to execute function bpf_map_free_deferred() which may cause the test failure.
Instead of adding arbitrary delays, let us remove the logic to
check btf_id availability after map is closed.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20231214203820.1469402-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 8fb4a04fbbc0..816145bcb647 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4630,11 +4630,6 @@ static int test_btf_id(unsigned int test_num)
 	/* The map holds the last ref to BTF and its btf_id */
 	close(map_fd);
 	map_fd = -1;
-	btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
-	if (CHECK(btf_fd[0] >= 0, "BTF lingers")) {
-		err = -1;
-		goto done;
-	}
 
 	fprintf(stderr, "OK");
 
-- 
cgit v1.2.3-70-g09d2


From 77a7a8220f0d87c44425c0a12e0a72b14962535b Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Thu, 14 Dec 2023 15:49:03 -0700
Subject: bpf: selftests: test_tunnel: Setup fresh topology for each subtest

This helps with determinism b/c individual setup/teardown prevents
leaking state between different subtests.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/0fb59fa16fb58cca7def5239df606005a3e8dd0e.1702593901.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/test_tunnel.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index d149ab98798d..b57d48219d0b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -535,23 +535,20 @@ done:
 #define RUN_TEST(name, ...)						\
 	({								\
 		if (test__start_subtest(#name)) {			\
+			config_device();				\
 			test_ ## name(__VA_ARGS__);			\
+			cleanup();					\
 		}							\
 	})
 
 static void *test_tunnel_run_tests(void *arg)
 {
-	cleanup();
-	config_device();
-
 	RUN_TEST(vxlan_tunnel);
 	RUN_TEST(ip6vxlan_tunnel);
 	RUN_TEST(ipip_tunnel, NONE);
 	RUN_TEST(ipip_tunnel, FOU);
 	RUN_TEST(ipip_tunnel, GUE);
 
-	cleanup();
-
 	return NULL;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 02b4e126e6a5f5552da2ccec47a028984d2d9654 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Thu, 14 Dec 2023 15:49:04 -0700
Subject: bpf: selftests: test_tunnel: Use vmlinux.h declarations

vmlinux.h declarations are more ergnomic, especially when working with
kfuncs. The uapi headers are often incomplete for kfunc definitions.

This commit also switches bitfield accesses to use CO-RE helpers.
Switching to vmlinux.h definitions makes the verifier very
unhappy with raw bitfield accesses. The error is:

    ; md.u.md2.dir = direction;
    33: (69) r1 = *(u16 *)(r2 +11)
    misaligned stack access off (0x0; 0x0)+-64+11 size 2

Fix by using CO-RE-aware bitfield reads and writes.

Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/884bde1d9a351d126a3923886b945ea6b1b0776b.1702593901.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/progs/bpf_tracing_net.h  |  1 +
 .../testing/selftests/bpf/progs/test_tunnel_kern.c | 76 ++++++----------------
 2 files changed, 22 insertions(+), 55 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 0b793a102791..1bdc680b0e0e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -26,6 +26,7 @@
 #define IPV6_AUTOFLOWLABEL	70
 
 #define TC_ACT_UNSPEC		(-1)
+#define TC_ACT_OK		0
 #define TC_ACT_SHOT		2
 
 #define SOL_TCP			6
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index f66af753bbbb..b320fb7bb080 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -6,62 +6,26 @@
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
-#include <stddef.h>
-#include <string.h>
-#include <arpa/inet.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_tunnel.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/icmp.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/pkt_cls.h>
-#include <linux/erspan.h>
-#include <linux/udp.h>
+#include "vmlinux.h"
+#include <bpf/bpf_core_read.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
+#include "bpf_kfuncs.h"
+#include "bpf_tracing_net.h"
 
 #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
 
-#define VXLAN_UDP_PORT 4789
+#define VXLAN_UDP_PORT		4789
+#define ETH_P_IP		0x0800
+#define PACKET_HOST		0
+#define TUNNEL_CSUM		bpf_htons(0x01)
+#define TUNNEL_KEY		bpf_htons(0x04)
 
 /* Only IPv4 address assigned to veth1.
  * 172.16.1.200
  */
 #define ASSIGNED_ADDR_VETH1 0xac1001c8
 
-struct geneve_opt {
-	__be16	opt_class;
-	__u8	type;
-	__u8	length:5;
-	__u8	r3:1;
-	__u8	r2:1;
-	__u8	r1:1;
-	__u8	opt_data[8]; /* hard-coded to 8 byte */
-};
-
-struct vxlanhdr {
-	__be32 vx_flags;
-	__be32 vx_vni;
-} __attribute__((packed));
-
-struct vxlan_metadata {
-	__u32     gbp;
-};
-
-struct bpf_fou_encap {
-	__be16 sport;
-	__be16 dport;
-};
-
-enum bpf_fou_encap_type {
-	FOU_BPF_ENCAP_FOU,
-	FOU_BPF_ENCAP_GUE,
-};
-
 int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
 			  struct bpf_fou_encap *encap, int type) __ksym;
 int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
@@ -205,9 +169,9 @@ int erspan_set_tunnel(struct __sk_buff *skb)
 	__u8 hwid = 7;
 
 	md.version = 2;
-	md.u.md2.dir = direction;
-	md.u.md2.hwid = hwid & 0xf;
-	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+	BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
+	BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
+	BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
 #endif
 
 	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
@@ -246,8 +210,9 @@ int erspan_get_tunnel(struct __sk_buff *skb)
 	bpf_printk("\tindex %x\n", index);
 #else
 	bpf_printk("\tdirection %d hwid %x timestamp %u\n",
-		   md.u.md2.dir,
-		   (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+		   BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
+		   (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
+		   BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
 		   bpf_ntohl(md.u.md2.timestamp));
 #endif
 
@@ -284,9 +249,9 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
 	__u8 hwid = 17;
 
 	md.version = 2;
-	md.u.md2.dir = direction;
-	md.u.md2.hwid = hwid & 0xf;
-	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+	BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
+	BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
+	BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
 #endif
 
 	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
@@ -326,8 +291,9 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
 	bpf_printk("\tindex %x\n", index);
 #else
 	bpf_printk("\tdirection %d hwid %x timestamp %u\n",
-		   md.u.md2.dir,
-		   (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+		   BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
+		   (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
+		   BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
 		   bpf_ntohl(md.u.md2.timestamp));
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From e7adc8291a9e9c232d600d82465cbbb682164ca3 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Thu, 14 Dec 2023 15:49:05 -0700
Subject: bpf: selftests: Move xfrm tunnel test to test_progs

test_progs is better than a shell script b/c C is a bit easier to
maintain than shell. Also it's easier to use new infra like memory
mapped global variables from C via bpf skeleton.

Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/a350db9e08520c64544562d88ec005a039124d9b.1702593901.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/test_tunnel.c | 143 +++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_tunnel_kern.c |  11 +-
 tools/testing/selftests/bpf/test_tunnel.sh         |  92 -------------
 3 files changed, 151 insertions(+), 95 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index b57d48219d0b..2d7f8fa82ebd 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -50,6 +50,7 @@
  */
 
 #include <arpa/inet.h>
+#include <linux/if_link.h>
 #include <linux/if_tun.h>
 #include <linux/limits.h>
 #include <linux/sysctl.h>
@@ -92,6 +93,11 @@
 #define IPIP_TUNL_DEV0 "ipip00"
 #define IPIP_TUNL_DEV1 "ipip11"
 
+#define XFRM_AUTH "0x1111111111111111111111111111111111111111"
+#define XFRM_ENC "0x22222222222222222222222222222222"
+#define XFRM_SPI_IN_TO_OUT 0x1
+#define XFRM_SPI_OUT_TO_IN 0x2
+
 #define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
 
 static int config_device(void)
@@ -264,6 +270,92 @@ static void delete_ipip_tunnel(void)
 	SYS_NOFAIL("ip fou del port 5555 2> /dev/null");
 }
 
+static int add_xfrm_tunnel(void)
+{
+	/* at_ns0 namespace
+	 * at_ns0 -> root
+	 */
+	SYS(fail,
+	    "ip netns exec at_ns0 "
+		"ip xfrm state add src %s dst %s proto esp "
+			"spi %d reqid 1 mode tunnel "
+			"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+	    IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
+	SYS(fail,
+	    "ip netns exec at_ns0 "
+		"ip xfrm policy add src %s/32 dst %s/32 dir out "
+			"tmpl src %s dst %s proto esp reqid 1 "
+			"mode tunnel",
+	    IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+	/* root -> at_ns0 */
+	SYS(fail,
+	    "ip netns exec at_ns0 "
+		"ip xfrm state add src %s dst %s proto esp "
+			"spi %d reqid 2 mode tunnel "
+			"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+	    IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
+	SYS(fail,
+	    "ip netns exec at_ns0 "
+		"ip xfrm policy add src %s/32 dst %s/32 dir in "
+			"tmpl src %s dst %s proto esp reqid 2 "
+			"mode tunnel",
+	    IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
+
+	/* address & route */
+	SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32",
+	    IP4_ADDR_TUNL_DEV0);
+	SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s",
+	    IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0);
+
+	/* root namespace
+	 * at_ns0 -> root
+	 */
+	SYS(fail,
+	    "ip xfrm state add src %s dst %s proto esp "
+		    "spi %d reqid 1 mode tunnel "
+		    "auth-trunc 'hmac(sha1)' %s 96  enc 'cbc(aes)' %s",
+	    IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
+	SYS(fail,
+	    "ip xfrm policy add src %s/32 dst %s/32 dir in "
+		    "tmpl src %s dst %s proto esp reqid 1 "
+		    "mode tunnel",
+	    IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+	/* root -> at_ns0 */
+	SYS(fail,
+	    "ip xfrm state add src %s dst %s proto esp "
+		    "spi %d reqid 2 mode tunnel "
+		    "auth-trunc 'hmac(sha1)' %s 96  enc 'cbc(aes)' %s",
+	    IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
+	SYS(fail,
+	    "ip xfrm policy add src %s/32 dst %s/32 dir out "
+		    "tmpl src %s dst %s proto esp reqid 2 "
+		    "mode tunnel",
+	    IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
+
+	/* address & route */
+	SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1);
+	SYS(fail, "ip route add %s dev veth1 via %s src %s",
+	    IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static void delete_xfrm_tunnel(void)
+{
+	SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null",
+		   IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0);
+	SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null",
+		   IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1);
+	SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
+		   IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT);
+	SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
+		   IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
+}
+
 static int test_ping(int family, const char *addr)
 {
 	SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
@@ -532,6 +624,56 @@ done:
 		test_tunnel_kern__destroy(skel);
 }
 
+static void test_xfrm_tunnel(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+			    .attach_point = BPF_TC_INGRESS);
+	struct test_tunnel_kern *skel = NULL;
+	struct nstoken *nstoken;
+	int tc_prog_fd;
+	int ifindex;
+	int err;
+
+	err = add_xfrm_tunnel();
+	if (!ASSERT_OK(err, "add_xfrm_tunnel"))
+		return;
+
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		goto done;
+
+	ifindex = if_nametoindex("veth1");
+	if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+		goto done;
+
+	/* attach tc prog to tunnel dev */
+	tc_hook.ifindex = ifindex;
+	tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
+	if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
+		goto done;
+
+	/* ping from at_ns0 namespace test */
+	nstoken = open_netns("at_ns0");
+	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+	close_netns(nstoken);
+	if (!ASSERT_OK(err, "test_ping"))
+		goto done;
+
+	if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
+		goto done;
+	if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi"))
+		goto done;
+	if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip"))
+		goto done;
+
+done:
+	delete_xfrm_tunnel();
+	if (skel)
+		test_tunnel_kern__destroy(skel);
+}
+
 #define RUN_TEST(name, ...)						\
 	({								\
 		if (test__start_subtest(#name)) {			\
@@ -548,6 +690,7 @@ static void *test_tunnel_run_tests(void *arg)
 	RUN_TEST(ipip_tunnel, NONE);
 	RUN_TEST(ipip_tunnel, FOU);
 	RUN_TEST(ipip_tunnel, GUE);
+	RUN_TEST(xfrm_tunnel);
 
 	return NULL;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index b320fb7bb080..3a59eb9c34de 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -929,6 +929,10 @@ int ip6ip6_get_tunnel(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
+volatile int xfrm_reqid = 0;
+volatile int xfrm_spi = 0;
+volatile int xfrm_remote_ip = 0;
+
 SEC("tc")
 int xfrm_get_state(struct __sk_buff *skb)
 {
@@ -939,9 +943,10 @@ int xfrm_get_state(struct __sk_buff *skb)
 	if (ret < 0)
 		return TC_ACT_OK;
 
-	bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n",
-		   x.reqid, bpf_ntohl(x.spi),
-		   bpf_ntohl(x.remote_ipv4));
+	xfrm_reqid = x.reqid;
+	xfrm_spi = bpf_ntohl(x.spi);
+	xfrm_remote_ip = bpf_ntohl(x.remote_ipv4);
+
 	return TC_ACT_OK;
 }
 
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 2dec7dbf29a2..d9661b9988ba 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -517,90 +517,6 @@ test_ip6ip6()
         echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
 }
 
-setup_xfrm_tunnel()
-{
-	auth=0x$(printf '1%.0s' {1..40})
-	enc=0x$(printf '2%.0s' {1..32})
-	spi_in_to_out=0x1
-	spi_out_to_in=0x2
-	# at_ns0 namespace
-	# at_ns0 -> root
-	ip netns exec at_ns0 \
-		ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
-			spi $spi_in_to_out reqid 1 mode tunnel \
-			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
-	ip netns exec at_ns0 \
-		ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
-		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
-		mode tunnel
-	# root -> at_ns0
-	ip netns exec at_ns0 \
-		ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
-			spi $spi_out_to_in reqid 2 mode tunnel \
-			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
-	ip netns exec at_ns0 \
-		ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
-		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
-		mode tunnel
-	# address & route
-	ip netns exec at_ns0 \
-		ip addr add dev veth0 10.1.1.100/32
-	ip netns exec at_ns0 \
-		ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
-			src 10.1.1.100
-
-	# root namespace
-	# at_ns0 -> root
-	ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
-		spi $spi_in_to_out reqid 1 mode tunnel \
-		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
-	ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
-		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
-		mode tunnel
-	# root -> at_ns0
-	ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
-		spi $spi_out_to_in reqid 2 mode tunnel \
-		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
-	ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
-		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
-		mode tunnel
-	# address & route
-	ip addr add dev veth1 10.1.1.200/32
-	ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
-}
-
-test_xfrm_tunnel()
-{
-	if [[ -e /sys/kernel/tracing/trace ]]; then
-		TRACE=/sys/kernel/tracing/trace
-	else
-		TRACE=/sys/kernel/debug/tracing/trace
-	fi
-	config_device
-	> ${TRACE}
-	setup_xfrm_tunnel
-	mkdir -p ${BPF_PIN_TUNNEL_DIR}
-	bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}
-	tc qdisc add dev veth1 clsact
-	tc filter add dev veth1 proto ip ingress bpf da object-pinned \
-		${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	sleep 1
-	grep "reqid 1" ${TRACE}
-	check_err $?
-	grep "spi 0x1" ${TRACE}
-	check_err $?
-	grep "remote ip 0xac100164" ${TRACE}
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-		echo -e ${RED}"FAIL: xfrm tunnel"${NC}
-		return 1
-	fi
-	echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
-}
-
 attach_bpf()
 {
 	DEV=$1
@@ -630,10 +546,6 @@ cleanup()
 	ip link del ip6geneve11 2> /dev/null
 	ip link del erspan11 2> /dev/null
 	ip link del ip6erspan11 2> /dev/null
-	ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
-	ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
-	ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
-	ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
 }
 
 cleanup_exit()
@@ -716,10 +628,6 @@ bpf_tunnel_test()
 	test_ip6ip6
 	errors=$(( $errors + $? ))
 
-	echo "Testing IPSec tunnel..."
-	test_xfrm_tunnel
-	errors=$(( $errors + $? ))
-
 	return $errors
 }
 
-- 
cgit v1.2.3-70-g09d2


From 2cd07b0eb08c0ed63b1bd0bf0114146b19a4ab1f Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Thu, 14 Dec 2023 15:49:06 -0700
Subject: bpf: xfrm: Add selftest for bpf_xdp_get_xfrm_state()

This commit extends test_tunnel selftest to test the new XDP xfrm state
lookup kfunc.

Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/e704e9a4332e3eac7b458e4bfdec8fcc6984cdb6.1702593901.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/test_tunnel.c | 16 ++++++-
 .../testing/selftests/bpf/progs/test_tunnel_kern.c | 51 ++++++++++++++++++++++
 2 files changed, 65 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 2d7f8fa82ebd..2b3c6dd66259 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -278,7 +278,7 @@ static int add_xfrm_tunnel(void)
 	SYS(fail,
 	    "ip netns exec at_ns0 "
 		"ip xfrm state add src %s dst %s proto esp "
-			"spi %d reqid 1 mode tunnel "
+			"spi %d reqid 1 mode tunnel replay-window 42 "
 			"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
 	    IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
 	SYS(fail,
@@ -313,7 +313,7 @@ static int add_xfrm_tunnel(void)
 	 */
 	SYS(fail,
 	    "ip xfrm state add src %s dst %s proto esp "
-		    "spi %d reqid 1 mode tunnel "
+		    "spi %d reqid 1 mode tunnel replay-window 42 "
 		    "auth-trunc 'hmac(sha1)' %s 96  enc 'cbc(aes)' %s",
 	    IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
 	SYS(fail,
@@ -628,8 +628,10 @@ static void test_xfrm_tunnel(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
 			    .attach_point = BPF_TC_INGRESS);
+	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
 	struct test_tunnel_kern *skel = NULL;
 	struct nstoken *nstoken;
+	int xdp_prog_fd;
 	int tc_prog_fd;
 	int ifindex;
 	int err;
@@ -654,6 +656,14 @@ static void test_xfrm_tunnel(void)
 	if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
 		goto done;
 
+	/* attach xdp prog to tunnel dev */
+	xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
+	if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts);
+	if (!ASSERT_OK(err, "bpf_xdp_attach"))
+		goto done;
+
 	/* ping from at_ns0 namespace test */
 	nstoken = open_netns("at_ns0");
 	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
@@ -667,6 +677,8 @@ static void test_xfrm_tunnel(void)
 		goto done;
 	if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip"))
 		goto done;
+	if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window"))
+		goto done;
 
 done:
 	delete_xfrm_tunnel();
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 3a59eb9c34de..3e436e6f7312 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -30,6 +30,10 @@ int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
 			  struct bpf_fou_encap *encap, int type) __ksym;
 int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
 			  struct bpf_fou_encap *encap) __ksym;
+struct xfrm_state *
+bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts,
+		       u32 opts__sz) __ksym;
+void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -950,4 +954,51 @@ int xfrm_get_state(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
+volatile int xfrm_replay_window = 0;
+
+SEC("xdp")
+int xfrm_get_state_xdp(struct xdp_md *xdp)
+{
+	struct bpf_xfrm_state_opts opts = {};
+	struct xfrm_state *x = NULL;
+	struct ip_esp_hdr *esph;
+	struct bpf_dynptr ptr;
+	u8 esph_buf[8] = {};
+	u8 iph_buf[20] = {};
+	struct iphdr *iph;
+	u32 off;
+
+	if (bpf_dynptr_from_xdp(xdp, 0, &ptr))
+		goto out;
+
+	off = sizeof(struct ethhdr);
+	iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf));
+	if (!iph || iph->protocol != IPPROTO_ESP)
+		goto out;
+
+	off += sizeof(struct iphdr);
+	esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf));
+	if (!esph)
+		goto out;
+
+	opts.netns_id = BPF_F_CURRENT_NETNS;
+	opts.daddr.a4 = iph->daddr;
+	opts.spi = esph->spi;
+	opts.proto = IPPROTO_ESP;
+	opts.family = AF_INET;
+
+	x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts));
+	if (!x)
+		goto out;
+
+	if (!x->replay_esn)
+		goto out;
+
+	xfrm_replay_window = x->replay_esn->replay_window;
+out:
+	if (x)
+		bpf_xdp_xfrm_state_release(x);
+	return XDP_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From f2d0ffee1f03395d9ae65f9c615b6a0ee05d0e12 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 14 Dec 2023 14:50:16 -0800
Subject: selftests/bpf: utilize string values for delegate_xxx mount options

Use both hex-based and string-based way to specify delegate mount
options for BPF FS.

Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231214225016.1209867-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/token.c | 52 ++++++++++++++++----------
 1 file changed, 32 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index 548aeb91ab0d..b5dce630e0e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -66,14 +66,22 @@ static int restore_priv_caps(__u64 old_caps)
 	return cap_enable_effective(old_caps, NULL);
 }
 
-static int set_delegate_mask(int fs_fd, const char *key, __u64 mask)
+static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
 {
 	char buf[32];
 	int err;
 
-	snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
+	if (!mask_str) {
+		if (mask == ~0ULL) {
+			mask_str = "any";
+		} else {
+			snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
+			mask_str = buf;
+		}
+	}
+
 	err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
-			   mask == ~0ULL ? "any" : buf, 0);
+			   mask_str, 0);
 	if (err < 0)
 		err = -errno;
 	return err;
@@ -86,6 +94,10 @@ struct bpffs_opts {
 	__u64 maps;
 	__u64 progs;
 	__u64 attachs;
+	const char *cmds_str;
+	const char *maps_str;
+	const char *progs_str;
+	const char *attachs_str;
 };
 
 static int create_bpffs_fd(void)
@@ -104,16 +116,16 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
 	int mnt_fd, err;
 
 	/* set up token delegation mount options */
-	err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds);
+	err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
 	if (!ASSERT_OK(err, "fs_cfg_cmds"))
 		return err;
-	err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps);
+	err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
 	if (!ASSERT_OK(err, "fs_cfg_maps"))
 		return err;
-	err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs);
+	err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
 	if (!ASSERT_OK(err, "fs_cfg_progs"))
 		return err;
-	err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs);
+	err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
 	if (!ASSERT_OK(err, "fs_cfg_attachs"))
 		return err;
 
@@ -295,13 +307,13 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba
 	}
 
 	/* ensure unprivileged child cannot set delegation options */
-	err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1);
+	err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
 	ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
-	err = set_delegate_mask(fs_fd, "delegate_maps", 0x1);
+	err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
 	ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
-	err = set_delegate_mask(fs_fd, "delegate_progs", 0x1);
+	err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
 	ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
-	err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1);
+	err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
 	ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
 
 	/* pass BPF FS context object to parent */
@@ -325,22 +337,22 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba
 	}
 
 	/* ensure unprivileged child cannot reconfigure to set delegation options */
-	err = set_delegate_mask(fs_fd, "delegate_cmds", ~0ULL);
+	err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
 	if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
 		err = -EINVAL;
 		goto cleanup;
 	}
-	err = set_delegate_mask(fs_fd, "delegate_maps", ~0ULL);
+	err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
 	if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
 		err = -EINVAL;
 		goto cleanup;
 	}
-	err = set_delegate_mask(fs_fd, "delegate_progs", ~0ULL);
+	err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
 	if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
 		err = -EINVAL;
 		goto cleanup;
 	}
-	err = set_delegate_mask(fs_fd, "delegate_attachs", ~0ULL);
+	err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
 	if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
 		err = -EINVAL;
 		goto cleanup;
@@ -933,8 +945,8 @@ void test_token(void)
 {
 	if (test__start_subtest("map_token")) {
 		struct bpffs_opts opts = {
-			.cmds = 1ULL << BPF_MAP_CREATE,
-			.maps = 1ULL << BPF_MAP_TYPE_STACK,
+			.cmds_str = "map_create",
+			.maps_str = "stack",
 		};
 
 		subtest_userns(&opts, userns_map_create);
@@ -948,9 +960,9 @@ void test_token(void)
 	}
 	if (test__start_subtest("prog_token")) {
 		struct bpffs_opts opts = {
-			.cmds = 1ULL << BPF_PROG_LOAD,
-			.progs = 1ULL << BPF_PROG_TYPE_XDP,
-			.attachs = 1ULL << BPF_XDP,
+			.cmds_str = "PROG_LOAD",
+			.progs_str = "XDP",
+			.attachs_str = "xdp",
 		};
 
 		subtest_userns(&opts, userns_prog_load);
-- 
cgit v1.2.3-70-g09d2


From 4dc27587dcbaf9f6229222ba015344c0edce24c9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Dec 2023 15:14:25 -0800
Subject: tools: ynl-gen: add missing request free helpers for dumps

The code gen generates a prototype for dump request free
in the header, but no implementation in the source.

Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231213231432.2944749-2-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-c.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 266bc1629e58..9484882dbc2e 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -2771,6 +2771,7 @@ def main():
                     ri = RenderInfo(cw, parsed, args.mode, op, "dump")
                     if not ri.type_consistent:
                         parse_rsp_msg(ri, deref=True)
+                    print_req_free(ri)
                     print_dump_type_free(ri)
                     print_dump(ri)
                     cw.nl()
-- 
cgit v1.2.3-70-g09d2


From 139c163b5b0b7095bf88415da030795c403baa33 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Dec 2023 15:14:26 -0800
Subject: tools: ynl-gen: use enum user type for members and args

Commit 30c902001534 ("tools: ynl-gen: use enum name from the spec")
added pre-cooked user type for enums. Use it to fix ignoring
enum-name provided in the spec.

This changes a type in struct ethtool_tunnel_udp_entry but is
generally inconsequential for current families.

Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231213231432.2944749-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-c.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 9484882dbc2e..ab009d0f9db5 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -333,9 +333,8 @@ class TypeScalar(Type):
         else:
             self.is_bitfield = False
 
-        maybe_enum = not self.is_bitfield and 'enum' in self.attr
-        if maybe_enum and self.family.consts[self.attr['enum']].enum_name:
-            self.type_name = c_lower(f"enum {self.family.name}_{self.attr['enum']}")
+        if not self.is_bitfield and 'enum' in self.attr:
+            self.type_name = self.family.consts[self.attr['enum']].user_type
         elif self.is_auto_scalar:
             self.type_name = '__' + self.type[0] + '64'
         else:
-- 
cgit v1.2.3-70-g09d2


From f6805072c2aa81e6441c797bd074f4ae2db0c66e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Dec 2023 15:14:27 -0800
Subject: tools: ynl-gen: support fixed headers in genetlink

Support genetlink families using simple fixed headers.
Assume fixed header is identical for all ops of the family for now.

Fixed headers are added to the request and reply structs as a _hdr
member, and copied to/from netlink messages appropriately.

Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231213231432.2944749-4-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.c    |  8 ++++----
 tools/net/ynl/lib/ynl.h    |  1 +
 tools/net/ynl/ynl-gen-c.py | 44 ++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 45 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index 587286de10b5..c82a7f41b31c 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -191,12 +191,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
 			     str ? " (" : "");
 
 		start = mnl_nlmsg_get_payload_offset(ys->nlh,
-						     sizeof(struct genlmsghdr));
+						     ys->family->hdr_len);
 		end = mnl_nlmsg_get_payload_tail(ys->nlh);
 
 		off = ys->err.attr_offs;
 		off -= sizeof(struct nlmsghdr);
-		off -= sizeof(struct genlmsghdr);
+		off -= ys->family->hdr_len;
 
 		n += ynl_err_walk(ys, start, end, off, ys->req_policy,
 				  &bad_attr[n], sizeof(bad_attr) - n, NULL);
@@ -217,14 +217,14 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
 			     bad_attr[0] ? ", " : (str ? " (" : ""));
 
 		start = mnl_nlmsg_get_payload_offset(ys->nlh,
-						     sizeof(struct genlmsghdr));
+						     ys->family->hdr_len);
 		end = mnl_nlmsg_get_payload_tail(ys->nlh);
 
 		nest_pol = ys->req_policy;
 		if (tb[NLMSGERR_ATTR_MISS_NEST]) {
 			off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
 			off -= sizeof(struct nlmsghdr);
-			off -= sizeof(struct genlmsghdr);
+			off -= ys->family->hdr_len;
 
 			n += ynl_err_walk(ys, start, end, off, ys->req_policy,
 					  &miss_attr[n], sizeof(miss_attr) - n,
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index 5de580b992b8..ce77a6d76ce0 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -44,6 +44,7 @@ struct ynl_error {
 struct ynl_family {
 /* private: */
 	const char *name;
+	size_t hdr_len;
 	const struct ynl_ntf_info *ntf_info;
 	unsigned int ntf_info_size;
 };
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index ab009d0f9db5..70e2c41e5bd6 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -1124,6 +1124,10 @@ class RenderInfo:
         self.op_mode = op_mode
         self.op = op
 
+        self.fixed_hdr = None
+        if op and op.fixed_header:
+            self.fixed_hdr = 'struct ' + c_lower(op.fixed_header)
+
         # 'do' and 'dump' response parsing is identical
         self.type_consistent = True
         if op_mode != 'do' and 'dump' in op:
@@ -1570,7 +1574,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
     if struct.nested:
         iter_line = "mnl_attr_for_each_nested(attr, nested)"
     else:
-        iter_line = "mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr))"
+        if ri.fixed_hdr:
+            local_vars += ['void *hdr;']
+        iter_line = "mnl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)"
 
     array_nests = set()
     multi_attrs = set()
@@ -1603,6 +1609,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
     for arg in struct.inherited:
         ri.cw.p(f'dst->{arg} = {arg};')
 
+    if ri.fixed_hdr:
+        ri.cw.p('hdr = mnl_nlmsg_get_payload_offset(nlh, sizeof(struct genlmsghdr));')
+        ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));")
     for anest in sorted(all_multi):
         aspec = struct[anest]
         ri.cw.p(f"if (dst->{aspec.c_name})")
@@ -1723,6 +1732,10 @@ def print_req(ri):
         ret_err = 'NULL'
         local_vars += [f'{type_name(ri, rdir(direction))} *rsp;']
 
+    if ri.fixed_hdr:
+        local_vars += ['size_t hdr_len;',
+                       'void *hdr;']
+
     print_prototype(ri, direction, terminate=False)
     ri.cw.block_start()
     ri.cw.write_func_lvar(local_vars)
@@ -1733,6 +1746,13 @@ def print_req(ri):
     if 'reply' in ri.op[ri.op_mode]:
         ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
     ri.cw.nl()
+
+    if ri.fixed_hdr:
+        ri.cw.p("hdr_len = sizeof(req->_hdr);")
+        ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);")
+        ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
+        ri.cw.nl()
+
     for _, attr in ri.struct["request"].member_list():
         attr.attr_put(ri, "req")
     ri.cw.nl()
@@ -1773,9 +1793,11 @@ def print_dump(ri):
                   'struct nlmsghdr *nlh;',
                   'int err;']
 
-    for var in local_vars:
-        ri.cw.p(f'{var}')
-    ri.cw.nl()
+    if ri.fixed_hdr:
+        local_vars += ['size_t hdr_len;',
+                       'void *hdr;']
+
+    ri.cw.write_func_lvar(local_vars)
 
     ri.cw.p('yds.ys = ys;')
     ri.cw.p(f"yds.alloc_sz = sizeof({type_name(ri, rdir(direction))});")
@@ -1788,6 +1810,12 @@ def print_dump(ri):
     ri.cw.nl()
     ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
 
+    if ri.fixed_hdr:
+        ri.cw.p("hdr_len = sizeof(req->_hdr);")
+        ri.cw.p("hdr = mnl_nlmsg_put_extra_header(nlh, hdr_len);")
+        ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
+        ri.cw.nl()
+
     if "request" in ri.op[ri.op_mode]:
         ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
         ri.cw.nl()
@@ -1845,6 +1873,10 @@ def _print_type(ri, direction, struct):
 
     ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}")
 
+    if ri.fixed_hdr:
+        ri.cw.p(ri.fixed_hdr + ' _hdr;')
+        ri.cw.nl()
+
     meta_started = False
     for _, attr in struct.member_list():
         for type_filter in ['len', 'bit']:
@@ -2482,6 +2514,10 @@ def render_user_family(family, cw, prototype):
 
     cw.block_start(f'{symbol} = ')
     cw.p(f'.name\t\t= "{family.c_name}",')
+    if family.fixed_header:
+        cw.p(f'.hdr_len\t= sizeof(struct genlmsghdr) + sizeof(struct {c_lower(family.fixed_header)}),')
+    else:
+        cw.p('.hdr_len\t= sizeof(struct genlmsghdr),')
     if family.ntfs:
         cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
         cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),")
-- 
cgit v1.2.3-70-g09d2


From f967a498fce89e9291f01b8f29109fe782d7670a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Dec 2023 15:14:28 -0800
Subject: tools: ynl-gen: fill in implementations for TypeUnused

Fill in more empty handlers for TypeUnused. When 'unused'
attr gets specified in a nested set we have to cleanly
skip it during code generation.

Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231213231432.2944749-5-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-c.py | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 70e2c41e5bd6..2b7961838fb6 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -264,6 +264,15 @@ class TypeUnused(Type):
     def attr_policy(self, cw):
         pass
 
+    def attr_put(self, ri, var):
+        pass
+
+    def attr_get(self, ri, var, first):
+        pass
+
+    def setter(self, ri, space, direction, deref=False, ref=None):
+        pass
+
 
 class TypePad(Type):
     def presence_type(self):
-- 
cgit v1.2.3-70-g09d2


From 38329fcfb757b8215c07a77b6657721cc7e9530e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Dec 2023 15:14:29 -0800
Subject: tools: ynl-gen: record information about recursive nests

Track which nests are recursive. Non-recursive nesting gets
rendered in C as directly nested structs. For recursive
ones we need to put a pointer in, rather than full struct.

Track this information, no change to generated code, yet.

Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231213231432.2944749-6-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-c.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 2b7961838fb6..8a2c304cd2ad 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -105,6 +105,9 @@ class Type(SpecAttr):
     def is_scalar(self):
         return self.type in {'u8', 'u16', 'u32', 'u64', 's32', 's64'}
 
+    def is_recursive(self):
+        return False
+
     def presence_type(self):
         return 'bit'
 
@@ -529,6 +532,9 @@ class TypeBitfield32(Type):
 
 
 class TypeNest(Type):
+    def is_recursive(self):
+        return self.family.pure_nested_structs[self.nested_attrs].recursive
+
     def _complex_member_type(self, ri):
         return self.nested_struct_type
 
@@ -700,9 +706,12 @@ class Struct:
         if self.nested and space_name in family.consts:
             self.struct_name += '_'
         self.ptr_name = self.struct_name + ' *'
+        # All attr sets this one contains, directly or multiple levels down
+        self.child_nests = set()
 
         self.request = False
         self.reply = False
+        self.recursive = False
 
         self.attr_list = []
         self.attrs = dict()
@@ -1059,14 +1068,20 @@ class Family(SpecFamily):
                 pns_key_seen.add(name)
             else:
                 pns_key_list.append(name)
-        # Propagate the request / reply
+        # Propagate the request / reply / recursive
         for attr_set, struct in reversed(self.pure_nested_structs.items()):
             for _, spec in self.attr_sets[attr_set].items():
                 if 'nested-attributes' in spec:
-                    child = self.pure_nested_structs.get(spec['nested-attributes'])
+                    child_name = spec['nested-attributes']
+                    struct.child_nests.add(child_name)
+                    child = self.pure_nested_structs.get(child_name)
                     if child:
+                        if not child.recursive:
+                            struct.child_nests.update(child.child_nests)
                         child.request |= struct.request
                         child.reply |= struct.reply
+                if attr_set in struct.child_nests:
+                    struct.recursive = True
 
     def _load_attr_use(self):
         for _, struct in self.pure_nested_structs.items():
-- 
cgit v1.2.3-70-g09d2


From aa75783b95a1e7fc09129f5364476e6effe47392 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Dec 2023 15:14:30 -0800
Subject: tools: ynl-gen: re-sort ignoring recursive nests

We try to keep the structures and helpers "topologically sorted",
to avoid forward declarations. When recursive nests are at play
we need to sort twice, because structs which end up being marked
as recursive will get a full set of forward declarations, so we
should ignore them for the purpose of sorting.

Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231213231432.2944749-7-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-c.py | 52 +++++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 21 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 8a2c304cd2ad..4ef3a774c402 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -1008,6 +1008,33 @@ class Family(SpecFamily):
                 self.root_sets[op['attribute-set']]['request'].update(req_attrs)
                 self.root_sets[op['attribute-set']]['reply'].update(rsp_attrs)
 
+    def _sort_pure_types(self):
+        # Try to reorder according to dependencies
+        pns_key_list = list(self.pure_nested_structs.keys())
+        pns_key_seen = set()
+        rounds = len(pns_key_list) ** 2  # it's basically bubble sort
+        for _ in range(rounds):
+            if len(pns_key_list) == 0:
+                break
+            name = pns_key_list.pop(0)
+            finished = True
+            for _, spec in self.attr_sets[name].items():
+                if 'nested-attributes' in spec:
+                    nested = spec['nested-attributes']
+                    # If the unknown nest we hit is recursive it's fine, it'll be a pointer
+                    if self.pure_nested_structs[nested].recursive:
+                        continue
+                    if nested not in pns_key_seen:
+                        # Dicts are sorted, this will make struct last
+                        struct = self.pure_nested_structs.pop(name)
+                        self.pure_nested_structs[name] = struct
+                        finished = False
+                        break
+            if finished:
+                pns_key_seen.add(name)
+            else:
+                pns_key_list.append(name)
+
     def _load_nested_sets(self):
         attr_set_queue = list(self.root_sets.keys())
         attr_set_seen = set(self.root_sets.keys())
@@ -1047,27 +1074,8 @@ class Family(SpecFamily):
                     if attr in rs_members['reply']:
                         self.pure_nested_structs[nested].reply = True
 
-        # Try to reorder according to dependencies
-        pns_key_list = list(self.pure_nested_structs.keys())
-        pns_key_seen = set()
-        rounds = len(pns_key_list)**2  # it's basically bubble sort
-        for _ in range(rounds):
-            if len(pns_key_list) == 0:
-                break
-            name = pns_key_list.pop(0)
-            finished = True
-            for _, spec in self.attr_sets[name].items():
-                if 'nested-attributes' in spec:
-                    if spec['nested-attributes'] not in pns_key_seen:
-                        # Dicts are sorted, this will make struct last
-                        struct = self.pure_nested_structs.pop(name)
-                        self.pure_nested_structs[name] = struct
-                        finished = False
-                        break
-            if finished:
-                pns_key_seen.add(name)
-            else:
-                pns_key_list.append(name)
+        self._sort_pure_types()
+
         # Propagate the request / reply / recursive
         for attr_set, struct in reversed(self.pure_nested_structs.items()):
             for _, spec in self.attr_sets[attr_set].items():
@@ -1083,6 +1091,8 @@ class Family(SpecFamily):
                 if attr_set in struct.child_nests:
                     struct.recursive = True
 
+        self._sort_pure_types()
+
     def _load_attr_use(self):
         for _, struct in self.pure_nested_structs.items():
             if struct.request:
-- 
cgit v1.2.3-70-g09d2


From 461f25a2e4334767d3e306b08dbda054da1aaa30 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Dec 2023 15:14:31 -0800
Subject: tools: ynl-gen: store recursive nests by a pointer

To avoid infinite nesting store recursive structs by pointer.
If recursive struct is placed in the op directly - the first
instance can be stored by value. That makes the code much
less of a pain for majority of practical uses.

Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231213231432.2944749-8-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-c.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 4ef3a774c402..7176afb4a3bd 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -108,6 +108,9 @@ class Type(SpecAttr):
     def is_recursive(self):
         return False
 
+    def is_recursive_for_op(self, ri):
+        return self.is_recursive() and not ri.op
+
     def presence_type(self):
         return 'bit'
 
@@ -148,6 +151,8 @@ class Type(SpecAttr):
         member = self._complex_member_type(ri)
         if member:
             ptr = '*' if self.is_multi_val() else ''
+            if self.is_recursive_for_op(ri):
+                ptr = '*'
             ri.cw.p(f"{member} {ptr}{self.c_name};")
             return
         members = self.arg_member(ri)
@@ -539,7 +544,11 @@ class TypeNest(Type):
         return self.nested_struct_type
 
     def free(self, ri, var, ref):
-        ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name});')
+        at = '&'
+        if self.is_recursive_for_op(ri):
+            at = ''
+            ri.cw.p(f'if ({var}->{ref}{self.c_name})')
+        ri.cw.p(f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});')
 
     def _attr_typol(self):
         return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
@@ -548,8 +557,9 @@ class TypeNest(Type):
         return 'NLA_POLICY_NESTED(' + self.nested_render_name + '_nl_policy)'
 
     def attr_put(self, ri, var):
+        at = '' if self.is_recursive_for_op(ri) else '&'
         self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
-                            f"{self.enum_name}, &{var}->{self.c_name})")
+                            f"{self.enum_name}, {at}{var}->{self.c_name})")
 
     def _attr_get(self, ri, var):
         get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))",
@@ -562,6 +572,8 @@ class TypeNest(Type):
         ref = (ref if ref else []) + [self.c_name]
 
         for _, attr in ri.family.pure_nested_structs[self.nested_attrs].member_list():
+            if attr.is_recursive():
+                continue
             attr.setter(ri, self.nested_attrs, direction, deref=deref, ref=ref)
 
 
-- 
cgit v1.2.3-70-g09d2


From 7b5fe80ebc6312896a72f0187b00f7840579d4fd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Dec 2023 15:14:32 -0800
Subject: tools: ynl-gen: print prototypes for recursive stuff

We avoid printing forward declarations and prototypes for most
types by sorting things topologically. But if structs nest we
do need the forward declarations, there's no other way.

Reviewed-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231213231432.2944749-9-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-c.py | 44 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 39 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 7176afb4a3bd..7fc1aa788f6f 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -1521,6 +1521,10 @@ def print_dump_prototype(ri):
     print_prototype(ri, "request")
 
 
+def put_typol_fwd(cw, struct):
+    cw.p(f'extern struct ynl_policy_nest {struct.render_name}_nest;')
+
+
 def put_typol(cw, struct):
     type_max = struct.attr_set.max_name
     cw.block_start(line=f'struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =')
@@ -1594,12 +1598,17 @@ def put_enum_to_str(family, cw, enum):
     _put_enum_to_str_helper(cw, enum.render_name, map_name, 'value', enum=enum)
 
 
-def put_req_nested(ri, struct):
+def put_req_nested_prototype(ri, struct, suffix=';'):
     func_args = ['struct nlmsghdr *nlh',
                  'unsigned int attr_type',
                  f'{struct.ptr_name}obj']
 
-    ri.cw.write_func_prot('int', f'{struct.render_name}_put', func_args)
+    ri.cw.write_func_prot('int', f'{struct.render_name}_put', func_args,
+                          suffix=suffix)
+
+
+def put_req_nested(ri, struct):
+    put_req_nested_prototype(ri, struct, suffix='')
     ri.cw.block_start()
     ri.cw.write_func_lvar('struct nlattr *nest;')
 
@@ -1726,18 +1735,23 @@ def _multi_parse(ri, struct, init_lines, local_vars):
     ri.cw.nl()
 
 
-def parse_rsp_nested(ri, struct):
+def parse_rsp_nested_prototype(ri, struct, suffix=';'):
     func_args = ['struct ynl_parse_arg *yarg',
                  'const struct nlattr *nested']
     for arg in struct.inherited:
         func_args.append('__u32 ' + arg)
 
+    ri.cw.write_func_prot('int', f'{struct.render_name}_parse', func_args,
+                          suffix=suffix)
+
+
+def parse_rsp_nested(ri, struct):
+    parse_rsp_nested_prototype(ri, struct, suffix='')
+
     local_vars = ['const struct nlattr *attr;',
                   f'{struct.ptr_name}dst = yarg->data;']
     init_lines = []
 
-    ri.cw.write_func_prot('int', f'{struct.render_name}_parse', func_args)
-
     _multi_parse(ri, struct, init_lines, local_vars)
 
 
@@ -2051,6 +2065,10 @@ def _free_type(ri, direction, struct):
     ri.cw.nl()
 
 
+def free_rsp_nested_prototype(ri):
+        print_free_prototype(ri, "")
+
+
 def free_rsp_nested(ri, struct):
     _free_type(ri, "", struct)
 
@@ -2818,7 +2836,14 @@ def main():
                     put_enum_to_str(parsed, cw, const)
             cw.nl()
 
+            has_recursive_nests = False
             cw.p('/* Policies */')
+            for struct in parsed.pure_nested_structs.values():
+                if struct.recursive:
+                    put_typol_fwd(cw, struct)
+                    has_recursive_nests = True
+            if has_recursive_nests:
+                cw.nl()
             for name in parsed.pure_nested_structs:
                 struct = Struct(parsed, name)
                 put_typol(cw, struct)
@@ -2827,6 +2852,15 @@ def main():
                 put_typol(cw, struct)
 
             cw.p('/* Common nested types */')
+            if has_recursive_nests:
+                for attr_set, struct in parsed.pure_nested_structs.items():
+                    ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set)
+                    free_rsp_nested_prototype(ri)
+                    if struct.request:
+                        put_req_nested_prototype(ri, struct)
+                    if struct.reply:
+                        parse_rsp_nested_prototype(ri, struct)
+                cw.nl()
             for attr_set, struct in parsed.pure_nested_structs.items():
                 ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set)
 
-- 
cgit v1.2.3-70-g09d2


From b6925b4ed57cccf42ca0fb46c7446f0859e7ad4b Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:44 +0800
Subject: selftests/net: add variable NS_LIST for lib.sh

Add a global variable NS_LIST to store all the namespaces that setup_ns
created, so the caller could call cleanup_all_ns() instead of remember
all the netns names when using cleanup_ns().

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-2-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/lib.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 518eca57b815..dca549443801 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -6,6 +6,8 @@
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
+# namespace list created by setup_ns
+NS_LIST=""
 
 ##############################################################################
 # Helpers
@@ -56,6 +58,11 @@ cleanup_ns()
 	return $ret
 }
 
+cleanup_all_ns()
+{
+	cleanup_ns $NS_LIST
+}
+
 # setup netns with given names as prefix. e.g
 # setup_ns local remote
 setup_ns()
@@ -82,4 +89,5 @@ setup_ns()
 		ip -n "$ns" link set lo up
 		ns_list="$ns_list $ns"
 	done
+	NS_LIST="$NS_LIST $ns_list"
 }
-- 
cgit v1.2.3-70-g09d2


From 59cac2efd378811822ee320777116845b3e30722 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:45 +0800
Subject: selftests/net: convert srv6_end_dt46_l3vpn_test.sh to run it in
 unique namespace

As the name \${rt-${rt}} may make reader confuse, convert the variable
hs/rt in setup_rt/hs to hid, rid. Here is the test result after conversion.

 ]# ./srv6_end_dt46_l3vpn_test.sh

 ################################################################################
 TEST SECTION: IPv6 routers connectivity test
 ################################################################################

     TEST: Routers connectivity: rt-1 -> rt-2                            [ OK ]

     TEST: Routers connectivity: rt-2 -> rt-1                            [ OK ]

 ...

     TEST: IPv4 Hosts isolation: hs-t200-4 -X-> hs-t100-2                [ OK ]

 Tests passed:  34
 Tests failed:   0

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-3-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/srv6_end_dt46_l3vpn_test.sh      | 51 ++++++++++------------
 1 file changed, 24 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 441eededa031..02d617040793 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -193,8 +193,7 @@
 # +---------------------------------------------------+
 #
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
 
 readonly LOCALSID_TABLE_ID=90
 readonly IPv6_RT_NETWORK=fd00
@@ -250,26 +249,22 @@ cleanup()
 	ip link del veth-rt-1 2>/dev/null || true
 	ip link del veth-rt-2 2>/dev/null || true
 
-	# destroy routers rt-* and hosts hs-*
-	for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
-		ip netns del ${ns} || true
-	done
+	cleanup_all_ns
 }
 
 # Setup the basic networking for the routers
 setup_rt_networking()
 {
-	local rt=$1
-	local nsname=rt-${rt}
+	local id=$1
+	eval local nsname=\${rt_${id}}
 
-	ip netns add ${nsname}
-	ip link set veth-rt-${rt} netns ${nsname}
-	ip -netns ${nsname} link set veth-rt-${rt} name veth0
+	ip link set veth-rt-${id} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${id} name veth0
 
 	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
 
-	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad
 	ip -netns ${nsname} link set veth0 up
 	ip -netns ${nsname} link set lo up
 
@@ -279,16 +274,14 @@ setup_rt_networking()
 
 setup_hs()
 {
-	local hs=$1
-	local rt=$2
+	local hid=$1
+	local rid=$2
 	local tid=$3
-	local hsname=hs-t${tid}-${hs}
-	local rtname=rt-${rt}
+	eval local hsname=\${hs_t${tid}_${hid}}
+	eval local rtname=\${rt_${rid}}
 	local rtveth=veth-t${tid}
 
 	# set the networking for the host
-	ip netns add ${hsname}
-
 	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
 
@@ -299,8 +292,8 @@ setup_hs()
 
 	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
 	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
-	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
-	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
+	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
 	ip -netns ${hsname} link set veth0 up
 	ip -netns ${hsname} link set lo up
 
@@ -332,10 +325,8 @@ setup_vpn_config()
 	local rtdst=$4
 	local tid=$5
 
-	local hssrc_name=hs-t${tid}-${hssrc}
-	local hsdst_name=hs-t${tid}-${hsdst}
-	local rtsrc_name=rt-${rtsrc}
-	local rtdst_name=rt-${rtdst}
+	eval local rtsrc_name=\${rt_${rtsrc}}
+	eval local rtdst_name=\${rt_${rtdst}}
 	local rtveth=veth-t${tid}
 	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046
 
@@ -379,18 +370,21 @@ setup()
 {
 	ip link add veth-rt-1 type veth peer name veth-rt-2
 	# setup the networking for router rt-1 and router rt-2
+	setup_ns rt_1 rt_2
 	setup_rt_networking 1
 	setup_rt_networking 2
 
 	# setup two hosts for the tenant 100.
 	#  - host hs-1 is directly connected to the router rt-1;
 	#  - host hs-2 is directly connected to the router rt-2.
+	setup_ns hs_t100_1 hs_t100_2
 	setup_hs 1 1 100  #args: host router tenant
 	setup_hs 2 2 100
 
 	# setup two hosts for the tenant 200
 	#  - host hs-3 is directly connected to the router rt-1;
 	#  - host hs-4 is directly connected to the router rt-2.
+	setup_ns hs_t200_3 hs_t200_4
 	setup_hs 3 1 200
 	setup_hs 4 2 200
 
@@ -409,8 +403,9 @@ check_rt_connectivity()
 {
 	local rtsrc=$1
 	local rtdst=$2
+	eval local nsname=\${rt_${rtsrc}}
 
-	ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+	ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
 		>/dev/null 2>&1
 }
 
@@ -428,8 +423,9 @@ check_hs_ipv6_connectivity()
 	local hssrc=$1
 	local hsdst=$2
 	local tid=$3
+	eval local nsname=\${hs_t${tid}_${hssrc}}
 
-	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+	ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
 		${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
 }
 
@@ -438,8 +434,9 @@ check_hs_ipv4_connectivity()
 	local hssrc=$1
 	local hsdst=$2
 	local tid=$3
+	eval local nsname=\${hs_t${tid}_${hssrc}}
 
-	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+	ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
 		${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
 }
 
-- 
cgit v1.2.3-70-g09d2


From 7b2d941c81bc110925870b6f0c1a071a20850b7c Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:46 +0800
Subject: selftests/net: convert srv6_end_dt4_l3vpn_test.sh to run it in unique
 namespace

As the name \${rt-${rt}} may make reader confuse, convert the variable
hs/rt in setup_rt/hs to hid, rid. Here is the test result after conversion.

 ]# ./srv6_end_dt4_l3vpn_test.sh

 ################################################################################
 TEST SECTION: IPv6 routers connectivity test
 ################################################################################

     TEST: Routers connectivity: rt-1 -> rt-2                            [ OK ]

     TEST: Routers connectivity: rt-2 -> rt-1                            [ OK ]
 ...
     TEST: Hosts isolation: hs-t200-4 -X-> hs-t100-2                     [ OK ]

 Tests passed:  18
 Tests failed:   0

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-4-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/srv6_end_dt4_l3vpn_test.sh       | 48 ++++++++++------------
 1 file changed, 21 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index f96282362811..79fb81e63c59 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -163,8 +163,7 @@
 # +---------------------------------------------------+
 #
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
 
 readonly LOCALSID_TABLE_ID=90
 readonly IPv6_RT_NETWORK=fd00
@@ -219,27 +218,22 @@ cleanup()
 	ip link del veth-rt-1 2>/dev/null || true
 	ip link del veth-rt-2 2>/dev/null || true
 
-	# destroy routers rt-* and hosts hs-*
-	for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
-		ip netns del ${ns} || true
-	done
+	cleanup_all_ns
 }
 
 # Setup the basic networking for the routers
 setup_rt_networking()
 {
-	local rt=$1
-	local nsname=rt-${rt}
-
-	ip netns add ${nsname}
+	local id=$1
+	eval local nsname=\${rt_${id}}
 
 	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
 
-	ip link set veth-rt-${rt} netns ${nsname}
-	ip -netns ${nsname} link set veth-rt-${rt} name veth0
+	ip link set veth-rt-${id} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${id} name veth0
 
-	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad
 	ip -netns ${nsname} link set veth0 up
 	ip -netns ${nsname} link set lo up
 
@@ -249,16 +243,13 @@ setup_rt_networking()
 
 setup_hs()
 {
-	local hs=$1
-	local rt=$2
+	local hid=$1
+	local rid=$2
 	local tid=$3
-	local hsname=hs-t${tid}-${hs}
-	local rtname=rt-${rt}
+	eval local hsname=\${hs_t${tid}_${hid}}
+	eval local rtname=\${rt_${rid}}
 	local rtveth=veth-t${tid}
 
-	# set the networking for the host
-	ip netns add ${hsname}
-
 	# disable the rp_filter otherwise the kernel gets confused about how
 	# to route decap ipv4 packets.
 	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
@@ -266,7 +257,7 @@ setup_hs()
 
 	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
 	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
-	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
 	ip -netns ${hsname} link set veth0 up
 	ip -netns ${hsname} link set lo up
 
@@ -293,10 +284,8 @@ setup_vpn_config()
 	local rtdst=$4
 	local tid=$5
 
-	local hssrc_name=hs-t${tid}-${hssrc}
-	local hsdst_name=hs-t${tid}-${hsdst}
-	local rtsrc_name=rt-${rtsrc}
-	local rtdst_name=rt-${rtdst}
+	eval local rtsrc_name=\${rt_${rtsrc}}
+	eval local rtdst_name=\${rt_${rtdst}}
 	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004
 
 	# set the encap route for encapsulating packets which arrive from the
@@ -328,18 +317,21 @@ setup()
 {
 	ip link add veth-rt-1 type veth peer name veth-rt-2
 	# setup the networking for router rt-1 and router rt-2
+	setup_ns rt_1 rt_2
 	setup_rt_networking 1
 	setup_rt_networking 2
 
 	# setup two hosts for the tenant 100.
 	#  - host hs-1 is directly connected to the router rt-1;
 	#  - host hs-2 is directly connected to the router rt-2.
+	setup_ns hs_t100_1 hs_t100_2
 	setup_hs 1 1 100  #args: host router tenant
 	setup_hs 2 2 100
 
 	# setup two hosts for the tenant 200
 	#  - host hs-3 is directly connected to the router rt-1;
 	#  - host hs-4 is directly connected to the router rt-2.
+	setup_ns hs_t200_3 hs_t200_4
 	setup_hs 3 1 200
 	setup_hs 4 2 200
 
@@ -358,8 +350,9 @@ check_rt_connectivity()
 {
 	local rtsrc=$1
 	local rtdst=$2
+	eval local nsname=\${rt_${rtsrc}}
 
-	ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+	ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
 		>/dev/null 2>&1
 }
 
@@ -377,8 +370,9 @@ check_hs_connectivity()
 	local hssrc=$1
 	local hsdst=$2
 	local tid=$3
+	eval local nsname=\${hs_t${tid}_${hssrc}}
 
-	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+	ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
 		${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
 }
 
-- 
cgit v1.2.3-70-g09d2


From 792cd1dbc8a253c67f715ae6f987b8b28dbbcbbf Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:47 +0800
Subject: selftests/net: convert srv6_end_dt6_l3vpn_test.sh to run it in unique
 namespace

As the name \${rt-${rt}} may make reader confuse, convert the variable
hs/rt in setup_rt/hs to hid, rid. Here is the test result after conversion.

 ]# ./srv6_end_dt6_l3vpn_test.sh

 ################################################################################
 TEST SECTION: IPv6 routers connectivity test
 ################################################################################

     TEST: Routers connectivity: rt-1 -> rt-2                            [ OK ]

     TEST: Routers connectivity: rt-2 -> rt-1                            [ OK ]
 ...

     TEST: Hosts isolation: hs-t200-4 -X-> hs-t100-2                     [ OK ]

 Tests passed:  18
 Tests failed:   0

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-5-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/srv6_end_dt6_l3vpn_test.sh       | 46 ++++++++++------------
 1 file changed, 21 insertions(+), 25 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
index b9b06ef80d88..e408406d8489 100755
--- a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -164,8 +164,7 @@
 # +---------------------------------------------------+
 #
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
 
 readonly LOCALSID_TABLE_ID=90
 readonly IPv6_RT_NETWORK=fd00
@@ -220,26 +219,22 @@ cleanup()
 	ip link del veth-rt-1 2>/dev/null || true
 	ip link del veth-rt-2 2>/dev/null || true
 
-	# destroy routers rt-* and hosts hs-*
-	for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
-		ip netns del ${ns} || true
-	done
+	cleanup_all_ns
 }
 
 # Setup the basic networking for the routers
 setup_rt_networking()
 {
-	local rt=$1
-	local nsname=rt-${rt}
+	local id=$1
+	eval local nsname=\${rt_${id}}
 
-	ip netns add ${nsname}
-	ip link set veth-rt-${rt} netns ${nsname}
-	ip -netns ${nsname} link set veth-rt-${rt} name veth0
+	ip link set veth-rt-${id} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${id} name veth0
 
 	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
 
-	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad
 	ip -netns ${nsname} link set veth0 up
 	ip -netns ${nsname} link set lo up
 
@@ -248,22 +243,20 @@ setup_rt_networking()
 
 setup_hs()
 {
-	local hs=$1
-	local rt=$2
+	local hid=$1
+	local rid=$2
 	local tid=$3
-	local hsname=hs-t${tid}-${hs}
-	local rtname=rt-${rt}
+	eval local hsname=\${hs_t${tid}_${hid}}
+	eval local rtname=\${rt_${rid}}
 	local rtveth=veth-t${tid}
 
 	# set the networking for the host
-	ip netns add ${hsname}
-
 	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
 
 	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
 	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
-	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
 	ip -netns ${hsname} link set veth0 up
 	ip -netns ${hsname} link set lo up
 
@@ -293,10 +286,8 @@ setup_vpn_config()
 	local rtdst=$4
 	local tid=$5
 
-	local hssrc_name=hs-t${tid}-${hssrc}
-	local hsdst_name=hs-t${tid}-${hsdst}
-	local rtsrc_name=rt-${rtsrc}
-	local rtdst_name=rt-${rtdst}
+	eval local rtsrc_name=\${rt_${rtsrc}}
+	eval local rtdst_name=\${rt_${rtdst}}
 	local rtveth=veth-t${tid}
 	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006
 
@@ -331,18 +322,21 @@ setup()
 {
 	ip link add veth-rt-1 type veth peer name veth-rt-2
 	# setup the networking for router rt-1 and router rt-2
+	setup_ns rt_1 rt_2
 	setup_rt_networking 1
 	setup_rt_networking 2
 
 	# setup two hosts for the tenant 100.
 	#  - host hs-1 is directly connected to the router rt-1;
 	#  - host hs-2 is directly connected to the router rt-2.
+	setup_ns hs_t100_1 hs_t100_2
 	setup_hs 1 1 100  #args: host router tenant
 	setup_hs 2 2 100
 
 	# setup two hosts for the tenant 200
 	#  - host hs-3 is directly connected to the router rt-1;
 	#  - host hs-4 is directly connected to the router rt-2.
+	setup_ns hs_t200_3 hs_t200_4
 	setup_hs 3 1 200
 	setup_hs 4 2 200
 
@@ -361,8 +355,9 @@ check_rt_connectivity()
 {
 	local rtsrc=$1
 	local rtdst=$2
+	eval local nsname=\${rt_${rtsrc}}
 
-	ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+	ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
 		>/dev/null 2>&1
 }
 
@@ -380,8 +375,9 @@ check_hs_connectivity()
 	local hssrc=$1
 	local hsdst=$2
 	local tid=$3
+	eval local nsname=\${hs_t${tid}_${hssrc}}
 
-	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+	ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
 		${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
 }
 
-- 
cgit v1.2.3-70-g09d2


From 779283b7770f4580afa6691aa7fc6519d60f0e88 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:48 +0800
Subject: selftests/net: convert fcnal-test.sh to run it in unique namespace

Here is the test result after conversion. There are some failures, but it
also exists on my system without this patch. So it's not affectec by
this patch and I will check the reason later.

  ]# time ./fcnal-test.sh
  /usr/bin/which: no nettest in (/root/.local/bin:/root/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin)

  ###########################################################################
  IPv4 ping
  ###########################################################################

  #################################################################
  No VRF

  SYSCTL: net.ipv4.raw_l3mdev_accept=0

  TEST: ping out - ns-B IP                                                      [ OK ]
  TEST: ping out, device bind - ns-B IP                                         [ OK ]
  TEST: ping out, address bind - ns-B IP                                        [ OK ]
  ...

  #################################################################
  SNAT on VRF

  TEST: IPv4 TCP connection over VRF with SNAT                                  [ OK ]
  TEST: IPv6 TCP connection over VRF with SNAT                                  [ OK ]

  Tests passed: 893
  Tests failed:  21

  real    52m48.178s
  user    0m34.158s
  sys     1m42.976s

BTW, this test needs a really long time. So expand the timeout to 1h.

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-6-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fcnal-test.sh | 30 +++++++++++++-----------------
 tools/testing/selftests/net/settings      |  2 +-
 2 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index d32a14ba069a..0d4f252427e2 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -37,9 +37,7 @@
 #
 # server / client nomenclature relative to ns-A
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
+source lib.sh
 VERBOSE=0
 
 NSA_DEV=eth1
@@ -82,14 +80,6 @@ MCAST=ff02::1
 NSA_LINKIP6=
 NSB_LINKIP6=
 
-NSA=ns-A
-NSB=ns-B
-NSC=ns-C
-
-NSA_CMD="ip netns exec ${NSA}"
-NSB_CMD="ip netns exec ${NSB}"
-NSC_CMD="ip netns exec ${NSC}"
-
 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 
 # Check if FIPS mode is enabled
@@ -406,9 +396,6 @@ create_ns()
 	local addr=$2
 	local addr6=$3
 
-	ip netns add ${ns}
-
-	ip -netns ${ns} link set lo up
 	if [ "${addr}" != "-" ]; then
 		ip -netns ${ns} addr add dev lo ${addr}
 	fi
@@ -467,13 +454,12 @@ cleanup()
 		ip -netns ${NSA} link del dev ${NSA_DEV}
 
 		ip netns pids ${NSA} | xargs kill 2>/dev/null
-		ip netns del ${NSA}
+		cleanup_ns ${NSA}
 	fi
 
 	ip netns pids ${NSB} | xargs kill 2>/dev/null
-	ip netns del ${NSB}
 	ip netns pids ${NSC} | xargs kill 2>/dev/null
-	ip netns del ${NSC} >/dev/null 2>&1
+	cleanup_ns ${NSB} ${NSC}
 }
 
 cleanup_vrf_dup()
@@ -487,6 +473,8 @@ setup_vrf_dup()
 {
 	# some VRF tests use ns-C which has the same config as
 	# ns-B but for a device NOT in the VRF
+	setup_ns NSC
+	NSC_CMD="ip netns exec ${NSC}"
 	create_ns ${NSC} "-" "-"
 	connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
 		   ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
@@ -503,6 +491,10 @@ setup()
 	log_debug "Configuring network namespaces"
 	set -e
 
+	setup_ns NSA NSB
+	NSA_CMD="ip netns exec ${NSA}"
+	NSB_CMD="ip netns exec ${NSB}"
+
 	create_ns ${NSA} ${NSA_LO_IP}/32 ${NSA_LO_IP6}/128
 	create_ns ${NSB} ${NSB_LO_IP}/32 ${NSB_LO_IP6}/128
 	connect_ns ${NSA} ${NSA_DEV} ${NSA_IP}/24 ${NSA_IP6}/64 \
@@ -545,6 +537,10 @@ setup_lla_only()
 	log_debug "Configuring network namespaces"
 	set -e
 
+	setup_ns NSA NSB NSC
+	NSA_CMD="ip netns exec ${NSA}"
+	NSB_CMD="ip netns exec ${NSB}"
+	NSC_CMD="ip netns exec ${NSC}"
 	create_ns ${NSA} "-" "-"
 	create_ns ${NSB} "-" "-"
 	create_ns ${NSC} "-" "-"
diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings
index dfc27cdc6c05..ed8418e8217a 100644
--- a/tools/testing/selftests/net/settings
+++ b/tools/testing/selftests/net/settings
@@ -1 +1 @@
-timeout=1500
+timeout=3600
-- 
cgit v1.2.3-70-g09d2


From a33e9da3470499e9ff476138f271fb52d6bfe767 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:49 +0800
Subject: selftests/net: fix grep checking for fib_nexthop_multiprefix

When running fib_nexthop_multiprefix test I saw all IPv6 test failed.
e.g.

 ]# ./fib_nexthop_multiprefix.sh
 TEST: IPv4: host 0 to host 1, mtu 1300                              [ OK ]
 TEST: IPv6: host 0 to host 1, mtu 1300                              [FAIL]

 With -v it shows

 COMMAND: ip netns exec h0 /usr/sbin/ping6 -s 1350 -c5 -w5 2001:db8:101::1
 PING 2001:db8:101::1(2001:db8:101::1) 1350 data bytes
 From 2001:db8:100::64 icmp_seq=1 Packet too big: mtu=1300

 --- 2001:db8:101::1 ping statistics ---
 1 packets transmitted, 0 received, +1 errors, 100% packet loss, time 0ms

 Route get
 2001:db8:101::1 via 2001:db8:100::64 dev eth0 src 2001:db8:100::1 metric 1024 expires 599sec mtu 1300 pref medium
 Searching for:
     2001:db8:101::1 from :: via 2001:db8:100::64 dev eth0 src 2001:db8:100::1 .* mtu 1300

The reason is when CONFIG_IPV6_SUBTREES is not enabled, rt6_fill_node() will
not put RTA_SRC info. After fix:

]# ./fib_nexthop_multiprefix.sh
TEST: IPv4: host 0 to host 1, mtu 1300                              [ OK ]
TEST: IPv6: host 0 to host 1, mtu 1300                              [ OK ]

Fixes: 735ab2f65dce ("selftests: Add test with multiple prefixes using single nexthop")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-7-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fib_nexthop_multiprefix.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
index 51df5e305855..b52d59547fc5 100755
--- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -209,12 +209,12 @@ validate_v6_exception()
 		echo "Route get"
 		ip -netns h0 -6 ro get ${dst}
 		echo "Searching for:"
-		echo "    ${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+		echo "    ${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
 		echo
 	fi
 
 	ip -netns h0 -6 ro get ${dst} | \
-	grep -q "${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+	grep -q "${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
 	rc=$?
 
 	log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}"
-- 
cgit v1.2.3-70-g09d2


From 5ae89fe43a4e889249fa700d0da5aa5d3e64e972 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:50 +0800
Subject: selftests/net: convert fib_nexthop_multiprefix to run it in unique
 namespace

Here is the test result after conversion.

 ]# ./fib_nexthop_multiprefix.sh
 TEST: IPv4: host 0 to host 1, mtu 1300                              [ OK ]
 TEST: IPv6: host 0 to host 1, mtu 1300                              [ OK ]

 TEST: IPv4: host 0 to host 2, mtu 1350                              [ OK ]
 TEST: IPv6: host 0 to host 2, mtu 1350                              [ OK ]

 TEST: IPv4: host 0 to host 3, mtu 1400                              [ OK ]
 TEST: IPv6: host 0 to host 3, mtu 1400                              [ OK ]

 TEST: IPv4: host 0 to host 1, mtu 1300                              [ OK ]
 TEST: IPv6: host 0 to host 1, mtu 1300                              [ OK ]

 TEST: IPv4: host 0 to host 2, mtu 1350                              [ OK ]
 TEST: IPv6: host 0 to host 2, mtu 1350                              [ OK ]

 TEST: IPv4: host 0 to host 3, mtu 1400                              [ OK ]
 TEST: IPv6: host 0 to host 3, mtu 1400                              [ OK ]

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-8-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/fib_nexthop_multiprefix.sh       | 98 +++++++++++-----------
 1 file changed, 48 insertions(+), 50 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
index b52d59547fc5..e85248609af4 100755
--- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -12,6 +12,7 @@
 #
 # routing in h0 to hN is done with nexthop objects.
 
+source lib.sh
 PAUSE_ON_FAIL=no
 VERBOSE=0
 
@@ -72,12 +73,6 @@ create_ns()
 {
 	local ns=${1}
 
-	ip netns del ${ns} 2>/dev/null
-
-	ip netns add ${ns}
-	ip -netns ${ns} addr add 127.0.0.1/8 dev lo
-	ip -netns ${ns} link set lo up
-
 	ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
 	case ${ns} in
 	h*)
@@ -97,7 +92,13 @@ setup()
 
 	#set -e
 
-	for ns in h0 r1 h1 h2 h3
+	setup_ns h0 r1 h1 h2 h3
+	h[0]=$h0
+	h[1]=$h1
+	h[2]=$h2
+	h[3]=$h3
+	r[1]=$r1
+	for ns in ${h[0]} ${r[1]} ${h[1]} ${h[2]} ${h[3]}
 	do
 		create_ns ${ns}
 	done
@@ -108,35 +109,35 @@ setup()
 
 	for i in 0 1 2 3
 	do
-		ip -netns h${i} li add eth0 type veth peer name r1h${i}
-		ip -netns h${i} li set eth0 up
-		ip -netns h${i} li set r1h${i} netns r1 name eth${i} up
-
-		ip -netns h${i}    addr add dev eth0 172.16.10${i}.1/24
-		ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64
-		ip -netns r1    addr add dev eth${i} 172.16.10${i}.254/24
-		ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64
+		ip -netns ${h[$i]} li add eth0 type veth peer name r1h${i}
+		ip -netns ${h[$i]} li set eth0 up
+		ip -netns ${h[$i]} li set r1h${i} netns ${r[1]} name eth${i} up
+
+		ip -netns ${h[$i]}    addr add dev eth0 172.16.10${i}.1/24
+		ip -netns ${h[$i]} -6 addr add dev eth0 2001:db8:10${i}::1/64
+		ip -netns ${r[1]}    addr add dev eth${i} 172.16.10${i}.254/24
+		ip -netns ${r[1]} -6 addr add dev eth${i} 2001:db8:10${i}::64/64
 	done
 
-	ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0
-	ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0
+	ip -netns ${h[0]} nexthop add id 4 via 172.16.100.254 dev eth0
+	ip -netns ${h[0]} nexthop add id 6 via 2001:db8:100::64 dev eth0
 
-	# routing from h0 to h1-h3 and back
+	# routing from ${h[0]} to h1-h3 and back
 	for i in 1 2 3
 	do
-		ip -netns h0    ro add 172.16.10${i}.0/24 nhid 4
-		ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254
+		ip -netns ${h[0]}    ro add 172.16.10${i}.0/24 nhid 4
+		ip -netns ${h[$i]} ro add 172.16.100.0/24 via 172.16.10${i}.254
 
-		ip -netns h0    -6 ro add 2001:db8:10${i}::/64 nhid 6
-		ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64
+		ip -netns ${h[0]}    -6 ro add 2001:db8:10${i}::/64 nhid 6
+		ip -netns ${h[$i]} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64
 	done
 
 	if [ "$VERBOSE" = "1" ]; then
 		echo
 		echo "host 1 config"
-		ip -netns h0 li sh
-		ip -netns h0 ro sh
-		ip -netns h0 -6 ro sh
+		ip -netns ${h[0]} li sh
+		ip -netns ${h[0]} ro sh
+		ip -netns ${h[0]} -6 ro sh
 	fi
 
 	#set +e
@@ -144,10 +145,7 @@ setup()
 
 cleanup()
 {
-	for n in h0 r1 h1 h2 h3
-	do
-		ip netns del ${n} 2>/dev/null
-	done
+	cleanup_all_ns
 }
 
 change_mtu()
@@ -156,7 +154,7 @@ change_mtu()
 	local mtu=$2
 
 	run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu}
-	run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu}
+	run_cmd ip -netns ${r1} li set eth${hostid} mtu ${mtu}
 }
 
 ################################################################################
@@ -168,23 +166,23 @@ validate_v4_exception()
 	local mtu=$2
 	local ping_sz=$3
 	local dst="172.16.10${i}.1"
-	local h0=172.16.100.1
-	local r1=172.16.100.254
+	local h0_ip=172.16.100.1
+	local r1_ip=172.16.100.254
 	local rc
 
 	if [ ${ping_sz} != "0" ]; then
-		run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst}
+		run_cmd ip netns exec ${h0} ping -s ${ping_sz} -c5 -w5 ${dst}
 	fi
 
 	if [ "$VERBOSE" = "1" ]; then
 		echo "Route get"
-		ip -netns h0 ro get ${dst}
+		ip -netns ${h0} ro get ${dst}
 		echo "Searching for:"
 		echo "    cache .* mtu ${mtu}"
 		echo
 	fi
 
-	ip -netns h0 ro get ${dst} | \
+	ip -netns ${h0} ro get ${dst} | \
 	grep -q "cache .* mtu ${mtu}"
 	rc=$?
 
@@ -197,24 +195,24 @@ validate_v6_exception()
 	local mtu=$2
 	local ping_sz=$3
 	local dst="2001:db8:10${i}::1"
-	local h0=2001:db8:100::1
-	local r1=2001:db8:100::64
+	local h0_ip=2001:db8:100::1
+	local r1_ip=2001:db8:100::64
 	local rc
 
 	if [ ${ping_sz} != "0" ]; then
-		run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst}
+		run_cmd ip netns exec ${h0} ${ping6} -s ${ping_sz} -c5 -w5 ${dst}
 	fi
 
 	if [ "$VERBOSE" = "1" ]; then
 		echo "Route get"
-		ip -netns h0 -6 ro get ${dst}
+		ip -netns ${h0} -6 ro get ${dst}
 		echo "Searching for:"
-		echo "    ${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+		echo "    ${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}"
 		echo
 	fi
 
-	ip -netns h0 -6 ro get ${dst} | \
-	grep -q "${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+	ip -netns ${h0} -6 ro get ${dst} | \
+	grep -q "${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}"
 	rc=$?
 
 	log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}"
@@ -242,11 +240,11 @@ for i in 1 2 3
 do
 	# generate a cached route per-cpu
 	for c in ${cpus}; do
-		run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1
-		[ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1
+		run_cmd taskset -c ${c} ip netns exec ${h0} ping -c1 -w1 172.16.10${i}.1
+		[ $? -ne 0 ] && printf "\nERROR: ping to ${h[$i]} failed\n" && ret=1
 
-		run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1
-		[ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1
+		run_cmd taskset -c ${c} ip netns exec ${h0} ${ping6} -c1 -w1 2001:db8:10${i}::1
+		[ $? -ne 0 ] && printf "\nERROR: ping6 to ${h[$i]} failed\n" && ret=1
 
 		[ $ret -ne 0 ] && break
 	done
@@ -282,11 +280,11 @@ if [ $ret -eq 0 ]; then
 	validate_v6_exception 3 1400 0
 
 	# targeted deletes to trigger cleanup paths in kernel
-	ip -netns h0 ro del 172.16.102.0/24 nhid 4
-	ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6
+	ip -netns ${h0} ro del 172.16.102.0/24 nhid 4
+	ip -netns ${h0} -6 ro del 2001:db8:102::/64 nhid 6
 
-	ip -netns h0 nexthop del id 4
-	ip -netns h0 nexthop del id 6
+	ip -netns ${h0} nexthop del id 4
+	ip -netns ${h0} nexthop del id 6
 fi
 
 cleanup
-- 
cgit v1.2.3-70-g09d2


From d2168ea792345938c4f53c22126066fbe7a6001c Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:51 +0800
Subject: selftests/net: convert fib_nexthop_nongw.sh to run it in unique
 namespace

Here is the test result after conversion.

 ]# ./fib_nexthop_nongw.sh
 TEST: nexthop: get route with nexthop without gw                    [ OK ]
 TEST: nexthop: ping through nexthop without gw                      [ OK ]

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-9-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fib_nexthop_nongw.sh | 34 +++++++++++-------------
 1 file changed, 15 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh
index b7b928b38ce4..1ccf56f10171 100755
--- a/tools/testing/selftests/net/fib_nexthop_nongw.sh
+++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh
@@ -8,6 +8,7 @@
 #            veth0 <---|---> veth1
 # Validate source address selection for route without gateway
 
+source lib.sh
 PAUSE_ON_FAIL=no
 VERBOSE=0
 ret=0
@@ -64,35 +65,31 @@ run_cmd()
 # config
 setup()
 {
-	ip netns add h1
-	ip -n h1 link set lo up
-	ip netns add h2
-	ip -n h2 link set lo up
+	setup_ns h1 h2
 
 	# Add a fake eth0 to support an ip address
-	ip -n h1 link add name eth0 type dummy
-	ip -n h1 link set eth0 up
-	ip -n h1 address add 192.168.0.1/24 dev eth0
+	ip -n $h1 link add name eth0 type dummy
+	ip -n $h1 link set eth0 up
+	ip -n $h1 address add 192.168.0.1/24 dev eth0
 
 	# Configure veths (same @mac, arp off)
-	ip -n h1 link add name veth0 type veth peer name veth1 netns h2
-	ip -n h1 link set veth0 up
+	ip -n $h1 link add name veth0 type veth peer name veth1 netns $h2
+	ip -n $h1 link set veth0 up
 
-	ip -n h2 link set veth1 up
+	ip -n $h2 link set veth1 up
 
 	# Configure @IP in the peer netns
-	ip -n h2 address add 192.168.1.1/32 dev veth1
-	ip -n h2 route add default dev veth1
+	ip -n $h2 address add 192.168.1.1/32 dev veth1
+	ip -n $h2 route add default dev veth1
 
 	# Add a nexthop without @gw and use it in a route
-	ip -n h1 nexthop add id 1 dev veth0
-	ip -n h1 route add 192.168.1.1 nhid 1
+	ip -n $h1 nexthop add id 1 dev veth0
+	ip -n $h1 route add 192.168.1.1 nhid 1
 }
 
 cleanup()
 {
-	ip netns del h1 2>/dev/null
-	ip netns del h2 2>/dev/null
+	cleanup_ns $h1 $h2
 }
 
 trap cleanup EXIT
@@ -108,12 +105,11 @@ do
 	esac
 done
 
-cleanup
 setup
 
-run_cmd ip -netns h1 route get 192.168.1.1
+run_cmd ip -netns $h1 route get 192.168.1.1
 log_test $? 0 "nexthop: get route with nexthop without gw"
-run_cmd ip netns exec h1 ping -c1 192.168.1.1
+run_cmd ip netns exec $h1 ping -c1 192.168.1.1
 log_test $? 0 "nexthop: ping through nexthop without gw"
 
 exit $ret
-- 
cgit v1.2.3-70-g09d2


From 39333e31672cfc35430d1f5e411188553936b64d Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:52 +0800
Subject: selftests/net: convert fib_nexthops.sh to run it in unique namespace

Here is the test result after conversion.

 ]# ./fib_nexthops.sh

 Basic functional tests
 ----------------------
 TEST: List with nothing defined                                     [ OK ]
 TEST: Nexthop get on non-existent id                                [ OK ]

 ...

 TEST: IPv6 resilient nexthop group torture test                     [ OK ]

 Tests passed: 234
 Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-10-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fib_nexthops.sh | 142 ++++++++++++++--------------
 1 file changed, 69 insertions(+), 73 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index a6f2c0b9555d..d5a281aadbac 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -14,6 +14,7 @@
 # objects. Device reference counts and network namespace cleanup tested
 # by use of network namespace for peer.
 
+source lib.sh
 ret=0
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
@@ -148,13 +149,7 @@ create_ns()
 {
 	local n=${1}
 
-	ip netns del ${n} 2>/dev/null
-
 	set -e
-	ip netns add ${n}
-	ip netns set ${n} $((nsid++))
-	ip -netns ${n} addr add 127.0.0.1/8 dev lo
-	ip -netns ${n} link set lo up
 
 	ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1
 	ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1
@@ -173,12 +168,13 @@ setup()
 {
 	cleanup
 
-	create_ns me
-	create_ns peer
-	create_ns remote
+	setup_ns me peer remote
+	create_ns $me
+	create_ns $peer
+	create_ns $remote
 
-	IP="ip -netns me"
-	BRIDGE="bridge -netns me"
+	IP="ip -netns $me"
+	BRIDGE="bridge -netns $me"
 	set -e
 	$IP li add veth1 type veth peer name veth2
 	$IP li set veth1 up
@@ -190,24 +186,24 @@ setup()
 	$IP addr add 172.16.2.1/24 dev veth3
 	$IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad
 
-	$IP li set veth2 netns peer up
-	ip -netns peer addr add 172.16.1.2/24 dev veth2
-	ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
+	$IP li set veth2 netns $peer up
+	ip -netns $peer addr add 172.16.1.2/24 dev veth2
+	ip -netns $peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
 
-	$IP li set veth4 netns peer up
-	ip -netns peer addr add 172.16.2.2/24 dev veth4
-	ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
+	$IP li set veth4 netns $peer up
+	ip -netns $peer addr add 172.16.2.2/24 dev veth4
+	ip -netns $peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
 
-	ip -netns remote li add veth5 type veth peer name veth6
-	ip -netns remote li set veth5 up
-	ip -netns remote addr add dev veth5 172.16.101.1/24
-	ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
-	ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
-	ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
+	ip -netns $remote li add veth5 type veth peer name veth6
+	ip -netns $remote li set veth5 up
+	ip -netns $remote addr add dev veth5 172.16.101.1/24
+	ip -netns $remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
+	ip -netns $remote ro add 172.16.0.0/22 via 172.16.101.2
+	ip -netns $remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
 
-	ip -netns remote li set veth6 netns peer up
-	ip -netns peer addr add dev veth6 172.16.101.2/24
-	ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
+	ip -netns $remote li set veth6 netns $peer up
+	ip -netns $peer addr add dev veth6 172.16.101.2/24
+	ip -netns $peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
 	set +e
 }
 
@@ -215,7 +211,7 @@ cleanup()
 {
 	local ns
 
-	for ns in me peer remote; do
+	for ns in $me $peer $remote; do
 		ip netns del ${ns} 2>/dev/null
 	done
 }
@@ -779,7 +775,7 @@ ipv6_grp_refs()
 	run_cmd "$IP route add 2001:db8:101::1/128 nhid 102"
 
 	# create per-cpu dsts through nh 100
-	run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
+	run_cmd "ip netns exec $me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
 
 	# remove nh 100 from the group to delete the route potentially leaving
 	# a stale per-cpu dst which holds a reference to the nexthop's net
@@ -805,7 +801,7 @@ ipv6_grp_refs()
 
 	# if a reference was lost this command will hang because the net device
 	# cannot be removed
-	timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1
+	timeout -s KILL 5 ip netns exec $me ip link del veth1.10 >/dev/null 2>&1
 
 	# we can't cleanup if the command is hung trying to delete the netdev
 	if [ $? -eq 137 ]; then
@@ -1012,13 +1008,13 @@ ipv6_fcnal_runtime()
 	log_test $? 0 "Route delete"
 
 	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping with nexthop"
 
 	run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 81/82"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping - multipath"
 
 	#
@@ -1026,26 +1022,26 @@ ipv6_fcnal_runtime()
 	#
 	run_cmd "$IP -6 nexthop add id 83 blackhole"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP -6 nexthop replace id 83 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 83"
-		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+		run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 81/82"
-		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+		run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -1128,15 +1124,15 @@ ipv6_fcnal_runtime()
 
 	# rpfilter and default route
 	$IP nexthop flush >/dev/null 2>&1
-	run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
+	run_cmd "ip netns exec $me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
 	run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1"
 	run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 93 group 91/92"
 	run_cmd "$IP -6 ro add default nhid 91"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Nexthop with default route and rpfilter"
 	run_cmd "$IP -6 ro replace default nhid 93"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Nexthop with multipath default route and rpfilter"
 
 	# TO-DO:
@@ -1216,11 +1212,11 @@ ipv6_torture()
 	pid1=$!
 	ipv6_grp_replace_loop &
 	pid2=$!
-	ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
 	pid3=$!
-	ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
 	pid4=$!
-	ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	ip netns exec $me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
 	pid5=$!
 
 	sleep 300
@@ -1270,11 +1266,11 @@ ipv6_res_torture()
 	pid1=$!
 	ipv6_res_grp_replace_loop &
 	pid2=$!
-	ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
 	pid3=$!
-	ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
 	pid4=$!
-	ip netns exec me mausezahn -6 veth1 \
+	ip netns exec $me mausezahn -6 veth1 \
 			    -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \
 			    -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
 	pid5=$!
@@ -1544,7 +1540,7 @@ ipv4_withv6_fcnal()
 	local lladdr
 
 	set -e
-	lladdr=$(get_linklocal veth2 peer)
+	lladdr=$(get_linklocal veth2 $peer)
 	run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1"
 	set +e
 	run_cmd "$IP ro add 172.16.101.1/32 nhid 11"
@@ -1606,13 +1602,13 @@ ipv4_fcnal_runtime()
 	#
 	run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 21"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Basic ping"
 
 	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 21/22"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multipath"
 
 	run_cmd "$IP ro delete 172.16.101.1/32 nhid 122"
@@ -1623,7 +1619,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro add default nhid 501"
 	run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh first"
 
 	# flip the order
@@ -1632,7 +1628,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20"
 	run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1"
 	run_cmd "$IP ro add default nhid 501 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh second"
 
 	run_cmd "$IP nexthop delete nhid 501"
@@ -1643,26 +1639,26 @@ ipv4_fcnal_runtime()
 	#
 	run_cmd "$IP nexthop add id 23 blackhole"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 23"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP nexthop replace id 23 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 23"
-		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+		run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 21/22"
-		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+		run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -1685,11 +1681,11 @@ ipv4_fcnal_runtime()
 	# IPv4 with IPv6
 	#
 	set -e
-	lladdr=$(get_linklocal veth2 peer)
+	lladdr=$(get_linklocal veth2 $peer)
 	run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1"
 	set +e
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 24"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1713,11 +1709,11 @@ ipv4_fcnal_runtime()
 
 	check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
 
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv4 route with IPv6 gateway"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1734,7 +1730,7 @@ ipv4_fcnal_runtime()
 
 	run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
 	run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
+	run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv4 default route with IPv6 gateway"
 
 	#
@@ -1785,7 +1781,7 @@ sysctl_nexthop_compat_mode_check()
 	local sysctlname="net.ipv4.nexthop_compat_mode"
 	local lprefix=$1
 
-	IPE="ip netns exec me"
+	IPE="ip netns exec $me"
 
 	$IPE sysctl -q $sysctlname 2>&1 >/dev/null
 	if [ $? -ne 0 ]; then
@@ -1804,7 +1800,7 @@ sysctl_nexthop_compat_mode_set()
 	local mode=$1
 	local lprefix=$2
 
-	IPE="ip netns exec me"
+	IPE="ip netns exec $me"
 
 	out=$($IPE sysctl -w $sysctlname=$mode)
 	log_test $? 0 "$lprefix set compat mode - $mode"
@@ -1988,11 +1984,11 @@ ipv4_torture()
 	pid1=$!
 	ipv4_grp_replace_loop &
 	pid2=$!
-	ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 &
 	pid3=$!
-	ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 &
 	pid4=$!
-	ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	ip netns exec $me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
 	pid5=$!
 
 	sleep 300
@@ -2042,11 +2038,11 @@ ipv4_res_torture()
 	pid1=$!
 	ipv4_res_grp_replace_loop &
 	pid2=$!
-	ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 &
 	pid3=$!
-	ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+	ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 &
 	pid4=$!
-	ip netns exec me mausezahn veth1 \
+	ip netns exec $me mausezahn veth1 \
 				-B 172.16.101.2 -A 172.16.1.1 -c 0 \
 				-t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
 	pid5=$!
@@ -2081,10 +2077,10 @@ basic()
 
 	# create nh with linkdown device - fails
 	$IP li set veth1 up
-	ip -netns peer li set veth2 down
+	ip -netns $peer li set veth2 down
 	run_cmd "$IP nexthop add id 1 dev veth1"
 	log_test $? 2 "Nexthop with device that is linkdown"
-	ip -netns peer li set veth2 up
+	ip -netns $peer li set veth2 up
 
 	# device only
 	run_cmd "$IP nexthop add id 1 dev veth1"
@@ -2465,7 +2461,7 @@ fi
 for t in $TESTS
 do
 	case $t in
-	none) IP="ip -netns peer"; setup; exit 0;;
+	none) IP="ip -netns $peer"; setup; exit 0;;
 	*) setup; $t; cleanup;;
 	esac
 done
-- 
cgit v1.2.3-70-g09d2


From 3a06833b2adc0a902f2469ad4ce41ccd64f1f3ab Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:53 +0800
Subject: selftests/net: convert fib-onlink-tests.sh to run it in unique
 namespace

Remove PEER_CMD, which is not used in this test

Here is the test result after conversion.

 ]# ./fib-onlink-tests.sh
 Error: ipv4: FIB table does not exist.
 Flush terminated
 Error: ipv6: FIB table does not exist.
 Flush terminated

 ########################################
 Configuring interfaces

   ...

     TEST: Gateway resolves to wrong nexthop device - VRF      [ OK ]

 Tests passed:  38
 Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-11-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fib-onlink-tests.sh | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index c287b90b8af8..ec2d6ceb1f08 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -3,6 +3,7 @@
 
 # IPv4 and IPv6 onlink tests
 
+source lib.sh
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 VERBOSE=0
 
@@ -74,9 +75,6 @@ TEST_NET4IN6[2]=10.2.1.254
 # mcast address
 MCAST6=ff02::1
 
-
-PEER_NS=bart
-PEER_CMD="ip netns exec ${PEER_NS}"
 VRF=lisa
 VRF_TABLE=1101
 PBR_TABLE=101
@@ -176,8 +174,7 @@ setup()
 	set -e
 
 	# create namespace
-	ip netns add ${PEER_NS}
-	ip -netns ${PEER_NS} li set lo up
+	setup_ns PEER_NS
 
 	# add vrf table
 	ip li add ${VRF} type vrf table ${VRF_TABLE}
@@ -219,7 +216,7 @@ setup()
 cleanup()
 {
 	# make sure we start from a clean slate
-	ip netns del ${PEER_NS} 2>/dev/null
+	cleanup_ns ${PEER_NS} 2>/dev/null
 	for n in 1 3 5 7; do
 		ip link del ${NETIFS[p${n}]} 2>/dev/null
 	done
-- 
cgit v1.2.3-70-g09d2


From 6c0ee7b4d69d5545d28f81ceabf89a5b7d86d1dd Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:54 +0800
Subject: selftests/net: convert fib_rule_tests.sh to run it in unique
 namespace

Here is the test result after conversion.

 ]# ./fib_rule_tests.sh

     TEST: rule6 check: oif redirect to table                  [ OK ]

     ...

     TEST: rule4 dsfield tcp connect (dsfield 0x07)            [ OK ]

 Tests passed:  66
 Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-12-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fib_rule_tests.sh | 36 ++++++++++++---------------
 1 file changed, 16 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 63c3eaec8d30..51157a5559b7 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -3,14 +3,9 @@
 
 # This test is for checking IPv4 and IPv6 FIB rules API
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
+source lib.sh
 ret=0
-
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
-IP="ip -netns testns"
-IP_PEER="ip -netns peerns"
 
 RTABLE=100
 RTABLE_PEER=101
@@ -84,8 +79,8 @@ check_nettest()
 setup()
 {
 	set -e
-	ip netns add testns
-	$IP link set dev lo up
+	setup_ns testns
+	IP="ip -netns $testns"
 
 	$IP link add dummy0 type dummy
 	$IP link set dev dummy0 up
@@ -98,18 +93,19 @@ setup()
 cleanup()
 {
 	$IP link del dev dummy0 &> /dev/null
-	ip netns del testns
+	cleanup_ns $testns
 }
 
 setup_peer()
 {
 	set -e
 
-	ip netns add peerns
+	setup_ns peerns
+	IP_PEER="ip -netns $peerns"
 	$IP_PEER link set dev lo up
 
-	ip link add name veth0 netns testns type veth \
-		peer name veth1 netns peerns
+	ip link add name veth0 netns $testns type veth \
+		peer name veth1 netns $peerns
 	$IP link set dev veth0 up
 	$IP_PEER link set dev veth1 up
 
@@ -131,7 +127,7 @@ setup_peer()
 cleanup_peer()
 {
 	$IP link del dev veth0
-	ip netns del peerns
+	ip netns del $peerns
 }
 
 fib_check_iproute_support()
@@ -270,11 +266,11 @@ fib_rule6_connect_test()
 	# (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
 	# The ECN bits shouldn't influence the result of the test.
 	for dsfield in 0x04 0x05 0x06 0x07; do
-		nettest -q -6 -B -t 5 -N testns -O peerns -U -D \
+		nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \
 			-Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11
 		log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})"
 
-		nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \
+		nettest -q -6 -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \
 			-l 2001:db8::1:11 -r 2001:db8::1:11
 		log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})"
 	done
@@ -337,11 +333,11 @@ fib_rule4_test()
 
 	# need enable forwarding and disable rp_filter temporarily as all the
 	# addresses are in the same subnet and egress device == ingress device.
-	ip netns exec testns sysctl -qw net.ipv4.ip_forward=1
-	ip netns exec testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
+	ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
 	match="from $SRC_IP iif $DEV"
 	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
-	ip netns exec testns sysctl -qw net.ipv4.ip_forward=0
+	ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0
 
 	# Reject dsfield (tos) options which have ECN bits set
 	for cnt in $(seq 1 3); do
@@ -407,11 +403,11 @@ fib_rule4_connect_test()
 	# (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
 	# The ECN bits shouldn't influence the result of the test.
 	for dsfield in 0x04 0x05 0x06 0x07; do
-		nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \
+		nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q "${dsfield}" \
 			-l 198.51.100.11 -r 198.51.100.11
 		log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})"
 
-		nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \
+		nettest -q -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \
 			-l 198.51.100.11 -r 198.51.100.11
 		log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})"
 	done
-- 
cgit v1.2.3-70-g09d2


From f6fc5b949911efb758e13bc4a1a10c9a473b6254 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:55 +0800
Subject: selftests/net: convert fib_tests.sh to run it in unique namespace

Here is the test result after conversion.

 # ./fib_tests.sh

 Single path route test
     Start point
     TEST: IPv4 fibmatch                                                 [ OK ]

 ...

 Fib6 garbage collection test
     TEST: ipv6 route garbage collection                                 [ OK ]

 IPv4 multipath list receive tests
     TEST: Multipath route hit ratio (1.00)                              [ OK ]

 IPv6 multipath list receive tests
     TEST: Multipath route hit ratio (1.00)                              [ OK ]

 Tests passed: 225
 Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-13-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fib_tests.sh | 184 +++++++++++++++----------------
 1 file changed, 87 insertions(+), 97 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 66d0db7a2614..b3ecccbbfcd2 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -3,10 +3,8 @@
 
 # This test is for checking IPv4 and IPv6 FIB behavior in response to
 # different events.
-
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
 TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
@@ -18,8 +16,6 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
-IP="$(which ip) -netns ns1"
-NS_EXEC="$(which ip) netns exec ns1"
 
 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 
@@ -55,11 +51,11 @@ log_test()
 setup()
 {
 	set -e
-	ip netns add ns1
-	ip netns set ns1 auto
-	$IP link set dev lo up
-	ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
-	ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
+	setup_ns ns1
+	IP="$(which ip) -netns $ns1"
+	NS_EXEC="$(which ip) netns exec $ns1"
+	ip netns exec $ns1 sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
 
 	$IP link add dummy0 type dummy
 	$IP link set dev dummy0 up
@@ -72,8 +68,7 @@ setup()
 cleanup()
 {
 	$IP link del dev dummy0 &> /dev/null
-	ip netns del ns1 &> /dev/null
-	ip netns del ns2 &> /dev/null
+	cleanup_ns $ns1 $ns2
 }
 
 get_linklocal()
@@ -448,28 +443,25 @@ fib_rp_filter_test()
 	setup
 
 	set -e
-	ip netns add ns2
-	ip netns set ns2 auto
-
-	ip -netns ns2 link set dev lo up
+	setup_ns ns2
 
 	$IP link add name veth1 type veth peer name veth2
-	$IP link set dev veth2 netns ns2
+	$IP link set dev veth2 netns $ns2
 	$IP address add 192.0.2.1/24 dev veth1
-	ip -netns ns2 address add 192.0.2.1/24 dev veth2
+	ip -netns $ns2 address add 192.0.2.1/24 dev veth2
 	$IP link set dev veth1 up
-	ip -netns ns2 link set dev veth2 up
+	ip -netns $ns2 link set dev veth2 up
 
 	$IP link set dev lo address 52:54:00:6a:c7:5e
 	$IP link set dev veth1 address 52:54:00:6a:c7:5e
-	ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e
-	ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e
+	ip -netns $ns2 link set dev lo address 52:54:00:6a:c7:5e
+	ip -netns $ns2 link set dev veth2 address 52:54:00:6a:c7:5e
 
 	# 1. (ns2) redirect lo's egress to veth2's egress
-	ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel
-	ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \
+	ip netns exec $ns2 tc qdisc add dev lo parent root handle 1: fq_codel
+	ip netns exec $ns2 tc filter add dev lo parent 1: protocol arp basic \
 		action mirred egress redirect dev veth2
-	ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \
+	ip netns exec $ns2 tc filter add dev lo parent 1: protocol ip basic \
 		action mirred egress redirect dev veth2
 
 	# 2. (ns1) redirect veth1's ingress to lo's ingress
@@ -487,24 +479,24 @@ fib_rp_filter_test()
 		action mirred egress redirect dev veth1
 
 	# 4. (ns2) redirect veth2's ingress to lo's ingress
-	ip netns exec ns2 tc qdisc add dev veth2 ingress
-	ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \
+	ip netns exec $ns2 tc qdisc add dev veth2 ingress
+	ip netns exec $ns2 tc filter add dev veth2 ingress protocol arp basic \
 		action mirred ingress redirect dev lo
-	ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \
+	ip netns exec $ns2 tc filter add dev veth2 ingress protocol ip basic \
 		action mirred ingress redirect dev lo
 
 	$NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1
 	$NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1
 	$NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1
-	ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1
-	ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1
-	ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1
+	ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1
+	ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.accept_local=1
+	ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1
 	set +e
 
-	run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1"
+	run_cmd "ip netns exec $ns2 ping -w1 -c1 192.0.2.1"
 	log_test $? 0 "rp_filter passes local packets"
 
-	run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1"
+	run_cmd "ip netns exec $ns2 ping -w1 -c1 127.0.0.1"
 	log_test $? 0 "rp_filter passes loopback packets"
 
 	cleanup
@@ -959,34 +951,32 @@ route_setup()
 	[ "${VERBOSE}" = "1" ] && set -x
 	set -e
 
-	ip netns add ns2
-	ip netns set ns2 auto
-	ip -netns ns2 link set dev lo up
-	ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
-	ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
+	setup_ns ns2
+	ip netns exec $ns2 sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
 
 	$IP li add veth1 type veth peer name veth2
 	$IP li add veth3 type veth peer name veth4
 
 	$IP li set veth1 up
 	$IP li set veth3 up
-	$IP li set veth2 netns ns2 up
-	$IP li set veth4 netns ns2 up
-	ip -netns ns2 li add dummy1 type dummy
-	ip -netns ns2 li set dummy1 up
+	$IP li set veth2 netns $ns2 up
+	$IP li set veth4 netns $ns2 up
+	ip -netns $ns2 li add dummy1 type dummy
+	ip -netns $ns2 li set dummy1 up
 
 	$IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad
 	$IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad
 	$IP addr add 172.16.101.1/24 dev veth1
 	$IP addr add 172.16.103.1/24 dev veth3
 
-	ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
-	ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
-	ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
+	ip -netns $ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
+	ip -netns $ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
+	ip -netns $ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
 
-	ip -netns ns2 addr add 172.16.101.2/24 dev veth2
-	ip -netns ns2 addr add 172.16.103.2/24 dev veth4
-	ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
+	ip -netns $ns2 addr add 172.16.101.2/24 dev veth2
+	ip -netns $ns2 addr add 172.16.103.2/24 dev veth4
+	ip -netns $ns2 addr add 172.16.104.1/24 dev dummy1
 
 	set +e
 }
@@ -1238,7 +1228,7 @@ ipv6_addr_metric_test()
 	log_test $rc 0 "Modify metric of address"
 
 	# verify prefix route removed on down
-	run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+	run_cmd "ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
 	run_cmd "$IP li set dev dummy2 down"
 	rc=$?
 	if [ $rc -eq 0 ]; then
@@ -1344,7 +1334,7 @@ ipv6_route_metrics_test()
 	log_test $rc 0 "Multipath route with mtu metric"
 
 	$IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
-	run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1"
+	run_cmd "ip netns exec $ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1"
 	log_test $? 0 "Using route with mtu metric"
 
 	run_cmd "$IP -6 ro add 2001:db8:114::/64 via  2001:db8:101::2  congctl lock foo"
@@ -1599,19 +1589,19 @@ ipv4_rt_replace()
 ipv4_local_rt_cache()
 {
 	run_cmd "ip addr add 10.0.0.1/32 dev lo"
-	run_cmd "ip netns add test-ns"
+	run_cmd "setup_ns test-ns"
 	run_cmd "ip link add veth-outside type veth peer name veth-inside"
 	run_cmd "ip link add vrf-100 type vrf table 1100"
 	run_cmd "ip link set veth-outside master vrf-100"
-	run_cmd "ip link set veth-inside netns test-ns"
+	run_cmd "ip link set veth-inside netns $test-ns"
 	run_cmd "ip link set veth-outside up"
 	run_cmd "ip link set vrf-100 up"
 	run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
-	run_cmd "ip netns exec test-ns ip link set veth-inside up"
-	run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside"
-	run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside"
-	run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1"
-	run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1"
+	run_cmd "ip netns exec $test-ns ip link set veth-inside up"
+	run_cmd "ip netns exec $test-ns ip addr add 10.1.1.1/32 dev veth-inside"
+	run_cmd "ip netns exec $test-ns ip route add 10.0.0.1/32 dev veth-inside"
+	run_cmd "ip netns exec $test-ns ip route add default via 10.0.0.1"
+	run_cmd "ip netns exec $test-ns ping 10.0.0.1 -c 1 -i 1"
 	run_cmd "ip link delete vrf-100"
 
 	# if we do not hang test is a success
@@ -1841,7 +1831,7 @@ ipv4_route_metrics_test()
 	log_test $rc 0 "Multipath route with mtu metric"
 
 	$IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300
-	run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1"
+	run_cmd "ip netns exec $ns1 ping -w1 -c1 -s 1500 172.16.104.1"
 	log_test $? 0 "Using route with mtu metric"
 
 	run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo"
@@ -2105,7 +2095,7 @@ ipv4_route_v6_gw_test()
 		check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1"
 	fi
 
-	run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1"
+	run_cmd "ip netns exec $ns1 ping -w1 -c1 172.16.104.1"
 	log_test $rc 0 "Single path route with IPv6 gateway - ping"
 
 	run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2"
@@ -2196,7 +2186,7 @@ ipv4_mangle_test()
 	sleep 2
 
 	local tmp_file=$(mktemp)
-	ip netns exec ns2 socat UDP4-LISTEN:54321,fork $tmp_file &
+	ip netns exec $ns2 socat UDP4-LISTEN:54321,fork $tmp_file &
 
 	# Add a FIB rule and a route that will direct our connection to the
 	# listening server.
@@ -2254,7 +2244,7 @@ ipv6_mangle_test()
 	sleep 2
 
 	local tmp_file=$(mktemp)
-	ip netns exec ns2 socat UDP6-LISTEN:54321,fork $tmp_file &
+	ip netns exec $ns2 socat UDP6-LISTEN:54321,fork $tmp_file &
 
 	# Add a FIB rule and a route that will direct our connection to the
 	# listening server.
@@ -2423,37 +2413,37 @@ ipv4_mpath_list_test()
 	route_setup
 
 	set -e
-	run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
-
-	run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
-	run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
-	run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
-	run_cmd "ip -n ns2 link add name nh1 up type dummy"
-	run_cmd "ip -n ns2 link add name nh2 up type dummy"
-	run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1"
-	run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2"
-	run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
-	run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
-	run_cmd "ip -n ns2 route add 203.0.113.0/24
+	run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+	run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+	run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+	run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on"
+	run_cmd "ip -n $ns2 link add name nh1 up type dummy"
+	run_cmd "ip -n $ns2 link add name nh2 up type dummy"
+	run_cmd "ip -n $ns2 address add 172.16.201.1/24 dev nh1"
+	run_cmd "ip -n $ns2 address add 172.16.202.1/24 dev nh2"
+	run_cmd "ip -n $ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+	run_cmd "ip -n $ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+	run_cmd "ip -n $ns2 route add 203.0.113.0/24
 		nexthop via 172.16.201.2 nexthop via 172.16.202.2"
-	run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
-	run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
-	run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
-	run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
 	set +e
 
-	local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+	local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
 	local tmp_file=$(mktemp)
-	local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac
+	local cmd="ip netns exec $ns1 mausezahn veth1 -a own -b $dmac
 		-A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
 
 	# Packets forwarded in a list using a multipath route must not reuse a
 	# cached result so that a flow always hits the same nexthop. In other
 	# words, the FIB lookup tracepoint needs to be triggered for every
 	# packet.
-	local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+	local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
 	run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
-	local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+	local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
 	local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
 	list_rcv_eval $tmp_file $diff
 
@@ -2471,34 +2461,34 @@ ipv6_mpath_list_test()
 	route_setup
 
 	set -e
-	run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
-
-	run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
-	run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
-	run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
-	run_cmd "ip -n ns2 link add name nh1 up type dummy"
-	run_cmd "ip -n ns2 link add name nh2 up type dummy"
-	run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1"
-	run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2"
-	run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
-	run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
-	run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64
+	run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+	run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+	run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+	run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on"
+	run_cmd "ip -n $ns2 link add name nh1 up type dummy"
+	run_cmd "ip -n $ns2 link add name nh2 up type dummy"
+	run_cmd "ip -n $ns2 -6 address add 2001:db8:201::1/64 dev nh1"
+	run_cmd "ip -n $ns2 -6 address add 2001:db8:202::1/64 dev nh2"
+	run_cmd "ip -n $ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+	run_cmd "ip -n $ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+	run_cmd "ip -n $ns2 -6 route add 2001:db8:301::/64
 		nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2"
-	run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
+	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
 	set +e
 
-	local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+	local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
 	local tmp_file=$(mktemp)
-	local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac
+	local cmd="ip netns exec $ns1 mausezahn -6 veth1 -a own -b $dmac
 		-A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
 
 	# Packets forwarded in a list using a multipath route must not reuse a
 	# cached result so that a flow always hits the same nexthop. In other
 	# words, the FIB lookup tracepoint needs to be triggered for every
 	# packet.
-	local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+	local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
 	run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
-	local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+	local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
 	local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
 	list_rcv_eval $tmp_file $diff
 
-- 
cgit v1.2.3-70-g09d2


From b795db185e3260c8022dbfd01c12a05dcfe51b7a Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 13 Dec 2023 14:08:56 +0800
Subject: selftests/net: convert fdb_flush.sh to run it in unique namespace

Here is the test result after conversion.
 # ./fdb_flush.sh
 TEST: vx10: Expected 5 FDB entries, got 5                           [ OK ]
 TEST: vx20: Expected 5 FDB entries, got 5                           [ OK ]
 ...
 TEST: vx10: Expected 5 FDB entries, got 5                           [ OK ]
 TEST: Test entries with dst 192.0.2.1                               [ OK ]

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Ido Schimmel <idosch@nvidia.com>
Link: https://lore.kernel.org/r/20231213060856.4030084-14-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/fdb_flush.sh | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh
index 90e7a29e0476..d5e3abb8658c 100755
--- a/tools/testing/selftests/net/fdb_flush.sh
+++ b/tools/testing/selftests/net/fdb_flush.sh
@@ -5,6 +5,8 @@
 # Check that flush works as expected with all the supported arguments and verify
 # some combinations of arguments.
 
+source lib.sh
+
 FLUSH_BY_STATE_TESTS="
 	vxlan_test_flush_by_permanent
 	vxlan_test_flush_by_nopermanent
@@ -739,10 +741,9 @@ bridge_vxlan_test_flush()
 
 setup()
 {
-	IP="ip -netns ns1"
-	BRIDGE="bridge -netns ns1"
-
-	ip netns add ns1
+	setup_ns NS
+	IP="ip -netns ${NS}"
+	BRIDGE="bridge -netns ${NS}"
 
 	$IP link add name vx10 type vxlan id 1000 dstport "$VXPORT"
 	$IP link add name vx20 type vxlan id 2000 dstport "$VXPORT"
@@ -759,7 +760,7 @@ cleanup()
 	$IP link del dev vx20
 	$IP link del dev vx10
 
-	ip netns del ns1
+	cleanup_ns ${NS}
 }
 
 ################################################################################
-- 
cgit v1.2.3-70-g09d2


From 542e893fbadc51caa38a7c4c2a8f8e822cdba2b1 Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <avkrasnov@salutedevices.com>
Date: Thu, 14 Dec 2023 15:52:30 +0300
Subject: vsock/test: two tests to check credit update logic

Both tests are almost same, only differs in two 'if' conditions, so
implemented in a single function. Tests check, that credit update
message is sent:

1) During setting SO_RCVLOWAT value of the socket.
2) When number of 'rx_bytes' become smaller than SO_RCVLOWAT value.

Signed-off-by: Arseniy Krasnov <avkrasnov@salutedevices.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/vsock/vsock_test.c | 175 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 175 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 01fa816868bc..66246d81d654 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -1232,6 +1232,171 @@ static void test_double_bind_connect_client(const struct test_opts *opts)
 	}
 }
 
+#define RCVLOWAT_CREDIT_UPD_BUF_SIZE	(1024 * 128)
+/* This define is the same as in 'include/linux/virtio_vsock.h':
+ * it is used to decide when to send credit update message during
+ * reading from rx queue of a socket. Value and its usage in
+ * kernel is important for this test.
+ */
+#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE	(1024 * 64)
+
+static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opts)
+{
+	size_t buf_size;
+	void *buf;
+	int fd;
+
+	fd = vsock_stream_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Send 1 byte more than peer's buffer size. */
+	buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE + 1;
+
+	buf = malloc(buf_size);
+	if (!buf) {
+		perror("malloc");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Wait until peer sets needed buffer size. */
+	recv_byte(fd, 1, 0);
+
+	if (send(fd, buf, buf_size, 0) != buf_size) {
+		perror("send failed");
+		exit(EXIT_FAILURE);
+	}
+
+	free(buf);
+	close(fd);
+}
+
+static void test_stream_credit_update_test(const struct test_opts *opts,
+					   bool low_rx_bytes_test)
+{
+	size_t recv_buf_size;
+	struct pollfd fds;
+	size_t buf_size;
+	void *buf;
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE;
+
+	if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE,
+		       &buf_size, sizeof(buf_size))) {
+		perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)");
+		exit(EXIT_FAILURE);
+	}
+
+	if (low_rx_bytes_test) {
+		/* Set new SO_RCVLOWAT here. This enables sending credit
+		 * update when number of bytes if our rx queue become <
+		 * SO_RCVLOWAT value.
+		 */
+		recv_buf_size = 1 + VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
+
+		if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT,
+			       &recv_buf_size, sizeof(recv_buf_size))) {
+			perror("setsockopt(SO_RCVLOWAT)");
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	/* Send one dummy byte here, because 'setsockopt()' above also
+	 * sends special packet which tells sender to update our buffer
+	 * size. This 'send_byte()' will serialize such packet with data
+	 * reads in a loop below. Sender starts transmission only when
+	 * it receives this single byte.
+	 */
+	send_byte(fd, 1, 0);
+
+	buf = malloc(buf_size);
+	if (!buf) {
+		perror("malloc");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Wait until there will be 128KB of data in rx queue. */
+	while (1) {
+		ssize_t res;
+
+		res = recv(fd, buf, buf_size, MSG_PEEK);
+		if (res == buf_size)
+			break;
+
+		if (res <= 0) {
+			fprintf(stderr, "unexpected 'recv()' return: %zi\n", res);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	/* There is 128KB of data in the socket's rx queue, dequeue first
+	 * 64KB, credit update is sent if 'low_rx_bytes_test' == true.
+	 * Otherwise, credit update is sent in 'if (!low_rx_bytes_test)'.
+	 */
+	recv_buf_size = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
+	recv_buf(fd, buf, recv_buf_size, 0, recv_buf_size);
+
+	if (!low_rx_bytes_test) {
+		recv_buf_size++;
+
+		/* Updating SO_RCVLOWAT will send credit update. */
+		if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT,
+			       &recv_buf_size, sizeof(recv_buf_size))) {
+			perror("setsockopt(SO_RCVLOWAT)");
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	fds.fd = fd;
+	fds.events = POLLIN | POLLRDNORM | POLLERR |
+		     POLLRDHUP | POLLHUP;
+
+	/* This 'poll()' will return once we receive last byte
+	 * sent by client.
+	 */
+	if (poll(&fds, 1, -1) < 0) {
+		perror("poll");
+		exit(EXIT_FAILURE);
+	}
+
+	if (fds.revents & POLLERR) {
+		fprintf(stderr, "'poll()' error\n");
+		exit(EXIT_FAILURE);
+	}
+
+	if (fds.revents & (POLLIN | POLLRDNORM)) {
+		recv_buf(fd, buf, recv_buf_size, MSG_DONTWAIT, recv_buf_size);
+	} else {
+		/* These flags must be set, as there is at
+		 * least 64KB of data ready to read.
+		 */
+		fprintf(stderr, "POLLIN | POLLRDNORM expected\n");
+		exit(EXIT_FAILURE);
+	}
+
+	free(buf);
+	close(fd);
+}
+
+static void test_stream_cred_upd_on_low_rx_bytes(const struct test_opts *opts)
+{
+	test_stream_credit_update_test(opts, true);
+}
+
+static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts)
+{
+	test_stream_credit_update_test(opts, false);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -1342,6 +1507,16 @@ static struct test_case test_cases[] = {
 		.run_client = test_double_bind_connect_client,
 		.run_server = test_double_bind_connect_server,
 	},
+	{
+		.name = "SOCK_STREAM virtio credit update + SO_RCVLOWAT",
+		.run_client = test_stream_rcvlowat_def_cred_upd_client,
+		.run_server = test_stream_cred_upd_on_set_rcvlowat,
+	},
+	{
+		.name = "SOCK_STREAM virtio credit update + low rx_bytes",
+		.run_client = test_stream_rcvlowat_def_cred_upd_client,
+		.run_server = test_stream_cred_upd_on_low_rx_bytes,
+	},
 	{},
 };
 
-- 
cgit v1.2.3-70-g09d2


From 18872ba8cd2406ffa835f9f6276e47ed86fbb5d6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 14 Dec 2023 10:49:01 +0000
Subject: selftests/net: optmem_max became per netns

/proc/sys/net/core/optmem_max is now per netns, change two tests
that were saving/changing/restoring its value on the parent netns.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/io_uring_zerocopy_tx.sh | 9 ++++-----
 tools/testing/selftests/net/msg_zerocopy.sh         | 9 ++++-----
 2 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
index 9ac4456d48fc..123439545013 100755
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
@@ -76,23 +76,22 @@ case "${TXMODE}" in
 esac
 
 # Start of state changes: install cleanup handler
-save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
 
 cleanup() {
 	ip netns del "${NS2}"
 	ip netns del "${NS1}"
-	sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
 }
 
 trap cleanup EXIT
 
-# Configure system settings
-sysctl -w -q "${path_sysctl_mem}=1000000"
-
 # Create virtual ethernet pair between network namespaces
 ip netns add "${NS1}"
 ip netns add "${NS2}"
 
+# Configure system settings
+ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000"
+ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
+
 ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
   peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
 
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 825ffec85cea..89c22f5320e0 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -70,23 +70,22 @@ case "${TXMODE}" in
 esac
 
 # Start of state changes: install cleanup handler
-save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
 
 cleanup() {
 	ip netns del "${NS2}"
 	ip netns del "${NS1}"
-	sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
 }
 
 trap cleanup EXIT
 
-# Configure system settings
-sysctl -w -q "${path_sysctl_mem}=1000000"
-
 # Create virtual ethernet pair between network namespaces
 ip netns add "${NS1}"
 ip netns add "${NS2}"
 
+# Configure system settings
+ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000"
+ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
+
 ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
   peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
 
-- 
cgit v1.2.3-70-g09d2


From 00e7f29d9b895cbee58b7071900dd52ed6dcec1e Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 14 Dec 2023 14:50:29 +0100
Subject: selftests: forwarding: ethtool_rmon: Add histogram counter test

Validate the operation of rx and tx histogram counters, if supported
by the interface, by sending batches of packets targeted for each
bucket.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/Makefile    |   1 +
 .../selftests/net/forwarding/ethtool_rmon.sh       | 143 +++++++++++++++++++++
 tools/testing/selftests/net/forwarding/lib.sh      |   9 ++
 3 files changed, 153 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/ethtool_rmon.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index df593b7b3e6b..452693514be4 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -17,6 +17,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
 	dual_vxlan_bridge.sh \
 	ethtool_extended_state.sh \
 	ethtool_mm.sh \
+	ethtool_rmon.sh \
 	ethtool.sh \
 	gre_custom_multipath_hash.sh \
 	gre_inner_v4_multipath.sh \
diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
new file mode 100755
index 000000000000..41a34a61f763
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	rmon_rx_histogram
+	rmon_tx_histogram
+"
+
+NUM_NETIFS=2
+source lib.sh
+
+ETH_FCS_LEN=4
+ETH_HLEN=$((6+6+2))
+
+declare -A netif_mtu
+
+ensure_mtu()
+{
+	local iface=$1; shift
+	local len=$1; shift
+	local current=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+	local required=$((len - ETH_HLEN - ETH_FCS_LEN))
+
+	if [ $current -lt $required ]; then
+		ip link set dev $iface mtu $required || return 1
+	fi
+}
+
+bucket_test()
+{
+	local iface=$1; shift
+	local neigh=$1; shift
+	local set=$1; shift
+	local bucket=$1; shift
+	local len=$1; shift
+	local num_rx=10000
+	local num_tx=20000
+	local expected=
+	local before=
+	local after=
+	local delta=
+
+	# Mausezahn does not include FCS bytes in its length - but the
+	# histogram counters do
+	len=$((len - ETH_FCS_LEN))
+
+	before=$(ethtool --json -S $iface --groups rmon | \
+		jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+	# Send 10k one way and 20k in the other, to detect counters
+	# mapped to the wrong direction
+	$MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us
+	$MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us
+
+	after=$(ethtool --json -S $iface --groups rmon | \
+		jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+	delta=$((after - before))
+
+	expected=$([ $set = rx ] && echo $num_rx || echo $num_tx)
+
+	# Allow some extra tolerance for other packets sent by the stack
+	[ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ]
+}
+
+rmon_histogram()
+{
+	local iface=$1; shift
+	local neigh=$1; shift
+	local set=$1; shift
+	local nbuckets=0
+	local step=
+
+	RET=0
+
+	while read -r -a bucket; do
+		step="$set-pkts${bucket[0]}to${bucket[1]} on $iface"
+
+		for if in $iface $neigh; do
+			if ! ensure_mtu $if ${bucket[0]}; then
+				log_test_skip "$if does not support the required MTU for $step"
+				return
+			fi
+		done
+
+		if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then
+			check_err 1 "$step failed"
+			return 1
+		fi
+		log_test "$step"
+		nbuckets=$((nbuckets + 1))
+	done < <(ethtool --json -S $iface --groups rmon | \
+		jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null)
+
+	if [ $nbuckets -eq 0 ]; then
+		log_test_skip "$iface does not support $set histogram counters"
+		return
+	fi
+}
+
+rmon_rx_histogram()
+{
+	rmon_histogram $h1 $h2 rx
+	rmon_histogram $h2 $h1 rx
+}
+
+rmon_tx_histogram()
+{
+	rmon_histogram $h1 $h2 tx
+	rmon_histogram $h2 $h1 tx
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	for iface in $h1 $h2; do
+		netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+		ip link set dev $iface up
+	done
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	for iface in $h2 $h1; do
+		ip link set dev $iface \
+			mtu ${netif_mtu[$iface]} \
+			down
+	done
+}
+
+check_ethtool_counter_group_support
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 8f6ca458af9a..e3740163c384 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -146,6 +146,15 @@ check_ethtool_mm_support()
 	fi
 }
 
+check_ethtool_counter_group_support()
+{
+	ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: ethtool too old; it is missing standard counter group support"
+		exit $ksft_skip
+	fi
+}
+
 check_locked_port_support()
 {
 	if ! bridge -d link show | grep -q " locked"; then
-- 
cgit v1.2.3-70-g09d2


From 0d83786f5661154d015b498a3d23d4c37e30f6ef Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 15 Dec 2023 18:07:06 +0800
Subject: selftests/bpf: Add test for abnormal cnt during multi-uprobe
 attachment

If an abnormally huge cnt is used for multi-uprobes attachment, the
following warning will be reported:

  ------------[ cut here ]------------
  WARNING: CPU: 7 PID: 406 at mm/util.c:632 kvmalloc_node+0xd9/0xe0
  Modules linked in: bpf_testmod(O)
  CPU: 7 PID: 406 Comm: test_progs Tainted: G ...... 6.7.0-rc3+ #32
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) ......
  RIP: 0010:kvmalloc_node+0xd9/0xe0
  ......
  Call Trace:
   <TASK>
   ? __warn+0x89/0x150
   ? kvmalloc_node+0xd9/0xe0
   bpf_uprobe_multi_link_attach+0x14a/0x480
   __sys_bpf+0x14a9/0x2bc0
   do_syscall_64+0x36/0xb0
   entry_SYSCALL_64_after_hwframe+0x6e/0x76
   ......
   </TASK>
  ---[ end trace 0000000000000000 ]---

So add a test to ensure the warning is fixed.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231215100708.2265609-4-houtao@huaweicloud.com
---
 .../selftests/bpf/prog_tests/uprobe_multi_test.c   | 32 +++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index ece260cf2c0b..07a009f95e85 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -234,6 +234,34 @@ static void test_attach_api_syms(void)
 	test_attach_api("/proc/self/exe", NULL, &opts);
 }
 
+static void test_attach_api_fails(void)
+{
+	LIBBPF_OPTS(bpf_link_create_opts, opts);
+	const char *path = "/proc/self/exe";
+	struct uprobe_multi *skel = NULL;
+	unsigned long offset = 0;
+	int link_fd = -1;
+
+	skel = uprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+		goto cleanup;
+
+	/* abnormal cnt */
+	opts.uprobe_multi.path = path;
+	opts.uprobe_multi.offsets = &offset;
+	opts.uprobe_multi.cnt = INT_MAX;
+	link_fd = bpf_link_create(bpf_program__fd(skel->progs.uprobe), 0,
+				  BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -E2BIG, "big cnt"))
+		goto cleanup;
+cleanup:
+	if (link_fd >= 0)
+		close(link_fd);
+	uprobe_multi__destroy(skel);
+}
+
 static void __test_link_api(struct child *child)
 {
 	int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1;
@@ -311,7 +339,7 @@ cleanup:
 	free(offsets);
 }
 
-void test_link_api(void)
+static void test_link_api(void)
 {
 	struct child *child;
 
@@ -412,4 +440,6 @@ void test_uprobe_multi_test(void)
 		test_bench_attach_uprobe();
 	if (test__start_subtest("bench_usdt"))
 		test_bench_attach_usdt();
+	if (test__start_subtest("attach_api_fails"))
+		test_attach_api_fails();
 }
-- 
cgit v1.2.3-70-g09d2


From 00cdcd2900bdb9190d1e75438b39cef74cd99232 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 15 Dec 2023 18:07:07 +0800
Subject: selftests/bpf: Don't use libbpf_get_error() in kprobe_multi_test

Since libbpf v1.0, libbpf doesn't return error code embedded into the
pointer iteself, libbpf_get_error() is deprecated and it is basically
the same as using -errno directly.

So replace the invocations of libbpf_get_error() by -errno in
kprobe_multi_test. For libbpf_get_error() in test_attach_api_fails(),
saving -errno before invoking ASSERT_xx() macros just in case that
errno is overwritten by these macros. However, the invocation of
libbpf_get_error() in get_syms() should be kept intact, because
hashmap__new() still returns a pointer with embedded error code.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231215100708.2265609-5-houtao@huaweicloud.com
---
 .../testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 4041cfa670eb..6079611b5df4 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -222,6 +222,7 @@ static void test_attach_api_fails(void)
 		"bpf_fentry_test2",
 	};
 	__u64 cookies[2];
+	int saved_error;
 
 	addrs[0] = ksym_get_addr("bpf_fentry_test1");
 	addrs[1] = ksym_get_addr("bpf_fentry_test2");
@@ -238,10 +239,11 @@ static void test_attach_api_fails(void)
 	/* fail_1 - pattern and opts NULL */
 	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     NULL, NULL);
+	saved_error = -errno;
 	if (!ASSERT_ERR_PTR(link, "fail_1"))
 		goto cleanup;
 
-	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_1_error"))
+	if (!ASSERT_EQ(saved_error, -EINVAL, "fail_1_error"))
 		goto cleanup;
 
 	/* fail_2 - both addrs and syms set */
@@ -252,10 +254,11 @@ static void test_attach_api_fails(void)
 
 	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     NULL, &opts);
+	saved_error = -errno;
 	if (!ASSERT_ERR_PTR(link, "fail_2"))
 		goto cleanup;
 
-	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_2_error"))
+	if (!ASSERT_EQ(saved_error, -EINVAL, "fail_2_error"))
 		goto cleanup;
 
 	/* fail_3 - pattern and addrs set */
@@ -266,10 +269,11 @@ static void test_attach_api_fails(void)
 
 	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     "ksys_*", &opts);
+	saved_error = -errno;
 	if (!ASSERT_ERR_PTR(link, "fail_3"))
 		goto cleanup;
 
-	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_3_error"))
+	if (!ASSERT_EQ(saved_error, -EINVAL, "fail_3_error"))
 		goto cleanup;
 
 	/* fail_4 - pattern and cnt set */
@@ -280,10 +284,11 @@ static void test_attach_api_fails(void)
 
 	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     "ksys_*", &opts);
+	saved_error = -errno;
 	if (!ASSERT_ERR_PTR(link, "fail_4"))
 		goto cleanup;
 
-	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_4_error"))
+	if (!ASSERT_EQ(saved_error, -EINVAL, "fail_4_error"))
 		goto cleanup;
 
 	/* fail_5 - pattern and cookies */
@@ -294,10 +299,11 @@ static void test_attach_api_fails(void)
 
 	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     "ksys_*", &opts);
+	saved_error = -errno;
 	if (!ASSERT_ERR_PTR(link, "fail_5"))
 		goto cleanup;
 
-	if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_5_error"))
+	if (!ASSERT_EQ(saved_error, -EINVAL, "fail_5_error"))
 		goto cleanup;
 
 cleanup:
-- 
cgit v1.2.3-70-g09d2


From 1467affd16b236fc86e1b8ec5eaa147e104cd2a6 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 15 Dec 2023 18:07:08 +0800
Subject: selftests/bpf: Add test for abnormal cnt during multi-kprobe
 attachment

If an abnormally huge cnt is used for multi-kprobes attachment, the
following warning will be reported:

  ------------[ cut here ]------------
  WARNING: CPU: 1 PID: 392 at mm/util.c:632 kvmalloc_node+0xd9/0xe0
  Modules linked in: bpf_testmod(O)
  CPU: 1 PID: 392 Comm: test_progs Tainted: G ...... 6.7.0-rc3+ #32
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
  ......
  RIP: 0010:kvmalloc_node+0xd9/0xe0
   ? __warn+0x89/0x150
   ? kvmalloc_node+0xd9/0xe0
   bpf_kprobe_multi_link_attach+0x87/0x670
   __sys_bpf+0x2a28/0x2bc0
   __x64_sys_bpf+0x1a/0x30
   do_syscall_64+0x36/0xb0
   entry_SYSCALL_64_after_hwframe+0x6e/0x76
  RIP: 0033:0x7fbe067f0e0d
  ......
   </TASK>
  ---[ end trace 0000000000000000 ]---

So add a test to ensure the warning is fixed.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231215100708.2265609-6-houtao@huaweicloud.com
---
 .../testing/selftests/bpf/prog_tests/kprobe_multi_test.c  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 6079611b5df4..05000810e28e 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -306,6 +306,21 @@ static void test_attach_api_fails(void)
 	if (!ASSERT_EQ(saved_error, -EINVAL, "fail_5_error"))
 		goto cleanup;
 
+	/* fail_6 - abnormal cnt */
+	opts.addrs = (const unsigned long *) addrs;
+	opts.syms = NULL;
+	opts.cnt = INT_MAX;
+	opts.cookies = NULL;
+
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+						     NULL, &opts);
+	saved_error = -errno;
+	if (!ASSERT_ERR_PTR(link, "fail_6"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(saved_error, -E2BIG, "fail_6_error"))
+		goto cleanup;
+
 cleanup:
 	bpf_link__destroy(link);
 	kprobe_multi__destroy(skel);
-- 
cgit v1.2.3-70-g09d2


From 42d45c45624a098a9fdc477c7a8b86167f948c77 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 15 Dec 2023 16:28:25 -0800
Subject: selftests/bpf: Temporarily disable dummy_struct_ops test on s390

Temporarily disable dummy_struct_ops test on s390.
The breakage is likely due to
commit 2cd3e3772e41 ("x86/cfi,bpf: Fix bpf_struct_ops CFI").

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 1a63996c0304..d27aa42d11a4 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,5 +1,7 @@
 # TEMPORARY
 # Alphabetical order
+dummy_st_ops/dummy_init_ret_value
+dummy_st_ops/dummy_init_ptr_arg
 exceptions				 # JIT does not support calling kfunc bpf_throw				       (exceptions)
 get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
 stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
-- 
cgit v1.2.3-70-g09d2


From cfbab37b3da094579b8f7492e4df8a8a4c8c41b0 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:15 +0000
Subject: selftests/net: Add TCP-AO library

Provide functions to create selftests dedicated to TCP-AO.
They can run in parallel, as they use temporary net namespaces.
They can be very specific to the feature being tested.
This will allow to create a lot of TCP-AO tests, without complicating
one binary with many --options and to create scenarios, that are
hard to put in bash script that uses one binary.

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/Makefile                 |   1 +
 tools/testing/selftests/net/tcp_ao/.gitignore    |   2 +
 tools/testing/selftests/net/tcp_ao/Makefile      |  45 ++
 tools/testing/selftests/net/tcp_ao/connect.c     |  90 ++++
 tools/testing/selftests/net/tcp_ao/lib/aolib.h   | 605 +++++++++++++++++++++++
 tools/testing/selftests/net/tcp_ao/lib/kconfig.c | 148 ++++++
 tools/testing/selftests/net/tcp_ao/lib/netlink.c | 415 ++++++++++++++++
 tools/testing/selftests/net/tcp_ao/lib/proc.c    | 273 ++++++++++
 tools/testing/selftests/net/tcp_ao/lib/repair.c  | 254 ++++++++++
 tools/testing/selftests/net/tcp_ao/lib/setup.c   | 342 +++++++++++++
 tools/testing/selftests/net/tcp_ao/lib/sock.c    | 592 ++++++++++++++++++++++
 tools/testing/selftests/net/tcp_ao/lib/utils.c   |  30 ++
 12 files changed, 2797 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_ao/.gitignore
 create mode 100644 tools/testing/selftests/net/tcp_ao/Makefile
 create mode 100644 tools/testing/selftests/net/tcp_ao/connect.c
 create mode 100644 tools/testing/selftests/net/tcp_ao/lib/aolib.h
 create mode 100644 tools/testing/selftests/net/tcp_ao/lib/kconfig.c
 create mode 100644 tools/testing/selftests/net/tcp_ao/lib/netlink.c
 create mode 100644 tools/testing/selftests/net/tcp_ao/lib/proc.c
 create mode 100644 tools/testing/selftests/net/tcp_ao/lib/repair.c
 create mode 100644 tools/testing/selftests/net/tcp_ao/lib/setup.c
 create mode 100644 tools/testing/selftests/net/tcp_ao/lib/sock.c
 create mode 100644 tools/testing/selftests/net/tcp_ao/lib/utils.c

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 3b2061d1c1a5..f0c854d6511c 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -58,6 +58,7 @@ TARGETS += net/forwarding
 TARGETS += net/hsr
 TARGETS += net/mptcp
 TARGETS += net/openvswitch
+TARGETS += net/tcp_ao
 TARGETS += netfilter
 TARGETS += nsfs
 TARGETS += perf_events
diff --git a/tools/testing/selftests/net/tcp_ao/.gitignore b/tools/testing/selftests/net/tcp_ao/.gitignore
new file mode 100644
index 000000000000..e8bb81b715b7
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/.gitignore
@@ -0,0 +1,2 @@
+*_ipv4
+*_ipv6
diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
new file mode 100644
index 000000000000..62425b9fb73c
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_BOTH_AF := connect
+
+TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4)
+TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6)
+
+TEST_GEN_PROGS := $(TEST_IPV4_PROGS) $(TEST_IPV6_PROGS)
+
+top_srcdir	  := ../../../../..
+KSFT_KHDR_INSTALL := 1
+include ../../lib.mk
+
+HOSTAR ?= ar
+
+# Drop it on port to linux/master with commit 8ce72dc32578
+.DEFAULT_GOAL := all
+
+LIBDIR	:= $(OUTPUT)/lib
+LIB	:= $(LIBDIR)/libaotst.a
+LDLIBS	+= $(LIB) -pthread
+LIBDEPS	:= lib/aolib.h Makefile
+
+CFLAGS	:= -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing
+CFLAGS	+= -I ../../../../../usr/include/ -iquote $(LIBDIR)
+CFLAGS	+= -I ../../../../include/
+
+# Library
+LIBSRC	:= kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c
+LIBOBJ	:= $(LIBSRC:%.c=$(LIBDIR)/%.o)
+EXTRA_CLEAN += $(LIBOBJ) $(LIB)
+
+$(LIB): $(LIBOBJ)
+	$(HOSTAR) rcs $@ $^
+
+$(LIBDIR)/%.o: ./lib/%.c $(LIBDEPS)
+	$(CC) $< $(CFLAGS) $(CPPFLAGS) -o $@ -c
+
+$(TEST_GEN_PROGS): $(LIB)
+
+$(OUTPUT)/%_ipv4: %.c
+	$(LINK.c) $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/%_ipv6: %.c
+	$(LINK.c) -DIPV6_TEST $^ $(LDLIBS) -o $@
+
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
new file mode 100644
index 000000000000..81653b47f303
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+static void *server_fn(void *arg)
+{
+	int sk, lsk;
+	ssize_t bytes;
+
+	lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	synchronize_threads();
+
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads();
+
+	bytes = test_server_run(sk, 0, 0);
+
+	test_fail("server served: %zd", bytes);
+	return NULL;
+}
+
+static void *client_fn(void *arg)
+{
+	int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	uint64_t before_aogood, after_aogood;
+	const size_t nr_packets = 20;
+	struct netstat *ns_before, *ns_after;
+	struct tcp_ao_counters ao1, ao2;
+
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads();
+	if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0)
+		test_error("failed to connect()");
+	synchronize_threads();
+
+	ns_before = netstat_read();
+	before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+		test_fail("verify failed");
+		return NULL;
+	}
+
+	ns_after = netstat_read();
+	after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	netstat_print_diff(ns_before, ns_after);
+	netstat_free(ns_before);
+	netstat_free(ns_after);
+
+	if (nr_packets > (after_aogood - before_aogood)) {
+		test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)",
+				nr_packets, after_aogood, before_aogood);
+		return NULL;
+	}
+	if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
+		return NULL;
+
+	test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64,
+			before_aogood, ao1.ao_info_pkt_good,
+			ao1.key_cnts[0].pkt_good,
+			after_aogood, ao2.ao_info_pkt_good,
+			ao2.key_cnts[0].pkt_good,
+			nr_packets);
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(1, server_fn, client_fn);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
new file mode 100644
index 000000000000..fbc7f6111815
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * TCP-AO selftest library. Provides helpers to unshare network
+ * namespaces, create veth, assign ip addresses, set routes,
+ * manipulate socket options, read network counter and etc.
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+#ifndef _AOLIB_H_
+#define _AOLIB_H_
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <linux/snmp.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../../../../../include/linux/stringify.h"
+#include "../../../../../include/linux/bits.h"
+
+#ifndef SOL_TCP
+/* can't include <netinet/tcp.h> as including <linux/tcp.h> */
+# define SOL_TCP		6	/* TCP level */
+#endif
+
+/* Working around ksft, see the comment in lib/setup.c */
+extern void __test_msg(const char *buf);
+extern void __test_ok(const char *buf);
+extern void __test_fail(const char *buf);
+extern void __test_xfail(const char *buf);
+extern void __test_error(const char *buf);
+extern void __test_skip(const char *buf);
+
+__attribute__((__format__(__printf__, 2, 3)))
+static inline void __test_print(void (*fn)(const char *), const char *fmt, ...)
+{
+#define TEST_MSG_BUFFER_SIZE 4096
+	char buf[TEST_MSG_BUFFER_SIZE];
+	va_list arg;
+
+	va_start(arg, fmt);
+	vsnprintf(buf, sizeof(buf), fmt, arg);
+	va_end(arg);
+	fn(buf);
+}
+
+#define test_print(fmt, ...)						\
+	__test_print(__test_msg, "%ld[%s:%u] " fmt "\n",		\
+		     syscall(SYS_gettid),				\
+		     __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define test_ok(fmt, ...)						\
+	__test_print(__test_ok, fmt "\n", ##__VA_ARGS__)
+#define test_skip(fmt, ...)						\
+	__test_print(__test_skip, fmt "\n", ##__VA_ARGS__)
+#define test_xfail(fmt, ...)						\
+	__test_print(__test_xfail, fmt "\n", ##__VA_ARGS__)
+
+#define test_fail(fmt, ...)						\
+do {									\
+	if (errno)							\
+		__test_print(__test_fail, fmt ": %m\n", ##__VA_ARGS__);	\
+	else								\
+		__test_print(__test_fail, fmt "\n", ##__VA_ARGS__);	\
+	test_failed();							\
+} while (0)
+
+#define KSFT_FAIL  1
+#define test_error(fmt, ...)						\
+do {									\
+	if (errno)							\
+		__test_print(__test_error, "%ld[%s:%u] " fmt ": %m\n",	\
+			     syscall(SYS_gettid), __FILE__, __LINE__,	\
+			     ##__VA_ARGS__);				\
+	else								\
+		__test_print(__test_error, "%ld[%s:%u] " fmt "\n",	\
+			     syscall(SYS_gettid), __FILE__, __LINE__,	\
+			     ##__VA_ARGS__);				\
+	exit(KSFT_FAIL);						\
+} while (0)
+
+enum test_fault {
+	FAULT_TIMEOUT = 1,
+	FAULT_KEYREJECT,
+	FAULT_PREINSTALL_AO,
+	FAULT_PREINSTALL_MD5,
+	FAULT_POSTINSTALL,
+	FAULT_BUSY,
+	FAULT_CURRNEXT,
+	FAULT_FIXME,
+};
+typedef enum test_fault fault_t;
+
+enum test_needs_kconfig {
+	KCONFIG_NET_NS = 0,		/* required */
+	KCONFIG_VETH,			/* required */
+	KCONFIG_TCP_AO,			/* required */
+	KCONFIG_TCP_MD5,		/* optional, for TCP-MD5 features */
+	KCONFIG_NET_VRF,		/* optional, for L3/VRF testing */
+	__KCONFIG_LAST__
+};
+extern bool kernel_config_has(enum test_needs_kconfig k);
+extern const char *tests_skip_reason[__KCONFIG_LAST__];
+static inline bool should_skip_test(const char *tst_name,
+				    enum test_needs_kconfig k)
+{
+	if (kernel_config_has(k))
+		return false;
+	test_skip("%s: %s", tst_name, tests_skip_reason[k]);
+	return true;
+}
+
+union tcp_addr {
+	struct in_addr a4;
+	struct in6_addr a6;
+};
+
+typedef void *(*thread_fn)(void *);
+extern void test_failed(void);
+extern void __test_init(unsigned int ntests, int family, unsigned int prefix,
+			union tcp_addr addr1, union tcp_addr addr2,
+			thread_fn peer1, thread_fn peer2);
+
+static inline void test_init2(unsigned int ntests,
+			      thread_fn peer1, thread_fn peer2,
+			      int family, unsigned int prefix,
+			      const char *addr1, const char *addr2)
+{
+	union tcp_addr taddr1, taddr2;
+
+	if (inet_pton(family, addr1, &taddr1) != 1)
+		test_error("Can't convert ip address %s", addr1);
+	if (inet_pton(family, addr2, &taddr2) != 1)
+		test_error("Can't convert ip address %s", addr2);
+
+	__test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2);
+}
+extern void test_add_destructor(void (*d)(void));
+
+/* To adjust optmem socket limit, approximately estimate a number,
+ * that is bigger than sizeof(struct tcp_ao_key).
+ */
+#define KERNEL_TCP_AO_KEY_SZ_ROUND_UP	300
+
+extern void test_set_optmem(size_t value);
+extern size_t test_get_optmem(void);
+
+extern const struct sockaddr_in6 addr_any6;
+extern const struct sockaddr_in addr_any4;
+
+#ifdef IPV6_TEST
+# define __TEST_CLIENT_IP(n)	("2001:db8:" __stringify(n) "::1")
+# define TEST_CLIENT_IP	__TEST_CLIENT_IP(1)
+# define TEST_WRONG_IP	"2001:db8:253::1"
+# define TEST_SERVER_IP	"2001:db8:254::1"
+# define TEST_NETWORK	"2001::"
+# define TEST_PREFIX	128
+# define TEST_FAMILY	AF_INET6
+# define SOCKADDR_ANY	addr_any6
+# define sockaddr_af	struct sockaddr_in6
+#else
+# define __TEST_CLIENT_IP(n)	("10.0." __stringify(n) ".1")
+# define TEST_CLIENT_IP	__TEST_CLIENT_IP(1)
+# define TEST_WRONG_IP	"10.0.253.1"
+# define TEST_SERVER_IP	"10.0.254.1"
+# define TEST_NETWORK	"10.0.0.0"
+# define TEST_PREFIX	32
+# define TEST_FAMILY	AF_INET
+# define SOCKADDR_ANY	addr_any4
+# define sockaddr_af	struct sockaddr_in
+#endif
+
+static inline union tcp_addr gen_tcp_addr(union tcp_addr net, size_t n)
+{
+	union tcp_addr ret = net;
+
+#ifdef IPV6_TEST
+	ret.a6.s6_addr32[3] = htonl(n & (BIT(32) - 1));
+	ret.a6.s6_addr32[2] = htonl((n >> 32) & (BIT(32) - 1));
+#else
+	ret.a4.s_addr = htonl(ntohl(net.a4.s_addr) + n);
+#endif
+
+	return ret;
+}
+
+static inline void tcp_addr_to_sockaddr_in(void *dest,
+					   const union tcp_addr *src,
+					   unsigned int port)
+{
+	sockaddr_af *out = dest;
+
+	memset(out, 0, sizeof(*out));
+#ifdef IPV6_TEST
+	out->sin6_family = AF_INET6;
+	out->sin6_port   = port;
+	out->sin6_addr   = src->a6;
+#else
+	out->sin_family  = AF_INET;
+	out->sin_port    = port;
+	out->sin_addr    = src->a4;
+#endif
+}
+
+static inline void test_init(unsigned int ntests,
+			     thread_fn peer1, thread_fn peer2)
+{
+	test_init2(ntests, peer1, peer2, TEST_FAMILY, TEST_PREFIX,
+			TEST_SERVER_IP, TEST_CLIENT_IP);
+}
+extern void synchronize_threads(void);
+extern void switch_ns(int fd);
+
+extern __thread union tcp_addr this_ip_addr;
+extern __thread union tcp_addr this_ip_dest;
+extern int test_family;
+
+extern void randomize_buffer(void *buf, size_t buflen);
+extern int open_netns(void);
+extern int unshare_open_netns(void);
+extern const char veth_name[];
+extern int add_veth(const char *name, int nsfda, int nsfdb);
+extern int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd);
+extern int ip_addr_add(const char *intf, int family,
+		       union tcp_addr addr, uint8_t prefix);
+extern int ip_route_add(const char *intf, int family,
+			union tcp_addr src, union tcp_addr dst);
+extern int ip_route_add_vrf(const char *intf, int family,
+			    union tcp_addr src, union tcp_addr dst,
+			    uint8_t vrf);
+extern int link_set_up(const char *intf);
+
+extern const unsigned int test_server_port;
+extern int test_wait_fd(int sk, time_t sec, bool write);
+extern int __test_connect_socket(int sk, const char *device,
+				 void *addr, size_t addr_sz, time_t timeout);
+extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz);
+
+static inline int test_listen_socket(const union tcp_addr taddr,
+				     unsigned int port, int backlog)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+	return __test_listen_socket(backlog, (void *)&addr, sizeof(addr));
+}
+
+/*
+ * In order for selftests to work under CONFIG_CRYPTO_FIPS=y,
+ * the password should be loger than 14 bytes, see hmac_setkey()
+ */
+#define TEST_TCP_AO_MINKEYLEN	14
+#define DEFAULT_TEST_PASSWORD	"In this hour, I do not believe that any darkness will endure."
+
+#ifndef DEFAULT_TEST_ALGO
+#define DEFAULT_TEST_ALGO	"cmac(aes128)"
+#endif
+
+#ifdef IPV6_TEST
+#define DEFAULT_TEST_PREFIX	128
+#else
+#define DEFAULT_TEST_PREFIX	32
+#endif
+
+/*
+ * Timeout on syscalls where failure is not expected.
+ * You may want to rise it if the test machine is very busy.
+ */
+#ifndef TEST_TIMEOUT_SEC
+#define TEST_TIMEOUT_SEC	5
+#endif
+
+/*
+ * Timeout on connect() where a failure is expected.
+ * If set to 0 - kernel will try to retransmit SYN number of times, set in
+ * /proc/sys/net/ipv4/tcp_syn_retries
+ * By default set to 1 to make tests pass faster on non-busy machine.
+ */
+#ifndef TEST_RETRANSMIT_SEC
+#define TEST_RETRANSMIT_SEC	1
+#endif
+
+static inline int _test_connect_socket(int sk, const union tcp_addr taddr,
+				       unsigned int port, time_t timeout)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+	return __test_connect_socket(sk, veth_name,
+				     (void *)&addr, sizeof(addr), timeout);
+}
+
+static inline int test_connect_socket(int sk, const union tcp_addr taddr,
+				      unsigned int port)
+{
+	return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC);
+}
+
+extern int __test_set_md5(int sk, void *addr, size_t addr_sz,
+			  uint8_t prefix, int vrf, const char *password);
+static inline int test_set_md5(int sk, const union tcp_addr in_addr,
+			       uint8_t prefix, int vrf, const char *password)
+{
+	sockaddr_af addr;
+
+	if (prefix > DEFAULT_TEST_PREFIX)
+		prefix = DEFAULT_TEST_PREFIX;
+
+	tcp_addr_to_sockaddr_in(&addr, &in_addr, 0);
+	return __test_set_md5(sk, (void *)&addr, sizeof(addr),
+			prefix, vrf, password);
+}
+
+extern int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg,
+		void *addr, size_t addr_sz, bool set_current, bool set_rnext,
+		uint8_t prefix, uint8_t vrf,
+		uint8_t sndid, uint8_t rcvid, uint8_t maclen,
+		uint8_t keyflags, uint8_t keylen, const char *key);
+
+static inline int test_prepare_key(struct tcp_ao_add *ao,
+		const char *alg, union tcp_addr taddr,
+		bool set_current, bool set_rnext,
+		uint8_t prefix, uint8_t vrf,
+		uint8_t sndid, uint8_t rcvid, uint8_t maclen,
+		uint8_t keyflags, uint8_t keylen, const char *key)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &taddr, 0);
+	return test_prepare_key_sockaddr(ao, alg, (void *)&addr, sizeof(addr),
+			set_current, set_rnext, prefix, vrf, sndid, rcvid,
+			maclen, keyflags, keylen, key);
+}
+
+static inline int test_prepare_def_key(struct tcp_ao_add *ao,
+		const char *key, uint8_t keyflags,
+		union tcp_addr in_addr, uint8_t prefix, uint8_t vrf,
+		uint8_t sndid, uint8_t rcvid)
+{
+	if (prefix > DEFAULT_TEST_PREFIX)
+		prefix = DEFAULT_TEST_PREFIX;
+
+	return test_prepare_key(ao, DEFAULT_TEST_ALGO, in_addr, false, false,
+				prefix, vrf, sndid, rcvid, 0, keyflags,
+				strlen(key), key);
+}
+
+extern int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out,
+			   void *addr, size_t addr_sz,
+			   uint8_t prefix, uint8_t sndid, uint8_t rcvid);
+extern int test_get_ao_info(int sk, struct tcp_ao_info_opt *out);
+extern int test_set_ao_info(int sk, struct tcp_ao_info_opt *in);
+extern int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a,
+					  const struct tcp_ao_getsockopt *b);
+extern int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a,
+					     const struct tcp_ao_info_opt *b);
+
+static inline int test_verify_socket_key(int sk, struct tcp_ao_add *key)
+{
+	struct tcp_ao_getsockopt key2 = {};
+	int err;
+
+	err = test_get_one_ao(sk, &key2, &key->addr, sizeof(key->addr),
+			      key->prefix, key->sndid, key->rcvid);
+	if (err)
+		return err;
+
+	return test_cmp_getsockopt_setsockopt(key, &key2);
+}
+
+static inline int test_add_key_vrf(int sk,
+				   const char *key, uint8_t keyflags,
+				   union tcp_addr in_addr, uint8_t prefix,
+				   uint8_t vrf, uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_add tmp = {};
+	int err;
+
+	err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix,
+				   vrf, sndid, rcvid);
+	if (err)
+		return err;
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+	if (err < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+static inline int test_add_key(int sk, const char *key,
+			      union tcp_addr in_addr, uint8_t prefix,
+			      uint8_t sndid, uint8_t rcvid)
+{
+	return test_add_key_vrf(sk, key, 0, in_addr, prefix, 0, sndid, rcvid);
+}
+
+static inline int test_verify_socket_ao(int sk, struct tcp_ao_info_opt *ao)
+{
+	struct tcp_ao_info_opt ao2 = {};
+	int err;
+
+	err = test_get_ao_info(sk, &ao2);
+	if (err)
+		return err;
+
+	return test_cmp_getsockopt_setsockopt_ao(ao, &ao2);
+}
+
+static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps)
+{
+	struct tcp_ao_info_opt ao = {};
+	int err;
+
+	err = test_get_ao_info(sk, &ao);
+	/* Maybe ao_info wasn't allocated yet */
+	if (err && err != -ENOENT)
+		return err;
+
+	ao.ao_required = !!ao_required;
+	ao.accept_icmps = !!accept_icmps;
+	err = test_set_ao_info(sk, &ao);
+	if (err)
+		return err;
+
+	return test_verify_socket_ao(sk, &ao);
+}
+
+extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec);
+extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
+				const size_t msg_len, time_t timeout_sec);
+extern int test_client_verify(int sk, const size_t msg_len, const size_t nr,
+			      time_t timeout_sec);
+
+struct tcp_ao_key_counters {
+	uint8_t sndid;
+	uint8_t rcvid;
+	uint64_t pkt_good;
+	uint64_t pkt_bad;
+};
+
+struct tcp_ao_counters {
+	/* per-netns */
+	uint64_t netns_ao_good;
+	uint64_t netns_ao_bad;
+	uint64_t netns_ao_key_not_found;
+	uint64_t netns_ao_required;
+	uint64_t netns_ao_dropped_icmp;
+	/* per-socket */
+	uint64_t ao_info_pkt_good;
+	uint64_t ao_info_pkt_bad;
+	uint64_t ao_info_pkt_key_not_found;
+	uint64_t ao_info_pkt_ao_required;
+	uint64_t ao_info_pkt_dropped_icmp;
+	/* per-key */
+	size_t nr_keys;
+	struct tcp_ao_key_counters *key_cnts;
+};
+extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
+
+#define TEST_CNT_KEY_GOOD		BIT(0)
+#define TEST_CNT_KEY_BAD		BIT(1)
+#define TEST_CNT_SOCK_GOOD		BIT(2)
+#define TEST_CNT_SOCK_BAD		BIT(3)
+#define TEST_CNT_SOCK_KEY_NOT_FOUND	BIT(4)
+#define TEST_CNT_SOCK_AO_REQUIRED	BIT(5)
+#define TEST_CNT_SOCK_DROPPED_ICMP	BIT(6)
+#define TEST_CNT_NS_GOOD		BIT(7)
+#define TEST_CNT_NS_BAD			BIT(8)
+#define TEST_CNT_NS_KEY_NOT_FOUND	BIT(9)
+#define TEST_CNT_NS_AO_REQUIRED		BIT(10)
+#define TEST_CNT_NS_DROPPED_ICMP	BIT(11)
+typedef uint16_t test_cnt;
+
+#define TEST_CNT_AO_GOOD		(TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD)
+#define TEST_CNT_AO_BAD			(TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD)
+#define TEST_CNT_AO_KEY_NOT_FOUND	(TEST_CNT_SOCK_KEY_NOT_FOUND | \
+					 TEST_CNT_NS_KEY_NOT_FOUND)
+#define TEST_CNT_AO_REQUIRED		(TEST_CNT_SOCK_AO_REQUIRED | \
+					 TEST_CNT_NS_AO_REQUIRED)
+#define TEST_CNT_AO_DROPPED_ICMP	(TEST_CNT_SOCK_DROPPED_ICMP | \
+					 TEST_CNT_NS_DROPPED_ICMP)
+#define TEST_CNT_GOOD			(TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD)
+#define TEST_CNT_BAD			(TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD)
+
+extern int __test_tcp_ao_counters_cmp(const char *tst_name,
+		struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+		test_cnt expected);
+extern int test_tcp_ao_key_counters_cmp(const char *tst_name,
+		struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+		test_cnt expected, int sndid, int rcvid);
+extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts);
+/*
+ * Frees buffers allocated in test_get_tcp_ao_counters().
+ * The function doesn't expect new keys or keys removed between calls
+ * to test_get_tcp_ao_counters(). Check key counters manually if they
+ * may change.
+ */
+static inline int test_tcp_ao_counters_cmp(const char *tst_name,
+					   struct tcp_ao_counters *before,
+					   struct tcp_ao_counters *after,
+					   test_cnt expected)
+{
+	int ret;
+
+	ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected);
+	if (ret)
+		goto out;
+	ret = test_tcp_ao_key_counters_cmp(tst_name, before, after,
+					   expected, -1, -1);
+out:
+	test_tcp_ao_counters_free(before);
+	test_tcp_ao_counters_free(after);
+	return ret;
+}
+
+struct netstat;
+extern struct netstat *netstat_read(void);
+extern void netstat_free(struct netstat *ns);
+extern void netstat_print_diff(struct netstat *nsa, struct netstat *nsb);
+extern uint64_t netstat_get(struct netstat *ns,
+			    const char *name, bool *not_found);
+
+static inline uint64_t netstat_get_one(const char *name, bool *not_found)
+{
+	struct netstat *ns = netstat_read();
+	uint64_t ret;
+
+	ret = netstat_get(ns, name, not_found);
+
+	netstat_free(ns);
+	return ret;
+}
+
+struct tcp_sock_queue {
+	uint32_t seq;
+	void *buf;
+};
+
+struct tcp_sock_state {
+	struct tcp_info info;
+	struct tcp_repair_window trw;
+	struct tcp_sock_queue out;
+	int outq_len;		/* output queue size (not sent + not acked) */
+	int outq_nsd_len;	/* output queue size (not sent only) */
+	struct tcp_sock_queue in;
+	int inq_len;
+	int mss;
+	int timestamp;
+};
+
+extern void __test_sock_checkpoint(int sk, struct tcp_sock_state *state,
+				   void *addr, size_t addr_size);
+static inline void test_sock_checkpoint(int sk, struct tcp_sock_state *state,
+					sockaddr_af *saddr)
+{
+	__test_sock_checkpoint(sk, state, saddr, sizeof(*saddr));
+}
+extern void test_ao_checkpoint(int sk, struct tcp_ao_repair *state);
+extern void __test_sock_restore(int sk, const char *device,
+				struct tcp_sock_state *state,
+				void *saddr, void *daddr, size_t addr_size);
+static inline void test_sock_restore(int sk, struct tcp_sock_state *state,
+				     sockaddr_af *saddr,
+				     const union tcp_addr daddr,
+				     unsigned int dport)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &daddr, htons(dport));
+	__test_sock_restore(sk, veth_name, state, saddr, &addr, sizeof(addr));
+}
+extern void test_ao_restore(int sk, struct tcp_ao_repair *state);
+extern void test_sock_state_free(struct tcp_sock_state *state);
+extern void test_enable_repair(int sk);
+extern void test_disable_repair(int sk);
+extern void test_kill_sk(int sk);
+static inline int test_add_repaired_key(int sk,
+					const char *key, uint8_t keyflags,
+					union tcp_addr in_addr, uint8_t prefix,
+					uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_add tmp = {};
+	int err;
+
+	err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix,
+				   0, sndid, rcvid);
+	if (err)
+		return err;
+
+	tmp.set_current = 1;
+	tmp.set_rnext = 1;
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+#endif /* _AOLIB_H_ */
diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
new file mode 100644
index 000000000000..f279ffc3843b
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Check what features does the kernel support (where the selftest is running).
+ * Somewhat inspired by CRIU kerndat/kdat kernel features detector.
+ */
+#include <pthread.h>
+#include "aolib.h"
+
+struct kconfig_t {
+	int _errno;		/* the returned error if not supported */
+	int (*check_kconfig)(int *error);
+};
+
+static int has_net_ns(int *err)
+{
+	if (access("/proc/self/ns/net", F_OK) < 0) {
+		*err = errno;
+		if (errno == ENOENT)
+			return 0;
+		test_print("Unable to access /proc/self/ns/net: %m");
+		return -errno;
+	}
+	return *err = errno = 0;
+}
+
+static int has_veth(int *err)
+{
+	int orig_netns, ns_a, ns_b;
+
+	orig_netns = open_netns();
+	ns_a = unshare_open_netns();
+	ns_b = unshare_open_netns();
+
+	*err = add_veth("check_veth", ns_a, ns_b);
+
+	switch_ns(orig_netns);
+	close(orig_netns);
+	close(ns_a);
+	close(ns_b);
+	return 0;
+}
+
+static int has_tcp_ao(int *err)
+{
+	struct sockaddr_in addr = {
+		.sin_family = test_family,
+	};
+	struct tcp_ao_add tmp = {};
+	const char *password = DEFAULT_TEST_PASSWORD;
+	int sk, ret = 0;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0) {
+		test_print("socket(): %m");
+		return -errno;
+	}
+
+	tmp.sndid = 100;
+	tmp.rcvid = 100;
+	tmp.keylen = strlen(password);
+	memcpy(tmp.key, password, strlen(password));
+	strcpy(tmp.alg_name, "hmac(sha1)");
+	memcpy(&tmp.addr, &addr, sizeof(addr));
+	*err = 0;
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) {
+		*err = errno;
+		if (errno != ENOPROTOOPT)
+			ret = -errno;
+	}
+	close(sk);
+	return ret;
+}
+
+static int has_tcp_md5(int *err)
+{
+	union tcp_addr addr_any = {};
+	int sk, ret = 0;
+
+	sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0) {
+		test_print("socket(): %m");
+		return -errno;
+	}
+
+	/*
+	 * Under CONFIG_CRYPTO_FIPS=y it fails with ENOMEM, rather with
+	 * anything more descriptive. Oh well.
+	 */
+	*err = 0;
+	if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) {
+		*err = errno;
+		if (errno != ENOPROTOOPT && errno == ENOMEM) {
+			test_print("setsockopt(TCP_MD5SIG_EXT): %m");
+			ret = -errno;
+		}
+	}
+	close(sk);
+	return ret;
+}
+
+static int has_vrfs(int *err)
+{
+	int orig_netns, ns_test, ret = 0;
+
+	orig_netns = open_netns();
+	ns_test = unshare_open_netns();
+
+	*err = add_vrf("ksft-check", 55, 101, ns_test);
+	if (*err && *err != -EOPNOTSUPP) {
+		test_print("Failed to add a VRF: %d", *err);
+		ret = *err;
+	}
+
+	switch_ns(orig_netns);
+	close(orig_netns);
+	close(ns_test);
+	return ret;
+}
+
+static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct kconfig_t kconfig[__KCONFIG_LAST__] = {
+	{ -1, has_net_ns },
+	{ -1, has_veth },
+	{ -1, has_tcp_ao },
+	{ -1, has_tcp_md5 },
+	{ -1, has_vrfs },
+};
+
+const char *tests_skip_reason[__KCONFIG_LAST__] = {
+	"Tests require network namespaces support (CONFIG_NET_NS)",
+	"Tests require veth support (CONFIG_VETH)",
+	"Tests require TCP-AO support (CONFIG_TCP_AO)",
+	"setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)",
+	"VRFs are not supported (CONFIG_NET_VRF)",
+};
+
+bool kernel_config_has(enum test_needs_kconfig k)
+{
+	bool ret;
+
+	pthread_mutex_lock(&kconfig_lock);
+	if (kconfig[k]._errno == -1) {
+		if (kconfig[k].check_kconfig(&kconfig[k]._errno))
+			test_error("Failed to initialize kconfig %u", k);
+	}
+	ret = kconfig[k]._errno == 0;
+	pthread_mutex_unlock(&kconfig_lock);
+	return ret;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/netlink.c b/tools/testing/selftests/net/tcp_ao/lib/netlink.c
new file mode 100644
index 000000000000..b731f2c84083
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/netlink.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Original from tools/testing/selftests/net/ipsec.c */
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <linux/veth.h>
+#include <net/if.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "aolib.h"
+
+#define MAX_PAYLOAD		2048
+
+static int netlink_sock(int *sock, uint32_t *seq_nr, int proto)
+{
+	if (*sock > 0) {
+		seq_nr++;
+		return 0;
+	}
+
+	*sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
+	if (*sock < 0) {
+		test_print("socket(AF_NETLINK)");
+		return -1;
+	}
+
+	randomize_buffer(seq_nr, sizeof(*seq_nr));
+
+	return 0;
+}
+
+static int netlink_check_answer(int sock, bool quite)
+{
+	struct nlmsgerror {
+		struct nlmsghdr hdr;
+		int error;
+		struct nlmsghdr orig_msg;
+	} answer;
+
+	if (recv(sock, &answer, sizeof(answer), 0) < 0) {
+		test_print("recv()");
+		return -1;
+	} else if (answer.hdr.nlmsg_type != NLMSG_ERROR) {
+		test_print("expected NLMSG_ERROR, got %d",
+			   (int)answer.hdr.nlmsg_type);
+		return -1;
+	} else if (answer.error) {
+		if (!quite) {
+			test_print("NLMSG_ERROR: %d: %s",
+				answer.error, strerror(-answer.error));
+		}
+		return answer.error;
+	}
+
+	return 0;
+}
+
+static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh)
+{
+	return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len));
+}
+
+static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
+		unsigned short rta_type, const void *payload, size_t size)
+{
+	/* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */
+	struct rtattr *attr = rtattr_hdr(nh);
+	size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size);
+
+	if (req_sz < nl_size) {
+		test_print("req buf is too small: %zu < %zu", req_sz, nl_size);
+		return -1;
+	}
+	nh->nlmsg_len = nl_size;
+
+	attr->rta_len = RTA_LENGTH(size);
+	attr->rta_type = rta_type;
+	memcpy(RTA_DATA(attr), payload, size);
+
+	return 0;
+}
+
+static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+		unsigned short rta_type, const void *payload, size_t size)
+{
+	struct rtattr *ret = rtattr_hdr(nh);
+
+	if (rtattr_pack(nh, req_sz, rta_type, payload, size))
+		return 0;
+
+	return ret;
+}
+
+static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+		unsigned short rta_type)
+{
+	return _rtattr_begin(nh, req_sz, rta_type, 0, 0);
+}
+
+static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+	char *nlmsg_end = (char *)nh + nh->nlmsg_len;
+
+	attr->rta_len = nlmsg_end - (char *)attr;
+}
+
+static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz,
+		const char *peer, int ns)
+{
+	struct ifinfomsg pi;
+	struct rtattr *peer_attr;
+
+	memset(&pi, 0, sizeof(pi));
+	pi.ifi_family	= AF_UNSPEC;
+	pi.ifi_change	= 0xFFFFFFFF;
+
+	peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi));
+	if (!peer_attr)
+		return -1;
+
+	if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer)))
+		return -1;
+
+	if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns)))
+		return -1;
+
+	rtattr_end(nh, peer_attr);
+
+	return 0;
+}
+
+static int __add_veth(int sock, uint32_t seq, const char *name,
+		      int ns_a, int ns_b)
+{
+	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+	struct {
+		struct nlmsghdr		nh;
+		struct ifinfomsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	static const char veth_type[] = "veth";
+	struct rtattr *link_info, *info_data;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWLINK;
+	req.nh.nlmsg_flags	= flags;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifi_family	= AF_UNSPEC;
+	req.info.ifi_change	= 0xFFFFFFFF;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name)))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a)))
+		return -1;
+
+	link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+	if (!link_info)
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type)))
+		return -1;
+
+	info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+	if (!info_data)
+		return -1;
+
+	if (veth_pack_peerb(&req.nh, sizeof(req), name, ns_b))
+		return -1;
+
+	rtattr_end(&req.nh, info_data);
+	rtattr_end(&req.nh, link_info);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock, false);
+}
+
+int add_veth(const char *name, int nsfda, int nsfdb)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __add_veth(route_sock, route_seq++, name, nsfda, nsfdb);
+	close(route_sock);
+	return ret;
+}
+
+static int __ip_addr_add(int sock, uint32_t seq, const char *intf,
+			 int family, union tcp_addr addr, uint8_t prefix)
+{
+	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+	struct {
+		struct nlmsghdr		nh;
+		struct ifaddrmsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) :
+						sizeof(struct in6_addr);
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWADDR;
+	req.nh.nlmsg_flags	= flags;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifa_family	= family;
+	req.info.ifa_prefixlen	= prefix;
+	req.info.ifa_index	= if_nametoindex(intf);
+	req.info.ifa_flags	= IFA_F_NODAD;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, addr_len))
+		return -1;
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock, true);
+}
+
+int ip_addr_add(const char *intf, int family,
+		union tcp_addr addr, uint8_t prefix)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __ip_addr_add(route_sock, route_seq++, intf,
+			    family, addr, prefix);
+
+	close(route_sock);
+	return ret;
+}
+
+static int __ip_route_add(int sock, uint32_t seq, const char *intf, int family,
+			  union tcp_addr src, union tcp_addr dst, uint8_t vrf)
+{
+	struct {
+		struct nlmsghdr	nh;
+		struct rtmsg	rt;
+		char		attrbuf[MAX_PAYLOAD];
+	} req;
+	unsigned int index = if_nametoindex(intf);
+	size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) :
+						sizeof(struct in6_addr);
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.rt));
+	req.nh.nlmsg_type	= RTM_NEWROUTE;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+	req.nh.nlmsg_seq	= seq;
+	req.rt.rtm_family	= family;
+	req.rt.rtm_dst_len	= (family == AF_INET) ? 32 : 128;
+	req.rt.rtm_table	= RT_TABLE_MAIN;
+	req.rt.rtm_protocol	= RTPROT_BOOT;
+	req.rt.rtm_scope	= RT_SCOPE_UNIVERSE;
+	req.rt.rtm_type		= RTN_UNICAST;
+
+	if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, addr_len))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, addr_len))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index)))
+		return -1;
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+
+	return netlink_check_answer(sock, true);
+}
+
+int ip_route_add_vrf(const char *intf, int family,
+		 union tcp_addr src, union tcp_addr dst, uint8_t vrf)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __ip_route_add(route_sock, route_seq++, intf,
+			     family, src, dst, vrf);
+	if (ret == -EEXIST) /* ignoring */
+		ret = 0;
+
+	close(route_sock);
+	return ret;
+}
+
+int ip_route_add(const char *intf, int family,
+		 union tcp_addr src, union tcp_addr dst)
+{
+	return ip_route_add_vrf(intf, family, src, dst, RT_TABLE_MAIN);
+}
+
+static int __link_set_up(int sock, uint32_t seq, const char *intf)
+{
+	struct {
+		struct nlmsghdr		nh;
+		struct ifinfomsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWLINK;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifi_family	= AF_UNSPEC;
+	req.info.ifi_change	= 0xFFFFFFFF;
+	req.info.ifi_index	= if_nametoindex(intf);
+	req.info.ifi_flags	= IFF_UP;
+	req.info.ifi_change	= IFF_UP;
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock, false);
+}
+
+int link_set_up(const char *intf)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __link_set_up(route_sock, route_seq++, intf);
+
+	close(route_sock);
+	return ret;
+}
+
+static int __add_vrf(int sock, uint32_t seq, const char *name,
+		     uint32_t tabid, int ifindex, int nsfd)
+{
+	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+	struct {
+		struct nlmsghdr		nh;
+		struct ifinfomsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	static const char vrf_type[] = "vrf";
+	struct rtattr *link_info, *info_data;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWLINK;
+	req.nh.nlmsg_flags	= flags;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifi_family	= AF_UNSPEC;
+	req.info.ifi_change	= 0xFFFFFFFF;
+	req.info.ifi_index	= ifindex;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name)))
+		return -1;
+
+	if (nsfd >= 0)
+		if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD,
+				&nsfd, sizeof(nsfd)))
+			return -1;
+
+	link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+	if (!link_info)
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, vrf_type, sizeof(vrf_type)))
+		return -1;
+
+	info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+	if (!info_data)
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_VRF_TABLE,
+			&tabid, sizeof(tabid)))
+		return -1;
+
+	rtattr_end(&req.nh, info_data);
+	rtattr_end(&req.nh, link_info);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		test_print("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock, true);
+}
+
+int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd)
+{
+	int route_sock = -1, ret;
+	uint32_t route_seq;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		test_error("Failed to open netlink route socket\n");
+
+	ret = __add_vrf(route_sock, route_seq++, name, tabid, ifindex, nsfd);
+	close(route_sock);
+	return ret;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c
new file mode 100644
index 000000000000..2322f4d4676d
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdio.h>
+#include "../../../../../include/linux/compiler.h"
+#include "../../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+struct netstat_counter {
+	uint64_t val;
+	char *name;
+};
+
+struct netstat {
+	char *header_name;
+	struct netstat *next;
+	size_t counters_nr;
+	struct netstat_counter *counters;
+};
+
+static struct netstat *lookup_type(struct netstat *ns,
+		const char *type, size_t len)
+{
+	while (ns != NULL) {
+		size_t cmp = max(len, strlen(ns->header_name));
+
+		if (!strncmp(ns->header_name, type, cmp))
+			return ns;
+		ns = ns->next;
+	}
+	return NULL;
+}
+
+static struct netstat *lookup_get(struct netstat *ns,
+				  const char *type, const size_t len)
+{
+	struct netstat *ret;
+
+	ret = lookup_type(ns, type, len);
+	if (ret != NULL)
+		return ret;
+
+	ret = malloc(sizeof(struct netstat));
+	if (!ret)
+		test_error("malloc()");
+
+	ret->header_name = strndup(type, len);
+	if (ret->header_name == NULL)
+		test_error("strndup()");
+	ret->next = ns;
+	ret->counters_nr = 0;
+	ret->counters = NULL;
+
+	return ret;
+}
+
+static struct netstat *lookup_get_column(struct netstat *ns, const char *line)
+{
+	char *column;
+
+	column = strchr(line, ':');
+	if (!column)
+		test_error("can't parse netstat file");
+
+	return lookup_get(ns, line, column - line);
+}
+
+static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line)
+{
+	struct netstat *type = lookup_get_column(*dest, line);
+	const char *pos = line;
+	size_t i, nr_elems = 0;
+	char tmp;
+
+	while ((pos = strchr(pos, ' '))) {
+		nr_elems++;
+		pos++;
+	}
+
+	*dest = type;
+	type->counters = reallocarray(type->counters,
+				type->counters_nr + nr_elems,
+				sizeof(struct netstat_counter));
+	if (!type->counters)
+		test_error("reallocarray()");
+
+	pos = strchr(line, ' ') + 1;
+
+	if (fscanf(fnetstat, type->header_name) == EOF)
+		test_error("fscanf(%s)", type->header_name);
+	if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':')
+		test_error("Unexpected netstat format (%c)", tmp);
+
+	for (i = type->counters_nr; i < type->counters_nr + nr_elems; i++) {
+		struct netstat_counter *nc = &type->counters[i];
+		const char *new_pos = strchr(pos, ' ');
+		const char *fmt = " %" PRIu64;
+
+		if (new_pos == NULL)
+			new_pos = strchr(pos, '\n');
+
+		nc->name = strndup(pos, new_pos - pos);
+		if (nc->name == NULL)
+			test_error("strndup()");
+
+		if (unlikely(!strcmp(nc->name, "MaxConn")))
+			fmt = " %" PRId64; /* MaxConn is signed, RFC 2012 */
+		if (fscanf(fnetstat, fmt, &nc->val) != 1)
+			test_error("fscanf(%s)", nc->name);
+		pos = new_pos + 1;
+	}
+	type->counters_nr += nr_elems;
+
+	if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != '\n')
+		test_error("Unexpected netstat format");
+}
+
+static const char *snmp6_name = "Snmp6";
+static void snmp6_read(FILE *fnetstat, struct netstat **dest)
+{
+	struct netstat *type = lookup_get(*dest, snmp6_name, strlen(snmp6_name));
+	char *counter_name;
+	size_t i;
+
+	for (i = type->counters_nr;; i++) {
+		struct netstat_counter *nc;
+		uint64_t counter;
+
+		if (fscanf(fnetstat, "%ms", &counter_name) == EOF)
+			break;
+		if (fscanf(fnetstat, "%" PRIu64, &counter) == EOF)
+			test_error("Unexpected snmp6 format");
+		type->counters = reallocarray(type->counters, i + 1,
+					sizeof(struct netstat_counter));
+		if (!type->counters)
+			test_error("reallocarray()");
+		nc = &type->counters[i];
+		nc->name = counter_name;
+		nc->val = counter;
+	}
+	type->counters_nr = i;
+	*dest = type;
+}
+
+struct netstat *netstat_read(void)
+{
+	struct netstat *ret = 0;
+	size_t line_sz = 0;
+	char *line = NULL;
+	FILE *fnetstat;
+
+	/*
+	 * Opening thread-self instead of /proc/net/... as the latter
+	 * points to /proc/self/net/ which instantiates thread-leader's
+	 * net-ns, see:
+	 * commit 155134fef2b6 ("Revert "proc: Point /proc/{mounts,net} at..")
+	 */
+	errno = 0;
+	fnetstat = fopen("/proc/thread-self/net/netstat", "r");
+	if (fnetstat == NULL)
+		test_error("failed to open /proc/net/netstat");
+
+	while (getline(&line, &line_sz, fnetstat) != -1)
+		netstat_read_type(fnetstat, &ret, line);
+	fclose(fnetstat);
+
+	errno = 0;
+	fnetstat = fopen("/proc/thread-self/net/snmp", "r");
+	if (fnetstat == NULL)
+		test_error("failed to open /proc/net/snmp");
+
+	while (getline(&line, &line_sz, fnetstat) != -1)
+		netstat_read_type(fnetstat, &ret, line);
+	fclose(fnetstat);
+
+	errno = 0;
+	fnetstat = fopen("/proc/thread-self/net/snmp6", "r");
+	if (fnetstat == NULL)
+		test_error("failed to open /proc/net/snmp6");
+
+	snmp6_read(fnetstat, &ret);
+	fclose(fnetstat);
+
+	free(line);
+	return ret;
+}
+
+void netstat_free(struct netstat *ns)
+{
+	while (ns != NULL) {
+		struct netstat *prev = ns;
+		size_t i;
+
+		free(ns->header_name);
+		for (i = 0; i < ns->counters_nr; i++)
+			free(ns->counters[i].name);
+		free(ns->counters);
+		ns = ns->next;
+		free(prev);
+	}
+}
+
+static inline void
+__netstat_print_diff(uint64_t a, struct netstat *nsb, size_t i)
+{
+	if (unlikely(!strcmp(nsb->header_name, "MaxConn"))) {
+		test_print("%8s %25s: %" PRId64 " => %" PRId64,
+				nsb->header_name, nsb->counters[i].name,
+				a, nsb->counters[i].val);
+		return;
+	}
+
+	test_print("%8s %25s: %" PRIu64 " => %" PRIu64, nsb->header_name,
+			nsb->counters[i].name, a, nsb->counters[i].val);
+}
+
+void netstat_print_diff(struct netstat *nsa, struct netstat *nsb)
+{
+	size_t i, j;
+
+	while (nsb != NULL) {
+		if (unlikely(strcmp(nsb->header_name, nsa->header_name))) {
+			for (i = 0; i < nsb->counters_nr; i++)
+				__netstat_print_diff(0, nsb, i);
+			nsb = nsb->next;
+			continue;
+		}
+
+		if (nsb->counters_nr < nsa->counters_nr)
+			test_error("Unexpected: some counters dissapeared!");
+
+		for (j = 0, i = 0; i < nsb->counters_nr; i++) {
+			if (strcmp(nsb->counters[i].name, nsa->counters[j].name)) {
+				__netstat_print_diff(0, nsb, i);
+				continue;
+			}
+
+			if (nsa->counters[j].val == nsb->counters[i].val) {
+				j++;
+				continue;
+			}
+
+			__netstat_print_diff(nsa->counters[j].val, nsb, i);
+			j++;
+		}
+		if (j != nsa->counters_nr)
+			test_error("Unexpected: some counters dissapeared!");
+
+		nsb = nsb->next;
+		nsa = nsa->next;
+	}
+}
+
+uint64_t netstat_get(struct netstat *ns, const char *name, bool *not_found)
+{
+	if (not_found)
+		*not_found = false;
+
+	while (ns != NULL) {
+		size_t i;
+
+		for (i = 0; i < ns->counters_nr; i++) {
+			if (!strcmp(name, ns->counters[i].name))
+				return ns->counters[i].val;
+		}
+
+		ns = ns->next;
+	}
+
+	if (not_found)
+		*not_found = true;
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/repair.c b/tools/testing/selftests/net/tcp_ao/lib/repair.c
new file mode 100644
index 000000000000..9893b3ba69f5
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/repair.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets
+ * It tests that TCP-AO enabled connection can be restored.
+ * For the proper socket repair see:
+ * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h
+ */
+#include <fcntl.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include "aolib.h"
+
+#ifndef TCPOPT_MAXSEG
+# define TCPOPT_MAXSEG		2
+#endif
+#ifndef TCPOPT_WINDOW
+# define TCPOPT_WINDOW		3
+#endif
+#ifndef TCPOPT_SACK_PERMITTED
+# define TCPOPT_SACK_PERMITTED	4
+#endif
+#ifndef TCPOPT_TIMESTAMP
+# define TCPOPT_TIMESTAMP	8
+#endif
+
+enum {
+	TCP_ESTABLISHED = 1,
+	TCP_SYN_SENT,
+	TCP_SYN_RECV,
+	TCP_FIN_WAIT1,
+	TCP_FIN_WAIT2,
+	TCP_TIME_WAIT,
+	TCP_CLOSE,
+	TCP_CLOSE_WAIT,
+	TCP_LAST_ACK,
+	TCP_LISTEN,
+	TCP_CLOSING,	/* Now a valid state */
+	TCP_NEW_SYN_RECV,
+
+	TCP_MAX_STATES	/* Leave at the end! */
+};
+
+static void test_sock_checkpoint_queue(int sk, int queue, int qlen,
+				       struct tcp_sock_queue *q)
+{
+	socklen_t len;
+	int ret;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)))
+		test_error("setsockopt(TCP_REPAIR_QUEUE)");
+
+	len = sizeof(q->seq);
+	ret = getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &q->seq, &len);
+	if (ret || len != sizeof(q->seq))
+		test_error("getsockopt(TCP_QUEUE_SEQ): %d", (int)len);
+
+	if (!qlen) {
+		q->buf = NULL;
+		return;
+	}
+
+	q->buf = malloc(qlen);
+	if (q->buf == NULL)
+		test_error("malloc()");
+	ret = recv(sk, q->buf, qlen, MSG_PEEK | MSG_DONTWAIT);
+	if (ret != qlen)
+		test_error("recv(%d): %d", qlen, ret);
+}
+
+void __test_sock_checkpoint(int sk, struct tcp_sock_state *state,
+			    void *addr, size_t addr_size)
+{
+	socklen_t len = sizeof(state->info);
+	int ret;
+
+	memset(state, 0, sizeof(*state));
+
+	ret = getsockopt(sk, SOL_TCP, TCP_INFO, &state->info, &len);
+	if (ret || len != sizeof(state->info))
+		test_error("getsockopt(TCP_INFO): %d", (int)len);
+
+	len = addr_size;
+	if (getsockname(sk, addr, &len) || len != addr_size)
+		test_error("getsockname(): %d", (int)len);
+
+	len = sizeof(state->trw);
+	ret = getsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, &len);
+	if (ret || len != sizeof(state->trw))
+		test_error("getsockopt(TCP_REPAIR_WINDOW): %d", (int)len);
+
+	if (ioctl(sk, SIOCOUTQ, &state->outq_len))
+		test_error("ioctl(SIOCOUTQ)");
+
+	if (ioctl(sk, SIOCOUTQNSD, &state->outq_nsd_len))
+		test_error("ioctl(SIOCOUTQNSD)");
+	test_sock_checkpoint_queue(sk, TCP_SEND_QUEUE, state->outq_len, &state->out);
+
+	if (ioctl(sk, SIOCINQ, &state->inq_len))
+		test_error("ioctl(SIOCINQ)");
+	test_sock_checkpoint_queue(sk, TCP_RECV_QUEUE, state->inq_len, &state->in);
+
+	if (state->info.tcpi_state == TCP_CLOSE)
+		state->outq_len = state->outq_nsd_len = 0;
+
+	len = sizeof(state->mss);
+	ret = getsockopt(sk, SOL_TCP, TCP_MAXSEG, &state->mss, &len);
+	if (ret || len != sizeof(state->mss))
+		test_error("getsockopt(TCP_MAXSEG): %d", (int)len);
+
+	len = sizeof(state->timestamp);
+	ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &state->timestamp, &len);
+	if (ret || len != sizeof(state->timestamp))
+		test_error("getsockopt(TCP_TIMESTAMP): %d", (int)len);
+}
+
+void test_ao_checkpoint(int sk, struct tcp_ao_repair *state)
+{
+	socklen_t len = sizeof(*state);
+	int ret;
+
+	memset(state, 0, sizeof(*state));
+
+	ret = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, &len);
+	if (ret || len != sizeof(*state))
+		test_error("getsockopt(TCP_AO_REPAIR): %d", (int)len);
+}
+
+static void test_sock_restore_seq(int sk, int queue, uint32_t seq)
+{
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)))
+		test_error("setsockopt(TCP_REPAIR_QUEUE)");
+
+	if (setsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &seq, sizeof(seq)))
+		test_error("setsockopt(TCP_QUEUE_SEQ)");
+}
+
+static void test_sock_restore_queue(int sk, int queue, void *buf, int len)
+{
+	int chunk = len;
+	size_t off = 0;
+
+	if (len == 0)
+		return;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)))
+		test_error("setsockopt(TCP_REPAIR_QUEUE)");
+
+	do {
+		int ret;
+
+		ret = send(sk, buf + off, chunk, 0);
+		if (ret <= 0) {
+			if (chunk > 1024) {
+				chunk >>= 1;
+				continue;
+			}
+			test_error("send()");
+		}
+		off += ret;
+		len -= ret;
+	} while (len > 0);
+}
+
+void __test_sock_restore(int sk, const char *device,
+			 struct tcp_sock_state *state,
+			 void *saddr, void *daddr, size_t addr_size)
+{
+	struct tcp_repair_opt opts[4];
+	unsigned int opt_nr = 0;
+	long flags;
+
+	if (bind(sk, saddr, addr_size))
+		test_error("bind()");
+
+	flags = fcntl(sk, F_GETFL);
+	if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
+		test_error("fcntl()");
+
+	test_sock_restore_seq(sk, TCP_RECV_QUEUE, state->in.seq - state->inq_len);
+	test_sock_restore_seq(sk, TCP_SEND_QUEUE, state->out.seq - state->outq_len);
+
+	if (device != NULL && setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE,
+					 device, strlen(device) + 1))
+		test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
+
+	if (connect(sk, daddr, addr_size))
+		test_error("connect()");
+
+	if (state->info.tcpi_options & TCPI_OPT_SACK) {
+		opts[opt_nr].opt_code = TCPOPT_SACK_PERMITTED;
+		opts[opt_nr].opt_val = 0;
+		opt_nr++;
+	}
+	if (state->info.tcpi_options & TCPI_OPT_WSCALE) {
+		opts[opt_nr].opt_code = TCPOPT_WINDOW;
+		opts[opt_nr].opt_val = state->info.tcpi_snd_wscale +
+				(state->info.tcpi_rcv_wscale << 16);
+		opt_nr++;
+	}
+	if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) {
+		opts[opt_nr].opt_code = TCPOPT_TIMESTAMP;
+		opts[opt_nr].opt_val = 0;
+		opt_nr++;
+	}
+	opts[opt_nr].opt_code = TCPOPT_MAXSEG;
+	opts[opt_nr].opt_val = state->mss;
+	opt_nr++;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_OPTIONS, opts, opt_nr * sizeof(opts[0])))
+		test_error("setsockopt(TCP_REPAIR_OPTIONS)");
+
+	if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) {
+		if (setsockopt(sk, SOL_TCP, TCP_TIMESTAMP,
+			       &state->timestamp, opt_nr * sizeof(opts[0])))
+			test_error("setsockopt(TCP_TIMESTAMP)");
+	}
+	test_sock_restore_queue(sk, TCP_RECV_QUEUE, state->in.buf, state->inq_len);
+	test_sock_restore_queue(sk, TCP_SEND_QUEUE, state->out.buf, state->outq_len);
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, sizeof(state->trw)))
+		test_error("setsockopt(TCP_REPAIR_WINDOW)");
+}
+
+void test_ao_restore(int sk, struct tcp_ao_repair *state)
+{
+	if (setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, sizeof(*state)))
+		test_error("setsockopt(TCP_AO_REPAIR)");
+}
+
+void test_sock_state_free(struct tcp_sock_state *state)
+{
+	free(state->out.buf);
+	free(state->in.buf);
+}
+
+void test_enable_repair(int sk)
+{
+	int val = TCP_REPAIR_ON;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR)");
+}
+
+void test_disable_repair(int sk)
+{
+	int val = TCP_REPAIR_OFF_NO_WP;
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR)");
+}
+
+void test_kill_sk(int sk)
+{
+	test_enable_repair(sk);
+	close(sk);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
new file mode 100644
index 000000000000..374b27c26ebd
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include "aolib.h"
+
+/*
+ * Can't be included in the header: it defines static variables which
+ * will be unique to every object. Let's include it only once here.
+ */
+#include "../../../kselftest.h"
+
+/* Prevent overriding of one thread's output by another */
+static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+void __test_msg(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_print_msg(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_ok(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_pass(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_fail(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_fail(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_xfail(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_xfail(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_error(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_error(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_skip(const char *buf)
+{
+	pthread_mutex_lock(&ksft_print_lock);
+	ksft_test_result_skip(buf);
+	pthread_mutex_unlock(&ksft_print_lock);
+}
+
+static volatile int failed;
+static volatile int skipped;
+
+void test_failed(void)
+{
+	failed = 1;
+}
+
+static void test_exit(void)
+{
+	if (failed) {
+		ksft_exit_fail();
+	} else if (skipped) {
+		/* ksft_exit_skip() is different from ksft_exit_*() */
+		ksft_print_cnts();
+		exit(KSFT_SKIP);
+	} else {
+		ksft_exit_pass();
+	}
+}
+
+struct dlist_t {
+	void (*destruct)(void);
+	struct dlist_t *next;
+};
+static struct dlist_t *destructors_list;
+
+void test_add_destructor(void (*d)(void))
+{
+	struct dlist_t *p;
+
+	p = malloc(sizeof(struct dlist_t));
+	if (p == NULL)
+		test_error("malloc() failed");
+
+	p->next = destructors_list;
+	p->destruct = d;
+	destructors_list = p;
+}
+
+static void test_destructor(void) __attribute__((destructor));
+static void test_destructor(void)
+{
+	while (destructors_list) {
+		struct dlist_t *p = destructors_list->next;
+
+		destructors_list->destruct();
+		free(destructors_list);
+		destructors_list = p;
+	}
+	test_exit();
+}
+
+static void sig_int(int signo)
+{
+	test_error("Caught SIGINT - exiting");
+}
+
+int open_netns(void)
+{
+	const char *netns_path = "/proc/self/ns/net";
+	int fd;
+
+	fd = open(netns_path, O_RDONLY);
+	if (fd < 0)
+		test_error("open(%s)", netns_path);
+	return fd;
+}
+
+int unshare_open_netns(void)
+{
+	if (unshare(CLONE_NEWNET) != 0)
+		test_error("unshare()");
+
+	return open_netns();
+}
+
+void switch_ns(int fd)
+{
+	if (setns(fd, CLONE_NEWNET))
+		test_error("setns()");
+}
+
+int switch_save_ns(int new_ns)
+{
+	int ret = open_netns();
+
+	switch_ns(new_ns);
+	return ret;
+}
+
+static int nsfd_outside	= -1;
+static int nsfd_parent	= -1;
+static int nsfd_child	= -1;
+const char veth_name[]	= "ktst-veth";
+
+static void init_namespaces(void)
+{
+	nsfd_outside = open_netns();
+	nsfd_parent = unshare_open_netns();
+	nsfd_child = unshare_open_netns();
+}
+
+static void link_init(const char *veth, int family, uint8_t prefix,
+		      union tcp_addr addr, union tcp_addr dest)
+{
+	if (link_set_up(veth))
+		test_error("Failed to set link up");
+	if (ip_addr_add(veth, family, addr, prefix))
+		test_error("Failed to add ip address");
+	if (ip_route_add(veth, family, addr, dest))
+		test_error("Failed to add route");
+}
+
+static unsigned int nr_threads = 1;
+
+static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t sync_cond = PTHREAD_COND_INITIALIZER;
+static volatile unsigned int stage_threads[2];
+static volatile unsigned int stage_nr;
+
+/* synchronize all threads in the same stage */
+void synchronize_threads(void)
+{
+	unsigned int q = stage_nr;
+
+	pthread_mutex_lock(&sync_lock);
+	stage_threads[q]++;
+	if (stage_threads[q] == nr_threads) {
+		stage_nr ^= 1;
+		stage_threads[stage_nr] = 0;
+		pthread_cond_signal(&sync_cond);
+	}
+	while (stage_threads[q] < nr_threads)
+		pthread_cond_wait(&sync_cond, &sync_lock);
+	pthread_mutex_unlock(&sync_lock);
+}
+
+__thread union tcp_addr this_ip_addr;
+__thread union tcp_addr this_ip_dest;
+int test_family;
+
+struct new_pthread_arg {
+	thread_fn	func;
+	union tcp_addr	my_ip;
+	union tcp_addr	dest_ip;
+};
+static void *new_pthread_entry(void *arg)
+{
+	struct new_pthread_arg *p = arg;
+
+	this_ip_addr = p->my_ip;
+	this_ip_dest = p->dest_ip;
+	p->func(NULL); /* shouldn't return */
+	exit(KSFT_FAIL);
+}
+
+static void __test_skip_all(const char *msg)
+{
+	ksft_set_plan(1);
+	ksft_print_header();
+	skipped = 1;
+	test_skip("%s", msg);
+	exit(KSFT_SKIP);
+}
+
+void __test_init(unsigned int ntests, int family, unsigned int prefix,
+		 union tcp_addr addr1, union tcp_addr addr2,
+		 thread_fn peer1, thread_fn peer2)
+{
+	struct sigaction sa = {
+		.sa_handler = sig_int,
+		.sa_flags = SA_RESTART,
+	};
+	time_t seed = time(NULL);
+
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGINT, &sa, NULL))
+		test_error("Can't set SIGINT handler");
+
+	test_family = family;
+	if (!kernel_config_has(KCONFIG_NET_NS))
+		__test_skip_all(tests_skip_reason[KCONFIG_NET_NS]);
+	if (!kernel_config_has(KCONFIG_VETH))
+		__test_skip_all(tests_skip_reason[KCONFIG_VETH]);
+	if (!kernel_config_has(KCONFIG_TCP_AO))
+		__test_skip_all(tests_skip_reason[KCONFIG_TCP_AO]);
+
+	ksft_set_plan(ntests);
+	test_print("rand seed %u", (unsigned int)seed);
+	srand(seed);
+
+
+	ksft_print_header();
+	init_namespaces();
+
+	if (add_veth(veth_name, nsfd_parent, nsfd_child))
+		test_error("Failed to add veth");
+
+	switch_ns(nsfd_child);
+	link_init(veth_name, family, prefix, addr2, addr1);
+	if (peer2) {
+		struct new_pthread_arg targ;
+		pthread_t t;
+
+		targ.my_ip = addr2;
+		targ.dest_ip = addr1;
+		targ.func = peer2;
+		nr_threads++;
+		if (pthread_create(&t, NULL, new_pthread_entry, &targ))
+			test_error("Failed to create pthread");
+	}
+	switch_ns(nsfd_parent);
+	link_init(veth_name, family, prefix, addr1, addr2);
+
+	this_ip_addr = addr1;
+	this_ip_dest = addr2;
+	peer1(NULL);
+	if (failed)
+		exit(KSFT_FAIL);
+	else
+		exit(KSFT_PASS);
+}
+
+/* /proc/sys/net/core/optmem_max artifically limits the amount of memory
+ * that can be allocated with sock_kmalloc() on each socket in the system.
+ * It is not virtualized, so it has to written outside test namespaces.
+ * To be nice a test will revert optmem back to the old value.
+ * Keeping it simple without any file lock, which means the tests that
+ * need to set/increase optmem value shouldn't run in parallel.
+ * Also, not re-entrant.
+ */
+static const char *optmem_file = "/proc/sys/net/core/optmem_max";
+static size_t saved_optmem;
+
+size_t test_get_optmem(void)
+{
+	FILE *foptmem;
+	int old_ns;
+	size_t ret;
+
+	old_ns = switch_save_ns(nsfd_outside);
+	foptmem = fopen(optmem_file, "r");
+	if (!foptmem)
+		test_error("failed to open %s", optmem_file);
+
+	if (fscanf(foptmem, "%zu", &ret) != 1)
+		test_error("can't read from %s", optmem_file);
+	fclose(foptmem);
+	switch_ns(old_ns);
+	return ret;
+}
+
+static void __test_set_optmem(size_t new, size_t *old)
+{
+	FILE *foptmem;
+	int old_ns;
+
+	if (old != NULL)
+		*old = test_get_optmem();
+
+	old_ns = switch_save_ns(nsfd_outside);
+	foptmem = fopen(optmem_file, "w");
+	if (!foptmem)
+		test_error("failed to open %s", optmem_file);
+
+	if (fprintf(foptmem, "%zu", new) <= 0)
+		test_error("can't write %zu to %s", new, optmem_file);
+	fclose(foptmem);
+	switch_ns(old_ns);
+}
+
+static void test_revert_optmem(void)
+{
+	if (saved_optmem == 0)
+		return;
+
+	__test_set_optmem(saved_optmem, NULL);
+}
+
+void test_set_optmem(size_t value)
+{
+	if (saved_optmem == 0) {
+		__test_set_optmem(value, &saved_optmem);
+		test_add_destructor(test_revert_optmem);
+	} else {
+		__test_set_optmem(value, NULL);
+	}
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
new file mode 100644
index 000000000000..7f3c31b7d997
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -0,0 +1,592 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <alloca.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include "../../../../../include/linux/kernel.h"
+#include "../../../../../include/linux/stringify.h"
+#include "aolib.h"
+
+const unsigned int test_server_port = 7010;
+int __test_listen_socket(int backlog, void *addr, size_t addr_sz)
+{
+	int err, sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	long flags;
+
+	if (sk < 0)
+		test_error("socket()");
+
+	err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, veth_name,
+			 strlen(veth_name) + 1);
+	if (err < 0)
+		test_error("setsockopt(SO_BINDTODEVICE)");
+
+	if (bind(sk, (struct sockaddr *)addr, addr_sz) < 0)
+		test_error("bind()");
+
+	flags = fcntl(sk, F_GETFL);
+	if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
+		test_error("fcntl()");
+
+	if (listen(sk, backlog))
+		test_error("listen()");
+
+	return sk;
+}
+
+int test_wait_fd(int sk, time_t sec, bool write)
+{
+	struct timeval tv = { .tv_sec = sec };
+	struct timeval *ptv = NULL;
+	fd_set fds, efds;
+	int ret;
+	socklen_t slen = sizeof(ret);
+
+	FD_ZERO(&fds);
+	FD_SET(sk, &fds);
+	FD_ZERO(&efds);
+	FD_SET(sk, &efds);
+
+	if (sec)
+		ptv = &tv;
+
+	errno = 0;
+	if (write)
+		ret = select(sk + 1, NULL, &fds, &efds, ptv);
+	else
+		ret = select(sk + 1, &fds, NULL, &efds, ptv);
+	if (ret < 0)
+		return -errno;
+	if (ret == 0) {
+		errno = ETIMEDOUT;
+		return -ETIMEDOUT;
+	}
+
+	if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen) || ret)
+		return -ret;
+	return 0;
+}
+
+int __test_connect_socket(int sk, const char *device,
+			  void *addr, size_t addr_sz, time_t timeout)
+{
+	long flags;
+	int err;
+
+	if (device != NULL) {
+		err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, device,
+				 strlen(device) + 1);
+		if (err < 0)
+			test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
+	}
+
+	if (!timeout) {
+		err = connect(sk, addr, addr_sz);
+		if (err) {
+			err = -errno;
+			goto out;
+		}
+		return 0;
+	}
+
+	flags = fcntl(sk, F_GETFL);
+	if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
+		test_error("fcntl()");
+
+	if (connect(sk, addr, addr_sz) < 0) {
+		if (errno != EINPROGRESS) {
+			err = -errno;
+			goto out;
+		}
+		if (timeout < 0)
+			return sk;
+		err = test_wait_fd(sk, timeout, 1);
+		if (err)
+			goto out;
+	}
+	return sk;
+
+out:
+	close(sk);
+	return err;
+}
+
+int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix,
+		   int vrf, const char *password)
+{
+	size_t pwd_len = strlen(password);
+	struct tcp_md5sig md5sig = {};
+
+	md5sig.tcpm_keylen = pwd_len;
+	memcpy(md5sig.tcpm_key, password, pwd_len);
+	md5sig.tcpm_flags = TCP_MD5SIG_FLAG_PREFIX;
+	md5sig.tcpm_prefixlen = prefix;
+	if (vrf >= 0) {
+		md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX;
+		md5sig.tcpm_ifindex = (uint8_t)vrf;
+	}
+	memcpy(&md5sig.tcpm_addr, addr, addr_sz);
+
+	errno = 0;
+	return setsockopt(sk, IPPROTO_TCP, TCP_MD5SIG_EXT,
+			&md5sig, sizeof(md5sig));
+}
+
+
+int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg,
+		void *addr, size_t addr_sz, bool set_current, bool set_rnext,
+		uint8_t prefix, uint8_t vrf, uint8_t sndid, uint8_t rcvid,
+		uint8_t maclen, uint8_t keyflags,
+		uint8_t keylen, const char *key)
+{
+	memset(ao, 0, sizeof(struct tcp_ao_add));
+
+	ao->set_current	= !!set_current;
+	ao->set_rnext	= !!set_rnext;
+	ao->prefix	= prefix;
+	ao->sndid	= sndid;
+	ao->rcvid	= rcvid;
+	ao->maclen	= maclen;
+	ao->keyflags	= keyflags;
+	ao->keylen	= keylen;
+	ao->ifindex	= vrf;
+
+	memcpy(&ao->addr, addr, addr_sz);
+
+	if (strlen(alg) > 64)
+		return -ENOBUFS;
+	strncpy(ao->alg_name, alg, 64);
+
+	memcpy(ao->key, key,
+	       (keylen > TCP_AO_MAXKEYLEN) ? TCP_AO_MAXKEYLEN : keylen);
+	return 0;
+}
+
+static int test_get_ao_keys_nr(int sk)
+{
+	struct tcp_ao_getsockopt tmp = {};
+	socklen_t tmp_sz = sizeof(tmp);
+	int ret;
+
+	tmp.nkeys  = 1;
+	tmp.get_all = 1;
+
+	ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz);
+	if (ret)
+		return -errno;
+	return (int)tmp.nkeys;
+}
+
+int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out,
+		void *addr, size_t addr_sz, uint8_t prefix,
+		uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_getsockopt tmp = {};
+	socklen_t tmp_sz = sizeof(tmp);
+	int ret;
+
+	memcpy(&tmp.addr, addr, addr_sz);
+	tmp.prefix = prefix;
+	tmp.sndid  = sndid;
+	tmp.rcvid  = rcvid;
+	tmp.nkeys  = 1;
+
+	ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz);
+	if (ret)
+		return ret;
+	if (tmp.nkeys != 1)
+		return -E2BIG;
+	*out = tmp;
+	return 0;
+}
+
+int test_get_ao_info(int sk, struct tcp_ao_info_opt *out)
+{
+	socklen_t sz = sizeof(*out);
+
+	out->reserved = 0;
+	out->reserved2 = 0;
+	if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, out, &sz))
+		return -errno;
+	if (sz != sizeof(*out))
+		return -EMSGSIZE;
+	return 0;
+}
+
+int test_set_ao_info(int sk, struct tcp_ao_info_opt *in)
+{
+	socklen_t sz = sizeof(*in);
+
+	in->reserved = 0;
+	in->reserved2 = 0;
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, in, sz))
+		return -errno;
+	return 0;
+}
+
+int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a,
+				   const struct tcp_ao_getsockopt *b)
+{
+	bool is_kdf_aes_128_cmac = false;
+	bool is_cmac_aes = false;
+
+	if (!strcmp("cmac(aes128)", a->alg_name)) {
+		is_kdf_aes_128_cmac = (a->keylen != 16);
+		is_cmac_aes = true;
+	}
+
+#define __cmp_ao(member)						\
+do {									\
+	if (b->member != a->member) {					\
+		test_fail("getsockopt(): " __stringify(member) " %u != %u",	\
+				b->member, a->member);			\
+		return -1;						\
+	}								\
+} while(0)
+	__cmp_ao(sndid);
+	__cmp_ao(rcvid);
+	__cmp_ao(prefix);
+	__cmp_ao(keyflags);
+	__cmp_ao(ifindex);
+	if (a->maclen) {
+		__cmp_ao(maclen);
+	} else if (b->maclen != 12) {
+		test_fail("getsockopt(): expected default maclen 12, but it's %u",
+				b->maclen);
+		return -1;
+	}
+	if (!is_kdf_aes_128_cmac) {
+		__cmp_ao(keylen);
+	} else if (b->keylen != 16) {
+		test_fail("getsockopt(): expected keylen 16 for cmac(aes128), but it's %u",
+				b->keylen);
+		return -1;
+	}
+#undef __cmp_ao
+	if (!is_kdf_aes_128_cmac && memcmp(b->key, a->key, a->keylen)) {
+		test_fail("getsockopt(): returned key is different `%s' != `%s'",
+				b->key, a->key);
+		return -1;
+	}
+	if (memcmp(&b->addr, &a->addr, sizeof(b->addr))) {
+		test_fail("getsockopt(): returned address is different");
+		return -1;
+	}
+	if (!is_cmac_aes && strcmp(b->alg_name, a->alg_name)) {
+		test_fail("getsockopt(): returned algorithm %s is different than %s", b->alg_name, a->alg_name);
+		return -1;
+	}
+	if (is_cmac_aes && strcmp(b->alg_name, "cmac(aes)")) {
+		test_fail("getsockopt(): returned algorithm %s is different than cmac(aes)", b->alg_name);
+		return -1;
+	}
+	/* For a established key rotation test don't add a key with
+	 * set_current = 1, as it's likely to change by peer's request;
+	 * rather use setsockopt(TCP_AO_INFO)
+	 */
+	if (a->set_current != b->is_current) {
+		test_fail("getsockopt(): returned key is not Current_key");
+		return -1;
+	}
+	if (a->set_rnext != b->is_rnext) {
+		test_fail("getsockopt(): returned key is not RNext_key");
+		return -1;
+	}
+
+	return 0;
+}
+
+int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a,
+				      const struct tcp_ao_info_opt *b)
+{
+	/* No check for ::current_key, as it may change by the peer */
+	if (a->ao_required != b->ao_required) {
+		test_fail("getsockopt(): returned ao doesn't have ao_required");
+		return -1;
+	}
+	if (a->accept_icmps != b->accept_icmps) {
+		test_fail("getsockopt(): returned ao doesn't accept ICMPs");
+		return -1;
+	}
+	if (a->set_rnext && a->rnext != b->rnext) {
+		test_fail("getsockopt(): RNext KeyID has changed");
+		return -1;
+	}
+#define __cmp_cnt(member)						\
+do {									\
+	if (b->member != a->member) {					\
+		test_fail("getsockopt(): " __stringify(member) " %llu != %llu",	\
+				b->member, a->member);			\
+		return -1;						\
+	}								\
+} while(0)
+	if (a->set_counters) {
+		__cmp_cnt(pkt_good);
+		__cmp_cnt(pkt_bad);
+		__cmp_cnt(pkt_key_not_found);
+		__cmp_cnt(pkt_ao_required);
+		__cmp_cnt(pkt_dropped_icmp);
+	}
+#undef __cmp_cnt
+	return 0;
+}
+
+int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
+{
+	struct tcp_ao_getsockopt *key_dump;
+	socklen_t key_dump_sz = sizeof(*key_dump);
+	struct tcp_ao_info_opt info = {};
+	bool c1, c2, c3, c4, c5;
+	struct netstat *ns;
+	int err, nr_keys;
+
+	memset(out, 0, sizeof(*out));
+
+	/* per-netns */
+	ns = netstat_read();
+	out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
+	out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
+	out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
+	out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
+	out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+	netstat_free(ns);
+	if (c1 || c2 || c3 || c4 || c5)
+		return -EOPNOTSUPP;
+
+	err = test_get_ao_info(sk, &info);
+	if (err)
+		return err;
+
+	/* per-socket */
+	out->ao_info_pkt_good		= info.pkt_good;
+	out->ao_info_pkt_bad		= info.pkt_bad;
+	out->ao_info_pkt_key_not_found	= info.pkt_key_not_found;
+	out->ao_info_pkt_ao_required	= info.pkt_ao_required;
+	out->ao_info_pkt_dropped_icmp	= info.pkt_dropped_icmp;
+
+	/* per-key */
+	nr_keys = test_get_ao_keys_nr(sk);
+	if (nr_keys < 0)
+		return nr_keys;
+	if (nr_keys == 0)
+		test_error("test_get_ao_keys_nr() == 0");
+	out->nr_keys = (size_t)nr_keys;
+	key_dump = calloc(nr_keys, key_dump_sz);
+	if (!key_dump)
+		return -errno;
+
+	key_dump[0].nkeys = nr_keys;
+	key_dump[0].get_all = 1;
+	key_dump[0].get_all = 1;
+	err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS,
+			 key_dump, &key_dump_sz);
+	if (err) {
+		free(key_dump);
+		return -errno;
+	}
+
+	out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0]));
+	if (!out->key_cnts) {
+		free(key_dump);
+		return -errno;
+	}
+
+	while (nr_keys--) {
+		out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
+		out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
+		out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
+		out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
+	}
+	free(key_dump);
+
+	return 0;
+}
+
+int __test_tcp_ao_counters_cmp(const char *tst_name,
+			       struct tcp_ao_counters *before,
+			       struct tcp_ao_counters *after,
+			       test_cnt expected)
+{
+#define __cmp_ao(cnt, expecting_inc)					\
+do {									\
+	if (before->cnt > after->cnt) {					\
+		test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \
+			  tst_name ?: "", before->cnt, after->cnt);		\
+		return -1;						\
+	}								\
+	if ((before->cnt != after->cnt) != (expecting_inc)) {		\
+		test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \
+			  tst_name ?: "", (expecting_inc) ? "" : "not ",	\
+			  before->cnt, after->cnt);			\
+		return -1;						\
+	}								\
+} while(0)
+
+	errno = 0;
+	/* per-netns */
+	__cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD));
+	__cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD));
+	__cmp_ao(netns_ao_key_not_found,
+		 !!(expected & TEST_CNT_NS_KEY_NOT_FOUND));
+	__cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED));
+	__cmp_ao(netns_ao_dropped_icmp,
+		 !!(expected & TEST_CNT_NS_DROPPED_ICMP));
+	/* per-socket */
+	__cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD));
+	__cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD));
+	__cmp_ao(ao_info_pkt_key_not_found,
+		 !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND));
+	__cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED));
+	__cmp_ao(ao_info_pkt_dropped_icmp,
+		 !!(expected & TEST_CNT_SOCK_DROPPED_ICMP));
+	return 0;
+#undef __cmp_ao
+}
+
+int test_tcp_ao_key_counters_cmp(const char *tst_name,
+				 struct tcp_ao_counters *before,
+				 struct tcp_ao_counters *after,
+				 test_cnt expected,
+				 int sndid, int rcvid)
+{
+	size_t i;
+#define __cmp_ao(i, cnt, expecting_inc)					\
+do {									\
+	if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) {		\
+		test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \
+			  tst_name ?: "", before->key_cnts[i].cnt,	\
+			  after->key_cnts[i].cnt,			\
+			  before->key_cnts[i].sndid,			\
+			  before->key_cnts[i].rcvid);			\
+		return -1;						\
+	}								\
+	if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) {		\
+		test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \
+			  tst_name ?: "", (expecting_inc) ? "" : "not ",\
+			  before->key_cnts[i].cnt,			\
+			  after->key_cnts[i].cnt,			\
+			  before->key_cnts[i].sndid,			\
+			  before->key_cnts[i].rcvid);			\
+		return -1;						\
+	}								\
+} while(0)
+
+	if (before->nr_keys != after->nr_keys) {
+		test_fail("%s: Keys changed on the socket %zu != %zu",
+			  tst_name, before->nr_keys, after->nr_keys);
+		return -1;
+	}
+
+	/* per-key */
+	i = before->nr_keys;
+	while (i--) {
+		if (sndid >= 0 && before->key_cnts[i].sndid != sndid)
+			continue;
+		if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid)
+			continue;
+		__cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD));
+		__cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD));
+	}
+	return 0;
+#undef __cmp_ao
+}
+
+void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts)
+{
+	free(cnts->key_cnts);
+}
+
+#define TEST_BUF_SIZE 4096
+ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+{
+	ssize_t total = 0;
+
+	do {
+		char buf[TEST_BUF_SIZE];
+		ssize_t bytes, sent;
+		int ret;
+
+		ret = test_wait_fd(sk, timeout_sec, 0);
+		if (ret)
+			return ret;
+
+		bytes = recv(sk, buf, sizeof(buf), 0);
+
+		if (bytes < 0)
+			test_error("recv(): %zd", bytes);
+		if (bytes == 0)
+			break;
+
+		ret = test_wait_fd(sk, timeout_sec, 1);
+		if (ret)
+			return ret;
+
+		sent = send(sk, buf, bytes, 0);
+		if (sent == 0)
+			break;
+		if (sent != bytes)
+			test_error("send()");
+		total += bytes;
+	} while (!quota || total < quota);
+
+	return total;
+}
+
+ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
+			 const size_t msg_len, time_t timeout_sec)
+{
+	char msg[msg_len];
+	int nodelay = 1;
+	size_t i;
+
+	if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay)))
+		test_error("setsockopt(TCP_NODELAY)");
+
+	for (i = 0; i < buf_sz; i += min(msg_len, buf_sz - i)) {
+		size_t sent, bytes = min(msg_len, buf_sz - i);
+		int ret;
+
+		ret = test_wait_fd(sk, timeout_sec, 1);
+		if (ret)
+			return ret;
+
+		sent = send(sk, buf + i, bytes, 0);
+		if (sent == 0)
+			break;
+		if (sent != bytes)
+			test_error("send()");
+
+		bytes = 0;
+		do {
+			ssize_t got;
+
+			ret = test_wait_fd(sk, timeout_sec, 0);
+			if (ret)
+				return ret;
+
+			got = recv(sk, msg + bytes, sizeof(msg) - bytes, 0);
+			if (got <= 0)
+				test_error("recv(): %zd", got);
+			bytes += got;
+		} while (bytes < sent);
+		if (bytes > sent)
+			test_error("recv(): %zd > %zd", bytes, sent);
+		if (memcmp(buf + i, msg, bytes) != 0) {
+			test_fail("received message differs");
+			return -1;
+		}
+	}
+	return i;
+}
+
+int test_client_verify(int sk, const size_t msg_len, const size_t nr,
+		       time_t timeout_sec)
+{
+	size_t buf_sz = msg_len * nr;
+	char *buf = alloca(buf_sz);
+
+	randomize_buffer(buf, buf_sz);
+	if (test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec) != buf_sz)
+		return -1;
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c
new file mode 100644
index 000000000000..372daca525f5
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "aolib.h"
+#include <string.h>
+
+void randomize_buffer(void *buf, size_t buflen)
+{
+	int *p = (int *)buf;
+	size_t words = buflen / sizeof(int);
+	size_t leftover = buflen % sizeof(int);
+
+	if (!buflen)
+		return;
+
+	while (words--)
+		*p++ = rand();
+
+	if (leftover) {
+		int tmp = rand();
+
+		memcpy(buf + buflen - leftover, &tmp, leftover);
+	}
+}
+
+const struct sockaddr_in6 addr_any6 = {
+	.sin6_family	= AF_INET6,
+};
+
+const struct sockaddr_in addr_any4 = {
+	.sin_family	= AF_INET,
+};
-- 
cgit v1.2.3-70-g09d2


From a8fcf8ca14d7b374519e5637d7b49b03ebaf580d Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:16 +0000
Subject: selftests/net: Verify that TCP-AO complies with ignoring ICMPs

Hand-crafted ICMP packets are sent to the server, the server checks for
hard/soft errors and fails if any.

Expected output for ipv4 version:
> # ./icmps-discard_ipv4
> 1..3
> # 3164[lib/setup.c:166] rand seed 1642623745
> TAP version 13
> # 3164[lib/proc.c:207]    Snmp6             Ip6InReceives: 0 => 1
> # 3164[lib/proc.c:207]    Snmp6             Ip6InNoRoutes: 0 => 1
> # 3164[lib/proc.c:207]    Snmp6               Ip6InOctets: 0 => 76
> # 3164[lib/proc.c:207]    Snmp6            Ip6InNoECTPkts: 0 => 1
> # 3164[lib/proc.c:207]      Tcp                    InSegs: 2 => 203
> # 3164[lib/proc.c:207]      Tcp                   OutSegs: 1 => 202
> # 3164[lib/proc.c:207]  IcmpMsg                   InType3: 0 => 543
> # 3164[lib/proc.c:207]     Icmp                    InMsgs: 0 => 543
> # 3164[lib/proc.c:207]     Icmp            InDestUnreachs: 0 => 543
> # 3164[lib/proc.c:207]       Ip                InReceives: 2 => 746
> # 3164[lib/proc.c:207]       Ip                InDelivers: 2 => 746
> # 3164[lib/proc.c:207]       Ip               OutRequests: 1 => 202
> # 3164[lib/proc.c:207]    IpExt                  InOctets: 132 => 61684
> # 3164[lib/proc.c:207]    IpExt                 OutOctets: 68 => 31324
> # 3164[lib/proc.c:207]    IpExt               InNoECTPkts: 2 => 744
> # 3164[lib/proc.c:207]   TcpExt               TCPPureAcks: 1 => 2
> # 3164[lib/proc.c:207]   TcpExt           TCPOrigDataSent: 0 => 200
> # 3164[lib/proc.c:207]   TcpExt              TCPDelivered: 0 => 199
> # 3164[lib/proc.c:207]   TcpExt                 TCPAOGood: 2 => 203
> # 3164[lib/proc.c:207]   TcpExt         TCPAODroppedIcmps: 0 => 541
> ok 1 InDestUnreachs delivered 543
> ok 2 Server survived 20000 bytes of traffic
> ok 3 ICMPs ignored 541
> # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0

Expected output for ipv6 version:
> # ./icmps-discard_ipv6
> 1..3
> # 3186[lib/setup.c:166] rand seed 1642623803
> TAP version 13
> # 3186[lib/proc.c:207]    Snmp6             Ip6InReceives: 4 => 568
> # 3186[lib/proc.c:207]    Snmp6             Ip6InDelivers: 3 => 564
> # 3186[lib/proc.c:207]    Snmp6            Ip6OutRequests: 2 => 204
> # 3186[lib/proc.c:207]    Snmp6            Ip6InMcastPkts: 1 => 4
> # 3186[lib/proc.c:207]    Snmp6           Ip6OutMcastPkts: 0 => 1
> # 3186[lib/proc.c:207]    Snmp6               Ip6InOctets: 320 => 70420
> # 3186[lib/proc.c:207]    Snmp6              Ip6OutOctets: 160 => 35512
> # 3186[lib/proc.c:207]    Snmp6          Ip6InMcastOctets: 72 => 336
> # 3186[lib/proc.c:207]    Snmp6         Ip6OutMcastOctets: 0 => 76
> # 3186[lib/proc.c:207]    Snmp6            Ip6InNoECTPkts: 4 => 568
> # 3186[lib/proc.c:207]    Snmp6               Icmp6InMsgs: 1 => 361
> # 3186[lib/proc.c:207]    Snmp6              Icmp6OutMsgs: 1 => 2
> # 3186[lib/proc.c:207]    Snmp6       Icmp6InDestUnreachs: 0 => 360
> # 3186[lib/proc.c:207]    Snmp6      Icmp6OutMLDv2Reports: 0 => 1
> # 3186[lib/proc.c:207]    Snmp6              Icmp6InType1: 0 => 360
> # 3186[lib/proc.c:207]    Snmp6           Icmp6OutType143: 0 => 1
> # 3186[lib/proc.c:207]      Tcp                    InSegs: 2 => 203
> # 3186[lib/proc.c:207]      Tcp                   OutSegs: 1 => 202
> # 3186[lib/proc.c:207]   TcpExt               TCPPureAcks: 1 => 2
> # 3186[lib/proc.c:207]   TcpExt           TCPOrigDataSent: 0 => 200
> # 3186[lib/proc.c:207]   TcpExt              TCPDelivered: 0 => 199
> # 3186[lib/proc.c:207]   TcpExt                 TCPAOGood: 2 => 203
> # 3186[lib/proc.c:207]   TcpExt         TCPAODroppedIcmps: 0 => 360
> ok 1 Icmp6InDestUnreachs delivered 360
> ok 2 Server survived 20000 bytes of traffic
> ok 3 ICMPs ignored 360
> # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile        |   1 +
 tools/testing/selftests/net/tcp_ao/icmps-discard.c | 438 +++++++++++++++++++++
 2 files changed, 439 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_ao/icmps-discard.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 62425b9fb73c..0fc5db59be0c 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_BOTH_AF := connect
+TEST_BOTH_AF += icmps-discard
 
 TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4)
 TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6)
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
new file mode 100644
index 000000000000..d77c791754de
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Selftest that verifies that incomping ICMPs are ignored,
+ * the TCP connection stays alive, no hard or soft errors get reported
+ * to the usespace and the counter for ignored ICMPs is updated.
+ *
+ * RFC5925, 7.8:
+ * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
+ * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
+ * unreachable, port unreachable, and fragmentation needed -- ’hard
+ * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
+ * (administratively prohibited) and Code 4 (port unreachable) intended
+ * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
+ * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
+ *
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+#include <inttypes.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <sys/socket.h>
+#include "aolib.h"
+#include "../../../../include/linux/compiler.h"
+
+const size_t packets_nr = 20;
+const size_t packet_size = 100;
+const char *tcpao_icmps	= "TCPAODroppedIcmps";
+
+#ifdef IPV6_TEST
+const char *dst_unreach	= "Icmp6InDestUnreachs";
+const int sk_ip_level	= SOL_IPV6;
+const int sk_recverr	= IPV6_RECVERR;
+#else
+const char *dst_unreach	= "InDestUnreachs";
+const int sk_ip_level	= SOL_IP;
+const int sk_recverr	= IP_RECVERR;
+#endif
+
+#define test_icmps_fail test_fail
+#define test_icmps_ok test_ok
+
+static void serve_interfered(int sk)
+{
+	ssize_t test_quota = packet_size * packets_nr * 10;
+	uint64_t dest_unreach_a, dest_unreach_b;
+	uint64_t icmp_ignored_a, icmp_ignored_b;
+	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	bool counter_not_found;
+	struct netstat *ns_after, *ns_before;
+	ssize_t bytes;
+
+	ns_before = netstat_read();
+	dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL);
+	icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL);
+	if (test_get_tcp_ao_counters(sk, &ao_cnt1))
+		test_error("test_get_tcp_ao_counters()");
+	bytes = test_server_run(sk, test_quota, 0);
+	ns_after = netstat_read();
+	netstat_print_diff(ns_before, ns_after);
+	dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL);
+	icmp_ignored_b = netstat_get(ns_after, tcpao_icmps,
+					&counter_not_found);
+	if (test_get_tcp_ao_counters(sk, &ao_cnt2))
+		test_error("test_get_tcp_ao_counters()");
+
+	netstat_free(ns_before);
+	netstat_free(ns_after);
+
+	if (dest_unreach_a >= dest_unreach_b) {
+		test_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
+				dst_unreach, dest_unreach_a, dest_unreach_b);
+		return;
+	}
+	test_ok("%s delivered %" PRIu64,
+		dst_unreach, dest_unreach_b - dest_unreach_a);
+	if (bytes < 0)
+		test_icmps_fail("Server failed with %zd: %s", bytes, strerrordesc_np(-bytes));
+	else
+		test_icmps_ok("Server survived %zd bytes of traffic", test_quota);
+	if (counter_not_found) {
+		test_fail("Not found %s counter", tcpao_icmps);
+		return;
+	}
+	test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
+	if (icmp_ignored_a >= icmp_ignored_b) {
+		test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
+				tcpao_icmps, icmp_ignored_a, icmp_ignored_b);
+		return;
+	}
+	test_icmps_ok("ICMPs ignored %" PRIu64, icmp_ignored_b - icmp_ignored_a);
+}
+
+static void *server_fn(void *arg)
+{
+	int val, err, sk, lsk;
+	bool accept_icmps = false;
+
+	lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
+
+	if (test_set_ao_flags(lsk, false, accept_icmps))
+		test_error("setsockopt(TCP_AO_INFO)");
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	synchronize_threads();
+
+	err = test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0);
+	if (!err)
+		test_error("timeouted for accept()");
+	else if (err < 0)
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	/* Fail on hard ip errors, such as dest unreachable (RFC1122) */
+	val = 1;
+	if (setsockopt(sk, sk_ip_level, sk_recverr, &val, sizeof(val)))
+		test_error("setsockopt()");
+
+	synchronize_threads();
+
+	serve_interfered(sk);
+	return NULL;
+}
+
+static size_t packets_sent;
+static size_t icmps_sent;
+
+static uint32_t checksum4_nofold(void *data, size_t len, uint32_t sum)
+{
+	uint16_t *words = data;
+	size_t i;
+
+	for (i = 0; i < len / sizeof(uint16_t); i++)
+		sum += words[i];
+	if (len & 1)
+		sum += ((char *)data)[len - 1];
+	return sum;
+}
+
+static uint16_t checksum4_fold(void *data, size_t len, uint32_t sum)
+{
+	sum = checksum4_nofold(data, len, sum);
+	while (sum > 0xFFFF)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+	return ~sum;
+}
+
+static void set_ip4hdr(struct iphdr *iph, size_t packet_len, int proto,
+		struct sockaddr_in *src, struct sockaddr_in *dst)
+{
+	iph->version	= 4;
+	iph->ihl	= 5;
+	iph->tos	= 0;
+	iph->tot_len	= htons(packet_len);
+	iph->ttl	= 2;
+	iph->protocol	= proto;
+	iph->saddr	= src->sin_addr.s_addr;
+	iph->daddr	= dst->sin_addr.s_addr;
+	iph->check	= checksum4_fold((void *)iph, iph->ihl << 1, 0);
+}
+
+static void icmp_interfere4(uint8_t type, uint8_t code, uint32_t rcv_nxt,
+		struct sockaddr_in *src, struct sockaddr_in *dst)
+{
+	int sk = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+	struct {
+		struct iphdr iph;
+		struct icmphdr icmph;
+		struct iphdr iphe;
+		struct {
+			uint16_t sport;
+			uint16_t dport;
+			uint32_t seq;
+		} tcph;
+	} packet = {};
+	size_t packet_len;
+	ssize_t bytes;
+
+	if (sk < 0)
+		test_error("socket(AF_INET, SOCK_RAW, IPPROTO_RAW)");
+
+	packet_len = sizeof(packet);
+	set_ip4hdr(&packet.iph, packet_len, IPPROTO_ICMP, src, dst);
+
+	packet.icmph.type = type;
+	packet.icmph.code = code;
+	if (code == ICMP_FRAG_NEEDED) {
+		randomize_buffer(&packet.icmph.un.frag.mtu,
+				sizeof(packet.icmph.un.frag.mtu));
+	}
+
+	packet_len = sizeof(packet.iphe) + sizeof(packet.tcph);
+	set_ip4hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src);
+
+	packet.tcph.sport = dst->sin_port;
+	packet.tcph.dport = src->sin_port;
+	packet.tcph.seq = htonl(rcv_nxt);
+
+	packet_len = sizeof(packet) - sizeof(packet.iph);
+	packet.icmph.checksum = checksum4_fold((void *)&packet.icmph,
+						packet_len, 0);
+
+	bytes = sendto(sk, &packet, sizeof(packet), 0,
+		       (struct sockaddr *)dst, sizeof(*dst));
+	if (bytes != sizeof(packet))
+		test_error("send(): %zd", bytes);
+	icmps_sent++;
+
+	close(sk);
+}
+
+static void set_ip6hdr(struct ipv6hdr *iph, size_t packet_len, int proto,
+		struct sockaddr_in6 *src, struct sockaddr_in6 *dst)
+{
+	iph->version		= 6;
+	iph->payload_len	= htons(packet_len);
+	iph->nexthdr		= proto;
+	iph->hop_limit		= 2;
+	iph->saddr		= src->sin6_addr;
+	iph->daddr		= dst->sin6_addr;
+}
+
+static inline uint16_t csum_fold(uint32_t csum)
+{
+	uint32_t sum = csum;
+
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (uint16_t)~sum;
+}
+
+static inline uint32_t csum_add(uint32_t csum, uint32_t addend)
+{
+	uint32_t res = csum;
+
+	res += addend;
+	return res + (res < addend);
+}
+
+noinline uint32_t checksum6_nofold(void *data, size_t len, uint32_t sum)
+{
+	uint16_t *words = data;
+	size_t i;
+
+	for (i = 0; i < len / sizeof(uint16_t); i++)
+		sum = csum_add(sum, words[i]);
+	if (len & 1)
+		sum = csum_add(sum, ((char *)data)[len - 1]);
+	return sum;
+}
+
+noinline uint16_t icmp6_checksum(struct sockaddr_in6 *src,
+				 struct sockaddr_in6 *dst,
+				 void *ptr, size_t len, uint8_t proto)
+{
+	struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+		uint32_t payload_len;
+		uint8_t zero[3];
+		uint8_t nexthdr;
+	} pseudo_header = {};
+	uint32_t sum;
+
+	pseudo_header.saddr		= src->sin6_addr;
+	pseudo_header.daddr		= dst->sin6_addr;
+	pseudo_header.payload_len	= htonl(len);
+	pseudo_header.nexthdr		= proto;
+
+	sum = checksum6_nofold(&pseudo_header, sizeof(pseudo_header), 0);
+	sum = checksum6_nofold(ptr, len, sum);
+
+	return csum_fold(sum);
+}
+
+static void icmp6_interfere(int type, int code, uint32_t rcv_nxt,
+		struct sockaddr_in6 *src, struct sockaddr_in6 *dst)
+{
+	int sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
+	struct sockaddr_in6 dst_raw = *dst;
+	struct {
+		struct ipv6hdr iph;
+		struct icmp6hdr icmph;
+		struct ipv6hdr iphe;
+		struct {
+			uint16_t sport;
+			uint16_t dport;
+			uint32_t seq;
+		} tcph;
+	} packet = {};
+	size_t packet_len;
+	ssize_t bytes;
+
+
+	if (sk < 0)
+		test_error("socket(AF_INET6, SOCK_RAW, IPPROTO_RAW)");
+
+	packet_len = sizeof(packet) - sizeof(packet.iph);
+	set_ip6hdr(&packet.iph, packet_len, IPPROTO_ICMPV6, src, dst);
+
+	packet.icmph.icmp6_type = type;
+	packet.icmph.icmp6_code = code;
+
+	packet_len = sizeof(packet.iphe) + sizeof(packet.tcph);
+	set_ip6hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src);
+
+	packet.tcph.sport = dst->sin6_port;
+	packet.tcph.dport = src->sin6_port;
+	packet.tcph.seq = htonl(rcv_nxt);
+
+	packet_len = sizeof(packet) - sizeof(packet.iph);
+
+	packet.icmph.icmp6_cksum = icmp6_checksum(src, dst,
+			(void *)&packet.icmph, packet_len, IPPROTO_ICMPV6);
+
+	dst_raw.sin6_port = htons(IPPROTO_RAW);
+	bytes = sendto(sk, &packet, sizeof(packet), 0,
+		       (struct sockaddr *)&dst_raw, sizeof(dst_raw));
+	if (bytes != sizeof(packet))
+		test_error("send(): %zd", bytes);
+	icmps_sent++;
+
+	close(sk);
+}
+
+static uint32_t get_rcv_nxt(int sk)
+{
+	int val = TCP_REPAIR_ON;
+	uint32_t ret;
+	socklen_t sz = sizeof(ret);
+
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR)");
+	val = TCP_RECV_QUEUE;
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR_QUEUE)");
+	if (getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &ret, &sz))
+		test_error("getsockopt(TCP_QUEUE_SEQ)");
+	val = TCP_REPAIR_OFF_NO_WP;
+	if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+		test_error("setsockopt(TCP_REPAIR)");
+	return ret;
+}
+
+static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *dst)
+{
+	struct sockaddr_in *saddr4 = src;
+	struct sockaddr_in *daddr4 = dst;
+	struct sockaddr_in6 *saddr6 = src;
+	struct sockaddr_in6 *daddr6 = dst;
+	size_t i;
+
+	if (saddr4->sin_family != daddr4->sin_family)
+		test_error("Different address families");
+
+	for (i = 0; i < nr; i++) {
+		if (saddr4->sin_family == AF_INET) {
+			icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PROT_UNREACH,
+					rcv_nxt, saddr4, daddr4);
+			icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PORT_UNREACH,
+					rcv_nxt, saddr4, daddr4);
+			icmp_interfere4(ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+					rcv_nxt, saddr4, daddr4);
+			icmps_sent += 3;
+		} else if (saddr4->sin_family == AF_INET6) {
+			icmp6_interfere(ICMPV6_DEST_UNREACH,
+					ICMPV6_ADM_PROHIBITED,
+					rcv_nxt, saddr6, daddr6);
+			icmp6_interfere(ICMPV6_DEST_UNREACH,
+					ICMPV6_PORT_UNREACH,
+					rcv_nxt, saddr6, daddr6);
+			icmps_sent += 2;
+		} else {
+			test_error("Not ip address family");
+		}
+	}
+}
+
+static void send_interfered(int sk)
+{
+	const unsigned int timeout = TEST_TIMEOUT_SEC;
+	struct sockaddr_in6 src, dst;
+	socklen_t addr_sz;
+
+	addr_sz = sizeof(src);
+	if (getsockname(sk, &src, &addr_sz))
+		test_error("getsockname()");
+	addr_sz = sizeof(dst);
+	if (getpeername(sk, &dst, &addr_sz))
+		test_error("getpeername()");
+
+	while (1) {
+		uint32_t rcv_nxt;
+
+		if (test_client_verify(sk, packet_size, packets_nr, timeout)) {
+			test_fail("client: connection is broken");
+			return;
+		}
+		packets_sent += packets_nr;
+		rcv_nxt = get_rcv_nxt(sk);
+		icmp_interfere(packets_nr, rcv_nxt, (void *)&src, (void *)&dst);
+	}
+}
+
+static void *client_fn(void *arg)
+{
+	int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads();
+	if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0)
+		test_error("failed to connect()");
+	synchronize_threads();
+
+	send_interfered(sk);
+
+	/* Not expecting client to quit */
+	test_fail("client disconnected");
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(3, server_fn, client_fn);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From d11301f65977244ca8bdcc6ac80431683b9b3b0a Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:17 +0000
Subject: selftests/net: Add TCP-AO ICMPs accept test

Reverse to icmps-discard test: the server accepts ICMPs, using
TCP_AO_CMDF_ACCEPT_ICMP and it is expected to fail under ICMP
flood from client. Test that the default pre-TCP-AO behaviour functions
when TCP_AO_CMDF_ACCEPT_ICMP is set.

Expected output for ipv4 version (in case it receives ICMP_PROT_UNREACH):
> # ./icmps-accept_ipv4
> 1..3
> # 3209[lib/setup.c:166] rand seed 1642623870
> TAP version 13
> # 3209[lib/proc.c:207]    Snmp6             Ip6InReceives: 0 => 1
> # 3209[lib/proc.c:207]    Snmp6             Ip6InNoRoutes: 0 => 1
> # 3209[lib/proc.c:207]    Snmp6               Ip6InOctets: 0 => 76
> # 3209[lib/proc.c:207]    Snmp6            Ip6InNoECTPkts: 0 => 1
> # 3209[lib/proc.c:207]      Tcp                    InSegs: 3 => 23
> # 3209[lib/proc.c:207]      Tcp                   OutSegs: 2 => 22
> # 3209[lib/proc.c:207]  IcmpMsg                   InType3: 0 => 4
> # 3209[lib/proc.c:207]     Icmp                    InMsgs: 0 => 4
> # 3209[lib/proc.c:207]     Icmp            InDestUnreachs: 0 => 4
> # 3209[lib/proc.c:207]       Ip                InReceives: 3 => 27
> # 3209[lib/proc.c:207]       Ip                InDelivers: 3 => 27
> # 3209[lib/proc.c:207]       Ip               OutRequests: 2 => 22
> # 3209[lib/proc.c:207]    IpExt                  InOctets: 288 => 3420
> # 3209[lib/proc.c:207]    IpExt                 OutOctets: 124 => 3244
> # 3209[lib/proc.c:207]    IpExt               InNoECTPkts: 3 => 25
> # 3209[lib/proc.c:207]   TcpExt               TCPPureAcks: 1 => 2
> # 3209[lib/proc.c:207]   TcpExt           TCPOrigDataSent: 0 => 20
> # 3209[lib/proc.c:207]   TcpExt              TCPDelivered: 0 => 19
> # 3209[lib/proc.c:207]   TcpExt                 TCPAOGood: 3 => 23
> ok 1 InDestUnreachs delivered 4
> ok 2 server failed with -92: Protocol not available
> ok 3 TCPAODroppedIcmps counter didn't change: 0 >= 0
> # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0

Expected output for ipv6 version (in case it receives ADM_PROHIBITED):
> # ./icmps-accept_ipv6
> 1..3
> # 3277[lib/setup.c:166] rand seed 1642624035
> TAP version 13
> # 3277[lib/proc.c:207]    Snmp6             Ip6InReceives: 6 => 31
> # 3277[lib/proc.c:207]    Snmp6             Ip6InDelivers: 4 => 29
> # 3277[lib/proc.c:207]    Snmp6            Ip6OutRequests: 4 => 24
> # 3277[lib/proc.c:207]    Snmp6               Ip6InOctets: 592 => 4492
> # 3277[lib/proc.c:207]    Snmp6              Ip6OutOctets: 332 => 3852
> # 3277[lib/proc.c:207]    Snmp6            Ip6InNoECTPkts: 6 => 31
> # 3277[lib/proc.c:207]    Snmp6               Icmp6InMsgs: 1 => 6
> # 3277[lib/proc.c:207]    Snmp6       Icmp6InDestUnreachs: 0 => 5
> # 3277[lib/proc.c:207]    Snmp6              Icmp6InType1: 0 => 5
> # 3277[lib/proc.c:207]      Tcp                    InSegs: 3 => 23
> # 3277[lib/proc.c:207]      Tcp                   OutSegs: 2 => 22
> # 3277[lib/proc.c:207]   TcpExt               TCPPureAcks: 1 => 2
> # 3277[lib/proc.c:207]   TcpExt           TCPOrigDataSent: 0 => 20
> # 3277[lib/proc.c:207]   TcpExt              TCPDelivered: 0 => 19
> # 3277[lib/proc.c:207]   TcpExt                 TCPAOGood: 3 => 23
> ok 1 Icmp6InDestUnreachs delivered 5
> ok 2 server failed with -13: Permission denied
> ok 3 TCPAODroppedIcmps counter didn't change: 0 >= 0
> # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0

With some luck the server may fail with ECONNREFUSED (depending on what
icmp packet was delivered firstly).
For the kernel error handlers see: tab_unreach[] and icmp_err_convert[].

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile        |  4 +++-
 tools/testing/selftests/net/tcp_ao/icmps-accept.c  |  1 +
 tools/testing/selftests/net/tcp_ao/icmps-discard.c | 25 ++++++++++++++++------
 3 files changed, 22 insertions(+), 8 deletions(-)
 create mode 120000 tools/testing/selftests/net/tcp_ao/icmps-accept.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 0fc5db59be0c..7bf61b167ec5 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_BOTH_AF := connect
-TEST_BOTH_AF += icmps-discard
+TEST_BOTH_AF += icmps-accept icmps-discard
 
 TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4)
 TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6)
@@ -44,3 +44,5 @@ $(OUTPUT)/%_ipv4: %.c
 $(OUTPUT)/%_ipv6: %.c
 	$(LINK.c) -DIPV6_TEST $^ $(LDLIBS) -o $@
 
+$(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT
+$(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-accept.c b/tools/testing/selftests/net/tcp_ao/icmps-accept.c
new file mode 120000
index 000000000000..0a5bb85eb260
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/icmps-accept.c
@@ -0,0 +1 @@
+icmps-discard.c
\ No newline at end of file
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
index d77c791754de..d69bcba3c929 100644
--- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -39,8 +39,14 @@ const int sk_ip_level	= SOL_IP;
 const int sk_recverr	= IP_RECVERR;
 #endif
 
-#define test_icmps_fail test_fail
-#define test_icmps_ok test_ok
+/* Server is expected to fail with hard error if ::accept_icmp is set */
+#ifdef TEST_ICMPS_ACCEPT
+# define test_icmps_fail test_ok
+# define test_icmps_ok test_fail
+#else
+# define test_icmps_fail test_fail
+# define test_icmps_ok test_ok
+#endif
 
 static void serve_interfered(int sk)
 {
@@ -84,7 +90,11 @@ static void serve_interfered(int sk)
 		test_fail("Not found %s counter", tcpao_icmps);
 		return;
 	}
+#ifdef TEST_ICMPS_ACCEPT
+	test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD);
+#else
 	test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
+#endif
 	if (icmp_ignored_a >= icmp_ignored_b) {
 		test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
 				tcpao_icmps, icmp_ignored_a, icmp_ignored_b);
@@ -95,11 +105,15 @@ static void serve_interfered(int sk)
 
 static void *server_fn(void *arg)
 {
-	int val, err, sk, lsk;
+	int val, sk, lsk;
 	bool accept_icmps = false;
 
 	lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
 
+#ifdef TEST_ICMPS_ACCEPT
+	accept_icmps = true;
+#endif
+
 	if (test_set_ao_flags(lsk, false, accept_icmps))
 		test_error("setsockopt(TCP_AO_INFO)");
 
@@ -107,10 +121,7 @@ static void *server_fn(void *arg)
 		test_error("setsockopt(TCP_AO_ADD_KEY)");
 	synchronize_threads();
 
-	err = test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0);
-	if (!err)
-		test_error("timeouted for accept()");
-	else if (err < 0)
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
 		test_error("test_wait_fd()");
 
 	sk = accept(lsk, NULL, NULL);
-- 
cgit v1.2.3-70-g09d2


From ed9d09b309b17cead3bbb910894399da6b74e898 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:18 +0000
Subject: selftests/net: Add a test for TCP-AO keys matching

Add TCP-AO tests on connect()/accept() pair.
SNMP counters exposed by kernel are very useful here to verify the
expected behavior of TCP-AO.

Expected output for ipv4 version:
> # ./connect-deny_ipv4
> 1..19
> # 1702[lib/setup.c:254] rand seed 1680553689
> TAP version 13
> ok 1 Non-AO server + AO client
> ok 2 Non-AO server + AO client: counter TCPAOKeyNotFound increased 0 => 1
> ok 3 AO server + Non-AO client
> ok 4 AO server + Non-AO client: counter TCPAORequired increased 0 => 1
> ok 5 Wrong password
> ok 6 Wrong password: counter TCPAOBad increased 0 => 1
> ok 7 Wrong rcv id
> ok 8 Wrong rcv id: counter TCPAOKeyNotFound increased 1 => 2
> ok 9 Wrong snd id
> ok 10 Wrong snd id: counter TCPAOGood increased 0 => 1
> ok 11 Server: Wrong addr: counter TCPAOKeyNotFound increased 2 => 3
> ok 12 Server: Wrong addr
> ok 13 Client: Wrong addr: connect() was prevented
> ok 14 rcv id != snd id: connected
> ok 15 rcv id != snd id: counter TCPAOGood increased 1 => 3
> ok 16 Server: prefix match: connected
> ok 17 Server: prefix match: counter TCPAOGood increased 4 => 6
> ok 18 Client: prefix match: connected
> ok 19 Client: prefix match: counter TCPAOGood increased 7 => 9
> # Totals: pass:19 fail:0 xfail:0 xpass:0 skip:0 error:0

Expected output for ipv6 version:
> # ./connect-deny_ipv6
> 1..19
> # 1725[lib/setup.c:254] rand seed 1680553711
> TAP version 13
> ok 1 Non-AO server + AO client
> ok 2 Non-AO server + AO client: counter TCPAOKeyNotFound increased 0 => 1
> ok 3 AO server + Non-AO client: counter TCPAORequired increased 0 => 1
> ok 4 AO server + Non-AO client
> ok 5 Wrong password: counter TCPAOBad increased 0 => 1
> ok 6 Wrong password
> ok 7 Wrong rcv id: counter TCPAOKeyNotFound increased 1 => 2
> ok 8 Wrong rcv id
> ok 9 Wrong snd id: counter TCPAOGood increased 0 => 1
> ok 10 Wrong snd id
> ok 11 Server: Wrong addr
> ok 12 Server: Wrong addr: counter TCPAOKeyNotFound increased 2 => 3
> ok 13 Client: Wrong addr: connect() was prevented
> ok 14 rcv id != snd id: connected
> ok 15 rcv id != snd id: counter TCPAOGood increased 1 => 3
> ok 16 Server: prefix match: connected
> ok 17 Server: prefix match: counter TCPAOGood increased 5 => 7
> ok 18 Client: prefix match: connected
> ok 19 Client: prefix match: counter TCPAOGood increased 8 => 10
> # Totals: pass:19 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile       |   1 +
 tools/testing/selftests/net/tcp_ao/connect-deny.c | 264 ++++++++++++++++++++++
 2 files changed, 265 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_ao/connect-deny.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 7bf61b167ec5..f3b1d7f42edb 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_BOTH_AF := connect
+TEST_BOTH_AF += connect-deny
 TEST_BOTH_AF += icmps-accept icmps-discard
 
 TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4)
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
new file mode 100644
index 000000000000..1ca78040d8b7
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+#define fault(type)	(inj == FAULT_ ## type)
+
+static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen,
+				      union tcp_addr in_addr, uint8_t prefix,
+				      uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_add tmp = {};
+	int err;
+
+	if (prefix > DEFAULT_TEST_PREFIX)
+		prefix = DEFAULT_TEST_PREFIX;
+
+	err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, false, false,
+			       prefix, 0, sndid, rcvid, maclen,
+			       0, strlen(key), key);
+	if (err)
+		return err;
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+	if (err < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
+		       union tcp_addr addr, uint8_t prefix,
+		       uint8_t sndid, uint8_t rcvid, uint8_t maclen,
+		       const char *cnt_name, test_cnt cnt_expected,
+		       fault_t inj)
+{
+	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+	int lsk, err, sk = 0;
+	time_t timeout;
+
+	lsk = test_listen_socket(this_ip_addr, port, 1);
+
+	if (pwd && test_add_key_maclen(lsk, pwd, maclen, addr, prefix, sndid, rcvid))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	if (cnt_name)
+		before_cnt = netstat_get_one(cnt_name, NULL);
+	if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	err = test_wait_fd(lsk, timeout, 0);
+	if (err == -ETIMEDOUT) {
+		if (!fault(TIMEOUT))
+			test_fail("timeouted for accept()");
+	} else if (err < 0) {
+		test_error("test_wait_fd()");
+	} else {
+		if (fault(TIMEOUT))
+			test_fail("ready to accept");
+
+		sk = accept(lsk, NULL, NULL);
+		if (sk < 0) {
+			test_error("accept()");
+		} else {
+			if (fault(TIMEOUT))
+				test_fail("%s: accepted", tst_name);
+		}
+	}
+
+	if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
+		test_error("test_get_tcp_ao_counters()");
+
+	close(lsk);
+	if (pwd)
+		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+
+	if (!cnt_name)
+		goto out;
+
+	after_cnt = netstat_get_one(cnt_name, NULL);
+
+	if (after_cnt <= before_cnt) {
+		test_fail("%s: %s counter did not increase: %zu <= %zu",
+				tst_name, cnt_name, after_cnt, before_cnt);
+	} else {
+		test_ok("%s: counter %s increased %zu => %zu",
+			tst_name, cnt_name, before_cnt, after_cnt);
+	}
+
+out:
+	synchronize_threads(); /* close() */
+	if (sk > 0)
+		close(sk);
+}
+
+static void *server_fn(void *arg)
+{
+	union tcp_addr wrong_addr, network_addr;
+	unsigned int port = test_server_port;
+
+	if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_WRONG_IP);
+
+	try_accept("Non-AO server + AO client", port++, NULL,
+		   this_ip_dest, -1, 100, 100, 0,
+		   "TCPAOKeyNotFound", 0, FAULT_TIMEOUT);
+
+	try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 100, 100, 0,
+		   "TCPAORequired", TEST_CNT_AO_REQUIRED, FAULT_TIMEOUT);
+
+	try_accept("Wrong password", port++, "something that is not DEFAULT_TEST_PASSWORD",
+		   this_ip_dest, -1, 100, 100, 0,
+		   "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT);
+
+	try_accept("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 100, 101, 0,
+		   "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
+
+	try_accept("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 101, 100, 0,
+		   "TCPAOGood", TEST_CNT_GOOD, FAULT_TIMEOUT);
+
+	try_accept("Different maclen", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 100, 100, 8,
+		   "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT);
+
+	try_accept("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
+		   wrong_addr, -1, 100, 100, 0,
+		   "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
+
+	try_accept("Client: Wrong addr", port++, NULL,
+		   this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT);
+
+	try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 200, 100, 0,
+		   "TCPAOGood", TEST_CNT_GOOD, 0);
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+
+	try_accept("Server: prefix match", port++, DEFAULT_TEST_PASSWORD,
+		   network_addr, 16, 100, 100, 0,
+		   "TCPAOGood", TEST_CNT_GOOD, 0);
+
+	try_accept("Client: prefix match", port++, DEFAULT_TEST_PASSWORD,
+		   this_ip_dest, -1, 100, 100, 0,
+		   "TCPAOGood", TEST_CNT_GOOD, 0);
+
+	/* client exits */
+	synchronize_threads();
+	return NULL;
+}
+
+static void try_connect(const char *tst_name, unsigned int port,
+			const char *pwd, union tcp_addr addr, uint8_t prefix,
+			uint8_t sndid, uint8_t rcvid,
+			test_cnt cnt_expected, fault_t inj)
+{
+	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	time_t timeout;
+	int sk, ret;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+
+	if (ret < 0) {
+		if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
+			test_ok("%s: connect() was prevented", tst_name);
+		} else if (ret == -ETIMEDOUT && fault(TIMEOUT)) {
+			test_ok("%s", tst_name);
+		} else if (ret == -ECONNREFUSED &&
+				(fault(TIMEOUT) || fault(KEYREJECT))) {
+			test_ok("%s: refused to connect", tst_name);
+		} else {
+			test_error("%s: connect() returned %d", tst_name, ret);
+		}
+		goto out;
+	}
+
+	if (fault(TIMEOUT) || fault(KEYREJECT))
+		test_fail("%s: connected", tst_name);
+	else
+		test_ok("%s: connected", tst_name);
+	if (pwd && ret > 0) {
+		if (test_get_tcp_ao_counters(sk, &ao_cnt2))
+			test_error("test_get_tcp_ao_counters()");
+		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+	}
+out:
+	synchronize_threads(); /* close() */
+
+	if (ret > 0)
+		close(sk);
+}
+
+static void *client_fn(void *arg)
+{
+	union tcp_addr wrong_addr, network_addr;
+	unsigned int port = test_server_port;
+
+	if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_WRONG_IP);
+
+	try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("AO server + Non-AO client", port++, NULL,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+	try_connect("Client: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
+			wrong_addr, -1, 100, 100, 0, FAULT_KEYREJECT);
+
+	try_connect("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 200, TEST_CNT_GOOD, 0);
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+
+	try_connect("Server: prefix match", port++, DEFAULT_TEST_PASSWORD,
+			this_ip_dest, -1, 100, 100, TEST_CNT_GOOD, 0);
+
+	try_connect("Client: prefix match", port++, DEFAULT_TEST_PASSWORD,
+			network_addr, 16, 100, 100, TEST_CNT_GOOD, 0);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(21, server_fn, client_fn);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From b26660531cf66e1c8daab551535d3ad07b78fa54 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:19 +0000
Subject: selftests/net: Add test for TCP-AO add setsockopt() command

Verify corner-cases for UAPI.
Sample output:
> # ./setsockopt-closed_ipv4
> 1..120
> # 1657[lib/setup.c:254] rand seed 1681938184
> TAP version 13
> ok 1 AO add: minimum size
> ok 2 AO add: extended size
> ok 3 AO add: null optval
> ok 4 AO del: minimum size
> ok 5 AO del: extended size
> ok 6 AO del: null optval
> ok 7 AO set info: minimum size
> ok 8 AO set info: extended size
> ok 9 AO info get: : extended size
> ok 10 AO set info: null optval
> ok 11 AO get info: minimum size
> ok 12 AO get info: extended size
> ok 13 AO get info: null optval
> ok 14 AO get info: null optlen
> ok 15 AO get keys: minimum size
> ok 16 AO get keys: extended size
> ok 17 AO get keys: null optval
> ok 18 AO get keys: null optlen
> ok 19 key add: too big keylen
> ok 20 key add: using reserved padding
> ok 21 key add: using reserved2 padding
> ok 22 key add: wrong address family
> ok 23 key add: port (unsupported)
> ok 24 key add: no prefix, addr
> ok 25 key add: no prefix, any addr
> ok 26 key add: prefix, any addr
> ok 27 key add: too big prefix
> ok 28 key add: too short prefix
> ok 29 key add: bad key flags
> ok 30 key add: add current key on a listen socket
> ok 31 key add: add rnext key on a listen socket
> ok 32 key add: add current+rnext key on a listen socket
> ok 33 key add: add key and set as current
> ok 34 key add: add key and set as rnext
> ok 35 key add: add key and set as current+rnext
> ok 36 key add: ifindex without TCP_AO_KEYF_IFNINDEX
> ok 37 key add: non-existent VRF
> ok 38 optmem limit was hit on adding 69 key
> ok 39 key add: maclen bigger than TCP hdr
> ok 40 key add: bad algo
> ok 41 key del: using reserved padding
> ok 42 key del: using reserved2 padding
> ok 43 key del: del and set current key on a listen socket
> ok 44 key del: del and set rnext key on a listen socket
> ok 45 key del: del and set current+rnext key on a listen socket
> ok 46 key del: bad key flags
> ok 47 key del: ifindex without TCP_AO_KEYF_IFNINDEX
> ok 48 key del: non-existent VRF
> ok 49 key del: set non-exising current key
> ok 50 key del: set non-existing rnext key
> ok 51 key del: set non-existing current+rnext key
> ok 52 key del: set current key
> ok 53 key del: set rnext key
> ok 54 key del: set current+rnext key
> ok 55 key del: set as current key to be removed
> ok 56 key del: set as rnext key to be removed
> ok 57 key del: set as current+rnext key to be removed
> ok 58 key del: async on non-listen
> ok 59 key del: non-existing sndid
> ok 60 key del: non-existing rcvid
> ok 61 key del: incorrect addr
> ok 62 key del: correct key delete
> ok 63 AO info set: set current key on a listen socket
> ok 64 AO info set: set rnext key on a listen socket
> ok 65 AO info set: set current+rnext key on a listen socket
> ok 66 AO info set: using reserved padding
> ok 67 AO info set: using reserved2 padding
> ok 68 AO info set: accept_icmps
> ok 69 AO info get: accept_icmps
> ok 70 AO info set: ao required
> ok 71 AO info get: ao required
> ok 72 AO info set: ao required with MD5 key
> ok 73 AO info set: set non-existing current key
> ok 74 AO info set: set non-existing rnext key
> ok 75 AO info set: set non-existing current+rnext key
> ok 76 AO info set: set current key
> ok 77 AO info get: set current key
> ok 78 AO info set: set rnext key
> ok 79 AO info get: set rnext key
> ok 80 AO info set: set current+rnext key
> ok 81 AO info get: set current+rnext key
> ok 82 AO info set: set counters
> ok 83 AO info get: set counters
> ok 84 AO info set: no-op
> ok 85 AO info get: no-op
> ok 86 get keys: no ao_info
> ok 87 get keys: proper tcp_ao_get_mkts()
> ok 88 get keys: set out-only pkt_good counter
> ok 89 get keys: set out-only pkt_bad counter
> ok 90 get keys: bad keyflags
> ok 91 get keys: ifindex without TCP_AO_KEYF_IFNINDEX
> ok 92 get keys: using reserved field
> ok 93 get keys: no prefix, addr
> ok 94 get keys: no prefix, any addr
> ok 95 get keys: prefix, any addr
> ok 96 get keys: too big prefix
> ok 97 get keys: too short prefix
> ok 98 get keys: prefix + addr
> ok 99 get keys: get_all + prefix
> ok 100 get keys: get_all + addr
> ok 101 get keys: get_all + sndid
> ok 102 get keys: get_all + rcvid
> ok 103 get keys: current + prefix
> ok 104 get keys: current + addr
> ok 105 get keys: current + sndid
> ok 106 get keys: current + rcvid
> ok 107 get keys: rnext + prefix
> ok 108 get keys: rnext + addr
> ok 109 get keys: rnext + sndid
> ok 110 get keys: rnext + rcvid
> ok 111 get keys: get_all + current
> ok 112 get keys: get_all + rnext
> ok 113 get keys: current + rnext
> ok 114 key add: duplicate: full copy
> ok 115 key add: duplicate: any addr key on the socket
> ok 116 key add: duplicate: add any addr key
> ok 117 key add: duplicate: add any addr for the same subnet
> ok 118 key add: duplicate: full copy of a key
> ok 119 key add: duplicate: RecvID differs
> ok 120 key add: duplicate: SendID differs
> # Totals: pass:120 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile        |   1 +
 .../selftests/net/tcp_ao/setsockopt-closed.c       | 835 +++++++++++++++++++++
 2 files changed, 836 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_ao/setsockopt-closed.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index f3b1d7f42edb..1efd98ca12db 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -2,6 +2,7 @@
 TEST_BOTH_AF := connect
 TEST_BOTH_AF += connect-deny
 TEST_BOTH_AF += icmps-accept icmps-discard
+TEST_BOTH_AF += setsockopt-closed
 
 TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4)
 TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6)
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
new file mode 100644
index 000000000000..7e4601b3f6a3
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -0,0 +1,835 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+static union tcp_addr tcp_md5_client;
+
+static int test_port = 7788;
+static void make_listen(int sk)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &this_ip_addr, htons(test_port++));
+	if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		test_error("bind()");
+	if (listen(sk, 1))
+		test_error("listen()");
+}
+
+static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
+				const char *tst)
+{
+	struct tcp_ao_info_opt tmp;
+	socklen_t len = sizeof(tmp);
+
+	if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+
+#define __cmp_ao(member)							\
+do {										\
+	if (info->member != tmp.member) {					\
+		test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu",	\
+			  tst, (size_t)info->member, (size_t)tmp.member);	\
+		return;								\
+	}									\
+} while(0)
+	if (info->set_current)
+		__cmp_ao(current_key);
+	if (info->set_rnext)
+		__cmp_ao(rnext);
+	if (info->set_counters) {
+		__cmp_ao(pkt_good);
+		__cmp_ao(pkt_bad);
+		__cmp_ao(pkt_key_not_found);
+		__cmp_ao(pkt_ao_required);
+		__cmp_ao(pkt_dropped_icmp);
+	}
+	__cmp_ao(ao_required);
+	__cmp_ao(accept_icmps);
+
+	test_ok("AO info get: %s", tst);
+#undef __cmp_ao
+}
+
+static void __setsockopt_checked(int sk, int optname, bool get,
+				 void *optval, socklen_t *len,
+				 int err, const char *tst, const char *tst2)
+{
+	int ret;
+
+	if (!tst)
+		tst = "";
+	if (!tst2)
+		tst2 = "";
+
+	errno = 0;
+	if (get)
+		ret = getsockopt(sk, IPPROTO_TCP, optname, optval, len);
+	else
+		ret = setsockopt(sk, IPPROTO_TCP, optname, optval, *len);
+	if (ret == -1) {
+		if (errno == err)
+			test_ok("%s%s", tst ?: "", tst2 ?: "");
+		else
+			test_fail("%s%s: %setsockopt() failed",
+				  tst, tst2, get ? "g" : "s");
+		close(sk);
+		return;
+	}
+
+	if (err) {
+		test_fail("%s%s: %setsockopt() was expected to fail with %d",
+			  tst, tst2, get ? "g" : "s", err);
+	} else {
+		test_ok("%s%s", tst ?: "", tst2 ?: "");
+		if (optname == TCP_AO_ADD_KEY) {
+			test_verify_socket_key(sk, optval);
+		} else if (optname == TCP_AO_INFO && !get) {
+			test_vefify_ao_info(sk, optval, tst2);
+		} else if (optname == TCP_AO_GET_KEYS) {
+			if (*len != sizeof(struct tcp_ao_getsockopt))
+				test_fail("%s%s: get keys returned wrong tcp_ao_getsockopt size",
+					  tst, tst2);
+		}
+	}
+	close(sk);
+}
+
+static void setsockopt_checked(int sk, int optname, void *optval,
+			       int err, const char *tst)
+{
+	const char *cmd = NULL;
+	socklen_t len;
+
+	switch (optname) {
+	case TCP_AO_ADD_KEY:
+		cmd = "key add: ";
+		len = sizeof(struct tcp_ao_add);
+		break;
+	case TCP_AO_DEL_KEY:
+		cmd = "key del: ";
+		len = sizeof(struct tcp_ao_del);
+		break;
+	case TCP_AO_INFO:
+		cmd = "AO info set: ";
+		len = sizeof(struct tcp_ao_info_opt);
+		break;
+	default:
+		break;
+	};
+
+	__setsockopt_checked(sk, optname, false, optval, &len, err, cmd, tst);
+}
+
+static int prepare_defs(int cmd, void *optval)
+{
+	int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+	if (sk < 0)
+		test_error("socket()");
+
+	switch (cmd) {
+	case TCP_AO_ADD_KEY: {
+		struct tcp_ao_add *add = optval;
+
+		if (test_prepare_def_key(add, DEFAULT_TEST_PASSWORD, 0, this_ip_dest,
+					-1, 0, 100, 100))
+			test_error("prepare default tcp_ao_add");
+		break;
+		}
+	case TCP_AO_DEL_KEY: {
+		struct tcp_ao_del *del = optval;
+
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+				 DEFAULT_TEST_PREFIX, 100, 100))
+			test_error("add default key");
+		memset(del, 0, sizeof(struct tcp_ao_del));
+		del->sndid = 100;
+		del->rcvid = 100;
+		del->prefix = DEFAULT_TEST_PREFIX;
+		tcp_addr_to_sockaddr_in(&del->addr, &this_ip_dest, 0);
+		break;
+		}
+	case TCP_AO_INFO: {
+		struct tcp_ao_info_opt *info = optval;
+
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+				 DEFAULT_TEST_PREFIX, 100, 100))
+			test_error("add default key");
+		memset(info, 0, sizeof(struct tcp_ao_info_opt));
+		break;
+		}
+	case TCP_AO_GET_KEYS: {
+		struct tcp_ao_getsockopt *get = optval;
+
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+				 DEFAULT_TEST_PREFIX, 100, 100))
+			test_error("add default key");
+		memset(get, 0, sizeof(struct tcp_ao_getsockopt));
+		get->nkeys = 1;
+		get->get_all = 1;
+		break;
+		}
+	default:
+		test_error("unknown cmd");
+	}
+
+	return sk;
+}
+
+static void test_extend(int cmd, bool get, const char *tst, socklen_t under_size)
+{
+	struct {
+		union {
+			struct tcp_ao_add add;
+			struct tcp_ao_del del;
+			struct tcp_ao_getsockopt get;
+			struct tcp_ao_info_opt info;
+		};
+		char *extend[100];
+	} tmp_opt;
+	socklen_t extended_size = sizeof(tmp_opt);
+	int sk;
+
+	memset(&tmp_opt, 0, sizeof(tmp_opt));
+	sk = prepare_defs(cmd, &tmp_opt);
+	__setsockopt_checked(sk, cmd, get, &tmp_opt, &under_size,
+			     EINVAL, tst, ": minimum size");
+
+	memset(&tmp_opt, 0, sizeof(tmp_opt));
+	sk = prepare_defs(cmd, &tmp_opt);
+	__setsockopt_checked(sk, cmd, get, &tmp_opt, &extended_size,
+			     0, tst, ": extended size");
+
+	memset(&tmp_opt, 0, sizeof(tmp_opt));
+	sk = prepare_defs(cmd, &tmp_opt);
+	__setsockopt_checked(sk, cmd, get, NULL, &extended_size,
+			     EFAULT, tst, ": null optval");
+
+	if (get) {
+		memset(&tmp_opt, 0, sizeof(tmp_opt));
+		sk = prepare_defs(cmd, &tmp_opt);
+		__setsockopt_checked(sk, cmd, get, &tmp_opt, NULL,
+				     EFAULT, tst, ": null optlen");
+	}
+}
+
+static void extend_tests(void)
+{
+	test_extend(TCP_AO_ADD_KEY, false, "AO add",
+		    offsetof(struct tcp_ao_add, key));
+	test_extend(TCP_AO_DEL_KEY, false, "AO del",
+		    offsetof(struct tcp_ao_del, keyflags));
+	test_extend(TCP_AO_INFO, false, "AO set info",
+		    offsetof(struct tcp_ao_info_opt, pkt_dropped_icmp));
+	test_extend(TCP_AO_INFO, true, "AO get info", -1);
+	test_extend(TCP_AO_GET_KEYS, true, "AO get keys", -1);
+}
+
+static void test_optmem_limit(void)
+{
+	size_t i, keys_limit, current_optmem = test_get_optmem();
+	struct tcp_ao_add ao;
+	union tcp_addr net = {};
+	int sk;
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	keys_limit = current_optmem / KERNEL_TCP_AO_KEY_SZ_ROUND_UP;
+	for (i = 0;; i++) {
+		union tcp_addr key_peer;
+		int err;
+
+		key_peer = gen_tcp_addr(net, i + 1);
+		tcp_addr_to_sockaddr_in(&ao.addr, &key_peer, 0);
+		err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY,
+				 &ao, sizeof(ao));
+		if (!err) {
+			/*
+			 * TCP_AO_ADD_KEY should be the same order as the real
+			 * sizeof(struct tcp_ao_key) in kernel.
+			 */
+			if (i <= keys_limit * 10)
+				continue;
+			test_fail("optmem limit test failed: added %zu key", i);
+			break;
+		}
+		if (i < keys_limit) {
+			test_fail("optmem limit test failed: couldn't add %zu key", i);
+			break;
+		}
+		test_ok("optmem limit was hit on adding %zu key", i);
+		break;
+	}
+	close(sk);
+}
+
+static void test_einval_add_key(void)
+{
+	struct tcp_ao_add ao;
+	int sk;
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.keylen = TCP_AO_MAXKEYLEN + 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big keylen");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.reserved = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved padding");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.reserved2 = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved2 padding");
+
+	/* tcp_ao_verify_ipv{4,6}() checks */
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.addr.ss_family = AF_UNIX;
+	memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "wrong address family");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	tcp_addr_to_sockaddr_in(&ao.addr, &this_ip_dest, 1234);
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "port (unsupported)");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 0;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "no prefix, addr");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 0;
+	memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "no prefix, any addr");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 32;
+	memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "prefix, any addr");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 129;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big prefix");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.prefix = 2;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too short prefix");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.keyflags = (uint8_t)(-1);
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "bad key flags");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	make_listen(sk);
+	ao.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	make_listen(sk);
+	ao.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	make_listen(sk);
+	ao.set_current = 1;
+	ao.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current+rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as rnext");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.set_current = 1;
+	ao.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current+rnext");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.ifindex = 42;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL,
+			   "ifindex without TCP_AO_KEYF_IFNINDEX");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.keyflags |= TCP_AO_KEYF_IFINDEX;
+	ao.ifindex = 42;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "non-existent VRF");
+	/*
+	 * tcp_md5_do_lookup{,_any_l3index}() are checked in unsigned-md5
+	 * see client_vrf_tests().
+	 */
+
+	test_optmem_limit();
+
+	/* tcp_ao_parse_crypto() */
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao.maclen = 100;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EMSGSIZE, "maclen bigger than TCP hdr");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	strcpy(ao.alg_name, "imaginary hash algo");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, ENOENT, "bad algo");
+}
+
+static void test_einval_del_key(void)
+{
+	struct tcp_ao_del del;
+	int sk;
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.reserved = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved padding");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.reserved2 = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved2 padding");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	make_listen(sk);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	make_listen(sk);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	make_listen(sk);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_current = 1;
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current+rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.keyflags = (uint8_t)(-1);
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "bad key flags");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.ifindex = 42;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL,
+			   "ifindex without TCP_AO_KEYF_IFNINDEX");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.keyflags |= TCP_AO_KEYF_IFINDEX;
+	del.ifindex = 42;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existent VRF");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-exising current key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing rnext key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_current = 1;
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current+rnext key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set rnext key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+		test_error("add key");
+	del.set_current = 1;
+	del.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current+rnext key");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_current = 1;
+	del.current_key = 100;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current key to be removed");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_rnext = 1;
+	del.rnext = 100;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as rnext key to be removed");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.set_current = 1;
+	del.current_key = 100;
+	del.set_rnext = 1;
+	del.rnext = 100;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current+rnext key to be removed");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.del_async = 1;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "async on non-listen");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.sndid = 101;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing sndid");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	del.rcvid = 101;
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing rcvid");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	tcp_addr_to_sockaddr_in(&del.addr, &this_ip_addr, 0);
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "incorrect addr");
+
+	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "correct key delete");
+}
+
+static void test_einval_ao_info(void)
+{
+	struct tcp_ao_info_opt info;
+	int sk;
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	make_listen(sk);
+	info.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	make_listen(sk);
+	info.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	make_listen(sk);
+	info.set_current = 1;
+	info.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current+rnext key on a listen socket");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.reserved = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved padding");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.reserved2 = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved2 padding");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.accept_icmps = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "accept_icmps");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.ao_required = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "ao required");
+
+	if (!should_skip_test("ao required with MD5 key", KCONFIG_TCP_MD5)) {
+		sk = prepare_defs(TCP_AO_INFO, &info);
+		info.ao_required = 1;
+		if (test_set_md5(sk, tcp_md5_client, TEST_PREFIX, -1,
+				 "long long secret")) {
+			test_error("setsockopt(TCP_MD5SIG_EXT)");
+			close(sk);
+		} else {
+			setsockopt_checked(sk, TCP_AO_INFO, &info, EKEYREJECTED,
+					   "ao required with MD5 key");
+		}
+	}
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_current = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing rnext key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_current = 1;
+	info.set_rnext = 1;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current+rnext key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_current = 1;
+	info.current_key = 100;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_rnext = 1;
+	info.rnext = 100;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set rnext key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_current = 1;
+	info.set_rnext = 1;
+	info.current_key = 100;
+	info.rnext = 100;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current+rnext key");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	info.set_counters = 1;
+	info.pkt_good = 321;
+	info.pkt_bad = 888;
+	info.pkt_key_not_found = 654;
+	info.pkt_ao_required = 987654;
+	info.pkt_dropped_icmp = 10000;
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set counters");
+
+	sk = prepare_defs(TCP_AO_INFO, &info);
+	setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "no-op");
+}
+
+static void getsockopt_checked(int sk, struct tcp_ao_getsockopt *optval,
+			       int err, const char *tst)
+{
+	socklen_t len = sizeof(struct tcp_ao_getsockopt);
+
+	__setsockopt_checked(sk, TCP_AO_GET_KEYS, true, optval, &len, err,
+			     "get keys: ", tst);
+}
+
+static void test_einval_get_keys(void)
+{
+	struct tcp_ao_getsockopt out;
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+	getsockopt_checked(sk, &out, ENOENT, "no ao_info");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	getsockopt_checked(sk, &out, 0, "proper tcp_ao_get_mkts()");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.pkt_good = 643;
+	getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_good counter");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.pkt_bad = 94;
+	getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_bad counter");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.keyflags = (uint8_t)(-1);
+	getsockopt_checked(sk, &out, EINVAL, "bad keyflags");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.ifindex = 42;
+	getsockopt_checked(sk, &out, EINVAL,
+			   "ifindex without TCP_AO_KEYF_IFNINDEX");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.reserved = 1;
+	getsockopt_checked(sk, &out, EINVAL, "using reserved field");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 0;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "no prefix, addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 0;
+	memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	getsockopt_checked(sk, &out, 0, "no prefix, any addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 32;
+	memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	getsockopt_checked(sk, &out, EINVAL, "prefix, any addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 129;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "too big prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = 2;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "too short prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.prefix = DEFAULT_TEST_PREFIX;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, 0, "prefix + addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.prefix = DEFAULT_TEST_PREFIX;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "get_all + addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.sndid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + sndid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.rcvid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + rcvid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	out.prefix = DEFAULT_TEST_PREFIX;
+	getsockopt_checked(sk, &out, EINVAL, "current + prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "current + addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	out.sndid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "current + sndid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	out.rcvid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "current + rcvid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_rnext = 1;
+	out.prefix = DEFAULT_TEST_PREFIX;
+	getsockopt_checked(sk, &out, EINVAL, "rnext + prefix");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_rnext = 1;
+	tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+	getsockopt_checked(sk, &out, EINVAL, "rnext + addr");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_rnext = 1;
+	out.sndid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "rnext + sndid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_rnext = 1;
+	out.rcvid = 1;
+	getsockopt_checked(sk, &out, EINVAL, "rnext + rcvid");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.is_current = 1;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + current");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 1;
+	out.is_rnext = 1;
+	getsockopt_checked(sk, &out, EINVAL, "get_all + rnext");
+
+	sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+	out.get_all = 0;
+	out.is_current = 1;
+	out.is_rnext = 1;
+	getsockopt_checked(sk, &out, 0, "current + rnext");
+}
+
+static void einval_tests(void)
+{
+	test_einval_add_key();
+	test_einval_del_key();
+	test_einval_ao_info();
+	test_einval_get_keys();
+}
+
+static void duplicate_tests(void)
+{
+	union tcp_addr network_dup;
+	struct tcp_ao_add ao, ao2;
+	int sk;
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	ao2 = ao;
+	memcpy(&ao2.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	ao2.prefix = 0;
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao2, sizeof(ao)))
+		test_error("setsockopt()");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: any addr key on the socket");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+	ao.prefix = 0;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr key");
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_dup) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	if (test_prepare_def_key(&ao, "password", 0, network_dup,
+				 16, 0, 100, 100))
+		test_error("prepare default tcp_ao_add");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr for the same subnet");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy of a key");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	ao.rcvid = 101;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: RecvID differs");
+
+	sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+		test_error("setsockopt()");
+	ao.sndid = 101;
+	setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs");
+}
+
+static void *client_fn(void *arg)
+{
+	if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1)
+		test_error("Can't convert ip address");
+	extend_tests();
+	einval_tests();
+	duplicate_tests();
+	/*
+	 * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters
+	 * returning proper nr & keys;
+	 */
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(120, client_fn, NULL);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 6f0c472a681586161e4b3988243754514eef8a0d Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:20 +0000
Subject: selftests/net: Add TCP-AO + TCP-MD5 + no sign listen socket tests

The test plan was (most of tests have all 3 client types):
1. TCP-AO listen (INADDR_ANY)
2. TCP-MD5 listen (INADDR_ANY)
3. non-signed listen (INADDR_ANY)
4. TCP-AO + TCP-MD5 listen (prefix)
5. TCP-AO subprefix add failure [checked in setsockopt-closed.c]
6. TCP-AO out of prefix connect [checked in connect-deny.c]
7. TCP-AO + TCP-MD5 on connect()
8. TCP-AO intersect with TCP-MD5 failure
9. Established TCP-AO: add TCP-MD5 key
10. Established TCP-MD5: add TCP-AO key
11. Established non-signed: add TCP-AO key

Output produced:
> # ./unsigned-md5_ipv6
> 1..72
> # 1592[lib/setup.c:239] rand seed 1697567046
> TAP version 13
> ok 1 AO server (INADDR_ANY): AO client: counter TCPAOGood increased 0 => 2
> ok 2 AO server (INADDR_ANY): AO client: connected
> ok 3 AO server (INADDR_ANY): MD5 client
> ok 4 AO server (INADDR_ANY): MD5 client: counter TCPMD5Unexpected increased 0 => 1
> ok 5 AO server (INADDR_ANY): no sign client: counter TCPAORequired increased 0 => 1
> ok 6 AO server (INADDR_ANY): unsigned client
> ok 7 AO server (AO_REQUIRED): AO client: connected
> ok 8 AO server (AO_REQUIRED): AO client: counter TCPAOGood increased 4 => 6
> ok 9 AO server (AO_REQUIRED): unsigned client
> ok 10 AO server (AO_REQUIRED): unsigned client: counter TCPAORequired increased 1 => 2
> ok 11 MD5 server (INADDR_ANY): AO client: counter TCPAOKeyNotFound increased 0 => 1
> ok 12 MD5 server (INADDR_ANY): AO client
> ok 13 MD5 server (INADDR_ANY): MD5 client: connected
> ok 14 MD5 server (INADDR_ANY): MD5 client: no counter checks
> ok 15 MD5 server (INADDR_ANY): no sign client
> ok 16 MD5 server (INADDR_ANY): no sign client: counter TCPMD5NotFound increased 0 => 1
> ok 17 no sign server: AO client
> ok 18 no sign server: AO client: counter TCPAOKeyNotFound increased 1 => 2
> ok 19 no sign server: MD5 client
> ok 20 no sign server: MD5 client: counter TCPMD5Unexpected increased 1 => 2
> ok 21 no sign server: no sign client: connected
> ok 22 no sign server: no sign client: counter CurrEstab increased 0 => 1
> ok 23 AO+MD5 server: AO client (matching): connected
> ok 24 AO+MD5 server: AO client (matching): counter TCPAOGood increased 8 => 10
> ok 25 AO+MD5 server: AO client (misconfig, matching MD5)
> ok 26 AO+MD5 server: AO client (misconfig, matching MD5): counter TCPAOKeyNotFound increased 2 => 3
> ok 27 AO+MD5 server: AO client (misconfig, non-matching): counter TCPAOKeyNotFound increased 3 => 4
> ok 28 AO+MD5 server: AO client (misconfig, non-matching)
> ok 29 AO+MD5 server: MD5 client (matching): connected
> ok 30 AO+MD5 server: MD5 client (matching): no counter checks
> ok 31 AO+MD5 server: MD5 client (misconfig, matching AO)
> ok 32 AO+MD5 server: MD5 client (misconfig, matching AO): counter TCPMD5Unexpected increased 2 => 3
> ok 33 AO+MD5 server: MD5 client (misconfig, non-matching)
> ok 34 AO+MD5 server: MD5 client (misconfig, non-matching): counter TCPMD5Unexpected increased 3 => 4
> ok 35 AO+MD5 server: no sign client (unmatched): connected
> ok 36 AO+MD5 server: no sign client (unmatched): counter CurrEstab increased 0 => 1
> ok 37 AO+MD5 server: no sign client (misconfig, matching AO)
> ok 38 AO+MD5 server: no sign client (misconfig, matching AO): counter TCPAORequired increased 2 => 3
> ok 39 AO+MD5 server: no sign client (misconfig, matching MD5)
> ok 40 AO+MD5 server: no sign client (misconfig, matching MD5): counter TCPMD5NotFound increased 1 => 2
> ok 41 AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys: connect() was prevented
> ok 42 AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys: no counter checks
> ok 43 AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys: connect() was prevented
> ok 44 AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys: no counter checks
> ok 45 TCP-AO established: add TCP-MD5 key: postfailed as expected
> ok 46 TCP-AO established: add TCP-MD5 key: counter TCPAOGood increased 12 => 14
> ok 47 TCP-MD5 established: add TCP-AO key: postfailed as expected
> ok 48 TCP-MD5 established: add TCP-AO key: no counter checks
> ok 49 non-signed established: add TCP-AO key: postfailed as expected
> ok 50 non-signed established: add TCP-AO key: counter CurrEstab increased 0 => 1
> ok 51 TCP-AO key intersects with existing TCP-MD5 key: prefailed as expected: Key was rejected by service
> ok 52 TCP-MD5 key intersects with existing TCP-AO key: prefailed as expected: Key was rejected by service
> ok 53 TCP-MD5 key + TCP-AO required: prefailed as expected: Key was rejected by service
> ok 54 TCP-AO required on socket + TCP-MD5 key: prefailed as expected: Key was rejected by service
> ok 55 VRF: TCP-AO key (no l3index) + TCP-MD5 key (no l3index): prefailed as expected: Key was rejected by service
> ok 56 VRF: TCP-MD5 key (no l3index) + TCP-AO key (no l3index): prefailed as expected: Key was rejected by service
> ok 57 VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=0): prefailed as expected: Key was rejected by service
> ok 58 VRF: TCP-MD5 key (l3index=0) + TCP-AO key (no l3index): prefailed as expected: Key was rejected by service
> ok 59 VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=N): prefailed as expected: Key was rejected by service
> ok 60 VRF: TCP-MD5 key (l3index=N) + TCP-AO key (no l3index): prefailed as expected: Key was rejected by service
> ok 61 VRF: TCP-AO key (l3index=0) + TCP-MD5 key (no l3index): prefailed as expected: Key was rejected by service
> ok 62 VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=0): prefailed as expected: Key was rejected by service
> ok 63 VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=0): prefailed as expected: Key was rejected by service
> ok 64 VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=0): prefailed as expected: Key was rejected by service
> ok 65 VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=N)
> ok 66 VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=0)
> ok 67 VRF: TCP-AO key (l3index=N) + TCP-MD5 key (no l3index): prefailed as expected: Key was rejected by service
> ok 68 VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=N): prefailed as expected: Key was rejected by service
> ok 69 VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=0)
> ok 70 VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=N)
> ok 71 VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=N): prefailed as expected: Key was rejected by service
> ok 72 VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=N): prefailed as expected: Key was rejected by service
> # Totals: pass:72 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile       |   1 +
 tools/testing/selftests/net/tcp_ao/unsigned-md5.c | 742 ++++++++++++++++++++++
 2 files changed, 743 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_ao/unsigned-md5.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 1efd98ca12db..ee2f1a17e805 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -3,6 +3,7 @@ TEST_BOTH_AF := connect
 TEST_BOTH_AF += connect-deny
 TEST_BOTH_AF += icmps-accept icmps-discard
 TEST_BOTH_AF += setsockopt-closed
+TEST_BOTH_AF += unsigned-md5
 
 TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4)
 TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6)
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
new file mode 100644
index 000000000000..7cffde02d2be
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -0,0 +1,742 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+#define fault(type)	(inj == FAULT_ ## type)
+static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver.";
+static const char *ao_password = DEFAULT_TEST_PASSWORD;
+
+static union tcp_addr client2;
+static union tcp_addr client3;
+
+static const int test_vrf_ifindex = 200;
+static const uint8_t test_vrf_tabid = 42;
+static void setup_vrfs(void)
+{
+	int err;
+
+	if (!kernel_config_has(KCONFIG_NET_VRF))
+		return;
+
+	err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1);
+	if (err)
+		test_error("Failed to add a VRF: %d", err);
+
+	err = link_set_up("ksft-vrf");
+	if (err)
+		test_error("Failed to bring up a VRF");
+
+	err = ip_route_add_vrf(veth_name, TEST_FAMILY,
+			       this_ip_addr, this_ip_dest, test_vrf_tabid);
+	if (err)
+		test_error("Failed to add a route to VRF");
+}
+
+static void try_accept(const char *tst_name, unsigned int port,
+		       union tcp_addr *md5_addr, uint8_t md5_prefix,
+		       union tcp_addr *ao_addr, uint8_t ao_prefix,
+		       bool set_ao_required,
+		       uint8_t sndid, uint8_t rcvid, uint8_t vrf,
+		       const char *cnt_name, test_cnt cnt_expected,
+		       int needs_tcp_md5, fault_t inj)
+{
+	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+	int lsk, err, sk = 0;
+	time_t timeout;
+
+	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+		return;
+
+	lsk = test_listen_socket(this_ip_addr, port, 1);
+
+	if (md5_addr && test_set_md5(lsk, *md5_addr, md5_prefix, -1, md5_password))
+		test_error("setsockopt(TCP_MD5SIG_EXT)");
+
+	if (ao_addr && test_add_key(lsk, ao_password,
+				    *ao_addr, ao_prefix, sndid, rcvid))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	if (set_ao_required && test_set_ao_flags(lsk, true, false))
+		test_error("setsockopt(TCP_AO_INFO)");
+
+	if (cnt_name)
+		before_cnt = netstat_get_one(cnt_name, NULL);
+	if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	err = test_wait_fd(lsk, timeout, 0);
+	if (err == -ETIMEDOUT) {
+		if (!fault(TIMEOUT))
+			test_fail("timeouted for accept()");
+	} else if (err < 0) {
+		test_error("test_wait_fd()");
+	} else {
+		if (fault(TIMEOUT))
+			test_fail("ready to accept");
+
+		sk = accept(lsk, NULL, NULL);
+		if (sk < 0) {
+			test_error("accept()");
+		} else {
+			if (fault(TIMEOUT))
+				test_fail("%s: accepted", tst_name);
+		}
+	}
+
+	if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2))
+		test_error("test_get_tcp_ao_counters()");
+	close(lsk);
+
+	if (!cnt_name) {
+		test_ok("%s: no counter checks", tst_name);
+		goto out;
+	}
+
+	after_cnt = netstat_get_one(cnt_name, NULL);
+
+	if (after_cnt <= before_cnt) {
+		test_fail("%s: %s counter did not increase: %zu <= %zu",
+				tst_name, cnt_name, after_cnt, before_cnt);
+	} else {
+		test_ok("%s: counter %s increased %zu => %zu",
+			tst_name, cnt_name, before_cnt, after_cnt);
+	}
+	if (ao_addr)
+		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+
+out:
+	synchronize_threads(); /* close() */
+	if (sk > 0)
+		close(sk);
+}
+
+static void server_add_routes(void)
+{
+	int family = TEST_FAMILY;
+
+	synchronize_threads(); /* client_add_ips() */
+
+	if (ip_route_add(veth_name, family, this_ip_addr, client2))
+		test_error("Failed to add route");
+	if (ip_route_add(veth_name, family, this_ip_addr, client3))
+		test_error("Failed to add route");
+}
+
+static void server_add_fail_tests(unsigned int *port)
+{
+	union tcp_addr addr_any = {};
+
+	try_accept("TCP-AO established: add TCP-MD5 key", (*port)++, NULL, 0,
+		   &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD,
+		   1, 0);
+	try_accept("TCP-MD5 established: add TCP-AO key", (*port)++, &addr_any,
+		   0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
+	try_accept("non-signed established: add TCP-AO key", (*port)++, NULL, 0,
+		   NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
+}
+
+static void server_vrf_tests(unsigned int *port)
+{
+	setup_vrfs();
+}
+
+static void *server_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+	union tcp_addr addr_any = {};
+
+	server_add_routes();
+
+	try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+		   &addr_any, 0, 0, 100, 100, 0, "TCPAOGood",
+		   TEST_CNT_GOOD, 0, 0);
+	try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
+		   &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected",
+		   0, 1, FAULT_TIMEOUT);
+	try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0,
+		   &addr_any, 0, 0, 100, 100, 0, "TCPAORequired",
+		   TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
+	try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+		   &this_ip_dest, TEST_PREFIX, true,
+		   100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0);
+	try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+		   &this_ip_dest, TEST_PREFIX, true,
+		   100, 100, 0, "TCPAORequired",
+		   TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
+
+	try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
+		   NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
+		   0, 1, FAULT_TIMEOUT);
+	try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+		   NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
+	try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
+		   0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound",
+		   0, 1, FAULT_TIMEOUT);
+
+	try_accept("no sign server: AO client", port++, NULL, 0,
+		   NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
+		   TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
+	try_accept("no sign server: MD5 client", port++, NULL, 0,
+		   NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected",
+		   0, 1, FAULT_TIMEOUT);
+	try_accept("no sign server: no sign client", port++, NULL, 0,
+		   NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
+
+	try_accept("AO+MD5 server: AO client (matching)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0);
+	try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
+		   1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
+		   1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: MD5 client (matching)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, NULL, 0, 1, 0);
+	try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: no sign client (unmatched)", port++,
+		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "CurrEstab", 0, 1, 0);
+	try_accept("AO+MD5 server: no sign client (misconfig, matching AO)",
+		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPAORequired",
+		   TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)",
+		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT);
+
+	try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+	try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+		   100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+
+	server_add_fail_tests(&port);
+
+	server_vrf_tests(&port);
+
+	/* client exits */
+	synchronize_threads();
+	return NULL;
+}
+
+static int client_bind(int sk, union tcp_addr bind_addr)
+{
+#ifdef IPV6_TEST
+	struct sockaddr_in6 addr = {
+		.sin6_family	= AF_INET6,
+		.sin6_port	= 0,
+		.sin6_addr	= bind_addr.a6,
+	};
+#else
+	struct sockaddr_in addr = {
+		.sin_family	= AF_INET,
+		.sin_port	= 0,
+		.sin_addr	= bind_addr.a4,
+	};
+#endif
+	return bind(sk, &addr, sizeof(addr));
+}
+
+static void try_connect(const char *tst_name, unsigned int port,
+		       union tcp_addr *md5_addr, uint8_t md5_prefix,
+		       union tcp_addr *ao_addr, uint8_t ao_prefix,
+		       uint8_t sndid, uint8_t rcvid, uint8_t vrf,
+		       fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr)
+{
+	time_t timeout;
+	int sk, ret;
+
+	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+		return;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (bind_addr && client_bind(sk, *bind_addr))
+		test_error("bind()");
+
+	if (md5_addr && test_set_md5(sk, *md5_addr, md5_prefix, -1, md5_password))
+		test_error("setsockopt(TCP_MD5SIG_EXT)");
+
+	if (ao_addr && test_add_key(sk, ao_password, *ao_addr,
+				    ao_prefix, sndid, rcvid))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+
+	if (ret < 0) {
+		if (fault(KEYREJECT) && ret == -EKEYREJECTED)
+			test_ok("%s: connect() was prevented", tst_name);
+		else if (ret == -ETIMEDOUT && fault(TIMEOUT))
+			test_ok("%s", tst_name);
+		else if (ret == -ECONNREFUSED &&
+				(fault(TIMEOUT) || fault(KEYREJECT)))
+			test_ok("%s: refused to connect", tst_name);
+		else
+			test_error("%s: connect() returned %d", tst_name, ret);
+		goto out;
+	}
+
+	if (fault(TIMEOUT) || fault(KEYREJECT))
+		test_fail("%s: connected", tst_name);
+	else
+		test_ok("%s: connected", tst_name);
+
+out:
+	synchronize_threads(); /* close() */
+	/* _test_connect_socket() cleans up on failure */
+	if (ret > 0)
+		close(sk);
+}
+
+#define PREINSTALL_MD5_FIRST	BIT(0)
+#define PREINSTALL_AO		BIT(1)
+#define POSTINSTALL_AO		BIT(2)
+#define PREINSTALL_MD5		BIT(3)
+#define POSTINSTALL_MD5		BIT(4)
+
+static int try_add_key_vrf(int sk, union tcp_addr in_addr, uint8_t prefix,
+			   int vrf, uint8_t sndid, uint8_t rcvid,
+			   bool set_ao_required)
+{
+	uint8_t keyflags = 0;
+
+	if (vrf >= 0)
+		keyflags |= TCP_AO_KEYF_IFINDEX;
+	else
+		vrf = 0;
+	if (set_ao_required) {
+		int err = test_set_ao_flags(sk, true, 0);
+
+		if (err)
+			return err;
+	}
+	return test_add_key_vrf(sk, ao_password, keyflags, in_addr, prefix,
+				(uint8_t)vrf, sndid, rcvid);
+}
+
+static bool test_continue(const char *tst_name, int err,
+			  fault_t inj, bool added_ao)
+{
+	bool expected_to_fail;
+
+	expected_to_fail = fault(PREINSTALL_AO) && added_ao;
+	expected_to_fail |= fault(PREINSTALL_MD5) && !added_ao;
+
+	if (!err) {
+		if (!expected_to_fail)
+			return true;
+		test_fail("%s: setsockopt()s were expected to fail", tst_name);
+		return false;
+	}
+	if (err != -EKEYREJECTED || !expected_to_fail) {
+		test_error("%s: setsockopt(%s) = %d", tst_name,
+			   added_ao ? "TCP_AO_ADD_KEY" : "TCP_MD5SIG_EXT", err);
+		return false;
+	}
+	test_ok("%s: prefailed as expected: %m", tst_name);
+	return false;
+}
+
+static int open_add(const char *tst_name, unsigned int port,
+		    unsigned int strategy,
+		    union tcp_addr md5_addr, uint8_t md5_prefix, int md5_vrf,
+		    union tcp_addr ao_addr, uint8_t ao_prefix,
+		    int ao_vrf, bool set_ao_required,
+		    uint8_t sndid, uint8_t rcvid,
+		    fault_t inj)
+{
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (client_bind(sk, this_ip_addr))
+		test_error("bind()");
+
+	if (strategy & PREINSTALL_MD5_FIRST) {
+		if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password))
+			test_error("setsockopt(TCP_MD5SIG_EXT)");
+	}
+
+	if (strategy & PREINSTALL_AO) {
+		int err = try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf,
+					  sndid, rcvid, set_ao_required);
+
+		if (!test_continue(tst_name, err, inj, true)) {
+			close(sk);
+			return -1;
+		}
+	}
+
+	if (strategy & PREINSTALL_MD5) {
+		errno = 0;
+		test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password);
+		if (!test_continue(tst_name, -errno, inj, false)) {
+			close(sk);
+			return -1;
+		}
+	}
+
+	return sk;
+}
+
+static void try_to_preadd(const char *tst_name, unsigned int port,
+			  unsigned int strategy,
+			  union tcp_addr md5_addr, uint8_t md5_prefix,
+			  int md5_vrf,
+			  union tcp_addr ao_addr, uint8_t ao_prefix,
+			  int ao_vrf, bool set_ao_required,
+			  uint8_t sndid, uint8_t rcvid,
+			  int needs_tcp_md5, int needs_vrf, fault_t inj)
+{
+	int sk;
+
+	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+		return;
+	if (needs_vrf && should_skip_test(tst_name, KCONFIG_NET_VRF))
+		return;
+
+	sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf,
+		      ao_addr, ao_prefix, ao_vrf, set_ao_required,
+		      sndid, rcvid, inj);
+	if (sk < 0)
+		return;
+
+	test_ok("%s", tst_name);
+	close(sk);
+}
+
+static void try_to_add(const char *tst_name, unsigned int port,
+		       unsigned int strategy,
+		       union tcp_addr md5_addr, uint8_t md5_prefix,
+		       int md5_vrf,
+		       union tcp_addr ao_addr, uint8_t ao_prefix,
+		       int ao_vrf, uint8_t sndid, uint8_t rcvid,
+		       int needs_tcp_md5, fault_t inj)
+{
+	time_t timeout;
+	int sk, ret;
+
+	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+		return;
+
+	sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf,
+		      ao_addr, ao_prefix, ao_vrf, 0, sndid, rcvid, inj);
+	if (sk < 0)
+		return;
+
+	synchronize_threads(); /* preparations done */
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+
+	if (ret <= 0) {
+		test_error("%s: connect() returned %d", tst_name, ret);
+		goto out;
+	}
+
+	if (strategy & POSTINSTALL_MD5) {
+		if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) {
+			if (fault(POSTINSTALL)) {
+				test_ok("%s: postfailed as expected", tst_name);
+				goto out;
+			} else {
+				test_error("setsockopt(TCP_MD5SIG_EXT)");
+			}
+		} else if (fault(POSTINSTALL)) {
+			test_fail("%s: post setsockopt() was expected to fail", tst_name);
+			goto out;
+		}
+	}
+
+	if (strategy & POSTINSTALL_AO) {
+		if (try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf,
+				   sndid, rcvid, 0)) {
+			if (fault(POSTINSTALL)) {
+				test_ok("%s: postfailed as expected", tst_name);
+				goto out;
+			} else {
+				test_error("setsockopt(TCP_AO_ADD_KEY)");
+			}
+		} else if (fault(POSTINSTALL)) {
+			test_fail("%s: post setsockopt() was expected to fail", tst_name);
+			goto out;
+		}
+	}
+
+out:
+	synchronize_threads(); /* close() */
+	/* _test_connect_socket() cleans up on failure */
+	if (ret > 0)
+		close(sk);
+}
+
+static void client_add_ip(union tcp_addr *client, const char *ip)
+{
+	int family = TEST_FAMILY;
+
+	if (inet_pton(family, ip, client) != 1)
+		test_error("Can't convert ip address %s", ip);
+
+	if (ip_addr_add(veth_name, family, *client, TEST_PREFIX))
+		test_error("Failed to add ip address");
+	if (ip_route_add(veth_name, family, *client, this_ip_dest))
+		test_error("Failed to add route");
+}
+
+static void client_add_ips(void)
+{
+	client_add_ip(&client2, __TEST_CLIENT_IP(2));
+	client_add_ip(&client3, __TEST_CLIENT_IP(3));
+	synchronize_threads(); /* server_add_routes() */
+}
+
+static void client_add_fail_tests(unsigned int *port)
+{
+	try_to_add("TCP-AO established: add TCP-MD5 key",
+		   (*port)++, POSTINSTALL_MD5 | PREINSTALL_AO,
+		   this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0,
+		   100, 100, 1, FAULT_POSTINSTALL);
+	try_to_add("TCP-MD5 established: add TCP-AO key",
+		   (*port)++, PREINSTALL_MD5 | POSTINSTALL_AO,
+		   this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0,
+		   100, 100, 1, FAULT_POSTINSTALL);
+	try_to_add("non-signed established: add TCP-AO key",
+		   (*port)++, POSTINSTALL_AO,
+		   this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0,
+		   100, 100, 0, FAULT_POSTINSTALL);
+
+	try_to_add("TCP-AO key intersects with existing TCP-MD5 key",
+		   (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		   this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1,
+		   100, 100, 1, FAULT_PREINSTALL_AO);
+	try_to_add("TCP-MD5 key intersects with existing TCP-AO key",
+		   (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		   this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1,
+		   100, 100, 1, FAULT_PREINSTALL_MD5);
+
+	try_to_preadd("TCP-MD5 key + TCP-AO required",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, -1, true,
+		      100, 100, 1, 0, FAULT_PREINSTALL_AO);
+	try_to_preadd("TCP-AO required on socket + TCP-MD5 key",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, -1, true,
+		      100, 100, 1, 0, FAULT_PREINSTALL_MD5);
+}
+
+static void client_vrf_tests(unsigned int *port)
+{
+	setup_vrfs();
+
+	/* The following restrictions for setsockopt()s are expected:
+	 *
+	 * |--------------|-----------------|-------------|-------------|
+	 * |              | MD5 key without |   MD5 key   |   MD5 key   |
+	 * |              |     l3index     |  l3index=0  |  l3index=N  |
+	 * |--------------|-----------------|-------------|-------------|
+	 * |  TCP-AO key  |                 |             |             |
+	 * |  without     |     reject      |    reject   |    reject   |
+	 * |  l3index     |                 |             |             |
+	 * |--------------|-----------------|-------------|-------------|
+	 * |  TCP-AO key  |                 |             |             |
+	 * |  l3index=0   |     reject      |    reject   |    allow    |
+	 * |--------------|-----------------|-------------|-------------|
+	 * |  TCP-AO key  |                 |             |             |
+	 * |  l3index=N   |     reject      |    allow    |    reject   |
+	 * |--------------|-----------------|-------------|-------------|
+	 */
+	try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (no l3index)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (no l3index)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (no l3index)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (no l3index)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+
+	try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (no l3index)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, 0);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+		      1, 1, 0);
+
+	try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (no l3index)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, -1,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+	try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=0)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, 0);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, 0,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, 0);
+	try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_MD5);
+	try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=N)",
+		      (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+		      this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+		      1, 1, FAULT_PREINSTALL_AO);
+}
+
+static void *client_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+	union tcp_addr addr_any = {};
+
+	client_add_ips();
+
+	try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+		    &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+	try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+		    NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+	try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0,
+		    NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+	try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+		    &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+	try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+		    NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2);
+
+	try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0,
+		   &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+	try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+		   NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+	try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0,
+		   NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+
+	try_connect("no sign server: AO client", port++, NULL, 0,
+		   &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+	try_connect("no sign server: MD5 client", port++, &addr_any, 0,
+		   NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+	try_connect("no sign server: no sign client", port++, NULL, 0,
+		   NULL, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+
+	try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0,
+		   &addr_any, 0, 100, 100, 0, 0, 1, &client2);
+	try_connect("AO+MD5 server: AO client (misconfig, matching MD5)",
+		   port++, NULL, 0, &addr_any, 0, 100, 100, 0,
+		   FAULT_TIMEOUT, 1, &this_ip_addr);
+	try_connect("AO+MD5 server: AO client (misconfig, non-matching)",
+		   port++, NULL, 0, &addr_any, 0, 100, 100, 0,
+		   FAULT_TIMEOUT, 1, &client3);
+	try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0,
+		   NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+	try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)",
+		   port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+		   1, &client2);
+	try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)",
+		   port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+		   1, &client3);
+	try_connect("AO+MD5 server: no sign client (unmatched)",
+		   port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3);
+	try_connect("AO+MD5 server: no sign client (misconfig, matching AO)",
+		   port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+		   1, &client2);
+	try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)",
+		   port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+		   1, &this_ip_addr);
+
+	try_connect("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+		   port++, &this_ip_addr, TEST_PREFIX,
+		   &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT,
+		   1, &this_ip_addr);
+	try_connect("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+		   port++, &this_ip_addr, TEST_PREFIX,
+		   &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT,
+		   1, &client2);
+
+	client_add_fail_tests(&port);
+	client_vrf_tests(&port);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(72, server_fn, client_fn);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From d1066c9c58d48bdbda0236b4744dc03f8a903d49 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:21 +0000
Subject: selftests/net: Add test/benchmark for removing MKTs

Sample output:
> 1..36
> # 1106[lib/setup.c:207] rand seed 1660754406
> TAP version 13
> ok 1   Worst case connect       512 keys: min=0ms max=1ms mean=0.583329ms stddev=0.076376
> ok 2 Connect random-search      512 keys: min=0ms max=1ms mean=0.53412ms stddev=0.0516779
> ok 3    Worst case delete       512 keys: min=2ms max=11ms mean=6.04139ms stddev=0.245792
> ok 4        Add a new key       512 keys: min=0ms max=13ms mean=0.673415ms stddev=0.0820618
> ok 5 Remove random-search       512 keys: min=5ms max=9ms mean=6.65969ms stddev=0.258064
> ok 6         Remove async       512 keys: min=0ms max=0ms mean=0.041825ms stddev=0.0204512
> ok 7   Worst case connect       1024 keys: min=0ms max=2ms mean=0.520357ms stddev=0.0721358
> ok 8 Connect random-search      1024 keys: min=0ms max=2ms mean=0.535312ms stddev=0.0517355
> ok 9    Worst case delete       1024 keys: min=5ms max=9ms mean=8.27219ms stddev=0.287614
> ok 10        Add a new key      1024 keys: min=0ms max=1ms mean=0.688121ms stddev=0.0829531
> ok 11 Remove random-search      1024 keys: min=5ms max=9ms mean=8.37649ms stddev=0.289422
> ok 12         Remove async      1024 keys: min=0ms max=0ms mean=0.0457096ms stddev=0.0213798
> ok 13   Worst case connect      2048 keys: min=0ms max=2ms mean=0.748804ms stddev=0.0865335
> ok 14 Connect random-search     2048 keys: min=0ms max=2ms mean=0.782993ms stddev=0.0625697
> ok 15    Worst case delete      2048 keys: min=5ms max=10ms mean=8.23106ms stddev=0.286898
> ok 16        Add a new key      2048 keys: min=0ms max=1ms mean=0.812988ms stddev=0.0901658
> ok 17 Remove random-search      2048 keys: min=8ms max=9ms mean=8.84949ms stddev=0.297481
> ok 18         Remove async      2048 keys: min=0ms max=0ms mean=0.0297223ms stddev=0.0172402
> ok 19   Worst case connect      4096 keys: min=1ms max=5ms mean=1.53352ms stddev=0.123836
> ok 20 Connect random-search     4096 keys: min=1ms max=5ms mean=1.52226ms stddev=0.0872429
> ok 21    Worst case delete      4096 keys: min=5ms max=9ms mean=8.25874ms stddev=0.28738
> ok 22        Add a new key      4096 keys: min=0ms max=3ms mean=1.67382ms stddev=0.129376
> ok 23 Remove random-search      4096 keys: min=5ms max=10ms mean=8.26178ms stddev=0.287433
> ok 24         Remove async      4096 keys: min=0ms max=0ms mean=0.0340009ms stddev=0.0184393
> ok 25   Worst case connect      8192 keys: min=2ms max=4ms mean=2.86208ms stddev=0.169177
> ok 26 Connect random-search     8192 keys: min=2ms max=4ms mean=2.87592ms stddev=0.119915
> ok 27    Worst case delete      8192 keys: min=6ms max=11ms mean=7.55291ms stddev=0.274826
> ok 28        Add a new key      8192 keys: min=1ms max=5ms mean=2.56797ms stddev=0.160249
> ok 29 Remove random-search      8192 keys: min=5ms max=10ms mean=7.14002ms stddev=0.267208
> ok 30         Remove async      8192 keys: min=0ms max=0ms mean=0.0320066ms stddev=0.0178904
> ok 31   Worst case connect      16384 keys: min=5ms max=6ms mean=5.55334ms stddev=0.235655
> ok 32 Connect random-search     16384 keys: min=5ms max=6ms mean=5.52614ms stddev=0.166225
> ok 33    Worst case delete      16384 keys: min=5ms max=11ms mean=7.39109ms stddev=0.271866
> ok 34        Add a new key      16384 keys: min=2ms max=4ms mean=3.35799ms stddev=0.183248
> ok 35 Remove random-search      16384 keys: min=5ms max=8ms mean=6.86078ms stddev=0.261931
> ok 36         Remove async      16384 keys: min=0ms max=0ms mean=0.0302384ms stddev=0.0173892
> # Totals: pass:36 fail:0 xfail:0 xpass:0 skip:0 error:0

>From the output it's visible that the current simplified approach with
linked-list of MKTs scales quite fine even for thousands of keys.
And that also means that the majority of the time for delete is eaten by
synchronize_rcu() [which I can confirm separately by tracing].

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile        |   5 +-
 tools/testing/selftests/net/tcp_ao/bench-lookups.c | 358 +++++++++++++++++++++
 2 files changed, 362 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/tcp_ao/bench-lookups.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index ee2f1a17e805..f0b218b99506 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_BOTH_AF := connect
+TEST_BOTH_AF := bench-lookups
+TEST_BOTH_AF += connect
 TEST_BOTH_AF += connect-deny
 TEST_BOTH_AF += icmps-accept icmps-discard
 TEST_BOTH_AF += setsockopt-closed
@@ -49,3 +50,5 @@ $(OUTPUT)/%_ipv6: %.c
 
 $(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT
 $(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT
+$(OUTPUT)/bench-lookups_ipv4: LDFLAGS+= -lm
+$(OUTPUT)/bench-lookups_ipv6: LDFLAGS+= -lm
diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
new file mode 100644
index 000000000000..7be8a7d9308c
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <arpa/inet.h>
+#include <inttypes.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "../../../../include/linux/bits.h"
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+#define BENCH_NR_ITERS	100 /* number of times to run gathering statistics */
+
+static void gen_test_ips(union tcp_addr *ips, size_t ips_nr, bool use_rand)
+{
+	union tcp_addr net = {};
+	size_t i, j;
+
+	if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1)
+		test_error("Can't convert ip address %s", TEST_NETWORK);
+
+	if (!use_rand) {
+		for (i = 0; i < ips_nr; i++)
+			ips[i] = gen_tcp_addr(net, 2 * i + 1);
+		return;
+	}
+	for (i = 0; i < ips_nr; i++) {
+		size_t r = (size_t)random() | 0x1;
+
+		ips[i] = gen_tcp_addr(net, r);
+
+		for (j = i - 1; j > 0 && i > 0; j--) {
+			if (!memcmp(&ips[i], &ips[j], sizeof(union tcp_addr))) {
+				i--; /* collision */
+				break;
+			}
+		}
+	}
+}
+
+static void test_add_routes(union tcp_addr *ips, size_t ips_nr)
+{
+	size_t i;
+
+	for (i = 0; i < ips_nr; i++) {
+		union tcp_addr *p = (union tcp_addr *)&ips[i];
+
+		if (ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p))
+			test_error("Failed to add route");
+	}
+}
+
+static void server_apply_keys(int lsk, union tcp_addr *ips, size_t ips_nr)
+{
+	size_t i;
+
+	for (i = 0; i < ips_nr; i++) {
+		union tcp_addr *p = (union tcp_addr *)&ips[i];
+
+		if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100))
+			test_error("setsockopt(TCP_AO)");
+	}
+}
+
+static const size_t nr_keys[] = { 512, 1024, 2048, 4096, 8192 };
+static union tcp_addr *test_ips;
+
+struct bench_stats {
+	uint64_t min;
+	uint64_t max;
+	uint64_t nr;
+	double mean;
+	double s2;
+};
+
+static struct bench_tests {
+	struct bench_stats delete_last_key;
+	struct bench_stats add_key;
+	struct bench_stats delete_rand_key;
+	struct bench_stats connect_last_key;
+	struct bench_stats connect_rand_key;
+	struct bench_stats delete_async;
+} bench_results[ARRAY_SIZE(nr_keys)];
+
+#define NSEC_PER_SEC 1000000000ULL
+
+static void measure_call(struct bench_stats *st,
+			 void (*f)(int, void *), int sk, void *arg)
+{
+	struct timespec start = {}, end = {};
+	double delta;
+	uint64_t nsec;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &start))
+		test_error("clock_gettime()");
+
+	f(sk, arg);
+
+	if (clock_gettime(CLOCK_MONOTONIC, &end))
+		test_error("clock_gettime()");
+
+	nsec = (end.tv_sec - start.tv_sec) * NSEC_PER_SEC;
+	if (end.tv_nsec >= start.tv_nsec)
+		nsec += end.tv_nsec - start.tv_nsec;
+	else
+		nsec -= start.tv_nsec - end.tv_nsec;
+
+	if (st->nr == 0) {
+		st->min = st->max = nsec;
+	} else {
+		if (st->min > nsec)
+			st->min = nsec;
+		if (st->max < nsec)
+			st->max = nsec;
+	}
+
+	/* Welford-Knuth algorithm */
+	st->nr++;
+	delta = (double)nsec - st->mean;
+	st->mean += delta / st->nr;
+	st->s2 += delta * ((double)nsec - st->mean);
+}
+
+static void delete_mkt(int sk, void *arg)
+{
+	struct tcp_ao_del *ao = arg;
+
+	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, ao, sizeof(*ao)))
+		test_error("setsockopt(TCP_AO_DEL_KEY)");
+}
+
+static void add_back_mkt(int sk, void *arg)
+{
+	union tcp_addr *p = arg;
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100))
+		test_error("setsockopt(TCP_AO)");
+}
+
+static void bench_delete(int lsk, struct bench_stats *add,
+			 struct bench_stats *del,
+			 union tcp_addr *ips, size_t ips_nr,
+			 bool rand_order, bool async)
+{
+	struct tcp_ao_del ao_del = {};
+	union tcp_addr *p;
+	size_t i;
+
+	ao_del.sndid = 100;
+	ao_del.rcvid = 100;
+	ao_del.del_async = !!async;
+	ao_del.prefix = DEFAULT_TEST_PREFIX;
+
+	/* Remove the first added */
+	p = (union tcp_addr *)&ips[0];
+	tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0);
+
+	for (i = 0; i < BENCH_NR_ITERS; i++) {
+		measure_call(del, delete_mkt, lsk, (void *)&ao_del);
+
+		/* Restore it back */
+		measure_call(add, add_back_mkt, lsk, (void *)p);
+
+		/*
+		 * Slowest for FILO-linked-list:
+		 * on (i) iteration removing ips[i] element. When it gets
+		 * added to the list back - it becomes first to fetch, so
+		 * on (i + 1) iteration go to ips[i + 1] element.
+		 */
+		if (rand_order)
+			p = (union tcp_addr *)&ips[rand() % ips_nr];
+		else
+			p = (union tcp_addr *)&ips[i % ips_nr];
+		tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0);
+	}
+}
+
+static void bench_connect_srv(int lsk, union tcp_addr *ips, size_t ips_nr)
+{
+	size_t i;
+
+	for (i = 0; i < BENCH_NR_ITERS; i++) {
+		int sk;
+
+		synchronize_threads();
+
+		if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+			test_error("test_wait_fd()");
+
+		sk = accept(lsk, NULL, NULL);
+		if (sk < 0)
+			test_error("accept()");
+
+		close(sk);
+	}
+}
+
+static void test_print_stats(const char *desc, size_t nr, struct bench_stats *bs)
+{
+	test_ok("%-20s\t%zu keys: min=%" PRIu64 "ms max=%" PRIu64 "ms mean=%gms stddev=%g",
+		desc, nr, bs->min / 1000000, bs->max / 1000000,
+		bs->mean / 1000000, sqrt((bs->mean / 1000000) / bs->nr));
+}
+
+static void *server_fn(void *arg)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(nr_keys); i++) {
+		struct bench_tests *bt = &bench_results[i];
+		int lsk;
+
+		test_ips = malloc(nr_keys[i] * sizeof(union tcp_addr));
+		if (!test_ips)
+			test_error("malloc()");
+
+		lsk = test_listen_socket(this_ip_addr, test_server_port + i, 1);
+
+		gen_test_ips(test_ips, nr_keys[i], false);
+		test_add_routes(test_ips, nr_keys[i]);
+		test_set_optmem(KERNEL_TCP_AO_KEY_SZ_ROUND_UP * nr_keys[i]);
+		server_apply_keys(lsk, test_ips, nr_keys[i]);
+
+		synchronize_threads();
+		bench_connect_srv(lsk, test_ips, nr_keys[i]);
+		bench_connect_srv(lsk, test_ips, nr_keys[i]);
+
+		/* The worst case for FILO-list */
+		bench_delete(lsk, &bt->add_key, &bt->delete_last_key,
+			     test_ips, nr_keys[i], false, false);
+		test_print_stats("Add a new key",
+				nr_keys[i], &bt->add_key);
+		test_print_stats("Delete: worst case",
+				nr_keys[i], &bt->delete_last_key);
+
+		bench_delete(lsk, &bt->add_key, &bt->delete_rand_key,
+			     test_ips, nr_keys[i], true, false);
+		test_print_stats("Delete: random-search",
+				nr_keys[i], &bt->delete_rand_key);
+
+		bench_delete(lsk, &bt->add_key, &bt->delete_async,
+			     test_ips, nr_keys[i], false, true);
+		test_print_stats("Delete: async", nr_keys[i], &bt->delete_async);
+
+		free(test_ips);
+		close(lsk);
+	}
+
+	return NULL;
+}
+
+static void connect_client(int sk, void *arg)
+{
+	size_t *p = arg;
+
+	if (test_connect_socket(sk, this_ip_dest, test_server_port + *p) <= 0)
+		test_error("failed to connect()");
+}
+
+static void client_addr_setup(int sk, union tcp_addr taddr)
+{
+#ifdef IPV6_TEST
+	struct sockaddr_in6 addr = {
+		.sin6_family	= AF_INET6,
+		.sin6_port	= 0,
+		.sin6_addr	= taddr.a6,
+	};
+#else
+	struct sockaddr_in addr = {
+		.sin_family	= AF_INET,
+		.sin_port	= 0,
+		.sin_addr	= taddr.a4,
+	};
+#endif
+	int ret;
+
+	ret = ip_addr_add(veth_name, TEST_FAMILY, taddr, TEST_PREFIX);
+	if (ret && ret != -EEXIST)
+		test_error("Failed to add ip address");
+	ret = ip_route_add(veth_name, TEST_FAMILY, taddr, this_ip_dest);
+	if (ret && ret != -EEXIST)
+		test_error("Failed to add route");
+
+	if (bind(sk, &addr, sizeof(addr)))
+		test_error("bind()");
+}
+
+static void bench_connect_client(size_t port_off, struct bench_tests *bt,
+		union tcp_addr *ips, size_t ips_nr, bool rand_order)
+{
+	struct bench_stats *con;
+	union tcp_addr *p;
+	size_t i;
+
+	if (rand_order)
+		con = &bt->connect_rand_key;
+	else
+		con = &bt->connect_last_key;
+
+	p = (union tcp_addr *)&ips[0];
+
+	for (i = 0; i < BENCH_NR_ITERS; i++) {
+		int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+		if (sk < 0)
+			test_error("socket()");
+
+		client_addr_setup(sk, *p);
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+				 -1, 100, 100))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+		synchronize_threads();
+
+		measure_call(con, connect_client, sk, (void *)&port_off);
+
+		close(sk);
+
+		/*
+		 * Slowest for FILO-linked-list:
+		 * on (i) iteration removing ips[i] element. When it gets
+		 * added to the list back - it becomes first to fetch, so
+		 * on (i + 1) iteration go to ips[i + 1] element.
+		 */
+		if (rand_order)
+			p = (union tcp_addr *)&ips[rand() % ips_nr];
+		else
+			p = (union tcp_addr *)&ips[i % ips_nr];
+	}
+}
+
+static void *client_fn(void *arg)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(nr_keys); i++) {
+		struct bench_tests *bt = &bench_results[i];
+
+		synchronize_threads();
+		bench_connect_client(i, bt, test_ips, nr_keys[i], false);
+		test_print_stats("Connect: worst case",
+				nr_keys[i], &bt->connect_last_key);
+
+		bench_connect_client(i, bt, test_ips, nr_keys[i], false);
+		test_print_stats("Connect: random-search",
+				nr_keys[i], &bt->connect_last_key);
+	}
+	synchronize_threads();
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(30, server_fn, client_fn);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 3715d32dc97698e6d2f59b1579577178a1361686 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:22 +0000
Subject: selftests/net: Add TCP_REPAIR TCP-AO tests

The test plan is:
1. check that TCP-AO connection may be restored on another socket
2. check restore with wrong send/recv ISN (checking that they are
   part of MAC generation)
3. check restore with wrong SEQ number extension (checking that
   high bytes of it taken into MAC generation)

Sample output expected:
> # ./restore_ipv4
> 1..20
> # 1412[lib/setup.c:254] rand seed 1686610825
> TAP version 13
> ok 1 TCP-AO migrate to another socket: server alive
> ok 2 TCP-AO migrate to another socket: post-migrate connection is alive
> ok 3 TCP-AO migrate to another socket: counter TCPAOGood increased 23 => 44
> ok 4 TCP-AO migrate to another socket: counter TCPAOGood increased 22 => 42
> ok 5 TCP-AO with wrong send ISN: server couldn't serve
> ok 6 TCP-AO with wrong send ISN: post-migrate connection is broken
> ok 7 TCP-AO with wrong send ISN: counter TCPAOBad increased 0 => 4
> ok 8 TCP-AO with wrong send ISN: counter TCPAOBad increased 0 => 3
> ok 9 TCP-AO with wrong receive ISN: server couldn't serve
> ok 10 TCP-AO with wrong receive ISN: post-migrate connection is broken
> ok 11 TCP-AO with wrong receive ISN: counter TCPAOBad increased 4 => 8
> ok 12 TCP-AO with wrong receive ISN: counter TCPAOBad increased 5 => 10
> ok 13 TCP-AO with wrong send SEQ ext number: server couldn't serve
> ok 14 TCP-AO with wrong send SEQ ext number: post-migrate connection is broken
> ok 15 TCP-AO with wrong send SEQ ext number: counter TCPAOBad increased 9 => 10
> ok 16 TCP-AO with wrong send SEQ ext number: counter TCPAOBad increased 11 => 19
> ok 17 TCP-AO with wrong receive SEQ ext number: post-migrate connection is broken
> ok 18 TCP-AO with wrong receive SEQ ext number: server couldn't serve
> ok 19 TCP-AO with wrong receive SEQ ext number: counter TCPAOBad increased 10 => 18
> ok 20 TCP-AO with wrong receive SEQ ext number: counter TCPAOBad increased 20 => 23
> # Totals: pass:20 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile  |   1 +
 tools/testing/selftests/net/tcp_ao/restore.c | 236 +++++++++++++++++++++++++++
 2 files changed, 237 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_ao/restore.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index f0b218b99506..aa11a855c3e0 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -3,6 +3,7 @@ TEST_BOTH_AF := bench-lookups
 TEST_BOTH_AF += connect
 TEST_BOTH_AF += connect-deny
 TEST_BOTH_AF += icmps-accept icmps-discard
+TEST_BOTH_AF += restore
 TEST_BOTH_AF += setsockopt-closed
 TEST_BOTH_AF += unsigned-md5
 
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
new file mode 100644
index 000000000000..8fdc808df325
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets
+ * It tests that TCP-AO enabled connection can be restored.
+ * For the proper socket repair see:
+ * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h
+ */
+#include <inttypes.h>
+#include "aolib.h"
+
+const size_t nr_packets = 20;
+const size_t msg_len = 100;
+const size_t quota = nr_packets * msg_len;
+#define fault(type)	(inj == FAULT_ ## type)
+
+static void try_server_run(const char *tst_name, unsigned int port,
+			   fault_t inj, test_cnt cnt_expected)
+{
+	const char *cnt_name = "TCPAOGood";
+	struct tcp_ao_counters ao1, ao2;
+	uint64_t before_cnt, after_cnt;
+	int sk, lsk;
+	time_t timeout;
+	ssize_t bytes;
+
+	if (fault(TIMEOUT))
+		cnt_name = "TCPAOBad";
+	lsk = test_listen_socket(this_ip_addr, port, 1);
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	synchronize_threads(); /* 1: MKT added => connect() */
+
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	close(lsk);
+
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota) {
+		test_fail("%s: server served: %zd", tst_name, bytes);
+		goto out;
+	}
+
+	before_cnt = netstat_get_one(cnt_name, NULL);
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	bytes = test_server_run(sk, quota, timeout);
+	if (fault(TIMEOUT)) {
+		if (bytes > 0)
+			test_fail("%s: server served: %zd", tst_name, bytes);
+		else
+			test_ok("%s: server couldn't serve", tst_name);
+	} else {
+		if (bytes != quota)
+			test_fail("%s: server served: %zd", tst_name, bytes);
+		else
+			test_ok("%s: server alive", tst_name);
+	}
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	after_cnt = netstat_get_one(cnt_name, NULL);
+
+	test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+
+	if (after_cnt <= before_cnt) {
+		test_fail("%s: %s counter did not increase: %zu <= %zu",
+				tst_name, cnt_name, after_cnt, before_cnt);
+	} else {
+		test_ok("%s: counter %s increased %zu => %zu",
+			tst_name, cnt_name, before_cnt, after_cnt);
+	}
+
+	/*
+	 * Before close() as that will send FIN and move the peer in TCP_CLOSE
+	 * and that will prevent reading AO counters from the peer's socket.
+	 */
+	synchronize_threads(); /* 3: verified => closed */
+out:
+	close(sk);
+}
+
+static void *server_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+
+	try_server_run("TCP-AO migrate to another socket", port++,
+		       0, TEST_CNT_GOOD);
+	try_server_run("TCP-AO with wrong send ISN", port++,
+		       FAULT_TIMEOUT, TEST_CNT_BAD);
+	try_server_run("TCP-AO with wrong receive ISN", port++,
+		       FAULT_TIMEOUT, TEST_CNT_BAD);
+	try_server_run("TCP-AO with wrong send SEQ ext number", port++,
+		       FAULT_TIMEOUT, TEST_CNT_BAD);
+	try_server_run("TCP-AO with wrong receive SEQ ext number", port++,
+		       FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+
+	synchronize_threads(); /* don't race to exit: client exits */
+	return NULL;
+}
+
+static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr,
+				   struct tcp_sock_state *img,
+				   struct tcp_ao_repair *ao_img)
+{
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+	if (test_connect_socket(sk, this_ip_dest, server_port) <= 0)
+		test_error("failed to connect()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+		test_fail("pre-migrate verify failed");
+
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, img, saddr);
+	test_ao_checkpoint(sk, ao_img);
+	test_kill_sk(sk);
+}
+
+static void test_sk_restore(const char *tst_name, unsigned int server_port,
+			    sockaddr_af *saddr, struct tcp_sock_state *img,
+			    struct tcp_ao_repair *ao_img,
+			    fault_t inj, test_cnt cnt_expected)
+{
+	const char *cnt_name = "TCPAOGood";
+	struct tcp_ao_counters ao1, ao2;
+	uint64_t before_cnt, after_cnt;
+	time_t timeout;
+	int sk;
+
+	if (fault(TIMEOUT))
+		cnt_name = "TCPAOBad";
+
+	before_cnt = netstat_get_one(cnt_name, NULL);
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	test_enable_repair(sk);
+	test_sock_restore(sk, img, saddr, this_ip_dest, server_port);
+	if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	test_ao_restore(sk, ao_img);
+
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	test_disable_repair(sk);
+	test_sock_state_free(img);
+
+	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+	if (test_client_verify(sk, msg_len, nr_packets, timeout)) {
+		if (fault(TIMEOUT))
+			test_ok("%s: post-migrate connection is broken", tst_name);
+		else
+			test_fail("%s: post-migrate connection is working", tst_name);
+	} else {
+		if (fault(TIMEOUT))
+			test_fail("%s: post-migrate connection still working", tst_name);
+		else
+			test_ok("%s: post-migrate connection is alive", tst_name);
+	}
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	after_cnt = netstat_get_one(cnt_name, NULL);
+
+	test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+
+	if (after_cnt <= before_cnt) {
+		test_fail("%s: %s counter did not increase: %zu <= %zu",
+				tst_name, cnt_name, after_cnt, before_cnt);
+	} else {
+		test_ok("%s: counter %s increased %zu => %zu",
+			tst_name, cnt_name, before_cnt, after_cnt);
+	}
+	synchronize_threads(); /* 3: verified => closed */
+	close(sk);
+}
+
+static void *client_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+	struct tcp_sock_state tcp_img;
+	struct tcp_ao_repair ao_img;
+	sockaddr_af saddr;
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	test_sk_restore("TCP-AO migrate to another socket", port++,
+			&saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD);
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	ao_img.snt_isn += 1;
+	test_sk_restore("TCP-AO with wrong send ISN", port++,
+			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	ao_img.rcv_isn += 1;
+	test_sk_restore("TCP-AO with wrong receive ISN", port++,
+			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	ao_img.snd_sne += 1;
+	test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
+			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+			TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+
+	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+	ao_img.rcv_sne += 1;
+	test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
+			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+			TEST_CNT_NS_GOOD | TEST_CNT_BAD);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(20, server_fn, client_fn);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 0d16eae57456bbbd7eee45caee53f8d6c4d7ea17 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:23 +0000
Subject: selftests/net: Add SEQ number extension test

Check that on SEQ number wraparound there is no disruption or TCPAOBad
segments produced.

Sample of expected output:
> # ./seq-ext_ipv4
> 1..7
> # 1436[lib/setup.c:254] rand seed 1686611079
> TAP version 13
> ok 1 server alive
> ok 2 post-migrate connection alive
> ok 3 TCPAOGood counter increased 1002 => 3002
> ok 4 TCPAOGood counter increased 1003 => 3003
> ok 5 TCPAOBad counter didn't increase
> ok 6 TCPAOBad counter didn't increase
> ok 7 SEQ extension incremented: 1/1999, 1/998999
> # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile  |   1 +
 tools/testing/selftests/net/tcp_ao/seq-ext.c | 245 +++++++++++++++++++++++++++
 2 files changed, 246 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_ao/seq-ext.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index aa11a855c3e0..5408c7233460 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -4,6 +4,7 @@ TEST_BOTH_AF += connect
 TEST_BOTH_AF += connect-deny
 TEST_BOTH_AF += icmps-accept icmps-discard
 TEST_BOTH_AF += restore
+TEST_BOTH_AF += seq-ext
 TEST_BOTH_AF += setsockopt-closed
 TEST_BOTH_AF += unsigned-md5
 
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
new file mode 100644
index 000000000000..ad4e77d6823e
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Check that after SEQ number wrap-around:
+ * 1. SEQ-extension has upper bytes set
+ * 2. TCP conneciton is alive and no TCPAOBad segments
+ * In order to test (2), the test doesn't just adjust seq number for a queue
+ * on a connected socket, but migrates it to another sk+port number, so
+ * that there won't be any delayed packets that will fail to verify
+ * with the new SEQ numbers.
+ */
+#include <inttypes.h>
+#include "aolib.h"
+
+const unsigned int nr_packets = 1000;
+const unsigned int msg_len = 1000;
+const unsigned int quota = nr_packets * msg_len;
+unsigned int client_new_port;
+
+/* Move them closer to roll-over */
+static void test_adjust_seqs(struct tcp_sock_state *img,
+			     struct tcp_ao_repair *ao_img,
+			     bool server)
+{
+	uint32_t new_seq1, new_seq2;
+
+	/* make them roll-over during quota, but on different segments */
+	if (server) {
+		new_seq1 = ((uint32_t)-1) - msg_len;
+		new_seq2 = ((uint32_t)-1) - (quota - 2 * msg_len);
+	} else {
+		new_seq1 = ((uint32_t)-1) - (quota - 2 * msg_len);
+		new_seq2 = ((uint32_t)-1) - msg_len;
+	}
+
+	img->in.seq = new_seq1;
+	img->trw.snd_wl1 = img->in.seq - msg_len;
+	img->out.seq = new_seq2;
+	img->trw.rcv_wup = img->in.seq;
+}
+
+static int test_sk_restore(struct tcp_sock_state *img,
+			   struct tcp_ao_repair *ao_img, sockaddr_af *saddr,
+			   const union tcp_addr daddr, unsigned int dport,
+			   struct tcp_ao_counters *cnt)
+{
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	test_enable_repair(sk);
+	test_sock_restore(sk, img, saddr, daddr, dport);
+	if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, daddr, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	test_ao_restore(sk, ao_img);
+
+	if (test_get_tcp_ao_counters(sk, cnt))
+		test_error("test_get_tcp_ao_counters()");
+
+	test_disable_repair(sk);
+	test_sock_state_free(img);
+	return sk;
+}
+
+static void *server_fn(void *arg)
+{
+	uint64_t before_good, after_good, after_bad;
+	struct tcp_ao_counters ao1, ao2;
+	struct tcp_sock_state img;
+	struct tcp_ao_repair ao_img;
+	sockaddr_af saddr;
+	ssize_t bytes;
+	int sk, lsk;
+
+	lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	close(lsk);
+
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota) {
+		if (bytes > 0)
+			test_fail("server served: %zd", bytes);
+		else
+			test_fail("server returned: %zd", bytes);
+		goto out;
+	}
+
+	before_good = netstat_get_one("TCPAOGood", NULL);
+
+	synchronize_threads(); /* 3: restore the connection on another port */
+
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, &img, &saddr);
+	test_ao_checkpoint(sk, &ao_img);
+	test_kill_sk(sk);
+#ifdef IPV6_TEST
+	saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1);
+#else
+	saddr.sin_port = htons(ntohs(saddr.sin_port) + 1);
+#endif
+	test_adjust_seqs(&img, &ao_img, true);
+	synchronize_threads(); /* 4: dump finished */
+	sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
+			     client_new_port, &ao1);
+
+	synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota) {
+		if (bytes > 0)
+			test_fail("server served: %zd", bytes);
+		else
+			test_fail("server returned: %zd", bytes);
+	} else {
+		test_ok("server alive");
+	}
+
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	after_good = netstat_get_one("TCPAOGood", NULL);
+
+	test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+
+	if (after_good <= before_good) {
+		test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+			  after_good, before_good);
+	} else {
+		test_ok("TCPAOGood counter increased %zu => %zu",
+			before_good, after_good);
+	}
+	after_bad = netstat_get_one("TCPAOBad", NULL);
+	if (after_bad)
+		test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+	else
+		test_ok("TCPAOBad counter didn't increase");
+	test_enable_repair(sk);
+	test_ao_checkpoint(sk, &ao_img);
+	if (ao_img.snd_sne && ao_img.rcv_sne) {
+		test_ok("SEQ extension incremented: %u/%u",
+			ao_img.snd_sne, ao_img.rcv_sne);
+	} else {
+		test_fail("SEQ extension was not incremented: %u/%u",
+			  ao_img.snd_sne, ao_img.rcv_sne);
+	}
+
+	synchronize_threads(); /* 6: verified => closed */
+out:
+	close(sk);
+	return NULL;
+}
+
+static void *client_fn(void *arg)
+{
+	uint64_t before_good, after_good, after_bad;
+	struct tcp_ao_counters ao1, ao2;
+	struct tcp_sock_state img;
+	struct tcp_ao_repair ao_img;
+	sockaddr_af saddr;
+	int sk;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+	if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0)
+		test_error("failed to connect()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+		test_fail("pre-migrate verify failed");
+		return NULL;
+	}
+
+	before_good = netstat_get_one("TCPAOGood", NULL);
+
+	synchronize_threads(); /* 3: restore the connection on another port */
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, &img, &saddr);
+	test_ao_checkpoint(sk, &ao_img);
+	test_kill_sk(sk);
+#ifdef IPV6_TEST
+	client_new_port = ntohs(saddr.sin6_port) + 1;
+	saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1);
+#else
+	client_new_port = ntohs(saddr.sin_port) + 1;
+	saddr.sin_port = htons(ntohs(saddr.sin_port) + 1);
+#endif
+	test_adjust_seqs(&img, &ao_img, false);
+	synchronize_threads(); /* 4: dump finished */
+	sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
+			     test_server_port + 1, &ao1);
+
+	synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+		test_fail("post-migrate verify failed");
+	else
+		test_ok("post-migrate connection alive");
+
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+	after_good = netstat_get_one("TCPAOGood", NULL);
+
+	test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+
+	if (after_good <= before_good) {
+		test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+			  after_good, before_good);
+	} else {
+		test_ok("TCPAOGood counter increased %zu => %zu",
+			before_good, after_good);
+	}
+	after_bad = netstat_get_one("TCPAOBad", NULL);
+	if (after_bad)
+		test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+	else
+		test_ok("TCPAOBad counter didn't increase");
+
+	synchronize_threads(); /* 6: verified => closed */
+	close(sk);
+
+	synchronize_threads(); /* don't race to exit: let server exit() */
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(7, server_fn, client_fn);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From c6df7b2361d721f40610df5c832ea0fa73e918b1 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:24 +0000
Subject: selftests/net: Add TCP-AO RST test

Check that both active and passive reset works and correctly sign
segments with TCP-AO or don't send RSTs if not possible to sign.
A listening socket with backlog = 0 gets one connection in accept
queue, another in syn queue. Once the server/listener socket is
forcibly closed, client sockets aren't connected to anything.
In regular situation they would receive RST on any segment, but
with TCP-AO as there's no listener, no AO-key and unknown ISNs,
no RST should be sent.

And "passive" reset, where RST is sent on reply for some segment
(tcp_v{4,6}_send_reset()) - there use TCP_REPAIR to corrupt SEQ numbers,
which later results in TCP-AO signed RST, which will be verified and
client socket will get EPIPE.

No TCPAORequired/TCPAOBad segments are expected during these tests.

Sample of the output:
> # ./rst_ipv4
> 1..15
> # 1462[lib/setup.c:254] rand seed 1686611171
> TAP version 13
> ok 1 servered 1000 bytes
> ok 2 Verified established tcp connection
> ok 3 sk[0] = 7, connection was reset
> ok 4 sk[1] = 8, connection was reset
> ok 5 sk[2] = 9
> ok 6 MKT counters are good on server
> ok 7 Verified established tcp connection
> ok 8 client connection broken post-seq-adjust
> ok 9 client connection was reset
> ok 10 No segments without AO sign (server)
> ok 11 Signed AO segments (server): 0 => 30
> ok 12 No segments with bad AO sign (server)
> ok 13 No segments without AO sign (client)
> ok 14 Signed AO segments (client): 0 => 30
> ok 15 No segments with bad AO sign (client)
> # Totals: pass:15 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile   |   1 +
 tools/testing/selftests/net/tcp_ao/lib/sock.c |   2 +-
 tools/testing/selftests/net/tcp_ao/rst.c      | 415 ++++++++++++++++++++++++++
 3 files changed, 417 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/tcp_ao/rst.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 5408c7233460..1d4f7576d774 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -4,6 +4,7 @@ TEST_BOTH_AF += connect
 TEST_BOTH_AF += connect-deny
 TEST_BOTH_AF += icmps-accept icmps-discard
 TEST_BOTH_AF += restore
+TEST_BOTH_AF += rst
 TEST_BOTH_AF += seq-ext
 TEST_BOTH_AF += setsockopt-closed
 TEST_BOTH_AF += unsigned-md5
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
index 7f3c31b7d997..c75d82885a2e 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -566,7 +566,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
 
 			got = recv(sk, msg + bytes, sizeof(msg) - bytes, 0);
 			if (got <= 0)
-				test_error("recv(): %zd", got);
+				return i;
 			bytes += got;
 		} while (bytes < sent);
 		if (bytes > sent)
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
new file mode 100644
index 000000000000..ac06009a7f5f
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+const size_t quota = 1000;
+/*
+ * Backlog == 0 means 1 connection in queue, see:
+ * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...")
+ */
+const unsigned int backlog;
+
+static void netstats_check(struct netstat *before, struct netstat *after,
+			   char *msg)
+{
+	uint64_t before_cnt, after_cnt;
+
+	before_cnt = netstat_get(before, "TCPAORequired", NULL);
+	after_cnt = netstat_get(after, "TCPAORequired", NULL);
+	if (after_cnt > before_cnt)
+		test_fail("Segments without AO sign (%s): %" PRIu64 " => %" PRIu64,
+			  msg, before_cnt, after_cnt);
+	else
+		test_ok("No segments without AO sign (%s)", msg);
+
+	before_cnt = netstat_get(before, "TCPAOGood", NULL);
+	after_cnt = netstat_get(after, "TCPAOGood", NULL);
+	if (after_cnt <= before_cnt)
+		test_fail("Signed AO segments (%s): %" PRIu64 " => %" PRIu64,
+			  msg, before_cnt, after_cnt);
+	else
+		test_ok("Signed AO segments (%s): %" PRIu64 " => %" PRIu64,
+			  msg, before_cnt, after_cnt);
+
+	before_cnt = netstat_get(before, "TCPAOBad", NULL);
+	after_cnt = netstat_get(after, "TCPAOBad", NULL);
+	if (after_cnt > before_cnt)
+		test_fail("Segments with bad AO sign (%s): %" PRIu64 " => %" PRIu64,
+			  msg, before_cnt, after_cnt);
+	else
+		test_ok("No segments with bad AO sign (%s)", msg);
+}
+
+/*
+ * Another way to send RST, but not through tcp_v{4,6}_send_reset()
+ * is tcp_send_active_reset(), that is not in reply to inbound segment,
+ * but rather active send. It uses tcp_transmit_skb(), so that should
+ * work, but as it also sends RST - nice that it can be covered as well.
+ */
+static void close_forced(int sk)
+{
+	struct linger sl;
+
+	sl.l_onoff = 1;
+	sl.l_linger = 0;
+	if (setsockopt(sk, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)))
+		test_error("setsockopt(SO_LINGER)");
+	close(sk);
+}
+
+static int test_wait_for_exception(int sk, time_t sec)
+{
+	struct timeval tv = { .tv_sec = sec };
+	struct timeval *ptv = NULL;
+	fd_set efds;
+	int ret;
+
+	FD_ZERO(&efds);
+	FD_SET(sk, &efds);
+
+	if (sec)
+		ptv = &tv;
+
+	errno = 0;
+	ret = select(sk + 1, NULL, NULL, &efds, ptv);
+	if (ret < 0)
+		return -errno;
+	return ret ? sk : 0;
+}
+
+static void test_server_active_rst(unsigned int port)
+{
+	struct tcp_ao_counters cnt1, cnt2;
+	ssize_t bytes;
+	int sk, lsk;
+
+	lsk = test_listen_socket(this_ip_addr, port, backlog);
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	if (test_get_tcp_ao_counters(lsk, &cnt1))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 1: MKT added */
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads(); /* 2: connection accept()ed, another queued */
+	if (test_get_tcp_ao_counters(lsk, &cnt2))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 3: close listen socket */
+	close(lsk);
+	bytes = test_server_run(sk, quota, 0);
+	if (bytes != quota)
+		test_error("servered only %zd bytes", bytes);
+	else
+		test_ok("servered %zd bytes", bytes);
+
+	synchronize_threads(); /* 4: finishing up */
+	close_forced(sk);
+
+	synchronize_threads(); /* 5: closed active sk */
+
+	synchronize_threads(); /* 6: counters checks */
+	if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
+		test_fail("MKT counters (server) have not only good packets");
+	else
+		test_ok("MKT counters are good on server");
+}
+
+static void test_server_passive_rst(unsigned int port)
+{
+	struct tcp_ao_counters ao1, ao2;
+	int sk, lsk;
+	ssize_t bytes;
+
+	lsk = test_listen_socket(this_ip_addr, port, 1);
+
+	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	close(lsk);
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota) {
+		if (bytes > 0)
+			test_fail("server served: %zd", bytes);
+		else
+			test_fail("server returned %zd", bytes);
+	}
+
+	synchronize_threads(); /* 3: chekpoint/restore the connection */
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 4: terminate server + send more on client */
+	bytes = test_server_run(sk, quota, TEST_RETRANSMIT_SEC);
+	close(sk);
+	test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD);
+
+	synchronize_threads(); /* 5: verified => closed */
+	close(sk);
+}
+
+static void *server_fn(void *arg)
+{
+	struct netstat *ns_before, *ns_after;
+	unsigned int port = test_server_port;
+
+	ns_before = netstat_read();
+
+	test_server_active_rst(port++);
+	test_server_passive_rst(port++);
+
+	ns_after = netstat_read();
+	netstats_check(ns_before, ns_after, "server");
+	netstat_free(ns_after);
+	netstat_free(ns_before);
+	synchronize_threads(); /* exit */
+
+	synchronize_threads(); /* don't race to exit() - client exits */
+	return NULL;
+}
+
+static int test_wait_fds(int sk[], size_t nr, bool is_writable[],
+			 ssize_t wait_for, time_t sec)
+{
+	struct timeval tv = { .tv_sec = sec };
+	struct timeval *ptv = NULL;
+	fd_set left;
+	size_t i;
+	int ret;
+
+	FD_ZERO(&left);
+	for (i = 0; i < nr; i++) {
+		FD_SET(sk[i], &left);
+		if (is_writable)
+			is_writable[i] = false;
+	}
+
+	if (sec)
+		ptv = &tv;
+
+	do {
+		bool is_empty = true;
+		fd_set fds, efds;
+		int nfd = 0;
+
+		FD_ZERO(&fds);
+		FD_ZERO(&efds);
+		for (i = 0; i < nr; i++) {
+			if (!FD_ISSET(sk[i], &left))
+				continue;
+
+			if (sk[i] > nfd)
+				nfd = sk[i];
+
+			FD_SET(sk[i], &fds);
+			FD_SET(sk[i], &efds);
+			is_empty = false;
+		}
+		if (is_empty)
+			return -ENOENT;
+
+		errno = 0;
+		ret = select(nfd + 1, NULL, &fds, &efds, ptv);
+		if (ret < 0)
+			return -errno;
+		if (!ret)
+			return -ETIMEDOUT;
+		for (i = 0; i < nr; i++) {
+			if (FD_ISSET(sk[i], &fds)) {
+				if (is_writable)
+					is_writable[i] = true;
+				FD_CLR(sk[i], &left);
+				wait_for--;
+				continue;
+			}
+			if (FD_ISSET(sk[i], &efds)) {
+				FD_CLR(sk[i], &left);
+				wait_for--;
+			}
+		}
+	} while (wait_for > 0);
+
+	return 0;
+}
+
+static void test_client_active_rst(unsigned int port)
+{
+	/* one in queue, another accept()ed */
+	unsigned int wait_for = backlog + 2;
+	int i, sk[3], err;
+	bool is_writable[ARRAY_SIZE(sk)] = {false};
+	unsigned int last = ARRAY_SIZE(sk) - 1;
+
+	for (i = 0; i < ARRAY_SIZE(sk); i++) {
+		sk[i] = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+		if (sk[i] < 0)
+			test_error("socket()");
+		if (test_add_key(sk[i], DEFAULT_TEST_PASSWORD,
+				 this_ip_dest, -1, 100, 100))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	}
+
+	synchronize_threads(); /* 1: MKT added */
+	for (i = 0; i < last; i++) {
+		err = _test_connect_socket(sk[i], this_ip_dest, port,
+					       (i == 0) ? TEST_TIMEOUT_SEC : -1);
+
+		if (err < 0)
+			test_error("failed to connect()");
+	}
+
+	synchronize_threads(); /* 2: connection accept()ed, another queued */
+	err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC);
+	if (err < 0)
+		test_error("test_wait_fds(): %d", err);
+
+	synchronize_threads(); /* 3: close listen socket */
+	if (test_client_verify(sk[0], 100, quota / 100, TEST_TIMEOUT_SEC))
+		test_fail("Failed to send data on connected socket");
+	else
+		test_ok("Verified established tcp connection");
+
+	synchronize_threads(); /* 4: finishing up */
+	err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+	if (err < 0)
+		test_error("failed to connect()");
+
+	synchronize_threads(); /* 5: closed active sk */
+	err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL,
+			    wait_for, TEST_TIMEOUT_SEC);
+	if (err < 0)
+		test_error("select(): %d", err);
+
+	for (i = 0; i < ARRAY_SIZE(sk); i++) {
+		socklen_t slen = sizeof(err);
+
+		if (getsockopt(sk[i], SOL_SOCKET, SO_ERROR, &err, &slen))
+			test_error("getsockopt()");
+		if (is_writable[i] && err != ECONNRESET) {
+			test_fail("sk[%d] = %d, err = %d, connection wasn't reset",
+				  i, sk[i], err);
+		} else {
+			test_ok("sk[%d] = %d%s", i, sk[i],
+				is_writable[i] ? ", connection was reset" : "");
+		}
+	}
+	synchronize_threads(); /* 6: counters checks */
+}
+
+static void test_client_passive_rst(unsigned int port)
+{
+	struct tcp_ao_counters ao1, ao2;
+	struct tcp_ao_repair ao_img;
+	struct tcp_sock_state img;
+	sockaddr_af saddr;
+	int sk, err;
+	socklen_t slen = sizeof(err);
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+	synchronize_threads(); /* 1: MKT added => connect() */
+	if (test_connect_socket(sk, this_ip_dest, port) <= 0)
+		test_error("failed to connect()");
+
+	synchronize_threads(); /* 2: accepted => send data */
+	if (test_client_verify(sk, 100, quota / 100, TEST_TIMEOUT_SEC))
+		test_fail("Failed to send data on connected socket");
+	else
+		test_ok("Verified established tcp connection");
+
+	synchronize_threads(); /* 3: chekpoint/restore the connection */
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, &img, &saddr);
+	test_ao_checkpoint(sk, &ao_img);
+	test_kill_sk(sk);
+
+	img.out.seq += quota;
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	test_enable_repair(sk);
+	test_sock_restore(sk, &img, &saddr, this_ip_dest, port);
+	if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100))
+		test_error("setsockopt(TCP_AO_ADD_KEY)");
+	test_ao_restore(sk, &ao_img);
+
+	if (test_get_tcp_ao_counters(sk, &ao1))
+		test_error("test_get_tcp_ao_counters()");
+
+	test_disable_repair(sk);
+	test_sock_state_free(&img);
+
+	synchronize_threads(); /* 4: terminate server + send more on client */
+	if (test_client_verify(sk, 100, quota / 100, 2 * TEST_TIMEOUT_SEC))
+		test_ok("client connection broken post-seq-adjust");
+	else
+		test_fail("client connection still works post-seq-adjust");
+
+	test_wait_for_exception(sk, TEST_TIMEOUT_SEC);
+
+	if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &err, &slen))
+		test_error("getsockopt()");
+	if (err != ECONNRESET && err != EPIPE)
+		test_fail("client connection was not reset: %d", err);
+	else
+		test_ok("client connection was reset");
+
+	if (test_get_tcp_ao_counters(sk, &ao2))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 5: verified => closed */
+	close(sk);
+	test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD);
+}
+
+static void *client_fn(void *arg)
+{
+	struct netstat *ns_before, *ns_after;
+	unsigned int port = test_server_port;
+
+	ns_before = netstat_read();
+
+	test_client_active_rst(port++);
+	test_client_passive_rst(port++);
+
+	ns_after = netstat_read();
+	netstats_check(ns_before, ns_after, "client");
+	netstat_free(ns_after);
+	netstat_free(ns_before);
+
+	synchronize_threads(); /* exit */
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(15, server_fn, client_fn);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 8c4e8dd0c047db7c68be01e6c30ada320b8babbc Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:25 +0000
Subject: selftests/net: Add TCP-AO selfconnect/simultaneous connect test

Check that a rare functionality of TCP named self-connect works with
TCP-AO. This "under the cover" also checks TCP simultaneous connect
(TCP_SYN_RECV socket state), which would be harder to check other ways.

In order to verify that it's indeed TCP simultaneous connect, check
the counters TCPChallengeACK and TCPSYNChallenge.

Sample of the output:
> # ./self-connect_ipv6
> 1..4
> # 1738[lib/setup.c:254] rand seed 1696451931
> TAP version 13
> ok 1 self-connect(same keyids): connect TCPAOGood 0 => 24
> ok 2 self-connect(different keyids): connect TCPAOGood 26 => 50
> ok 3 self-connect(restore): connect TCPAOGood 52 => 97
> ok 4 self-connect(restore, different keyids): connect TCPAOGood 99 => 144
> # Totals: pass:4 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile       |   1 +
 tools/testing/selftests/net/tcp_ao/self-connect.c | 197 ++++++++++++++++++++++
 2 files changed, 198 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_ao/self-connect.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 1d4f7576d774..9286f9b99c86 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -5,6 +5,7 @@ TEST_BOTH_AF += connect-deny
 TEST_BOTH_AF += icmps-accept icmps-discard
 TEST_BOTH_AF += restore
 TEST_BOTH_AF += rst
+TEST_BOTH_AF += self-connect
 TEST_BOTH_AF += seq-ext
 TEST_BOTH_AF += setsockopt-closed
 TEST_BOTH_AF += unsigned-md5
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
new file mode 100644
index 000000000000..e154d9e198a9
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+static union tcp_addr local_addr;
+
+static void __setup_lo_intf(const char *lo_intf,
+			    const char *addr_str, uint8_t prefix)
+{
+	if (inet_pton(TEST_FAMILY, addr_str, &local_addr) != 1)
+		test_error("Can't convert local ip address");
+
+	if (ip_addr_add(lo_intf, TEST_FAMILY, local_addr, prefix))
+		test_error("Failed to add %s ip address", lo_intf);
+
+	if (link_set_up(lo_intf))
+		test_error("Failed to bring %s up", lo_intf);
+}
+
+static void setup_lo_intf(const char *lo_intf)
+{
+#ifdef IPV6_TEST
+	__setup_lo_intf(lo_intf, "::1", 128);
+#else
+	__setup_lo_intf(lo_intf, "127.0.0.1", 8);
+#endif
+}
+
+static void tcp_self_connect(const char *tst, unsigned int port,
+			     bool different_keyids, bool check_restore)
+{
+	uint64_t before_challenge_ack, after_challenge_ack;
+	uint64_t before_syn_challenge, after_syn_challenge;
+	struct tcp_ao_counters before_ao, after_ao;
+	uint64_t before_aogood, after_aogood;
+	struct netstat *ns_before, *ns_after;
+	const size_t nr_packets = 20;
+	struct tcp_ao_repair ao_img;
+	struct tcp_sock_state img;
+	sockaddr_af addr;
+	int sk;
+
+	tcp_addr_to_sockaddr_in(&addr, &local_addr, htons(port));
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	if (different_keyids) {
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 5, 7))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 7, 5))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	} else {
+		if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 100, 100))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	}
+
+	if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		test_error("bind()");
+
+	ns_before = netstat_read();
+	before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
+	before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL);
+	before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL);
+	if (test_get_tcp_ao_counters(sk, &before_ao))
+		test_error("test_get_tcp_ao_counters()");
+
+	if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr,
+				  sizeof(addr), TEST_TIMEOUT_SEC) < 0) {
+		ns_after = netstat_read();
+		netstat_print_diff(ns_before, ns_after);
+		test_error("failed to connect()");
+	}
+
+	if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+		test_fail("%s: tcp connection verify failed", tst);
+		close(sk);
+		return;
+	}
+
+	ns_after = netstat_read();
+	after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
+	after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL);
+	after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL);
+	if (test_get_tcp_ao_counters(sk, &after_ao))
+		test_error("test_get_tcp_ao_counters()");
+	if (!check_restore) {
+		/* to debug: netstat_print_diff(ns_before, ns_after); */
+		netstat_free(ns_before);
+	}
+	netstat_free(ns_after);
+
+	if (after_aogood <= before_aogood) {
+		test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+			  tst, after_aogood, before_aogood);
+		close(sk);
+		return;
+	}
+	if (after_challenge_ack <= before_challenge_ack ||
+	    after_syn_challenge <= before_syn_challenge) {
+		/*
+		 * It's also meant to test simultaneous open, so check
+		 * these counters as well.
+		 */
+		test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu",
+			  tst, after_challenge_ack, before_challenge_ack,
+			  after_syn_challenge, before_syn_challenge);
+		close(sk);
+		return;
+	}
+
+	if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) {
+		close(sk);
+		return;
+	}
+
+	if (!check_restore) {
+		test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64,
+				tst, before_aogood, after_aogood);
+		close(sk);
+		return;
+	}
+
+	test_enable_repair(sk);
+	test_sock_checkpoint(sk, &img, &addr);
+#ifdef IPV6_TEST
+	addr.sin6_port = htons(port + 1);
+#else
+	addr.sin_port = htons(port + 1);
+#endif
+	test_ao_checkpoint(sk, &ao_img);
+	test_kill_sk(sk);
+
+	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	test_enable_repair(sk);
+	__test_sock_restore(sk, "lo", &img, &addr, &addr, sizeof(addr));
+	if (different_keyids) {
+		if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0,
+					  local_addr, -1, 7, 5))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+		if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0,
+					  local_addr, -1, 5, 7))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	} else {
+		if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0,
+					  local_addr, -1, 100, 100))
+			test_error("setsockopt(TCP_AO_ADD_KEY)");
+	}
+	test_ao_restore(sk, &ao_img);
+	test_disable_repair(sk);
+	test_sock_state_free(&img);
+	if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+		test_fail("%s: tcp connection verify failed", tst);
+		close(sk);
+		return;
+	}
+	ns_after = netstat_read();
+	after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
+	/* to debug: netstat_print_diff(ns_before, ns_after); */
+	netstat_free(ns_before);
+	netstat_free(ns_after);
+	close(sk);
+	if (after_aogood <= before_aogood) {
+		test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+			  tst, after_aogood, before_aogood);
+		return;
+	}
+	test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64,
+			tst, before_aogood, after_aogood);
+}
+
+static void *client_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+
+	setup_lo_intf("lo");
+
+	tcp_self_connect("self-connect(same keyids)", port++, false, false);
+	tcp_self_connect("self-connect(different keyids)", port++, true, false);
+	tcp_self_connect("self-connect(restore)", port, false, true);
+	port += 2;
+	tcp_self_connect("self-connect(restore, different keyids)", port, true, true);
+	port += 2;
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(4, client_fn, NULL);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 3c3ead55564825975cc40e59bfaf6c4834ea9745 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 15 Dec 2023 02:36:26 +0000
Subject: selftests/net: Add TCP-AO key-management test

Check multiple keys on a socket:
- rotation on closed socket
- current/rnext operations shouldn't be possible on listen sockets
- current/rnext key set should be the one, that's used on connect()
- key rotations with pseudo-random generated keys
- copying matching keys on connect() and on accept()

At this moment there are 3 tests that are "expected" to fail: a kernel
fix is needed to improve the situation, they are marked XFAIL.

Sample output:
> # ./key-management_ipv4
> 1..120
> # 1601[lib/setup.c:239] rand seed 1700526653
> TAP version 13
> ok 1 closed socket, delete a key: the key was deleted
> ok 2 closed socket, delete all keys: the key was deleted
> ok 3 closed socket, delete current key: key deletion was prevented
> ok 4 closed socket, delete rnext key: key deletion was prevented
> ok 5 closed socket, delete a key + set current/rnext: the key was deleted
> ok 6 closed socket, force-delete current key: the key was deleted
> ok 7 closed socket, force-delete rnext key: the key was deleted
> ok 8 closed socket, delete current+rnext key: key deletion was prevented
> ok 9 closed socket, add + change current key
> ok 10 closed socket, add + change rnext key
> ok 11 listen socket, delete a key: the key was deleted
> ok 12 listen socket, delete all keys: the key was deleted
> ok 13 listen socket, setting current key not allowed
> ok 14 listen socket, setting rnext key not allowed
> ok 15 # XFAIL listen() after current/rnext keys set: the socket has current/rnext keys: 100:200
> ok 16 # XFAIL listen socket, delete current key from before listen(): failed to delete the key 100:100 -16
> ok 17 # XFAIL listen socket, delete rnext key from before listen(): failed to delete the key 200:200 -16
> ok 18 listen socket, getsockopt(TCP_AO_REPAIR) is restricted
> ok 19 listen socket, setsockopt(TCP_AO_REPAIR) is restricted
> ok 20 listen socket, delete a key + set current/rnext: key deletion was prevented
> ok 21 listen socket, force-delete current key: key deletion was prevented
> ok 22 listen socket, force-delete rnext key: key deletion was prevented
> ok 23 listen socket, delete a key: the key was deleted
> ok 24 listen socket, add + change current key
> ok 25 listen socket, add + change rnext key
> ok 26 server: Check current/rnext keys unset before connect(): The socket keys are consistent with the expectations
> ok 27 client: Check current/rnext keys unset before connect(): current key 19 as expected
> ok 28 client: Check current/rnext keys unset before connect(): rnext key 146 as expected
> ok 29 server: Check current/rnext keys unset before connect(): server alive
> ok 30 server: Check current/rnext keys unset before connect(): passed counters checks
> ok 31 client: Check current/rnext keys unset before connect(): The socket keys are consistent with the expectations
> ok 32 server: Check current/rnext keys unset before connect(): The socket keys are consistent with the expectations
> ok 33 server: Check current/rnext keys unset before connect(): passed counters checks
> ok 34 client: Check current/rnext keys unset before connect(): passed counters checks
> ok 35 server: Check current/rnext keys set before connect(): The socket keys are consistent with the expectations
> ok 36 server: Check current/rnext keys set before connect(): server alive
> ok 37 server: Check current/rnext keys set before connect(): passed counters checks
> ok 38 client: Check current/rnext keys set before connect(): current key 10 as expected
> ok 39 client: Check current/rnext keys set before connect(): rnext key 137 as expected
> ok 40 server: Check current/rnext keys set before connect(): The socket keys are consistent with the expectations
> ok 41 client: Check current/rnext keys set before connect(): The socket keys are consistent with the expectations
> ok 42 client: Check current/rnext keys set before connect(): passed counters checks
> ok 43 server: Check current/rnext keys set before connect(): passed counters checks
> ok 44 server: Check current != rnext keys set before connect(): The socket keys are consistent with the expectations
> ok 45 server: Check current != rnext keys set before connect(): server alive
> ok 46 server: Check current != rnext keys set before connect(): passed counters checks
> ok 47 client: Check current != rnext keys set before connect(): current key 10 as expected
> ok 48 client: Check current != rnext keys set before connect(): rnext key 132 as expected
> ok 49 server: Check current != rnext keys set before connect(): The socket keys are consistent with the expectations
> ok 50 client: Check current != rnext keys set before connect(): The socket keys are consistent with the expectations
> ok 51 client: Check current != rnext keys set before connect(): passed counters checks
> ok 52 server: Check current != rnext keys set before connect(): passed counters checks
> ok 53 server: Check current flapping back on peer's RnextKey request: The socket keys are consistent with the expectations
> ok 54 server: Check current flapping back on peer's RnextKey request: server alive
> ok 55 server: Check current flapping back on peer's RnextKey request: passed counters checks
> ok 56 client: Check current flapping back on peer's RnextKey request: current key 10 as expected
> ok 57 client: Check current flapping back on peer's RnextKey request: rnext key 132 as expected
> ok 58 server: Check current flapping back on peer's RnextKey request: The socket keys are consistent with the expectations
> ok 59 client: Check current flapping back on peer's RnextKey request: The socket keys are consistent with the expectations
> ok 60 server: Check current flapping back on peer's RnextKey request: passed counters checks
> ok 61 client: Check current flapping back on peer's RnextKey request: passed counters checks
> ok 62 server: Rotate over all different keys: The socket keys are consistent with the expectations
> ok 63 server: Rotate over all different keys: server alive
> ok 64 server: Rotate over all different keys: passed counters checks
> ok 65 server: Rotate over all different keys: current key 128 as expected
> ok 66 client: Rotate over all different keys: rnext key 128 as expected
> ok 67 server: Rotate over all different keys: current key 129 as expected
> ok 68 client: Rotate over all different keys: rnext key 129 as expected
> ok 69 server: Rotate over all different keys: current key 130 as expected
> ok 70 client: Rotate over all different keys: rnext key 130 as expected
> ok 71 server: Rotate over all different keys: current key 131 as expected
> ok 72 client: Rotate over all different keys: rnext key 131 as expected
> ok 73 server: Rotate over all different keys: current key 132 as expected
> ok 74 client: Rotate over all different keys: rnext key 132 as expected
> ok 75 server: Rotate over all different keys: current key 133 as expected
> ok 76 client: Rotate over all different keys: rnext key 133 as expected
> ok 77 server: Rotate over all different keys: current key 134 as expected
> ok 78 client: Rotate over all different keys: rnext key 134 as expected
> ok 79 server: Rotate over all different keys: current key 135 as expected
> ok 80 client: Rotate over all different keys: rnext key 135 as expected
> ok 81 server: Rotate over all different keys: current key 136 as expected
> ok 82 client: Rotate over all different keys: rnext key 136 as expected
> ok 83 server: Rotate over all different keys: current key 137 as expected
> ok 84 client: Rotate over all different keys: rnext key 137 as expected
> ok 85 server: Rotate over all different keys: current key 138 as expected
> ok 86 client: Rotate over all different keys: rnext key 138 as expected
> ok 87 server: Rotate over all different keys: current key 139 as expected
> ok 88 client: Rotate over all different keys: rnext key 139 as expected
> ok 89 server: Rotate over all different keys: current key 140 as expected
> ok 90 client: Rotate over all different keys: rnext key 140 as expected
> ok 91 server: Rotate over all different keys: current key 141 as expected
> ok 92 client: Rotate over all different keys: rnext key 141 as expected
> ok 93 server: Rotate over all different keys: current key 142 as expected
> ok 94 client: Rotate over all different keys: rnext key 142 as expected
> ok 95 server: Rotate over all different keys: current key 143 as expected
> ok 96 client: Rotate over all different keys: rnext key 143 as expected
> ok 97 server: Rotate over all different keys: current key 144 as expected
> ok 98 client: Rotate over all different keys: rnext key 144 as expected
> ok 99 server: Rotate over all different keys: current key 145 as expected
> ok 100 client: Rotate over all different keys: rnext key 145 as expected
> ok 101 server: Rotate over all different keys: current key 146 as expected
> ok 102 client: Rotate over all different keys: rnext key 146 as expected
> ok 103 server: Rotate over all different keys: current key 127 as expected
> ok 104 client: Rotate over all different keys: rnext key 127 as expected
> ok 105 client: Rotate over all different keys: current key 0 as expected
> ok 106 client: Rotate over all different keys: rnext key 127 as expected
> ok 107 server: Rotate over all different keys: The socket keys are consistent with the expectations
> ok 108 client: Rotate over all different keys: The socket keys are consistent with the expectations
> ok 109 client: Rotate over all different keys: passed counters checks
> ok 110 server: Rotate over all different keys: passed counters checks
> ok 111 server: Check accept() => established key matching: The socket keys are consistent with the expectations
> ok 112 Can't add a key with non-matching ip-address for established sk
> ok 113 Can't add a key with non-matching VRF for established sk
> ok 114 server: Check accept() => established key matching: server alive
> ok 115 server: Check accept() => established key matching: passed counters checks
> ok 116 client: Check connect() => established key matching: current key 0 as expected
> ok 117 client: Check connect() => established key matching: rnext key 128 as expected
> ok 118 client: Check connect() => established key matching: The socket keys are consistent with the expectations
> ok 119 server: Check accept() => established key matching: The socket keys are consistent with the expectations
> ok 120 server: Check accept() => established key matching: passed counters checks
> # Totals: pass:120 fail:0 xfail:0 xpass:0 skip:0 error:0

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile        |    1 +
 .../testing/selftests/net/tcp_ao/key-management.c  | 1180 ++++++++++++++++++++
 2 files changed, 1181 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_ao/key-management.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 9286f9b99c86..6343cfcf919b 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -3,6 +3,7 @@ TEST_BOTH_AF := bench-lookups
 TEST_BOTH_AF += connect
 TEST_BOTH_AF += connect-deny
 TEST_BOTH_AF += icmps-accept icmps-discard
+TEST_BOTH_AF += key-management
 TEST_BOTH_AF += restore
 TEST_BOTH_AF += rst
 TEST_BOTH_AF += self-connect
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
new file mode 100644
index 000000000000..c48b4970ca17
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -0,0 +1,1180 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+const size_t nr_packets = 20;
+const size_t msg_len = 100;
+const size_t quota = nr_packets * msg_len;
+union tcp_addr wrong_addr;
+#define SECOND_PASSWORD	"at all times sincere friends of freedom have been rare"
+#define fault(type)	(inj == FAULT_ ## type)
+
+static const int test_vrf_ifindex = 200;
+static const uint8_t test_vrf_tabid = 42;
+static void setup_vrfs(void)
+{
+	int err;
+
+	if (!kernel_config_has(KCONFIG_NET_VRF))
+		return;
+
+	err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1);
+	if (err)
+		test_error("Failed to add a VRF: %d", err);
+
+	err = link_set_up("ksft-vrf");
+	if (err)
+		test_error("Failed to bring up a VRF");
+
+	err = ip_route_add_vrf(veth_name, TEST_FAMILY,
+			       this_ip_addr, this_ip_dest, test_vrf_tabid);
+	if (err)
+		test_error("Failed to add a route to VRF");
+}
+
+
+static int prepare_sk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid)
+{
+	int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+	if (sk < 0)
+		test_error("socket()");
+
+	if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 100, 100))
+		test_error("test_add_key()");
+
+	if (addr && test_add_key(sk, SECOND_PASSWORD, *addr,
+				 DEFAULT_TEST_PREFIX, sndid, rcvid))
+		test_error("test_add_key()");
+
+	return sk;
+}
+
+static int prepare_lsk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid)
+{
+	int sk = prepare_sk(addr, sndid, rcvid);
+
+	if (listen(sk, 10))
+		test_error("listen()");
+
+	return sk;
+}
+
+static int test_del_key(int sk, uint8_t sndid, uint8_t rcvid, bool async,
+			int current_key, int rnext_key)
+{
+	struct tcp_ao_info_opt ao_info = {};
+	struct tcp_ao_getsockopt key = {};
+	struct tcp_ao_del del = {};
+	sockaddr_af sockaddr;
+	int err;
+
+	tcp_addr_to_sockaddr_in(&del.addr, &this_ip_dest, 0);
+	del.prefix = DEFAULT_TEST_PREFIX;
+	del.sndid = sndid;
+	del.rcvid = rcvid;
+
+	if (current_key >= 0) {
+		del.set_current = 1;
+		del.current_key = (uint8_t)current_key;
+	}
+	if (rnext_key >= 0) {
+		del.set_rnext = 1;
+		del.rnext = (uint8_t)rnext_key;
+	}
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, &del, sizeof(del));
+	if (err < 0)
+		return -errno;
+
+	if (async)
+		return 0;
+
+	tcp_addr_to_sockaddr_in(&sockaddr, &this_ip_dest, 0);
+	err = test_get_one_ao(sk, &key, &sockaddr, sizeof(sockaddr),
+			      DEFAULT_TEST_PREFIX, sndid, rcvid);
+	if (!err)
+		return -EEXIST;
+	if (err != -E2BIG)
+		test_error("getsockopt()");
+	if (current_key < 0 && rnext_key < 0)
+		return 0;
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+	if (current_key >= 0 && ao_info.current_key != (uint8_t)current_key)
+		return -ENOTRECOVERABLE;
+	if (rnext_key >= 0 && ao_info.rnext != (uint8_t)rnext_key)
+		return -ENOTRECOVERABLE;
+	return 0;
+}
+
+static void try_delete_key(char *tst_name, int sk, uint8_t sndid, uint8_t rcvid,
+			   bool async, int current_key, int rnext_key,
+			   fault_t inj)
+{
+	int err;
+
+	err = test_del_key(sk, sndid, rcvid, async, current_key, rnext_key);
+	if ((err == -EBUSY && fault(BUSY)) || (err == -EINVAL && fault(CURRNEXT))) {
+		test_ok("%s: key deletion was prevented", tst_name);
+		return;
+	}
+	if (err && fault(FIXME)) {
+		test_xfail("%s: failed to delete the key %u:%u %d",
+			   tst_name, sndid, rcvid, err);
+		return;
+	}
+	if (!err) {
+		if (fault(BUSY) || fault(CURRNEXT)) {
+			test_fail("%s: the key was deleted %u:%u %d", tst_name,
+				  sndid, rcvid, err);
+		} else {
+			test_ok("%s: the key was deleted", tst_name);
+		}
+		return;
+	}
+	test_fail("%s: can't delete the key %u:%u %d", tst_name, sndid, rcvid, err);
+}
+
+static int test_set_key(int sk, int current_keyid, int rnext_keyid)
+{
+	struct tcp_ao_info_opt ao_info = {};
+	int err;
+
+	if (current_keyid >= 0) {
+		ao_info.set_current = 1;
+		ao_info.current_key = (uint8_t)current_keyid;
+	}
+	if (rnext_keyid >= 0) {
+		ao_info.set_rnext = 1;
+		ao_info.rnext = (uint8_t)rnext_keyid;
+	}
+
+	err = test_set_ao_info(sk, &ao_info);
+	if (err)
+		return err;
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+	if (current_keyid >= 0 && ao_info.current_key != (uint8_t)current_keyid)
+		return -ENOTRECOVERABLE;
+	if (rnext_keyid >= 0 && ao_info.rnext != (uint8_t)rnext_keyid)
+		return -ENOTRECOVERABLE;
+	return 0;
+}
+
+static int test_add_current_rnext_key(int sk, const char *key, uint8_t keyflags,
+				      union tcp_addr in_addr, uint8_t prefix,
+				      bool set_current, bool set_rnext,
+				      uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_add tmp = {};
+	int err;
+
+	err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr,
+			       set_current, set_rnext,
+			       prefix, 0, sndid, rcvid, 0, keyflags,
+			       strlen(key), key);
+	if (err)
+		return err;
+
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+	if (err < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+static int __try_add_current_rnext_key(int sk, const char *key, uint8_t keyflags,
+				       union tcp_addr in_addr, uint8_t prefix,
+				       bool set_current, bool set_rnext,
+				       uint8_t sndid, uint8_t rcvid)
+{
+	struct tcp_ao_info_opt ao_info = {};
+	int err;
+
+	err = test_add_current_rnext_key(sk, key, keyflags, in_addr, prefix,
+					 set_current, set_rnext, sndid, rcvid);
+	if (err)
+		return err;
+
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+	if (set_current && ao_info.current_key != sndid)
+		return -ENOTRECOVERABLE;
+	if (set_rnext && ao_info.rnext != rcvid)
+		return -ENOTRECOVERABLE;
+	return 0;
+}
+
+static void try_add_current_rnext_key(char *tst_name, int sk, const char *key,
+				     uint8_t keyflags,
+				     union tcp_addr in_addr, uint8_t prefix,
+				     bool set_current, bool set_rnext,
+				     uint8_t sndid, uint8_t rcvid, fault_t inj)
+{
+	int err;
+
+	err = __try_add_current_rnext_key(sk, key, keyflags, in_addr, prefix,
+					  set_current, set_rnext, sndid, rcvid);
+	if (!err && !fault(CURRNEXT)) {
+		test_ok("%s", tst_name);
+		return;
+	}
+	if (err == -EINVAL && fault(CURRNEXT)) {
+		test_ok("%s", tst_name);
+		return;
+	}
+	test_fail("%s", tst_name);
+}
+
+static void check_closed_socket(void)
+{
+	int sk;
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	try_delete_key("closed socket, delete a key", sk, 200, 200, 0, -1, -1, 0);
+	try_delete_key("closed socket, delete all keys", sk, 100, 100, 0, -1, -1, 0);
+	close(sk);
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	if (test_set_key(sk, 100, 200))
+		test_error("failed to set current/rnext keys");
+	try_delete_key("closed socket, delete current key", sk, 100, 100, 0, -1, -1, FAULT_BUSY);
+	try_delete_key("closed socket, delete rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY);
+	close(sk);
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	if (test_add_key(sk, "Glory to heros!", this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 10, 11))
+		test_error("test_add_key()");
+	if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 12, 13))
+		test_error("test_add_key()");
+	try_delete_key("closed socket, delete a key + set current/rnext", sk, 100, 100, 0, 10, 13, 0);
+	try_delete_key("closed socket, force-delete current key", sk, 10, 11, 0, 200, -1, 0);
+	try_delete_key("closed socket, force-delete rnext key", sk, 12, 13, 0, -1, 200, 0);
+	try_delete_key("closed socket, delete current+rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY);
+	close(sk);
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	if (test_set_key(sk, 100, 200))
+		test_error("failed to set current/rnext keys");
+	try_add_current_rnext_key("closed socket, add + change current key",
+				  sk, "Laaaa! Lalala-la-la-lalala...", 0,
+				  this_ip_dest, DEFAULT_TEST_PREFIX,
+				  true, false, 10, 20, 0);
+	try_add_current_rnext_key("closed socket, add + change rnext key",
+				  sk, "Laaaa! Lalala-la-la-lalala...", 0,
+				  this_ip_dest, DEFAULT_TEST_PREFIX,
+				  false, true, 20, 10, 0);
+	close(sk);
+}
+
+static void assert_no_current_rnext(const char *tst_msg, int sk)
+{
+	struct tcp_ao_info_opt ao_info = {};
+
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+
+	errno = 0;
+	if (ao_info.set_current || ao_info.set_rnext) {
+		test_xfail("%s: the socket has current/rnext keys: %d:%d",
+			   tst_msg,
+			   (ao_info.set_current) ? ao_info.current_key : -1,
+			   (ao_info.set_rnext) ? ao_info.rnext : -1);
+	} else {
+		test_ok("%s: the socket has no current/rnext keys", tst_msg);
+	}
+}
+
+static void assert_no_tcp_repair(void)
+{
+	struct tcp_ao_repair ao_img = {};
+	socklen_t len = sizeof(ao_img);
+	int sk, err;
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	test_enable_repair(sk);
+	if (listen(sk, 10))
+		test_error("listen()");
+	errno = 0;
+	err = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, &len);
+	if (err && errno == EPERM)
+		test_ok("listen socket, getsockopt(TCP_AO_REPAIR) is restricted");
+	else
+		test_fail("listen socket, getsockopt(TCP_AO_REPAIR) works");
+	errno = 0;
+	err = setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, sizeof(ao_img));
+	if (err && errno == EPERM)
+		test_ok("listen socket, setsockopt(TCP_AO_REPAIR) is restricted");
+	else
+		test_fail("listen socket, setsockopt(TCP_AO_REPAIR) works");
+	close(sk);
+}
+
+static void check_listen_socket(void)
+{
+	int sk, err;
+
+	sk = prepare_lsk(&this_ip_dest, 200, 200);
+	try_delete_key("listen socket, delete a key", sk, 200, 200, 0, -1, -1, 0);
+	try_delete_key("listen socket, delete all keys", sk, 100, 100, 0, -1, -1, 0);
+	close(sk);
+
+	sk = prepare_lsk(&this_ip_dest, 200, 200);
+	err = test_set_key(sk, 100, -1);
+	if (err == -EINVAL)
+		test_ok("listen socket, setting current key not allowed");
+	else
+		test_fail("listen socket, set current key");
+	err = test_set_key(sk, -1, 200);
+	if (err == -EINVAL)
+		test_ok("listen socket, setting rnext key not allowed");
+	else
+		test_fail("listen socket, set rnext key");
+	close(sk);
+
+	sk = prepare_sk(&this_ip_dest, 200, 200);
+	if (test_set_key(sk, 100, 200))
+		test_error("failed to set current/rnext keys");
+	if (listen(sk, 10))
+		test_error("listen()");
+	assert_no_current_rnext("listen() after current/rnext keys set", sk);
+	try_delete_key("listen socket, delete current key from before listen()", sk, 100, 100, 0, -1, -1, FAULT_FIXME);
+	try_delete_key("listen socket, delete rnext key from before listen()", sk, 200, 200, 0, -1, -1, FAULT_FIXME);
+	close(sk);
+
+	assert_no_tcp_repair();
+
+	sk = prepare_lsk(&this_ip_dest, 200, 200);
+	if (test_add_key(sk, "Glory to heros!", this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 10, 11))
+		test_error("test_add_key()");
+	if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest,
+			 DEFAULT_TEST_PREFIX, 12, 13))
+		test_error("test_add_key()");
+	try_delete_key("listen socket, delete a key + set current/rnext", sk,
+		       100, 100, 0, 10, 13, FAULT_CURRNEXT);
+	try_delete_key("listen socket, force-delete current key", sk,
+		       10, 11, 0, 200, -1, FAULT_CURRNEXT);
+	try_delete_key("listen socket, force-delete rnext key", sk,
+		       12, 13, 0, -1, 200, FAULT_CURRNEXT);
+	try_delete_key("listen socket, delete a key", sk,
+		       200, 200, 0, -1, -1, 0);
+	close(sk);
+
+	sk = prepare_lsk(&this_ip_dest, 200, 200);
+	try_add_current_rnext_key("listen socket, add + change current key",
+				  sk, "Laaaa! Lalala-la-la-lalala...", 0,
+				  this_ip_dest, DEFAULT_TEST_PREFIX,
+				  true, false, 10, 20, FAULT_CURRNEXT);
+	try_add_current_rnext_key("listen socket, add + change rnext key",
+				  sk, "Laaaa! Lalala-la-la-lalala...", 0,
+				  this_ip_dest, DEFAULT_TEST_PREFIX,
+				  false, true, 20, 10, FAULT_CURRNEXT);
+	close(sk);
+}
+
+static const char *fips_fpath = "/proc/sys/crypto/fips_enabled";
+static bool is_fips_enabled(void)
+{
+	static int fips_checked = -1;
+	FILE *fenabled;
+	int enabled;
+
+	if (fips_checked >= 0)
+		return !!fips_checked;
+	if (access(fips_fpath, R_OK)) {
+		if (errno != ENOENT)
+			test_error("Can't open %s", fips_fpath);
+		fips_checked = 0;
+		return false;
+	}
+	fenabled = fopen(fips_fpath, "r");
+	if (!fenabled)
+		test_error("Can't open %s", fips_fpath);
+	if (fscanf(fenabled, "%d", &enabled) != 1)
+		test_error("Can't read from %s", fips_fpath);
+	fclose(fenabled);
+	fips_checked = !!enabled;
+	return !!fips_checked;
+}
+
+struct test_key {
+	char password[TCP_AO_MAXKEYLEN];
+	const char *alg;
+	unsigned int len;
+	uint8_t client_keyid;
+	uint8_t server_keyid;
+	uint8_t maclen;
+	uint8_t matches_client		: 1,
+		matches_server		: 1,
+		matches_vrf		: 1,
+		is_current		: 1,
+		is_rnext		: 1,
+		used_on_handshake	: 1,
+		used_after_accept	: 1,
+		used_on_client		: 1;
+};
+
+struct key_collection {
+	unsigned int nr_keys;
+	struct test_key *keys;
+};
+
+static struct key_collection collection;
+
+#define TEST_MAX_MACLEN		16
+const char *test_algos[] = {
+	"cmac(aes128)",
+	"hmac(sha1)", "hmac(sha512)", "hmac(sha384)", "hmac(sha256)",
+	"hmac(sha224)", "hmac(sha3-512)",
+	/* only if !CONFIG_FIPS */
+#define TEST_NON_FIPS_ALGOS	2
+	"hmac(rmd160)", "hmac(md5)"
+};
+const unsigned int test_maclens[] = { 1, 4, 12, 16 };
+#define MACLEN_SHIFT		2
+#define ALGOS_SHIFT		4
+
+static unsigned int make_mask(unsigned int shift, unsigned int prev_shift)
+{
+	unsigned int ret = BIT(shift) - 1;
+
+	return ret << prev_shift;
+}
+
+static void init_key_in_collection(unsigned int index, bool randomized)
+{
+	struct test_key *key = &collection.keys[index];
+	unsigned int algos_nr, algos_index;
+
+	/* Same for randomized and non-randomized test flows */
+	key->client_keyid = index;
+	key->server_keyid = 127 + index;
+	key->matches_client = 1;
+	key->matches_server = 1;
+	key->matches_vrf = 1;
+	/* not really even random, but good enough for a test */
+	key->len = rand() % (TCP_AO_MAXKEYLEN - TEST_TCP_AO_MINKEYLEN);
+	key->len += TEST_TCP_AO_MINKEYLEN;
+	randomize_buffer(key->password, key->len);
+
+	if (randomized) {
+		key->maclen = (rand() % TEST_MAX_MACLEN) + 1;
+		algos_index = rand();
+	} else {
+		unsigned int shift = MACLEN_SHIFT;
+
+		key->maclen = test_maclens[index & make_mask(shift, 0)];
+		algos_index = index & make_mask(ALGOS_SHIFT, shift);
+	}
+	algos_nr = ARRAY_SIZE(test_algos);
+	if (is_fips_enabled())
+		algos_nr -= TEST_NON_FIPS_ALGOS;
+	key->alg = test_algos[algos_index % algos_nr];
+}
+
+static int init_default_key_collection(unsigned int nr_keys, bool randomized)
+{
+	size_t key_sz = sizeof(collection.keys[0]);
+
+	if (!nr_keys) {
+		free(collection.keys);
+		collection.keys = NULL;
+		return 0;
+	}
+
+	/*
+	 * All keys have uniq sndid/rcvid and sndid != rcvid in order to
+	 * check for any bugs/issues for different keyids, visible to both
+	 * peers. Keyid == 254 is unused.
+	 */
+	if (nr_keys > 127)
+		test_error("Test requires too many keys, correct the source");
+
+	collection.keys = reallocarray(collection.keys, nr_keys, key_sz);
+	if (!collection.keys)
+		return -ENOMEM;
+
+	memset(collection.keys, 0, nr_keys * key_sz);
+	collection.nr_keys = nr_keys;
+	while (nr_keys--)
+		init_key_in_collection(nr_keys, randomized);
+
+	return 0;
+}
+
+static void test_key_error(const char *msg, struct test_key *key)
+{
+	test_error("%s: key: { %s, %u:%u, %u, %u:%u:%u:%u:%u (%u)}",
+		   msg, key->alg, key->client_keyid, key->server_keyid,
+		   key->maclen, key->matches_client, key->matches_server,
+		   key->matches_vrf, key->is_current, key->is_rnext, key->len);
+}
+
+static int test_add_key_cr(int sk, const char *pwd, unsigned int pwd_len,
+			   union tcp_addr addr, uint8_t vrf,
+			   uint8_t sndid, uint8_t rcvid,
+			   uint8_t maclen, const char *alg,
+			   bool set_current, bool set_rnext)
+{
+	struct tcp_ao_add tmp = {};
+	uint8_t keyflags = 0;
+	int err;
+
+	if (!alg)
+		alg = DEFAULT_TEST_ALGO;
+
+	if (vrf)
+		keyflags |= TCP_AO_KEYF_IFINDEX;
+	err = test_prepare_key(&tmp, alg, addr, set_current, set_rnext,
+			       DEFAULT_TEST_PREFIX, vrf, sndid, rcvid, maclen,
+			       keyflags, pwd_len, pwd);
+	if (err)
+		return err;
+
+	err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+	if (err < 0)
+		return -errno;
+
+	return test_verify_socket_key(sk, &tmp);
+}
+
+static void verify_current_rnext(const char *tst, int sk,
+				 int current_keyid, int rnext_keyid)
+{
+	struct tcp_ao_info_opt ao_info = {};
+
+	if (test_get_ao_info(sk, &ao_info))
+		test_error("getsockopt(TCP_AO_INFO) failed");
+
+	errno = 0;
+	if (current_keyid >= 0) {
+		if (!ao_info.set_current)
+			test_fail("%s: the socket doesn't have current key", tst);
+		else if (ao_info.current_key != current_keyid)
+			test_fail("%s: current key is not the expected one %d != %u",
+				  tst, current_keyid, ao_info.current_key);
+		else
+			test_ok("%s: current key %u as expected",
+				tst, ao_info.current_key);
+	}
+	if (rnext_keyid >= 0) {
+		if (!ao_info.set_rnext)
+			test_fail("%s: the socket doesn't have rnext key", tst);
+		else if (ao_info.rnext != rnext_keyid)
+			test_fail("%s: rnext key is not the expected one %d != %u",
+				  tst, rnext_keyid, ao_info.rnext);
+		else
+			test_ok("%s: rnext key %u as expected", tst, ao_info.rnext);
+	}
+}
+
+
+static int key_collection_socket(bool server, unsigned int port)
+{
+	unsigned int i;
+	int sk;
+
+	if (server)
+		sk = test_listen_socket(this_ip_addr, port, 1);
+	else
+		sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0)
+		test_error("socket()");
+
+	for (i = 0; i < collection.nr_keys; i++) {
+		struct test_key *key = &collection.keys[i];
+		union tcp_addr *addr = &wrong_addr;
+		uint8_t sndid, rcvid, vrf;
+		bool set_current = false, set_rnext = false;
+
+		if (key->matches_vrf)
+			vrf = 0;
+		else
+			vrf = test_vrf_ifindex;
+		if (server) {
+			if (key->matches_client)
+				addr = &this_ip_dest;
+			sndid = key->server_keyid;
+			rcvid = key->client_keyid;
+		} else {
+			if (key->matches_server)
+				addr = &this_ip_dest;
+			sndid = key->client_keyid;
+			rcvid = key->server_keyid;
+			set_current = key->is_current;
+			set_rnext = key->is_rnext;
+		}
+
+		if (test_add_key_cr(sk, key->password, key->len,
+				    *addr, vrf, sndid, rcvid, key->maclen,
+				    key->alg, set_current, set_rnext))
+			test_key_error("setsockopt(TCP_AO_ADD_KEY)", key);
+		if (set_current || set_rnext)
+			key->used_on_handshake = 1;
+#ifdef DEBUG
+		test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}",
+			   server ? "server" : "client", i, collection.nr_keys,
+			   key->alg, rcvid, sndid, key->maclen,
+			   key->matches_client, key->matches_server,
+			   key->is_current, key->is_rnext, key->len);
+#endif
+	}
+	return sk;
+}
+
+static void verify_counters(const char *tst_name, bool is_listen_sk, bool server,
+			    struct tcp_ao_counters *a, struct tcp_ao_counters *b)
+{
+	unsigned int i;
+
+	__test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD);
+
+	for (i = 0; i < collection.nr_keys; i++) {
+		struct test_key *key = &collection.keys[i];
+		uint8_t sndid, rcvid;
+		bool was_used;
+
+		if (server) {
+			sndid = key->server_keyid;
+			rcvid = key->client_keyid;
+			if (is_listen_sk)
+				was_used = key->used_on_handshake;
+			else
+				was_used = key->used_after_accept;
+		} else {
+			sndid = key->client_keyid;
+			rcvid = key->server_keyid;
+			was_used = key->used_on_client;
+		}
+
+		test_tcp_ao_key_counters_cmp(tst_name, a, b, was_used,
+					     sndid, rcvid);
+	}
+	test_tcp_ao_counters_free(a);
+	test_tcp_ao_counters_free(b);
+	test_ok("%s: passed counters checks", tst_name);
+}
+
+static struct tcp_ao_getsockopt *lookup_key(struct tcp_ao_getsockopt *buf,
+					    size_t len, int sndid, int rcvid)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		if (sndid >= 0 && buf[i].sndid != sndid)
+			continue;
+		if (rcvid >= 0 && buf[i].rcvid != rcvid)
+			continue;
+		return &buf[i];
+	}
+	return NULL;
+}
+
+static void verify_keys(const char *tst_name, int sk,
+			bool is_listen_sk, bool server)
+{
+	socklen_t len = sizeof(struct tcp_ao_getsockopt);
+	struct tcp_ao_getsockopt *keys;
+	bool passed_test = true;
+	unsigned int i;
+
+	keys = calloc(collection.nr_keys, len);
+	if (!keys)
+		test_error("calloc()");
+
+	keys->nkeys = collection.nr_keys;
+	keys->get_all = 1;
+
+	if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, keys, &len)) {
+		free(keys);
+		test_error("getsockopt(TCP_AO_GET_KEYS)");
+	}
+
+	for (i = 0; i < collection.nr_keys; i++) {
+		struct test_key *key = &collection.keys[i];
+		struct tcp_ao_getsockopt *dump_key;
+		bool is_kdf_aes_128_cmac = false;
+		bool is_cmac_aes = false;
+		uint8_t sndid, rcvid;
+		bool matches = false;
+
+		if (server) {
+			if (key->matches_client)
+				matches = true;
+			sndid = key->server_keyid;
+			rcvid = key->client_keyid;
+		} else {
+			if (key->matches_server)
+				matches = true;
+			sndid = key->client_keyid;
+			rcvid = key->server_keyid;
+		}
+		if (!key->matches_vrf)
+			matches = false;
+		/* no keys get removed on the original listener socket */
+		if (is_listen_sk)
+			matches = true;
+
+		dump_key = lookup_key(keys, keys->nkeys, sndid, rcvid);
+		if (matches != !!dump_key) {
+			test_fail("%s: key %u:%u %s%s on the socket",
+				  tst_name, sndid, rcvid,
+				  key->matches_vrf ? "" : "[vrf] ",
+				  matches ? "disappeared" : "yet present");
+			passed_test = false;
+			goto out;
+		}
+		if (!dump_key)
+			continue;
+
+		if (!strcmp("cmac(aes128)", key->alg)) {
+			is_kdf_aes_128_cmac = (key->len != 16);
+			is_cmac_aes = true;
+		}
+
+		if (is_cmac_aes) {
+			if (strcmp(dump_key->alg_name, "cmac(aes)")) {
+				test_fail("%s: key %u:%u cmac(aes) has unexpected alg %s",
+					  tst_name, sndid, rcvid,
+					  dump_key->alg_name);
+				passed_test = false;
+				continue;
+			}
+		} else if (strcmp(dump_key->alg_name, key->alg)) {
+			test_fail("%s: key %u:%u has unexpected alg %s != %s",
+				  tst_name, sndid, rcvid,
+				  dump_key->alg_name, key->alg);
+			passed_test = false;
+			continue;
+		}
+		if (is_kdf_aes_128_cmac) {
+			if (dump_key->keylen != 16) {
+				test_fail("%s: key %u:%u cmac(aes128) has unexpected len %u",
+					  tst_name, sndid, rcvid,
+					  dump_key->keylen);
+				continue;
+			}
+		} else if (dump_key->keylen != key->len) {
+			test_fail("%s: key %u:%u changed password len %u != %u",
+				  tst_name, sndid, rcvid,
+				  dump_key->keylen, key->len);
+			passed_test = false;
+			continue;
+		}
+		if (!is_kdf_aes_128_cmac &&
+		    memcmp(dump_key->key, key->password, key->len)) {
+			test_fail("%s: key %u:%u has different password",
+				  tst_name, sndid, rcvid);
+			passed_test = false;
+			continue;
+		}
+		if (dump_key->maclen != key->maclen) {
+			test_fail("%s: key %u:%u changed maclen %u != %u",
+				  tst_name, sndid, rcvid,
+				  dump_key->maclen, key->maclen);
+			passed_test = false;
+			continue;
+		}
+	}
+
+	if (passed_test)
+		test_ok("%s: The socket keys are consistent with the expectations",
+			tst_name);
+out:
+	free(keys);
+}
+
+static int start_server(const char *tst_name, unsigned int port, size_t quota,
+			struct tcp_ao_counters *begin,
+			unsigned int current_index, unsigned int rnext_index)
+{
+	struct tcp_ao_counters lsk_c1, lsk_c2;
+	ssize_t bytes;
+	int sk, lsk;
+
+	synchronize_threads(); /* 1: key collection initialized */
+	lsk = key_collection_socket(true, port);
+	if (test_get_tcp_ao_counters(lsk, &lsk_c1))
+		test_error("test_get_tcp_ao_counters()");
+	synchronize_threads(); /* 2: MKTs added => connect() */
+	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+		test_error("test_wait_fd()");
+
+	sk = accept(lsk, NULL, NULL);
+	if (sk < 0)
+		test_error("accept()");
+	if (test_get_tcp_ao_counters(sk, begin))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 3: accepted => send data */
+	if (test_get_tcp_ao_counters(lsk, &lsk_c2))
+		test_error("test_get_tcp_ao_counters()");
+	verify_keys(tst_name, lsk, true, true);
+	close(lsk);
+
+	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+	if (bytes != quota)
+		test_fail("%s: server served: %zd", tst_name, bytes);
+	else
+		test_ok("%s: server alive", tst_name);
+
+	verify_counters(tst_name, true, true, &lsk_c1, &lsk_c2);
+
+	return sk;
+}
+
+static void end_server(const char *tst_name, int sk,
+		       struct tcp_ao_counters *begin)
+{
+	struct tcp_ao_counters end;
+
+	if (test_get_tcp_ao_counters(sk, &end))
+		test_error("test_get_tcp_ao_counters()");
+	verify_keys(tst_name, sk, false, true);
+
+	synchronize_threads(); /* 4: verified => closed */
+	close(sk);
+
+	verify_counters(tst_name, true, false, begin, &end);
+	synchronize_threads(); /* 5: counters */
+}
+
+static void try_server_run(const char *tst_name, unsigned int port, size_t quota,
+			   unsigned int current_index, unsigned int rnext_index)
+{
+	struct tcp_ao_counters tmp;
+	int sk;
+
+	sk = start_server(tst_name, port, quota, &tmp,
+			  current_index, rnext_index);
+	end_server(tst_name, sk, &tmp);
+}
+
+static void server_rotations(const char *tst_name, unsigned int port,
+			     size_t quota, unsigned int rotations,
+			     unsigned int current_index, unsigned int rnext_index)
+{
+	struct tcp_ao_counters tmp;
+	unsigned int i;
+	int sk;
+
+	sk = start_server(tst_name, port, quota, &tmp,
+			  current_index, rnext_index);
+
+	for (i = current_index + 1; rotations > 0; i++, rotations--) {
+		ssize_t bytes;
+
+		if (i >= collection.nr_keys)
+			i = 0;
+		bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+		if (bytes != quota) {
+			test_fail("%s: server served: %zd", tst_name, bytes);
+			return;
+		}
+		verify_current_rnext(tst_name, sk,
+				     collection.keys[i].server_keyid, -1);
+		synchronize_threads(); /* verify current/rnext */
+	}
+	end_server(tst_name, sk, &tmp);
+}
+
+static int run_client(const char *tst_name, unsigned int port,
+		      unsigned int nr_keys, int current_index, int rnext_index,
+		      struct tcp_ao_counters *before,
+		      const size_t msg_sz, const size_t msg_nr)
+{
+	int sk;
+
+	synchronize_threads(); /* 1: key collection initialized */
+	sk = key_collection_socket(false, port);
+
+	if (current_index >= 0 || rnext_index >= 0) {
+		int sndid = -1, rcvid = -1;
+
+		if (current_index >= 0)
+			sndid = collection.keys[current_index].client_keyid;
+		if (rnext_index >= 0)
+			rcvid = collection.keys[rnext_index].server_keyid;
+		if (test_set_key(sk, sndid, rcvid))
+			test_error("failed to set current/rnext keys");
+	}
+	if (before && test_get_tcp_ao_counters(sk, before))
+		test_error("test_get_tcp_ao_counters()");
+
+	synchronize_threads(); /* 2: MKTs added => connect() */
+	if (test_connect_socket(sk, this_ip_dest, port++) <= 0)
+		test_error("failed to connect()");
+	if (current_index < 0)
+		current_index = nr_keys - 1;
+	if (rnext_index < 0)
+		rnext_index = nr_keys - 1;
+	collection.keys[current_index].used_on_handshake = 1;
+	collection.keys[rnext_index].used_after_accept = 1;
+	collection.keys[rnext_index].used_on_client = 1;
+
+	synchronize_threads(); /* 3: accepted => send data */
+	if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) {
+		test_fail("verify failed");
+		close(sk);
+		if (before)
+			test_tcp_ao_counters_free(before);
+		return -1;
+	}
+
+	return sk;
+}
+
+static int start_client(const char *tst_name, unsigned int port,
+			unsigned int nr_keys, int current_index, int rnext_index,
+			struct tcp_ao_counters *before,
+			const size_t msg_sz, const size_t msg_nr)
+{
+	if (init_default_key_collection(nr_keys, true))
+		test_error("Failed to init the key collection");
+
+	return run_client(tst_name, port, nr_keys, current_index,
+			  rnext_index, before, msg_sz, msg_nr);
+}
+
+static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
+		       int current_index, int rnext_index,
+		       struct tcp_ao_counters *start)
+{
+	struct tcp_ao_counters end;
+
+	/* Some application may become dependent on this kernel choice */
+	if (current_index < 0)
+		current_index = nr_keys - 1;
+	if (rnext_index < 0)
+		rnext_index = nr_keys - 1;
+	verify_current_rnext(tst_name, sk,
+			     collection.keys[current_index].client_keyid,
+			     collection.keys[rnext_index].server_keyid);
+	if (start && test_get_tcp_ao_counters(sk, &end))
+		test_error("test_get_tcp_ao_counters()");
+	verify_keys(tst_name, sk, false, false);
+	synchronize_threads(); /* 4: verify => closed */
+	close(sk);
+	if (start)
+		verify_counters(tst_name, false, false, start, &end);
+	synchronize_threads(); /* 5: counters */
+}
+
+static void try_unmatched_keys(int sk, int *rnext_index)
+{
+	struct test_key *key;
+	unsigned int i = 0;
+	int err;
+
+	do {
+		key = &collection.keys[i];
+		if (!key->matches_server)
+			break;
+	} while (++i < collection.nr_keys);
+	if (key->matches_server)
+		test_error("all keys on client match the server");
+
+	err = test_add_key_cr(sk, key->password, key->len, wrong_addr,
+			      0, key->client_keyid, key->server_keyid,
+			      key->maclen, key->alg, 0, 0);
+	if (!err) {
+		test_fail("Added a key with non-matching ip-address for established sk");
+		return;
+	}
+	if (err == -EINVAL)
+		test_ok("Can't add a key with non-matching ip-address for established sk");
+	else
+		test_error("Failed to add a key");
+
+	err = test_add_key_cr(sk, key->password, key->len, this_ip_dest,
+			      test_vrf_ifindex,
+			      key->client_keyid, key->server_keyid,
+			      key->maclen, key->alg, 0, 0);
+	if (!err) {
+		test_fail("Added a key with non-matching VRF for established sk");
+		return;
+	}
+	if (err == -EINVAL)
+		test_ok("Can't add a key with non-matching VRF for established sk");
+	else
+		test_error("Failed to add a key");
+
+	for (i = 0; i < collection.nr_keys; i++) {
+		key = &collection.keys[i];
+		if (!key->matches_client)
+			break;
+	}
+	if (key->matches_client)
+		test_error("all keys on server match the client");
+	if (test_set_key(sk, -1, key->server_keyid))
+		test_error("Can't change the current key");
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+		test_fail("verify failed");
+	*rnext_index = i;
+}
+
+static int client_non_matching(const char *tst_name, unsigned int port,
+			       unsigned int nr_keys,
+			       int current_index, int rnext_index,
+			       const size_t msg_sz, const size_t msg_nr)
+{
+	unsigned int i;
+
+	if (init_default_key_collection(nr_keys, true))
+		test_error("Failed to init the key collection");
+
+	for (i = 0; i < nr_keys; i++) {
+		/* key (0, 0) matches */
+		collection.keys[i].matches_client = !!((i + 3) % 4);
+		collection.keys[i].matches_server = !!((i + 2) % 4);
+		if (kernel_config_has(KCONFIG_NET_VRF))
+			collection.keys[i].matches_vrf = !!((i + 1) % 4);
+	}
+
+	return run_client(tst_name, port, nr_keys, current_index,
+			  rnext_index, NULL, msg_sz, msg_nr);
+}
+
+static void check_current_back(const char *tst_name, unsigned int port,
+			       unsigned int nr_keys,
+			       unsigned int current_index, unsigned int rnext_index,
+			       unsigned int rotate_to_index)
+{
+	struct tcp_ao_counters tmp;
+	int sk;
+
+	sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
+			  &tmp, msg_len, nr_packets);
+	if (sk < 0)
+		return;
+	if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1))
+		test_error("Can't change the current key");
+	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+		test_fail("verify failed");
+	collection.keys[rotate_to_index].used_after_accept = 1;
+
+	end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
+}
+
+static void roll_over_keys(const char *tst_name, unsigned int port,
+			   unsigned int nr_keys, unsigned int rotations,
+			   unsigned int current_index, unsigned int rnext_index)
+{
+	struct tcp_ao_counters tmp;
+	unsigned int i;
+	int sk;
+
+	sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
+			  &tmp, msg_len, nr_packets);
+	if (sk < 0)
+		return;
+	for (i = rnext_index + 1; rotations > 0; i++, rotations--) {
+		if (i >= collection.nr_keys)
+			i = 0;
+		if (test_set_key(sk, -1, collection.keys[i].server_keyid))
+			test_error("Can't change the Rnext key");
+		if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+			test_fail("verify failed");
+			close(sk);
+			test_tcp_ao_counters_free(&tmp);
+			return;
+		}
+		verify_current_rnext(tst_name, sk, -1,
+				     collection.keys[i].server_keyid);
+		collection.keys[i].used_on_client = 1;
+		synchronize_threads(); /* verify current/rnext */
+	}
+	end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
+}
+
+static void try_client_run(const char *tst_name, unsigned int port,
+			   unsigned int nr_keys, int current_index, int rnext_index)
+{
+	struct tcp_ao_counters tmp;
+	int sk;
+
+	sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
+			  &tmp, msg_len, nr_packets);
+	if (sk < 0)
+		return;
+	end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
+}
+
+static void try_client_match(const char *tst_name, unsigned int port,
+			     unsigned int nr_keys,
+			     int current_index, int rnext_index)
+{
+	int sk;
+
+	sk = client_non_matching(tst_name, port, nr_keys, current_index,
+				 rnext_index, msg_len, nr_packets);
+	if (sk < 0)
+		return;
+	try_unmatched_keys(sk, &rnext_index);
+	end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL);
+}
+
+static void *server_fn(void *arg)
+{
+	unsigned int port = test_server_port;
+
+	setup_vrfs();
+	try_server_run("server: Check current/rnext keys unset before connect()",
+		       port++, quota, 19, 19);
+	try_server_run("server: Check current/rnext keys set before connect()",
+		       port++, quota, 10, 10);
+	try_server_run("server: Check current != rnext keys set before connect()",
+		       port++, quota, 5, 10);
+	try_server_run("server: Check current flapping back on peer's RnextKey request",
+		       port++, quota * 2, 5, 10);
+	server_rotations("server: Rotate over all different keys", port++,
+			 quota, 20, 0, 0);
+	try_server_run("server: Check accept() => established key matching",
+		       port++, quota * 2, 0, 0);
+
+	synchronize_threads(); /* don't race to exit: client exits */
+	return NULL;
+}
+
+static void check_established_socket(void)
+{
+	unsigned int port = test_server_port;
+
+	setup_vrfs();
+	try_client_run("client: Check current/rnext keys unset before connect()",
+		       port++, 20, -1, -1);
+	try_client_run("client: Check current/rnext keys set before connect()",
+		       port++, 20, 10, 10);
+	try_client_run("client: Check current != rnext keys set before connect()",
+		       port++, 20, 10, 5);
+	check_current_back("client: Check current flapping back on peer's RnextKey request",
+			   port++, 20, 10, 5, 2);
+	roll_over_keys("client: Rotate over all different keys", port++,
+		       20, 20, 0, 0);
+	try_client_match("client: Check connect() => established key matching",
+			 port++, 20, 0, 0);
+}
+
+static void *client_fn(void *arg)
+{
+	if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
+		test_error("Can't convert ip address %s", TEST_WRONG_IP);
+	check_closed_socket();
+	check_listen_socket();
+	check_established_socket();
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	test_init(120, server_fn, client_fn);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 0c970ed2f87c058fe3ddeb4d7d8f64f72cf41d7a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 15 Dec 2023 16:45:49 -0800
Subject: s390/bpf: Fix indirect trampoline generation

The func_addr used to be NULL for indirect trampolines used by struct_ops.
Now func_addr is a valid function pointer.
Hence use BPF_TRAMP_F_INDIRECT flag to detect such condition.

Fixes: 2cd3e3772e41 ("x86/cfi,bpf: Fix bpf_struct_ops CFI")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com>
Link: https://lore.kernel.org/bpf/20231216004549.78355-1-alexei.starovoitov@gmail.com
---
 arch/s390/net/bpf_jit_comp.c               | 3 ++-
 tools/testing/selftests/bpf/DENYLIST.s390x | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index cc129617480a..7f0a7b97ef4c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2362,7 +2362,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 		return -ENOTSUPP;
 
 	/* Return to %r14, since func_addr and %r0 are not available. */
-	if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
+	if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) ||
+	    (flags & BPF_TRAMP_F_INDIRECT))
 		flags |= BPF_TRAMP_F_SKIP_FRAME;
 
 	/*
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index d27aa42d11a4..1a63996c0304 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,7 +1,5 @@
 # TEMPORARY
 # Alphabetical order
-dummy_st_ops/dummy_init_ret_value
-dummy_st_ops/dummy_init_ptr_arg
 exceptions				 # JIT does not support calling kfunc bpf_throw				       (exceptions)
 get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
 stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
-- 
cgit v1.2.3-70-g09d2


From e58aac1a9a179fa9dab3025ef955cdb548c439f2 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Sat, 16 Dec 2023 11:55:10 +0800
Subject: selftests/bpf: Test the release of map btf

When there is bpf_list_head or bpf_rb_root field in map value, the free
of map btf and the free of map value may run concurrently and there may
be use-after-free problem, so add two test cases to demonstrate it. And
the use-after-free problem can been easily reproduced by using bpf_next
tree and a KASAN-enabled kernel.

The first test case tests the racing between the free of map btf and the
free of array map. It constructs the racing by releasing the array map in
the end after other ref-counter of map btf has been released. To delay
the free of array map and make it be invoked after btf_free_rcu() is
invoked, it stresses system_unbound_wq by closing multiple percpu array
maps before it closes the array map.

The second case tests the racing between the free of map btf and the
free of inner map. Beside using the similar method as the first one
does, it uses bpf_map_delete_elem() to delete the inner map and to defer
the release of inner map after one RCU grace period.

The reason for using two skeletons is to prevent the release of outer
map and inner map in map_in_map_btf.c interfering the release of bpf
map in normal_map_btf.c.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/bpf/20231216035510.4030605-1-houtao@huaweicloud.com
---
 tools/testing/selftests/bpf/prog_tests/map_btf.c   | 98 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/map_in_map_btf.c | 73 ++++++++++++++++
 tools/testing/selftests/bpf/progs/normal_map_btf.c | 56 +++++++++++++
 3 files changed, 227 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/map_btf.c
 create mode 100644 tools/testing/selftests/bpf/progs/map_in_map_btf.c
 create mode 100644 tools/testing/selftests/bpf/progs/normal_map_btf.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_btf.c b/tools/testing/selftests/bpf/prog_tests/map_btf.c
new file mode 100644
index 000000000000..2c4ef6037573
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_btf.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+
+#include "normal_map_btf.skel.h"
+#include "map_in_map_btf.skel.h"
+
+static void do_test_normal_map_btf(void)
+{
+	struct normal_map_btf *skel;
+	int i, err, new_fd = -1;
+	int map_fd_arr[64];
+
+	skel = normal_map_btf__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_load"))
+		return;
+
+	err = normal_map_btf__attach(skel);
+	if (!ASSERT_OK(err, "attach"))
+		goto out;
+
+	skel->bss->pid = getpid();
+	usleep(1);
+	ASSERT_TRUE(skel->bss->done, "done");
+
+	/* Use percpu_array to slow bpf_map_free_deferred() down.
+	 * The memory allocation may fail, so doesn't check the returned fd.
+	 */
+	for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++)
+		map_fd_arr[i] = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, 4, 4, 256, NULL);
+
+	/* Close array fd later */
+	new_fd = dup(bpf_map__fd(skel->maps.array));
+out:
+	normal_map_btf__destroy(skel);
+	if (new_fd < 0)
+		return;
+	/* Use kern_sync_rcu() to wait for the start of the free of the bpf
+	 * program and use an assumed delay to wait for the release of the map
+	 * btf which is held by other maps (e.g, bss). After that, array map
+	 * holds the last reference of map btf.
+	 */
+	kern_sync_rcu();
+	usleep(4000);
+	/* Spawn multiple kworkers to delay the invocation of
+	 * bpf_map_free_deferred() for array map.
+	 */
+	for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) {
+		if (map_fd_arr[i] < 0)
+			continue;
+		close(map_fd_arr[i]);
+	}
+	close(new_fd);
+}
+
+static void do_test_map_in_map_btf(void)
+{
+	int err, zero = 0, new_fd = -1;
+	struct map_in_map_btf *skel;
+
+	skel = map_in_map_btf__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_load"))
+		return;
+
+	err = map_in_map_btf__attach(skel);
+	if (!ASSERT_OK(err, "attach"))
+		goto out;
+
+	skel->bss->pid = getpid();
+	usleep(1);
+	ASSERT_TRUE(skel->bss->done, "done");
+
+	/* Close inner_array fd later */
+	new_fd = dup(bpf_map__fd(skel->maps.inner_array));
+	/* Defer the free of inner_array */
+	err = bpf_map__delete_elem(skel->maps.outer_array, &zero, sizeof(zero), 0);
+	ASSERT_OK(err, "delete inner map");
+out:
+	map_in_map_btf__destroy(skel);
+	if (new_fd < 0)
+		return;
+	/* Use kern_sync_rcu() to wait for the start of the free of the bpf
+	 * program and use an assumed delay to wait for the free of the outer
+	 * map and the release of map btf. After that, inner map holds the last
+	 * reference of map btf.
+	 */
+	kern_sync_rcu();
+	usleep(10000);
+	close(new_fd);
+}
+
+void test_map_btf(void)
+{
+	if (test__start_subtest("array_btf"))
+		do_test_normal_map_btf();
+	if (test__start_subtest("inner_array_btf"))
+		do_test_map_in_map_btf();
+}
diff --git a/tools/testing/selftests/bpf/progs/map_in_map_btf.c b/tools/testing/selftests/bpf/progs/map_in_map_btf.c
new file mode 100644
index 000000000000..7a1336d7b16a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_in_map_btf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+	__u64 data;
+	struct bpf_list_node node;
+};
+
+struct map_value {
+	struct bpf_list_head head __contains(node_data, node);
+	struct bpf_spin_lock lock;
+};
+
+struct inner_array_type {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} inner_array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(key_size, 4);
+	__uint(value_size, 4);
+	__uint(max_entries, 1);
+	__array(values, struct inner_array_type);
+} outer_array SEC(".maps") = {
+	.values = {
+		[0] = &inner_array,
+	},
+};
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+bool done = false;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int add_to_list_in_inner_array(void *ctx)
+{
+	struct map_value *value;
+	struct node_data *new;
+	struct bpf_map *map;
+	int zero = 0;
+
+	if (done || (u32)bpf_get_current_pid_tgid() != pid)
+		return 0;
+
+	map = bpf_map_lookup_elem(&outer_array, &zero);
+	if (!map)
+		return 0;
+
+	value = bpf_map_lookup_elem(map, &zero);
+	if (!value)
+		return 0;
+
+	new = bpf_obj_new(typeof(*new));
+	if (!new)
+		return 0;
+
+	bpf_spin_lock(&value->lock);
+	bpf_list_push_back(&value->head, &new->node);
+	bpf_spin_unlock(&value->lock);
+	done = true;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/normal_map_btf.c b/tools/testing/selftests/bpf/progs/normal_map_btf.c
new file mode 100644
index 000000000000..66cde82aa86d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/normal_map_btf.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+	__u64 data;
+	struct bpf_list_node node;
+};
+
+struct map_value {
+	struct bpf_list_head head __contains(node_data, node);
+	struct bpf_spin_lock lock;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} array SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+bool done = false;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int add_to_list_in_array(void *ctx)
+{
+	struct map_value *value;
+	struct node_data *new;
+	int zero = 0;
+
+	if (done || (u32)bpf_get_current_pid_tgid() != pid)
+		return 0;
+
+	value = bpf_map_lookup_elem(&array, &zero);
+	if (!value)
+		return 0;
+
+	new = bpf_obj_new(typeof(*new));
+	if (!new)
+		return 0;
+
+	bpf_spin_lock(&value->lock);
+	bpf_list_push_back(&value->head, &new->node);
+	bpf_spin_unlock(&value->lock);
+	done = true;
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From f17d1a18a3dd6cc4b38a5226b0acbbad3f2063ae Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 17 Dec 2023 22:55:38 +0100
Subject: selftests/bpf: Add more uprobe multi fail tests

We fail to create uprobe if we pass negative offset. Add more tests
validating kernel-side error checking code.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20231217215538.3361991-3-jolsa@kernel.org
---
 .../selftests/bpf/prog_tests/uprobe_multi_test.c   | 149 ++++++++++++++++++++-
 1 file changed, 146 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index 07a009f95e85..8269cdee33ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -239,23 +239,166 @@ static void test_attach_api_fails(void)
 	LIBBPF_OPTS(bpf_link_create_opts, opts);
 	const char *path = "/proc/self/exe";
 	struct uprobe_multi *skel = NULL;
+	int prog_fd, link_fd = -1;
 	unsigned long offset = 0;
-	int link_fd = -1;
 
 	skel = uprobe_multi__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
 		goto cleanup;
 
+	prog_fd = bpf_program__fd(skel->progs.uprobe_extra);
+
 	/* abnormal cnt */
 	opts.uprobe_multi.path = path;
 	opts.uprobe_multi.offsets = &offset;
 	opts.uprobe_multi.cnt = INT_MAX;
-	link_fd = bpf_link_create(bpf_program__fd(skel->progs.uprobe), 0,
-				  BPF_TRACE_UPROBE_MULTI, &opts);
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
 	if (!ASSERT_ERR(link_fd, "link_fd"))
 		goto cleanup;
 	if (!ASSERT_EQ(link_fd, -E2BIG, "big cnt"))
 		goto cleanup;
+
+	/* cnt is 0 */
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.path = path,
+		.uprobe_multi.offsets = (unsigned long *) &offset,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EINVAL, "cnt_is_zero"))
+		goto cleanup;
+
+	/* negative offset */
+	offset = -1;
+	opts.uprobe_multi.path = path;
+	opts.uprobe_multi.offsets = (unsigned long *) &offset;
+	opts.uprobe_multi.cnt = 1;
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EINVAL, "offset_is_negative"))
+		goto cleanup;
+
+	/* offsets is NULL */
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.path = path,
+		.uprobe_multi.cnt = 1,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EINVAL, "offsets_is_null"))
+		goto cleanup;
+
+	/* wrong offsets pointer */
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.path = path,
+		.uprobe_multi.offsets = (unsigned long *) 1,
+		.uprobe_multi.cnt = 1,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EFAULT, "offsets_is_wrong"))
+		goto cleanup;
+
+	/* path is NULL */
+	offset = 1;
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.offsets = (unsigned long *) &offset,
+		.uprobe_multi.cnt = 1,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EINVAL, "path_is_null"))
+		goto cleanup;
+
+	/* wrong path pointer  */
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.path = (const char *) 1,
+		.uprobe_multi.offsets = (unsigned long *) &offset,
+		.uprobe_multi.cnt = 1,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EFAULT, "path_is_wrong"))
+		goto cleanup;
+
+	/* wrong path type */
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.path = "/",
+		.uprobe_multi.offsets = (unsigned long *) &offset,
+		.uprobe_multi.cnt = 1,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EBADF, "path_is_wrong_type"))
+		goto cleanup;
+
+	/* wrong cookies pointer */
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.path = path,
+		.uprobe_multi.offsets = (unsigned long *) &offset,
+		.uprobe_multi.cookies = (__u64 *) 1ULL,
+		.uprobe_multi.cnt = 1,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EFAULT, "cookies_is_wrong"))
+		goto cleanup;
+
+	/* wrong ref_ctr_offsets pointer */
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.path = path,
+		.uprobe_multi.offsets = (unsigned long *) &offset,
+		.uprobe_multi.cookies = (__u64 *) &offset,
+		.uprobe_multi.ref_ctr_offsets = (unsigned long *) 1,
+		.uprobe_multi.cnt = 1,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EFAULT, "ref_ctr_offsets_is_wrong"))
+		goto cleanup;
+
+	/* wrong flags */
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.flags = 1 << 31,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	if (!ASSERT_EQ(link_fd, -EINVAL, "wrong_flags"))
+		goto cleanup;
+
+	/* wrong pid */
+	LIBBPF_OPTS_RESET(opts,
+		.uprobe_multi.path = path,
+		.uprobe_multi.offsets = (unsigned long *) &offset,
+		.uprobe_multi.cnt = 1,
+		.uprobe_multi.pid = -2,
+	);
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+	if (!ASSERT_ERR(link_fd, "link_fd"))
+		goto cleanup;
+	ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
+
 cleanup:
 	if (link_fd >= 0)
 		close(link_fd);
-- 
cgit v1.2.3-70-g09d2


From 62691b801daa497e36ad77636c3a6cd0f6dda440 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 15 Dec 2023 09:37:08 +0000
Subject: tools/net/ynl: Use consistent array index expression formatting

Use expression formatting that conforms to the python style guide.

Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231215093720.18774-2-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index c56dad9593c6..df7b1547de1f 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -98,12 +98,12 @@ class NlAttr:
     }
 
     def __init__(self, raw, offset):
-        self._len, self._type = struct.unpack("HH", raw[offset:offset + 4])
+        self._len, self._type = struct.unpack("HH", raw[offset : offset + 4])
         self.type = self._type & ~Netlink.NLA_TYPE_MASK
         self.is_nest = self._type & Netlink.NLA_F_NESTED
         self.payload_len = self._len
         self.full_len = (self.payload_len + 3) & ~3
-        self.raw = raw[offset + 4:offset + self.payload_len]
+        self.raw = raw[offset + 4 : offset + self.payload_len]
 
     @classmethod
     def get_format(cls, attr_type, byte_order=None):
@@ -154,7 +154,7 @@ class NlAttr:
         for m in members:
             # TODO: handle non-scalar members
             if m.type == 'binary':
-                decoded = self.raw[offset:offset+m['len']]
+                decoded = self.raw[offset : offset + m['len']]
                 offset += m['len']
             elif m.type in NlAttr.type_formats:
                 format = self.get_format(m.type, m.byte_order)
@@ -193,12 +193,12 @@ class NlAttrs:
 
 class NlMsg:
     def __init__(self, msg, offset, attr_space=None):
-        self.hdr = msg[offset:offset + 16]
+        self.hdr = msg[offset : offset + 16]
 
         self.nl_len, self.nl_type, self.nl_flags, self.nl_seq, self.nl_portid = \
             struct.unpack("IHHII", self.hdr)
 
-        self.raw = msg[offset + 16:offset + self.nl_len]
+        self.raw = msg[offset + 16 : offset + self.nl_len]
 
         self.error = 0
         self.done = 0
-- 
cgit v1.2.3-70-g09d2


From 1769e2be4baaa3273b56d1137bf67a6a747222ed Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 15 Dec 2023 09:37:11 +0000
Subject: tools/net/ynl: Add 'sub-message' attribute decoding to ynl

Implement the 'sub-message' attribute type in ynl.

Encode support is not yet implemented. Support for sub-message selectors
at a different nest level from the key attribute is not yet supported.

Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231215093720.18774-5-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/nlspec.py | 55 +++++++++++++++++++++++++++++++++++++++++++++
 tools/net/ynl/lib/ynl.py    | 48 ++++++++++++++++++++++++++++++++-------
 2 files changed, 95 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 92889298b197..44f13e383e8a 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -158,6 +158,9 @@ class SpecAttr(SpecElement):
         len           integer, optional byte length of binary types
         display_hint  string, hint to help choose format specifier
                       when displaying the value
+        sub_message   string, name of sub message type
+        selector      string, name of attribute used to select
+                      sub-message type
 
         is_auto_scalar bool, attr is a variable-size scalar
     """
@@ -173,6 +176,8 @@ class SpecAttr(SpecElement):
         self.byte_order = yaml.get('byte-order')
         self.len = yaml.get('len')
         self.display_hint = yaml.get('display-hint')
+        self.sub_message = yaml.get('sub-message')
+        self.selector = yaml.get('selector')
 
         self.is_auto_scalar = self.type == "sint" or self.type == "uint"
 
@@ -278,6 +283,47 @@ class SpecStruct(SpecElement):
         return self.members.items()
 
 
+class SpecSubMessage(SpecElement):
+    """ Netlink sub-message definition
+
+    Represents a set of sub-message formats for polymorphic nlattrs
+    that contain type-specific sub messages.
+
+    Attributes:
+        name     string, name of sub-message definition
+        formats  dict of sub-message formats indexed by match value
+    """
+    def __init__(self, family, yaml):
+        super().__init__(family, yaml)
+
+        self.formats = collections.OrderedDict()
+        for elem in self.yaml['formats']:
+            format = self.new_format(family, elem)
+            self.formats[format.value] = format
+
+    def new_format(self, family, format):
+        return SpecSubMessageFormat(family, format)
+
+
+class SpecSubMessageFormat(SpecElement):
+    """ Netlink sub-message definition
+
+    Represents a set of sub-message formats for polymorphic nlattrs
+    that contain type-specific sub messages.
+
+    Attributes:
+        value         attribute value to match against type selector
+        fixed_header  string, name of fixed header, or None
+        attr_set      string, name of attribute set, or None
+    """
+    def __init__(self, family, yaml):
+        super().__init__(family, yaml)
+
+        self.value = yaml.get('value')
+        self.fixed_header = yaml.get('fixed-header')
+        self.attr_set = yaml.get('attribute-set')
+
+
 class SpecOperation(SpecElement):
     """Netlink Operation
 
@@ -365,6 +411,7 @@ class SpecFamily(SpecElement):
 
         attr_sets  dict of attribute sets
         msgs       dict of all messages (index by name)
+        sub_msgs   dict of all sub messages (index by name)
         ops        dict of all valid requests / responses
         ntfs       dict of all async events
         consts     dict of all constants/enums
@@ -405,6 +452,7 @@ class SpecFamily(SpecElement):
             jsonschema.validate(self.yaml, schema)
 
         self.attr_sets = collections.OrderedDict()
+        self.sub_msgs = collections.OrderedDict()
         self.msgs = collections.OrderedDict()
         self.req_by_value = collections.OrderedDict()
         self.rsp_by_value = collections.OrderedDict()
@@ -441,6 +489,9 @@ class SpecFamily(SpecElement):
     def new_struct(self, elem):
         return SpecStruct(self, elem)
 
+    def new_sub_message(self, elem):
+        return SpecSubMessage(self, elem);
+
     def new_operation(self, elem, req_val, rsp_val):
         return SpecOperation(self, elem, req_val, rsp_val)
 
@@ -529,6 +580,10 @@ class SpecFamily(SpecElement):
             attr_set = self.new_attr_set(elem)
             self.attr_sets[elem['name']] = attr_set
 
+        for elem in self.yaml.get('sub-messages', []):
+            sub_message = self.new_sub_message(elem)
+            self.sub_msgs[sub_message.name] = sub_message
+
         if self.msg_id_model == 'unified':
             self._dictify_ops_unified()
         elif self.msg_id_model == 'directional':
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index df7b1547de1f..a8db71441634 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -170,10 +170,9 @@ class NlAttr:
 
 
 class NlAttrs:
-    def __init__(self, msg):
+    def __init__(self, msg, offset=0):
         self.attrs = []
 
-        offset = 0
         while offset < len(msg):
             attr = NlAttr(msg, offset)
             offset += attr.full_len
@@ -371,8 +370,8 @@ class NetlinkProtocol:
         fixed_header_size = 0
         if ynl:
             op = ynl.rsp_by_value[msg.cmd()]
-            fixed_header_size = ynl._fixed_header_size(op)
-        msg.raw_attrs = NlAttrs(msg.raw[fixed_header_size:])
+            fixed_header_size = ynl._fixed_header_size(op.fixed_header)
+        msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size)
         return msg
 
     def get_mcast_id(self, mcast_name, mcast_groups):
@@ -549,6 +548,37 @@ class YnlFamily(SpecFamily):
         else:
             rsp[name] = [decoded]
 
+    def _resolve_selector(self, attr_spec, vals):
+        sub_msg = attr_spec.sub_message
+        if sub_msg not in self.sub_msgs:
+            raise Exception(f"No sub-message spec named {sub_msg} for {attr_spec.name}")
+        sub_msg_spec = self.sub_msgs[sub_msg]
+
+        selector = attr_spec.selector
+        if selector not in vals:
+            raise Exception(f"There is no value for {selector} to resolve '{attr_spec.name}'")
+        value = vals[selector]
+        if value not in sub_msg_spec.formats:
+            raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'")
+
+        spec = sub_msg_spec.formats[value]
+        return spec
+
+    def _decode_sub_msg(self, attr, attr_spec, rsp):
+        msg_format = self._resolve_selector(attr_spec, rsp)
+        decoded = {}
+        offset = 0
+        if msg_format.fixed_header:
+            decoded.update(self._decode_fixed_header(attr, msg_format.fixed_header));
+            offset = self._fixed_header_size(msg_format.fixed_header)
+        if msg_format.attr_set:
+            if msg_format.attr_set in self.attr_sets:
+                subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set)
+                decoded.update(subdict)
+            else:
+                raise Exception(f"Unknown attribute-set '{attr_space}' when decoding '{attr_spec.name}'")
+        return decoded
+
     def _decode(self, attrs, space):
         if space:
             attr_space = self.attr_sets[space]
@@ -586,6 +616,8 @@ class YnlFamily(SpecFamily):
                     value = self._decode_enum(value, attr_spec)
                     selector = self._decode_enum(selector, attr_spec)
                 decoded = {"value": value, "selector": selector}
+            elif attr_spec["type"] == 'sub-message':
+                decoded = self._decode_sub_msg(attr, attr_spec, rsp)
             else:
                 if not self.process_unknown:
                     raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
@@ -626,16 +658,16 @@ class YnlFamily(SpecFamily):
             return
 
         msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set))
-        offset = 20 + self._fixed_header_size(op)
+        offset = 20 + self._fixed_header_size(op.fixed_header)
         path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset,
                                         extack['bad-attr-offs'])
         if path:
             del extack['bad-attr-offs']
             extack['bad-attr'] = path
 
-    def _fixed_header_size(self, op):
-        if op.fixed_header:
-            fixed_header_members = self.consts[op.fixed_header].members
+    def _fixed_header_size(self, name):
+        if name:
+            fixed_header_members = self.consts[name].members
             size = 0
             for m in fixed_header_members:
                 format = NlAttr.get_format(m.type, m.byte_order)
-- 
cgit v1.2.3-70-g09d2


From 8b6811d96666b7ea0a53f56e65cc656d390feb19 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 15 Dec 2023 09:37:12 +0000
Subject: tools/net/ynl: Add binary and pad support to structs for tc

The tc netlink-raw family needs binary and pad types for several
qopt C structs. Add support for them to ynl.

Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231215093720.18774-6-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/ynl.py | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index a8db71441634..1e10512b2117 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -670,8 +670,11 @@ class YnlFamily(SpecFamily):
             fixed_header_members = self.consts[name].members
             size = 0
             for m in fixed_header_members:
-                format = NlAttr.get_format(m.type, m.byte_order)
-                size += format.size
+                if m.type in ['pad', 'binary']:
+                    size += m.len
+                else:
+                    format = NlAttr.get_format(m.type, m.byte_order)
+                    size += format.size
             return size
         else:
             return 0
@@ -681,12 +684,20 @@ class YnlFamily(SpecFamily):
         fixed_header_attrs = dict()
         offset = 0
         for m in fixed_header_members:
-            format = NlAttr.get_format(m.type, m.byte_order)
-            [ value ] = format.unpack_from(msg.raw, offset)
-            offset += format.size
-            if m.enum:
-                value = self._decode_enum(value, m)
-            fixed_header_attrs[m.name] = value
+            value = None
+            if m.type == 'pad':
+                offset += m.len
+            elif m.type == 'binary':
+                value = msg.raw[offset : offset + m.len]
+                offset += m.len
+            else:
+                format = NlAttr.get_format(m.type, m.byte_order)
+                [ value ] = format.unpack_from(msg.raw, offset)
+                offset += format.size
+            if value is not None:
+                if m.enum:
+                    value = self._decode_enum(value, m)
+                fixed_header_attrs[m.name] = value
         return fixed_header_attrs
 
     def handle_ntf(self, decoded):
@@ -753,8 +764,13 @@ class YnlFamily(SpecFamily):
             fixed_header_members = self.consts[op.fixed_header].members
             for m in fixed_header_members:
                 value = vals.pop(m.name) if m.name in vals else 0
-                format = NlAttr.get_format(m.type, m.byte_order)
-                msg += format.pack(value)
+                if m.type == 'pad':
+                    msg += bytearray(m.len)
+                elif m.type == 'binary':
+                    msg += bytes.fromhex(value)
+                else:
+                    format = NlAttr.get_format(m.type, m.byte_order)
+                    msg += format.pack(value)
         for name, value in vals.items():
             msg += self._add_attr(op.attr_set.name, name, value)
         msg = _genl_msg_finalize(msg)
-- 
cgit v1.2.3-70-g09d2


From 6235b3d8bc3f81e81561c151237503fcf7868a98 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 15 Dec 2023 09:37:17 +0000
Subject: tools/net/ynl-gen-rst: Add sub-messages to generated docs

Add a section for sub-messages to the generated .rst files.

Reviewed-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231215093720.18774-11-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-rst.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py
index 8c62e040df5d..6825db92c899 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/ynl-gen-rst.py
@@ -256,6 +256,24 @@ def parse_attr_sets(entries: List[Dict[str, Any]]) -> str:
     return "\n".join(lines)
 
 
+def parse_sub_messages(entries: List[Dict[str, Any]]) -> str:
+    """Parse sub-message definitions"""
+    lines = []
+
+    for entry in entries:
+        lines.append(rst_section(entry["name"]))
+        for fmt in entry["formats"]:
+            value = fmt["value"]
+
+            lines.append(rst_bullet(bold(value)))
+            for attr in ['fixed-header', 'attribute-set']:
+                if attr in fmt:
+                    lines.append(rst_fields(attr, fmt[attr], 2))
+            lines.append("\n")
+
+    return "\n".join(lines)
+
+
 def parse_yaml(obj: Dict[str, Any]) -> str:
     """Format the whole YAML into a RST string"""
     lines = []
@@ -292,6 +310,11 @@ def parse_yaml(obj: Dict[str, Any]) -> str:
         lines.append(rst_subtitle("Attribute sets"))
         lines.append(parse_attr_sets(obj["attribute-sets"]))
 
+    # Sub-messages
+    if "sub-messages" in obj:
+        lines.append(rst_subtitle("Sub-messages"))
+        lines.append(parse_sub_messages(obj["sub-messages"]))
+
     return "\n".join(lines)
 
 
-- 
cgit v1.2.3-70-g09d2


From e8c32339cf49cd9c2626e143c548f5897aa58b17 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 15 Dec 2023 09:37:18 +0000
Subject: tools/net/ynl-gen-rst: Sort the index of generated netlink specs

The index of netlink specs was being generated unsorted. Sort the output
before generating the index entries.

Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231215093720.18774-12-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-rst.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py
index 6825db92c899..68f9a9cd57cb 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/ynl-gen-rst.py
@@ -383,7 +383,7 @@ def generate_main_index_rst(output: str) -> None:
 
     index_dir = os.path.dirname(output)
     logging.debug("Looking for .rst files in %s", index_dir)
-    for filename in os.listdir(index_dir):
+    for filename in sorted(os.listdir(index_dir)):
         if not filename.endswith(".rst") or filename == "index.rst":
             continue
         lines.append(f"   {filename.replace('.rst', '')}\n")
-- 
cgit v1.2.3-70-g09d2


From e9d7c59212e43f079dffaf65001b006da6a12580 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 15 Dec 2023 09:37:19 +0000
Subject: tools/net/ynl-gen-rst: Remove bold from attribute-set headings

The generated .rst for attribute-sets currently uses a sub-sub-heading
for each attribute, with the attribute name in bold. This makes
attributes stand out more than the attribute-set sub-headings they are
part of.

Remove the bold markup from attribute sub-sub-headings.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231215093720.18774-13-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-rst.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py
index 68f9a9cd57cb..7ac5714f73a2 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/ynl-gen-rst.py
@@ -240,7 +240,7 @@ def parse_attr_sets(entries: List[Dict[str, Any]]) -> str:
         lines.append(rst_section(entry["name"]))
         for attr in entry["attributes"]:
             type_ = attr.get("type")
-            attr_line = bold(attr["name"])
+            attr_line = attr["name"]
             if type_:
                 # Add the attribute type in the same line
                 attr_line += f" ({inline(type_)})"
-- 
cgit v1.2.3-70-g09d2


From 9b0aa2244d9d12cc39726ffabc0609a029fda95c Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Fri, 15 Dec 2023 09:37:20 +0000
Subject: tools/net/ynl-gen-rst: Remove extra indentation from generated docs

The output from ynl-gen-rst.py has extra indentation that causes extra
<blockquote> elements to be generated in the HTML output.

Reduce the indentation so that sphinx doesn't generate unnecessary
<blockquote> elements.

Reviewed-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Link: https://lore.kernel.org/r/20231215093720.18774-14-donald.hunter@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-rst.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py
index 7ac5714f73a2..262d88f88696 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/ynl-gen-rst.py
@@ -69,7 +69,7 @@ def rst_paragraph(paragraph: str, level: int = 0) -> str:
 
 def rst_bullet(item: str, level: int = 0) -> str:
     """Return a formatted a bullet"""
-    return headroom(level) + f" - {item}"
+    return headroom(level) + f"- {item}"
 
 
 def rst_subsection(title: str) -> str:
@@ -250,7 +250,7 @@ def parse_attr_sets(entries: List[Dict[str, Any]]) -> str:
             for k in attr.keys():
                 if k in preprocessed + ignored:
                     continue
-                lines.append(rst_fields(k, sanitize(attr[k]), 2))
+                lines.append(rst_fields(k, sanitize(attr[k]), 0))
             lines.append("\n")
 
     return "\n".join(lines)
@@ -268,7 +268,7 @@ def parse_sub_messages(entries: List[Dict[str, Any]]) -> str:
             lines.append(rst_bullet(bold(value)))
             for attr in ['fixed-header', 'attribute-set']:
                 if attr in fmt:
-                    lines.append(rst_fields(attr, fmt[attr], 2))
+                    lines.append(rst_fields(attr, fmt[attr], 1))
             lines.append("\n")
 
     return "\n".join(lines)
-- 
cgit v1.2.3-70-g09d2


From 8e432e6197cef6250dfd6fdffd41c06613c874ca Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 18 Dec 2023 09:36:01 -0800
Subject: bpf: Ensure precise is reset to false in __mark_reg_const_zero()

It is safe to always start with imprecise SCALAR_VALUE register.
Previously __mark_reg_const_zero() relied on caller to reset precise
mark, but it's very error prone and we already missed it in a few
places. So instead make __mark_reg_const_zero() reset precision always,
as it's a safe default for SCALAR_VALUE. Explanation is basically the
same as for why we are resetting (or rather not setting) precision in
current state. If necessary, precision propagation will set it to
precise correctly.

As such, also remove a big comment about forward precision propagation
in mark_reg_stack_read() and avoid unnecessarily setting precision to
true after reading from STACK_ZERO stack. Again, precision propagation
will correctly handle this, if that SCALAR_VALUE register will ever be
needed to be precise.

Reported-by: Maxim Mikityanskiy <maxtram95@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: Maxim Mikityanskiy <maxtram95@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20231218173601.53047-1-andrii@kernel.org
---
 kernel/bpf/verifier.c                              | 29 ++++++++--------------
 .../selftests/bpf/progs/verifier_spill_fill.c      | 10 ++++++--
 2 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1863826a4ac3..9456ee0ad129 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1777,10 +1777,14 @@ static void __mark_reg_known_zero(struct bpf_reg_state *reg)
 	__mark_reg_known(reg, 0);
 }
 
-static void __mark_reg_const_zero(struct bpf_reg_state *reg)
+static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 {
 	__mark_reg_known(reg, 0);
 	reg->type = SCALAR_VALUE;
+	/* all scalars are assumed imprecise initially (unless unprivileged,
+	 * in which case everything is forced to be precise)
+	 */
+	reg->precise = !env->bpf_capable;
 }
 
 static void mark_reg_known_zero(struct bpf_verifier_env *env,
@@ -4706,21 +4710,10 @@ static void mark_reg_stack_read(struct bpf_verifier_env *env,
 		zeros++;
 	}
 	if (zeros == max_off - min_off) {
-		/* any access_size read into register is zero extended,
-		 * so the whole register == const_zero
-		 */
-		__mark_reg_const_zero(&state->regs[dst_regno]);
-		/* backtracking doesn't support STACK_ZERO yet,
-		 * so mark it precise here, so that later
-		 * backtracking can stop here.
-		 * Backtracking may not need this if this register
-		 * doesn't participate in pointer adjustment.
-		 * Forward propagation of precise flag is not
-		 * necessary either. This mark is only to stop
-		 * backtracking. Any register that contributed
-		 * to const 0 was marked precise before spill.
+		/* Any access_size read into register is zero extended,
+		 * so the whole register == const_zero.
 		 */
-		state->regs[dst_regno].precise = true;
+		__mark_reg_const_zero(env, &state->regs[dst_regno]);
 	} else {
 		/* have read misc data from the stack */
 		mark_reg_unknown(env, state->regs, dst_regno);
@@ -4803,11 +4796,11 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 
 				if (spill_cnt == size &&
 				    tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
-					__mark_reg_const_zero(&state->regs[dst_regno]);
+					__mark_reg_const_zero(env, &state->regs[dst_regno]);
 					/* this IS register fill, so keep insn_flags */
 				} else if (zero_cnt == size) {
 					/* similarly to mark_reg_stack_read(), preserve zeroes */
-					__mark_reg_const_zero(&state->regs[dst_regno]);
+					__mark_reg_const_zero(env, &state->regs[dst_regno]);
 					insn_flags = 0; /* not restoring original register state */
 				} else {
 					mark_reg_unknown(env, state->regs, dst_regno);
@@ -7963,7 +7956,7 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
 	/* switch to DRAINED state, but keep the depth unchanged */
 	/* mark current iter state as drained and assume returned NULL */
 	cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
-	__mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]);
+	__mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 508f5d6c7347..39fe3372e0e0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -499,8 +499,14 @@ __success
 __msg("2: (7a) *(u64 *)(r10 -8) = 0          ; R10=fp0 fp-8_w=00000000")
 /* but fp-16 is spilled IMPRECISE zero const reg */
 __msg("4: (7b) *(u64 *)(r10 -16) = r0        ; R0_w=0 R10=fp0 fp-16_w=0")
-/* and now check that precision propagation works even for such tricky case */
-__msg("10: (71) r2 = *(u8 *)(r10 -9)         ; R2_w=P0 R10=fp0 fp-16_w=0")
+/* validate that assigning R2 from STACK_ZERO doesn't mark register
+ * precise immediately; if necessary, it will be marked precise later
+ */
+__msg("6: (71) r2 = *(u8 *)(r10 -1)          ; R2_w=0 R10=fp0 fp-8_w=00000000")
+/* similarly, when R2 is assigned from spilled register, it is initially
+ * imprecise, but will be marked precise later once it is used in precise context
+ */
+__msg("10: (71) r2 = *(u8 *)(r10 -9)         ; R2_w=0 R10=fp0 fp-16_w=0")
 __msg("11: (0f) r1 += r2")
 __msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
 __msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
-- 
cgit v1.2.3-70-g09d2


From d17aff807f845cf93926c28705216639c7279110 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 19 Dec 2023 07:37:35 -0800
Subject: Revert BPF token-related functionality

This patch includes the following revert (one  conflicting BPF FS
patch and three token patch sets, represented by merge commits):
  - revert 0f5d5454c723 "Merge branch 'bpf-fs-mount-options-parsing-follow-ups'";
  - revert 750e785796bb "bpf: Support uid and gid when mounting bpffs";
  - revert 733763285acf "Merge branch 'bpf-token-support-in-libbpf-s-bpf-object'";
  - revert c35919dcce28 "Merge branch 'bpf-token-and-bpf-fs-based-delegation'".

Link: https://lore.kernel.org/bpf/CAHk-=wg7JuFYwGy=GOMbRCtOL+jwSQsdUaBsRWkDVYbxipbM5A@mail.gmail.com
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 drivers/media/rc/bpf-lirc.c                        |    2 +-
 include/linux/bpf.h                                |   85 +-
 include/linux/filter.h                             |    2 +-
 include/linux/lsm_hook_defs.h                      |   15 +-
 include/linux/security.h                           |   43 +-
 include/uapi/linux/bpf.h                           |   42 -
 kernel/bpf/Makefile                                |    2 +-
 kernel/bpf/arraymap.c                              |    2 +-
 kernel/bpf/bpf_lsm.c                               |   15 +-
 kernel/bpf/cgroup.c                                |    6 +-
 kernel/bpf/core.c                                  |    3 +-
 kernel/bpf/helpers.c                               |    6 +-
 kernel/bpf/inode.c                                 |  326 +------
 kernel/bpf/syscall.c                               |  215 ++--
 kernel/bpf/token.c                                 |  271 -----
 kernel/bpf/verifier.c                              |   13 +-
 kernel/trace/bpf_trace.c                           |    2 +-
 net/core/filter.c                                  |   36 +-
 net/ipv4/bpf_tcp_ca.c                              |    2 +-
 net/netfilter/nf_bpf_link.c                        |    2 +-
 security/security.c                                |  101 +-
 security/selinux/hooks.c                           |   47 +-
 tools/include/uapi/linux/bpf.h                     |   42 -
 tools/lib/bpf/Build                                |    2 +-
 tools/lib/bpf/bpf.c                                |   37 +-
 tools/lib/bpf/bpf.h                                |   35 +-
 tools/lib/bpf/btf.c                                |    7 +-
 tools/lib/bpf/elf.c                                |    2 +
 tools/lib/bpf/features.c                           |  478 ---------
 tools/lib/bpf/libbpf.c                             |  573 ++++++++---
 tools/lib/bpf/libbpf.h                             |   37 +-
 tools/lib/bpf/libbpf.map                           |    1 -
 tools/lib/bpf/libbpf_internal.h                    |   36 +-
 tools/lib/bpf/libbpf_probes.c                      |    8 +-
 tools/lib/bpf/str_error.h                          |    3 -
 .../selftests/bpf/prog_tests/libbpf_probes.c       |    4 -
 .../testing/selftests/bpf/prog_tests/libbpf_str.c  |    6 -
 tools/testing/selftests/bpf/prog_tests/token.c     | 1031 --------------------
 tools/testing/selftests/bpf/progs/priv_map.c       |   13 -
 tools/testing/selftests/bpf/progs/priv_prog.c      |   13 -
 40 files changed, 641 insertions(+), 2925 deletions(-)
 delete mode 100644 kernel/bpf/token.c
 delete mode 100644 tools/lib/bpf/features.c
 delete mode 100644 tools/testing/selftests/bpf/prog_tests/token.c
 delete mode 100644 tools/testing/selftests/bpf/progs/priv_map.c
 delete mode 100644 tools/testing/selftests/bpf/progs/priv_prog.c

(limited to 'tools')

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 6d07693c6b9f..fe17c7f98e81 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -110,7 +110,7 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_trace_printk:
-		if (bpf_token_capable(prog->aux->token, CAP_PERFMON))
+		if (perfmon_capable())
 			return bpf_get_trace_printk_proto();
 		fallthrough;
 	default:
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2f54cc0436c4..7a8d4c81a39a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -52,10 +52,6 @@ struct module;
 struct bpf_func_state;
 struct ftrace_ops;
 struct cgroup;
-struct bpf_token;
-struct user_namespace;
-struct super_block;
-struct inode;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -1488,7 +1484,6 @@ struct bpf_prog_aux {
 #ifdef CONFIG_SECURITY
 	void *security;
 #endif
-	struct bpf_token *token;
 	struct bpf_prog_offload *offload;
 	struct btf *btf;
 	struct bpf_func_info *func_info;
@@ -1613,31 +1608,6 @@ struct bpf_link_primer {
 	u32 id;
 };
 
-struct bpf_mount_opts {
-	kuid_t uid;
-	kgid_t gid;
-	umode_t mode;
-
-	/* BPF token-related delegation options */
-	u64 delegate_cmds;
-	u64 delegate_maps;
-	u64 delegate_progs;
-	u64 delegate_attachs;
-};
-
-struct bpf_token {
-	struct work_struct work;
-	atomic64_t refcnt;
-	struct user_namespace *userns;
-	u64 allowed_cmds;
-	u64 allowed_maps;
-	u64 allowed_progs;
-	u64 allowed_attachs;
-#ifdef CONFIG_SECURITY
-	void *security;
-#endif
-};
-
 struct bpf_struct_ops_value;
 struct btf_member;
 
@@ -2097,7 +2067,6 @@ static inline void bpf_enable_instrumentation(void)
 	migrate_enable();
 }
 
-extern const struct super_operations bpf_super_ops;
 extern const struct file_operations bpf_map_fops;
 extern const struct file_operations bpf_prog_fops;
 extern const struct file_operations bpf_iter_fops;
@@ -2232,26 +2201,24 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map)
 
 extern int sysctl_unprivileged_bpf_disabled;
 
-bool bpf_token_capable(const struct bpf_token *token, int cap);
-
-static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token)
+static inline bool bpf_allow_ptr_leaks(void)
 {
-	return bpf_token_capable(token, CAP_PERFMON);
+	return perfmon_capable();
 }
 
-static inline bool bpf_allow_uninit_stack(const struct bpf_token *token)
+static inline bool bpf_allow_uninit_stack(void)
 {
-	return bpf_token_capable(token, CAP_PERFMON);
+	return perfmon_capable();
 }
 
-static inline bool bpf_bypass_spec_v1(const struct bpf_token *token)
+static inline bool bpf_bypass_spec_v1(void)
 {
-	return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
+	return cpu_mitigations_off() || perfmon_capable();
 }
 
-static inline bool bpf_bypass_spec_v4(const struct bpf_token *token)
+static inline bool bpf_bypass_spec_v4(void)
 {
-	return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
+	return cpu_mitigations_off() || perfmon_capable();
 }
 
 int bpf_map_new_fd(struct bpf_map *map, int flags);
@@ -2268,21 +2235,8 @@ int bpf_link_new_fd(struct bpf_link *link);
 struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
 
-void bpf_token_inc(struct bpf_token *token);
-void bpf_token_put(struct bpf_token *token);
-int bpf_token_create(union bpf_attr *attr);
-struct bpf_token *bpf_token_get_from_fd(u32 ufd);
-
-bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
-bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type);
-bool bpf_token_allow_prog_type(const struct bpf_token *token,
-			       enum bpf_prog_type prog_type,
-			       enum bpf_attach_type attach_type);
-
 int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
 int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
-struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir,
-			    umode_t mode);
 
 #define BPF_ITER_FUNC_PREFIX "bpf_iter_"
 #define DEFINE_BPF_ITER_FUNC(target, args...)			\
@@ -2526,8 +2480,7 @@ const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type
 struct bpf_prog *bpf_prog_by_id(u32 id);
 struct bpf_link *bpf_link_by_id(u32 id);
 
-const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id,
-						 const struct bpf_prog *prog);
+const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
 void bpf_task_storage_free(struct task_struct *task);
 void bpf_cgrp_storage_free(struct cgroup *cgroup);
 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
@@ -2646,24 +2599,6 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
 	return -EOPNOTSUPP;
 }
 
-static inline bool bpf_token_capable(const struct bpf_token *token, int cap)
-{
-	return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
-}
-
-static inline void bpf_token_inc(struct bpf_token *token)
-{
-}
-
-static inline void bpf_token_put(struct bpf_token *token)
-{
-}
-
-static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
 static inline void __dev_flush(void)
 {
 }
@@ -2787,7 +2722,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log,
 }
 
 static inline const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+bpf_base_func_proto(enum bpf_func_id func_id)
 {
 	return NULL;
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 12d907f17d36..68fb6c8142fe 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1139,7 +1139,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
 		return false;
 	if (!bpf_jit_harden)
 		return false;
-	if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF))
+	if (bpf_jit_harden == 1 && bpf_capable())
 		return false;
 
 	return true;
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 3fdd00b452ac..ff217a5ce552 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -398,17 +398,10 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
 LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size)
 LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode)
 LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog)
-LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr,
-	 struct bpf_token *token)
-LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map)
-LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr,
-	 struct bpf_token *token)
-LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
-LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
-	 struct path *path)
-LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token)
-LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd)
-LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
+LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map)
+LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map)
+LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux)
+LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux)
 #endif /* CONFIG_BPF_SYSCALL */
 
 LSM_HOOK(int, 0, locked_down, enum lockdown_reason what)
diff --git a/include/linux/security.h b/include/linux/security.h
index 00809d2d5c38..1d1df326c881 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -32,7 +32,6 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/sockptr.h>
-#include <linux/bpf.h>
 
 struct linux_binprm;
 struct cred;
@@ -2021,22 +2020,15 @@ static inline void securityfs_remove(struct dentry *dentry)
 union bpf_attr;
 struct bpf_map;
 struct bpf_prog;
-struct bpf_token;
+struct bpf_prog_aux;
 #ifdef CONFIG_SECURITY
 extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size);
 extern int security_bpf_map(struct bpf_map *map, fmode_t fmode);
 extern int security_bpf_prog(struct bpf_prog *prog);
-extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
-				   struct bpf_token *token);
+extern int security_bpf_map_alloc(struct bpf_map *map);
 extern void security_bpf_map_free(struct bpf_map *map);
-extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
-				  struct bpf_token *token);
-extern void security_bpf_prog_free(struct bpf_prog *prog);
-extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-				     struct path *path);
-extern void security_bpf_token_free(struct bpf_token *token);
-extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
-extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
+extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux);
+extern void security_bpf_prog_free(struct bpf_prog_aux *aux);
 #else
 static inline int security_bpf(int cmd, union bpf_attr *attr,
 					     unsigned int size)
@@ -2054,8 +2046,7 @@ static inline int security_bpf_prog(struct bpf_prog *prog)
 	return 0;
 }
 
-static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
-					  struct bpf_token *token)
+static inline int security_bpf_map_alloc(struct bpf_map *map)
 {
 	return 0;
 }
@@ -2063,33 +2054,13 @@ static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *a
 static inline void security_bpf_map_free(struct bpf_map *map)
 { }
 
-static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
-					 struct bpf_token *token)
+static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
 {
 	return 0;
 }
 
-static inline void security_bpf_prog_free(struct bpf_prog *prog)
+static inline void security_bpf_prog_free(struct bpf_prog_aux *aux)
 { }
-
-static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-				     struct path *path)
-{
-	return 0;
-}
-
-static inline void security_bpf_token_free(struct bpf_token *token)
-{ }
-
-static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
-{
-	return 0;
-}
-
-static inline int security_bpf_token_capable(const struct bpf_token *token, int cap)
-{
-	return 0;
-}
 #endif /* CONFIG_SECURITY */
 #endif /* CONFIG_BPF_SYSCALL */
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 42f4d3090efe..754e68ca8744 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -847,36 +847,6 @@ union bpf_iter_link_info {
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
  *
- * BPF_TOKEN_CREATE
- *	Description
- *		Create BPF token with embedded information about what
- *		BPF-related functionality it allows:
- *		- a set of allowed bpf() syscall commands;
- *		- a set of allowed BPF map types to be created with
- *		BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
- *		- a set of allowed BPF program types and BPF program attach
- *		types to be loaded with BPF_PROG_LOAD command, if
- *		BPF_PROG_LOAD itself is allowed.
- *
- *		BPF token is created (derived) from an instance of BPF FS,
- *		assuming it has necessary delegation mount options specified.
- *		This BPF token can be passed as an extra parameter to various
- *		bpf() syscall commands to grant BPF subsystem functionality to
- *		unprivileged processes.
- *
- *		When created, BPF token is "associated" with the owning
- *		user namespace of BPF FS instance (super block) that it was
- *		derived from, and subsequent BPF operations performed with
- *		BPF token would be performing capabilities checks (i.e.,
- *		CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
- *		that user namespace. Without BPF token, such capabilities
- *		have to be granted in init user namespace, making bpf()
- *		syscall incompatible with user namespace, for the most part.
- *
- *	Return
- *		A new file descriptor (a nonnegative integer), or -1 if an
- *		error occurred (in which case, *errno* is set appropriately).
- *
  * NOTES
  *	eBPF objects (maps and programs) can be shared between processes.
  *
@@ -931,8 +901,6 @@ enum bpf_cmd {
 	BPF_ITER_CREATE,
 	BPF_LINK_DETACH,
 	BPF_PROG_BIND_MAP,
-	BPF_TOKEN_CREATE,
-	__MAX_BPF_CMD,
 };
 
 enum bpf_map_type {
@@ -983,7 +951,6 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_BLOOM_FILTER,
 	BPF_MAP_TYPE_USER_RINGBUF,
 	BPF_MAP_TYPE_CGRP_STORAGE,
-	__MAX_BPF_MAP_TYPE
 };
 
 /* Note that tracing related programs such as
@@ -1028,7 +995,6 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_LOOKUP,
 	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
 	BPF_PROG_TYPE_NETFILTER,
-	__MAX_BPF_PROG_TYPE
 };
 
 enum bpf_attach_type {
@@ -1437,7 +1403,6 @@ union bpf_attr {
 		 * to using 5 hash functions).
 		 */
 		__u64	map_extra;
-		__u32	map_token_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1507,7 +1472,6 @@ union bpf_attr {
 		 * truncated), or smaller (if log buffer wasn't filled completely).
 		 */
 		__u32		log_true_size;
-		__u32		prog_token_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1620,7 +1584,6 @@ union bpf_attr {
 		 * truncated), or smaller (if log buffer wasn't filled completely).
 		 */
 		__u32		btf_log_true_size;
-		__u32		btf_token_fd;
 	};
 
 	struct {
@@ -1751,11 +1714,6 @@ union bpf_attr {
 		__u32		flags;		/* extra flags */
 	} prog_bind_map;
 
-	struct { /* struct used by BPF_TOKEN_CREATE command */
-		__u32		flags;
-		__u32		bpffs_fd;
-	} token_create;
-
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 4ce95acfcaa7..f526b7573e97 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
 endif
 CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 13358675ff2e..0bdbbbeab155 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -82,7 +82,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 	int numa_node = bpf_map_attr_numa_node(attr);
 	u32 elem_size, index_mask, max_entries;
-	bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL);
+	bool bypass_spec_v1 = bpf_bypass_spec_v1();
 	u64 array_size, mask64;
 	struct bpf_array *array;
 
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 63b4dc495125..e8e910395bf6 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -260,15 +260,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 BTF_SET_START(sleepable_lsm_hooks)
 BTF_ID(func, bpf_lsm_bpf)
 BTF_ID(func, bpf_lsm_bpf_map)
-BTF_ID(func, bpf_lsm_bpf_map_create)
-BTF_ID(func, bpf_lsm_bpf_map_free)
+BTF_ID(func, bpf_lsm_bpf_map_alloc_security)
+BTF_ID(func, bpf_lsm_bpf_map_free_security)
 BTF_ID(func, bpf_lsm_bpf_prog)
-BTF_ID(func, bpf_lsm_bpf_prog_load)
-BTF_ID(func, bpf_lsm_bpf_prog_free)
-BTF_ID(func, bpf_lsm_bpf_token_create)
-BTF_ID(func, bpf_lsm_bpf_token_free)
-BTF_ID(func, bpf_lsm_bpf_token_cmd)
-BTF_ID(func, bpf_lsm_bpf_token_capable)
 BTF_ID(func, bpf_lsm_bprm_check_security)
 BTF_ID(func, bpf_lsm_bprm_committed_creds)
 BTF_ID(func, bpf_lsm_bprm_committing_creds)
@@ -363,8 +357,9 @@ BTF_ID(func, bpf_lsm_userns_create)
 BTF_SET_END(sleepable_lsm_hooks)
 
 BTF_SET_START(untrusted_lsm_hooks)
-BTF_ID(func, bpf_lsm_bpf_map_free)
-BTF_ID(func, bpf_lsm_bpf_prog_free)
+BTF_ID(func, bpf_lsm_bpf_map_free_security)
+BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)
+BTF_ID(func, bpf_lsm_bpf_prog_free_security)
 BTF_ID(func, bpf_lsm_file_alloc_security)
 BTF_ID(func, bpf_lsm_file_free_security)
 #ifdef CONFIG_SECURITY_NETWORK
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 98e0e3835b28..491d20038cbe 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1630,7 +1630,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_perf_event_output:
 		return &bpf_event_output_data_proto;
 	default:
-		return bpf_base_func_proto(func_id, prog);
+		return bpf_base_func_proto(func_id);
 	}
 }
 
@@ -2191,7 +2191,7 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_perf_event_output:
 		return &bpf_event_output_data_proto;
 	default:
-		return bpf_base_func_proto(func_id, prog);
+		return bpf_base_func_proto(func_id);
 	}
 }
 
@@ -2348,7 +2348,7 @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_perf_event_output:
 		return &bpf_event_output_data_proto;
 	default:
-		return bpf_base_func_proto(func_id, prog);
+		return bpf_base_func_proto(func_id);
 	}
 }
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 14ace23d517b..ea6843be2616 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -682,7 +682,7 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
 void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 {
 	if (!bpf_prog_kallsyms_candidate(fp) ||
-	    !bpf_token_capable(fp->aux->token, CAP_BPF))
+	    !bpf_capable())
 		return;
 
 	bpf_prog_ksym_set_addr(fp);
@@ -2779,7 +2779,6 @@ void bpf_prog_free(struct bpf_prog *fp)
 
 	if (aux->dst_prog)
 		bpf_prog_put(aux->dst_prog);
-	bpf_token_put(aux->token);
 	INIT_WORK(&aux->work, bpf_prog_free_deferred);
 	schedule_work(&aux->work);
 }
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 07fd4b5704f3..be72824f32b2 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1679,7 +1679,7 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
 const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
 
 const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+bpf_base_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
 	case BPF_FUNC_map_lookup_elem:
@@ -1730,7 +1730,7 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		break;
 	}
 
-	if (!bpf_token_capable(prog->aux->token, CAP_BPF))
+	if (!bpf_capable())
 		return NULL;
 
 	switch (func_id) {
@@ -1788,7 +1788,7 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		break;
 	}
 
-	if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
+	if (!perfmon_capable())
 		return NULL;
 
 	switch (func_id) {
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 4383b3d13a55..1aafb2ff2e95 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -20,7 +20,6 @@
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
-#include <linux/kstrtox.h>
 #include "preload/bpf_preload.h"
 
 enum bpf_type {
@@ -99,9 +98,9 @@ static const struct inode_operations bpf_prog_iops = { };
 static const struct inode_operations bpf_map_iops  = { };
 static const struct inode_operations bpf_link_iops  = { };
 
-struct inode *bpf_get_inode(struct super_block *sb,
-			    const struct inode *dir,
-			    umode_t mode)
+static struct inode *bpf_get_inode(struct super_block *sb,
+				   const struct inode *dir,
+				   umode_t mode)
 {
 	struct inode *inode;
 
@@ -595,183 +594,15 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ
 }
 EXPORT_SYMBOL(bpf_prog_get_type_path);
 
-struct bpffs_btf_enums {
-	const struct btf *btf;
-	const struct btf_type *cmd_t;
-	const struct btf_type *map_t;
-	const struct btf_type *prog_t;
-	const struct btf_type *attach_t;
-};
-
-static int find_bpffs_btf_enums(struct bpffs_btf_enums *info)
-{
-	const struct btf *btf;
-	const struct btf_type *t;
-	const char *name;
-	int i, n;
-
-	memset(info, 0, sizeof(*info));
-
-	btf = bpf_get_btf_vmlinux();
-	if (IS_ERR(btf))
-		return PTR_ERR(btf);
-	if (!btf)
-		return -ENOENT;
-
-	info->btf = btf;
-
-	for (i = 1, n = btf_nr_types(btf); i < n; i++) {
-		t = btf_type_by_id(btf, i);
-		if (!btf_type_is_enum(t))
-			continue;
-
-		name = btf_name_by_offset(btf, t->name_off);
-		if (!name)
-			continue;
-
-		if (strcmp(name, "bpf_cmd") == 0)
-			info->cmd_t = t;
-		else if (strcmp(name, "bpf_map_type") == 0)
-			info->map_t = t;
-		else if (strcmp(name, "bpf_prog_type") == 0)
-			info->prog_t = t;
-		else if (strcmp(name, "bpf_attach_type") == 0)
-			info->attach_t = t;
-		else
-			continue;
-
-		if (info->cmd_t && info->map_t && info->prog_t && info->attach_t)
-			return 0;
-	}
-
-	return -ESRCH;
-}
-
-static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t,
-				const char *prefix, const char *str, int *value)
-{
-	const struct btf_enum *e;
-	const char *name;
-	int i, n, pfx_len = strlen(prefix);
-
-	*value = 0;
-
-	if (!btf || !enum_t)
-		return false;
-
-	for (i = 0, n = btf_vlen(enum_t); i < n; i++) {
-		e = &btf_enum(enum_t)[i];
-
-		name = btf_name_by_offset(btf, e->name_off);
-		if (!name || strncasecmp(name, prefix, pfx_len) != 0)
-			continue;
-
-		/* match symbolic name case insensitive and ignoring prefix */
-		if (strcasecmp(name + pfx_len, str) == 0) {
-			*value = e->val;
-			return true;
-		}
-	}
-
-	return false;
-}
-
-static void seq_print_delegate_opts(struct seq_file *m,
-				    const char *opt_name,
-				    const struct btf *btf,
-				    const struct btf_type *enum_t,
-				    const char *prefix,
-				    u64 delegate_msk, u64 any_msk)
-{
-	const struct btf_enum *e;
-	bool first = true;
-	const char *name;
-	u64 msk;
-	int i, n, pfx_len = strlen(prefix);
-
-	delegate_msk &= any_msk; /* clear unknown bits */
-
-	if (delegate_msk == 0)
-		return;
-
-	seq_printf(m, ",%s", opt_name);
-	if (delegate_msk == any_msk) {
-		seq_printf(m, "=any");
-		return;
-	}
-
-	if (btf && enum_t) {
-		for (i = 0, n = btf_vlen(enum_t); i < n; i++) {
-			e = &btf_enum(enum_t)[i];
-			name = btf_name_by_offset(btf, e->name_off);
-			if (!name || strncasecmp(name, prefix, pfx_len) != 0)
-				continue;
-			msk = 1ULL << e->val;
-			if (delegate_msk & msk) {
-				/* emit lower-case name without prefix */
-				seq_printf(m, "%c", first ? '=' : ':');
-				name += pfx_len;
-				while (*name) {
-					seq_printf(m, "%c", tolower(*name));
-					name++;
-				}
-
-				delegate_msk &= ~msk;
-				first = false;
-			}
-		}
-	}
-	if (delegate_msk)
-		seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk);
-}
-
 /*
  * Display the mount options in /proc/mounts.
  */
 static int bpf_show_options(struct seq_file *m, struct dentry *root)
 {
-	struct bpf_mount_opts *opts = root->d_sb->s_fs_info;
-	struct inode *inode = d_inode(root);
-	umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX;
-	u64 mask;
-
-	if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID))
-		seq_printf(m, ",uid=%u",
-			   from_kuid_munged(&init_user_ns, inode->i_uid));
-	if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID))
-		seq_printf(m, ",gid=%u",
-			   from_kgid_munged(&init_user_ns, inode->i_gid));
+	umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX;
+
 	if (mode != S_IRWXUGO)
 		seq_printf(m, ",mode=%o", mode);
-
-	if (opts->delegate_cmds || opts->delegate_maps ||
-	    opts->delegate_progs || opts->delegate_attachs) {
-		struct bpffs_btf_enums info;
-
-		/* ignore errors, fallback to hex */
-		(void)find_bpffs_btf_enums(&info);
-
-		mask = (1ULL << __MAX_BPF_CMD) - 1;
-		seq_print_delegate_opts(m, "delegate_cmds",
-					info.btf, info.cmd_t, "BPF_",
-					opts->delegate_cmds, mask);
-
-		mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1;
-		seq_print_delegate_opts(m, "delegate_maps",
-					info.btf, info.map_t, "BPF_MAP_TYPE_",
-					opts->delegate_maps, mask);
-
-		mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1;
-		seq_print_delegate_opts(m, "delegate_progs",
-					info.btf, info.prog_t, "BPF_PROG_TYPE_",
-					opts->delegate_progs, mask);
-
-		mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1;
-		seq_print_delegate_opts(m, "delegate_attachs",
-					info.btf, info.attach_t, "BPF_",
-					opts->delegate_attachs, mask);
-	}
-
 	return 0;
 }
 
@@ -786,7 +617,7 @@ static void bpf_free_inode(struct inode *inode)
 	free_inode_nonrcu(inode);
 }
 
-const struct super_operations bpf_super_ops = {
+static const struct super_operations bpf_super_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
 	.show_options	= bpf_show_options,
@@ -794,33 +625,23 @@ const struct super_operations bpf_super_ops = {
 };
 
 enum {
-	OPT_UID,
-	OPT_GID,
 	OPT_MODE,
-	OPT_DELEGATE_CMDS,
-	OPT_DELEGATE_MAPS,
-	OPT_DELEGATE_PROGS,
-	OPT_DELEGATE_ATTACHS,
 };
 
 static const struct fs_parameter_spec bpf_fs_parameters[] = {
-	fsparam_u32	("uid",				OPT_UID),
-	fsparam_u32	("gid",				OPT_GID),
 	fsparam_u32oct	("mode",			OPT_MODE),
-	fsparam_string	("delegate_cmds",		OPT_DELEGATE_CMDS),
-	fsparam_string	("delegate_maps",		OPT_DELEGATE_MAPS),
-	fsparam_string	("delegate_progs",		OPT_DELEGATE_PROGS),
-	fsparam_string	("delegate_attachs",		OPT_DELEGATE_ATTACHS),
 	{}
 };
 
+struct bpf_mount_opts {
+	umode_t mode;
+};
+
 static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-	struct bpf_mount_opts *opts = fc->s_fs_info;
+	struct bpf_mount_opts *opts = fc->fs_private;
 	struct fs_parse_result result;
-	kuid_t uid;
-	kgid_t gid;
-	int opt, err;
+	int opt;
 
 	opt = fs_parse(fc, bpf_fs_parameters, param, &result);
 	if (opt < 0) {
@@ -841,104 +662,12 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	}
 
 	switch (opt) {
-	case OPT_UID:
-		uid = make_kuid(current_user_ns(), result.uint_32);
-		if (!uid_valid(uid))
-			goto bad_value;
-
-		/*
-		 * The requested uid must be representable in the
-		 * filesystem's idmapping.
-		 */
-		if (!kuid_has_mapping(fc->user_ns, uid))
-			goto bad_value;
-
-		opts->uid = uid;
-		break;
-	case OPT_GID:
-		gid = make_kgid(current_user_ns(), result.uint_32);
-		if (!gid_valid(gid))
-			goto bad_value;
-
-		/*
-		 * The requested gid must be representable in the
-		 * filesystem's idmapping.
-		 */
-		if (!kgid_has_mapping(fc->user_ns, gid))
-			goto bad_value;
-
-		opts->gid = gid;
-		break;
 	case OPT_MODE:
 		opts->mode = result.uint_32 & S_IALLUGO;
 		break;
-	case OPT_DELEGATE_CMDS:
-	case OPT_DELEGATE_MAPS:
-	case OPT_DELEGATE_PROGS:
-	case OPT_DELEGATE_ATTACHS: {
-		struct bpffs_btf_enums info;
-		const struct btf_type *enum_t;
-		const char *enum_pfx;
-		u64 *delegate_msk, msk = 0;
-		char *p;
-		int val;
-
-		/* ignore errors, fallback to hex */
-		(void)find_bpffs_btf_enums(&info);
-
-		switch (opt) {
-		case OPT_DELEGATE_CMDS:
-			delegate_msk = &opts->delegate_cmds;
-			enum_t = info.cmd_t;
-			enum_pfx = "BPF_";
-			break;
-		case OPT_DELEGATE_MAPS:
-			delegate_msk = &opts->delegate_maps;
-			enum_t = info.map_t;
-			enum_pfx = "BPF_MAP_TYPE_";
-			break;
-		case OPT_DELEGATE_PROGS:
-			delegate_msk = &opts->delegate_progs;
-			enum_t = info.prog_t;
-			enum_pfx = "BPF_PROG_TYPE_";
-			break;
-		case OPT_DELEGATE_ATTACHS:
-			delegate_msk = &opts->delegate_attachs;
-			enum_t = info.attach_t;
-			enum_pfx = "BPF_";
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		while ((p = strsep(&param->string, ":"))) {
-			if (strcmp(p, "any") == 0) {
-				msk |= ~0ULL;
-			} else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) {
-				msk |= 1ULL << val;
-			} else {
-				err = kstrtou64(p, 0, &msk);
-				if (err)
-					return err;
-			}
-		}
-
-		/* Setting delegation mount options requires privileges */
-		if (msk && !capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		*delegate_msk |= msk;
-		break;
-	}
-	default:
-		/* ignore unknown mount options */
-		break;
 	}
 
 	return 0;
-
-bad_value:
-	return invalfc(fc, "Bad value for '%s'", param->key);
 }
 
 struct bpf_preload_ops *bpf_preload_ops;
@@ -1010,14 +739,10 @@ out:
 static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
 {
 	static const struct tree_descr bpf_rfiles[] = { { "" } };
-	struct bpf_mount_opts *opts = sb->s_fs_info;
+	struct bpf_mount_opts *opts = fc->fs_private;
 	struct inode *inode;
 	int ret;
 
-	/* Mounting an instance of BPF FS requires privileges */
-	if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
 	ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
 	if (ret)
 		return ret;
@@ -1025,8 +750,6 @@ static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
 	sb->s_op = &bpf_super_ops;
 
 	inode = sb->s_root->d_inode;
-	inode->i_uid = opts->uid;
-	inode->i_gid = opts->gid;
 	inode->i_op = &bpf_dir_iops;
 	inode->i_mode &= ~S_IALLUGO;
 	populate_bpffs(sb->s_root);
@@ -1041,7 +764,7 @@ static int bpf_get_tree(struct fs_context *fc)
 
 static void bpf_free_fc(struct fs_context *fc)
 {
-	kfree(fc->s_fs_info);
+	kfree(fc->fs_private);
 }
 
 static const struct fs_context_operations bpf_context_ops = {
@@ -1062,35 +785,18 @@ static int bpf_init_fs_context(struct fs_context *fc)
 		return -ENOMEM;
 
 	opts->mode = S_IRWXUGO;
-	opts->uid = current_fsuid();
-	opts->gid = current_fsgid();
-
-	/* start out with no BPF token delegation enabled */
-	opts->delegate_cmds = 0;
-	opts->delegate_maps = 0;
-	opts->delegate_progs = 0;
-	opts->delegate_attachs = 0;
 
-	fc->s_fs_info = opts;
+	fc->fs_private = opts;
 	fc->ops = &bpf_context_ops;
 	return 0;
 }
 
-static void bpf_kill_super(struct super_block *sb)
-{
-	struct bpf_mount_opts *opts = sb->s_fs_info;
-
-	kill_litter_super(sb);
-	kfree(opts);
-}
-
 static struct file_system_type bpf_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "bpf",
 	.init_fs_context = bpf_init_fs_context,
 	.parameters	= bpf_fs_parameters,
-	.kill_sb	= bpf_kill_super,
-	.fs_flags	= FS_USERNS_MOUNT,
+	.kill_sb	= kill_litter_super,
 };
 
 static int __init bpf_init(void)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8faa1a20edf8..1bf9805ee185 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1011,8 +1011,8 @@ int map_check_no_btf(const struct bpf_map *map,
 	return -ENOTSUPP;
 }
 
-static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
-			 const struct btf *btf, u32 btf_key_id, u32 btf_value_id)
+static int map_check_btf(struct bpf_map *map, const struct btf *btf,
+			 u32 btf_key_id, u32 btf_value_id)
 {
 	const struct btf_type *key_type, *value_type;
 	u32 key_size, value_size;
@@ -1040,7 +1040,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
 	if (!IS_ERR_OR_NULL(map->record)) {
 		int i;
 
-		if (!bpf_token_capable(token, CAP_BPF)) {
+		if (!bpf_capable()) {
 			ret = -EPERM;
 			goto free_map_tab;
 		}
@@ -1123,17 +1123,11 @@ free_map_tab:
 	return ret;
 }
 
-static bool bpf_net_capable(void)
-{
-	return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
-}
-
-#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
+#define BPF_MAP_CREATE_LAST_FIELD map_extra
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
 	const struct bpf_map_ops *ops;
-	struct bpf_token *token = NULL;
 	int numa_node = bpf_map_attr_numa_node(attr);
 	u32 map_type = attr->map_type;
 	struct bpf_map *map;
@@ -1184,32 +1178,14 @@ static int map_create(union bpf_attr *attr)
 	if (!ops->map_mem_usage)
 		return -EINVAL;
 
-	if (attr->map_token_fd) {
-		token = bpf_token_get_from_fd(attr->map_token_fd);
-		if (IS_ERR(token))
-			return PTR_ERR(token);
-
-		/* if current token doesn't grant map creation permissions,
-		 * then we can't use this token, so ignore it and rely on
-		 * system-wide capabilities checks
-		 */
-		if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) ||
-		    !bpf_token_allow_map_type(token, attr->map_type)) {
-			bpf_token_put(token);
-			token = NULL;
-		}
-	}
-
-	err = -EPERM;
-
 	/* Intent here is for unprivileged_bpf_disabled to block BPF map
 	 * creation for unprivileged users; other actions depend
 	 * on fd availability and access to bpffs, so are dependent on
 	 * object creation success. Even with unprivileged BPF disabled,
 	 * capability checks are still carried out.
 	 */
-	if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF))
-		goto put_token;
+	if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+		return -EPERM;
 
 	/* check privileged map type permissions */
 	switch (map_type) {
@@ -1242,27 +1218,25 @@ static int map_create(union bpf_attr *attr)
 	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
 	case BPF_MAP_TYPE_STRUCT_OPS:
 	case BPF_MAP_TYPE_CPUMAP:
-		if (!bpf_token_capable(token, CAP_BPF))
-			goto put_token;
+		if (!bpf_capable())
+			return -EPERM;
 		break;
 	case BPF_MAP_TYPE_SOCKMAP:
 	case BPF_MAP_TYPE_SOCKHASH:
 	case BPF_MAP_TYPE_DEVMAP:
 	case BPF_MAP_TYPE_DEVMAP_HASH:
 	case BPF_MAP_TYPE_XSKMAP:
-		if (!bpf_token_capable(token, CAP_NET_ADMIN))
-			goto put_token;
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
 		break;
 	default:
 		WARN(1, "unsupported map type %d", map_type);
-		goto put_token;
+		return -EPERM;
 	}
 
 	map = ops->map_alloc(attr);
-	if (IS_ERR(map)) {
-		err = PTR_ERR(map);
-		goto put_token;
-	}
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 	map->ops = ops;
 	map->map_type = map_type;
 
@@ -1299,7 +1273,7 @@ static int map_create(union bpf_attr *attr)
 		map->btf = btf;
 
 		if (attr->btf_value_type_id) {
-			err = map_check_btf(map, token, btf, attr->btf_key_type_id,
+			err = map_check_btf(map, btf, attr->btf_key_type_id,
 					    attr->btf_value_type_id);
 			if (err)
 				goto free_map;
@@ -1311,16 +1285,15 @@ static int map_create(union bpf_attr *attr)
 			attr->btf_vmlinux_value_type_id;
 	}
 
-	err = security_bpf_map_create(map, attr, token);
+	err = security_bpf_map_alloc(map);
 	if (err)
-		goto free_map_sec;
+		goto free_map;
 
 	err = bpf_map_alloc_id(map);
 	if (err)
 		goto free_map_sec;
 
 	bpf_map_save_memcg(map);
-	bpf_token_put(token);
 
 	err = bpf_map_new_fd(map, f_flags);
 	if (err < 0) {
@@ -1341,8 +1314,6 @@ free_map_sec:
 free_map:
 	btf_put(map->btf);
 	map->ops->map_free(map);
-put_token:
-	bpf_token_put(token);
 	return err;
 }
 
@@ -2173,7 +2144,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 	kvfree(aux->func_info);
 	kfree(aux->func_info_aux);
 	free_uid(aux->user);
-	security_bpf_prog_free(aux->prog);
+	security_bpf_prog_free(aux);
 	bpf_prog_free(aux->prog);
 }
 
@@ -2619,15 +2590,13 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
+#define	BPF_PROG_LOAD_LAST_FIELD log_true_size
 
 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 {
 	enum bpf_prog_type type = attr->prog_type;
 	struct bpf_prog *prog, *dst_prog = NULL;
 	struct btf *attach_btf = NULL;
-	struct bpf_token *token = NULL;
-	bool bpf_cap;
 	int err;
 	char license[128];
 
@@ -2644,31 +2613,10 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 				 BPF_F_TEST_REG_INVARIANTS))
 		return -EINVAL;
 
-	bpf_prog_load_fixup_attach_type(attr);
-
-	if (attr->prog_token_fd) {
-		token = bpf_token_get_from_fd(attr->prog_token_fd);
-		if (IS_ERR(token))
-			return PTR_ERR(token);
-		/* if current token doesn't grant prog loading permissions,
-		 * then we can't use this token, so ignore it and rely on
-		 * system-wide capabilities checks
-		 */
-		if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) ||
-		    !bpf_token_allow_prog_type(token, attr->prog_type,
-					       attr->expected_attach_type)) {
-			bpf_token_put(token);
-			token = NULL;
-		}
-	}
-
-	bpf_cap = bpf_token_capable(token, CAP_BPF);
-	err = -EPERM;
-
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
 	    (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
-	    !bpf_cap)
-		goto put_token;
+	    !bpf_capable())
+		return -EPERM;
 
 	/* Intent here is for unprivileged_bpf_disabled to block BPF program
 	 * creation for unprivileged users; other actions depend
@@ -2677,23 +2625,21 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	 * capability checks are still carried out for these
 	 * and other operations.
 	 */
-	if (sysctl_unprivileged_bpf_disabled && !bpf_cap)
-		goto put_token;
+	if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+		return -EPERM;
 
 	if (attr->insn_cnt == 0 ||
-	    attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) {
-		err = -E2BIG;
-		goto put_token;
-	}
+	    attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
+		return -E2BIG;
 	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
 	    type != BPF_PROG_TYPE_CGROUP_SKB &&
-	    !bpf_cap)
-		goto put_token;
+	    !bpf_capable())
+		return -EPERM;
 
-	if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN))
-		goto put_token;
-	if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON))
-		goto put_token;
+	if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (is_perfmon_prog_type(type) && !perfmon_capable())
+		return -EPERM;
 
 	/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
 	 * or btf, we need to check which one it is
@@ -2703,33 +2649,27 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 		if (IS_ERR(dst_prog)) {
 			dst_prog = NULL;
 			attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
-			if (IS_ERR(attach_btf)) {
-				err = -EINVAL;
-				goto put_token;
-			}
+			if (IS_ERR(attach_btf))
+				return -EINVAL;
 			if (!btf_is_kernel(attach_btf)) {
 				/* attaching through specifying bpf_prog's BTF
 				 * objects directly might be supported eventually
 				 */
 				btf_put(attach_btf);
-				err = -ENOTSUPP;
-				goto put_token;
+				return -ENOTSUPP;
 			}
 		}
 	} else if (attr->attach_btf_id) {
 		/* fall back to vmlinux BTF, if BTF type ID is specified */
 		attach_btf = bpf_get_btf_vmlinux();
-		if (IS_ERR(attach_btf)) {
-			err = PTR_ERR(attach_btf);
-			goto put_token;
-		}
-		if (!attach_btf) {
-			err = -EINVAL;
-			goto put_token;
-		}
+		if (IS_ERR(attach_btf))
+			return PTR_ERR(attach_btf);
+		if (!attach_btf)
+			return -EINVAL;
 		btf_get(attach_btf);
 	}
 
+	bpf_prog_load_fixup_attach_type(attr);
 	if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
 				       attach_btf, attr->attach_btf_id,
 				       dst_prog)) {
@@ -2737,8 +2677,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 			bpf_prog_put(dst_prog);
 		if (attach_btf)
 			btf_put(attach_btf);
-		err = -EINVAL;
-		goto put_token;
+		return -EINVAL;
 	}
 
 	/* plain bpf_prog allocation */
@@ -2748,8 +2687,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 			bpf_prog_put(dst_prog);
 		if (attach_btf)
 			btf_put(attach_btf);
-		err = -EINVAL;
-		goto put_token;
+		return -ENOMEM;
 	}
 
 	prog->expected_attach_type = attr->expected_attach_type;
@@ -2760,9 +2698,9 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
 	prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
 
-	/* move token into prog->aux, reuse taken refcnt */
-	prog->aux->token = token;
-	token = NULL;
+	err = security_bpf_prog_alloc(prog->aux);
+	if (err)
+		goto free_prog;
 
 	prog->aux->user = get_current_user();
 	prog->len = attr->insn_cnt;
@@ -2771,12 +2709,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	if (copy_from_bpfptr(prog->insns,
 			     make_bpfptr(attr->insns, uattr.is_kernel),
 			     bpf_prog_insn_size(prog)) != 0)
-		goto free_prog;
+		goto free_prog_sec;
 	/* copy eBPF program license from user space */
 	if (strncpy_from_bpfptr(license,
 				make_bpfptr(attr->license, uattr.is_kernel),
 				sizeof(license) - 1) < 0)
-		goto free_prog;
+		goto free_prog_sec;
 	license[sizeof(license) - 1] = 0;
 
 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
@@ -2790,29 +2728,25 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	if (bpf_prog_is_dev_bound(prog->aux)) {
 		err = bpf_prog_dev_bound_init(prog, attr);
 		if (err)
-			goto free_prog;
+			goto free_prog_sec;
 	}
 
 	if (type == BPF_PROG_TYPE_EXT && dst_prog &&
 	    bpf_prog_is_dev_bound(dst_prog->aux)) {
 		err = bpf_prog_dev_bound_inherit(prog, dst_prog);
 		if (err)
-			goto free_prog;
+			goto free_prog_sec;
 	}
 
 	/* find program type: socket_filter vs tracing_filter */
 	err = find_prog_type(type, prog);
 	if (err < 0)
-		goto free_prog;
+		goto free_prog_sec;
 
 	prog->aux->load_time = ktime_get_boottime_ns();
 	err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
 			       sizeof(attr->prog_name));
 	if (err < 0)
-		goto free_prog;
-
-	err = security_bpf_prog_load(prog, attr, token);
-	if (err)
 		goto free_prog_sec;
 
 	/* run eBPF verifier */
@@ -2858,16 +2792,13 @@ free_used_maps:
 	 */
 	__bpf_prog_put_noref(prog, prog->aux->real_func_cnt);
 	return err;
-
 free_prog_sec:
-	security_bpf_prog_free(prog);
-free_prog:
 	free_uid(prog->aux->user);
+	security_bpf_prog_free(prog->aux);
+free_prog:
 	if (prog->aux->attach_btf)
 		btf_put(prog->aux->attach_btf);
 	bpf_prog_free(prog);
-put_token:
-	bpf_token_put(token);
 	return err;
 }
 
@@ -3857,7 +3788,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 	case BPF_PROG_TYPE_SK_LOOKUP:
 		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
 	case BPF_PROG_TYPE_CGROUP_SKB:
-		if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN))
+		if (!capable(CAP_NET_ADMIN))
 			/* cg-skb progs can be loaded by unpriv user.
 			 * check permissions at attach time.
 			 */
@@ -4060,7 +3991,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 static int bpf_prog_query(const union bpf_attr *attr,
 			  union bpf_attr __user *uattr)
 {
-	if (!bpf_net_capable())
+	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 	if (CHECK_ATTR(BPF_PROG_QUERY))
 		return -EINVAL;
@@ -4828,31 +4759,15 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 	return err;
 }
 
-#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
+#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size
 
 static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
 {
-	struct bpf_token *token = NULL;
-
 	if (CHECK_ATTR(BPF_BTF_LOAD))
 		return -EINVAL;
 
-	if (attr->btf_token_fd) {
-		token = bpf_token_get_from_fd(attr->btf_token_fd);
-		if (IS_ERR(token))
-			return PTR_ERR(token);
-		if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) {
-			bpf_token_put(token);
-			token = NULL;
-		}
-	}
-
-	if (!bpf_token_capable(token, CAP_BPF)) {
-		bpf_token_put(token);
+	if (!bpf_capable())
 		return -EPERM;
-	}
-
-	bpf_token_put(token);
 
 	return btf_new_fd(attr, uattr, uattr_size);
 }
@@ -5470,20 +5385,6 @@ out_prog_put:
 	return ret;
 }
 
-#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd
-
-static int token_create(union bpf_attr *attr)
-{
-	if (CHECK_ATTR(BPF_TOKEN_CREATE))
-		return -EINVAL;
-
-	/* no flags are supported yet */
-	if (attr->token_create.flags)
-		return -EINVAL;
-
-	return bpf_token_create(attr);
-}
-
 static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 {
 	union bpf_attr attr;
@@ -5617,9 +5518,6 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 	case BPF_PROG_BIND_MAP:
 		err = bpf_prog_bind_map(&attr);
 		break;
-	case BPF_TOKEN_CREATE:
-		err = token_create(&attr);
-		break;
 	default:
 		err = -EINVAL;
 		break;
@@ -5726,7 +5624,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = {
 const struct bpf_func_proto * __weak
 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
-	return bpf_base_func_proto(func_id, prog);
+	return bpf_base_func_proto(func_id);
 }
 
 BPF_CALL_1(bpf_sys_close, u32, fd)
@@ -5776,8 +5674,7 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_sys_bpf:
-		return !bpf_token_capable(prog->aux->token, CAP_PERFMON)
-		       ? NULL : &bpf_sys_bpf_proto;
+		return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto;
 	case BPF_FUNC_btf_find_by_name_kind:
 		return &bpf_btf_find_by_name_kind_proto;
 	case BPF_FUNC_sys_close:
diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c
deleted file mode 100644
index a86fccd57e2d..000000000000
--- a/kernel/bpf/token.c
+++ /dev/null
@@ -1,271 +0,0 @@
-#include <linux/bpf.h>
-#include <linux/vmalloc.h>
-#include <linux/fdtable.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/kernel.h>
-#include <linux/idr.h>
-#include <linux/namei.h>
-#include <linux/user_namespace.h>
-#include <linux/security.h>
-
-bool bpf_token_capable(const struct bpf_token *token, int cap)
-{
-	/* BPF token allows ns_capable() level of capabilities, but only if
-	 * token's userns is *exactly* the same as current user's userns
-	 */
-	if (token && current_user_ns() == token->userns) {
-		if (ns_capable(token->userns, cap) ||
-		    (cap != CAP_SYS_ADMIN && ns_capable(token->userns, CAP_SYS_ADMIN)))
-			return security_bpf_token_capable(token, cap) == 0;
-	}
-	/* otherwise fallback to capable() checks */
-	return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
-}
-
-void bpf_token_inc(struct bpf_token *token)
-{
-	atomic64_inc(&token->refcnt);
-}
-
-static void bpf_token_free(struct bpf_token *token)
-{
-	security_bpf_token_free(token);
-	put_user_ns(token->userns);
-	kvfree(token);
-}
-
-static void bpf_token_put_deferred(struct work_struct *work)
-{
-	struct bpf_token *token = container_of(work, struct bpf_token, work);
-
-	bpf_token_free(token);
-}
-
-void bpf_token_put(struct bpf_token *token)
-{
-	if (!token)
-		return;
-
-	if (!atomic64_dec_and_test(&token->refcnt))
-		return;
-
-	INIT_WORK(&token->work, bpf_token_put_deferred);
-	schedule_work(&token->work);
-}
-
-static int bpf_token_release(struct inode *inode, struct file *filp)
-{
-	struct bpf_token *token = filp->private_data;
-
-	bpf_token_put(token);
-	return 0;
-}
-
-static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp)
-{
-	struct bpf_token *token = filp->private_data;
-	u64 mask;
-
-	BUILD_BUG_ON(__MAX_BPF_CMD >= 64);
-	mask = (1ULL << __MAX_BPF_CMD) - 1;
-	if ((token->allowed_cmds & mask) == mask)
-		seq_printf(m, "allowed_cmds:\tany\n");
-	else
-		seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds);
-
-	BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64);
-	mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1;
-	if ((token->allowed_maps & mask) == mask)
-		seq_printf(m, "allowed_maps:\tany\n");
-	else
-		seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps);
-
-	BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64);
-	mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1;
-	if ((token->allowed_progs & mask) == mask)
-		seq_printf(m, "allowed_progs:\tany\n");
-	else
-		seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs);
-
-	BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64);
-	mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1;
-	if ((token->allowed_attachs & mask) == mask)
-		seq_printf(m, "allowed_attachs:\tany\n");
-	else
-		seq_printf(m, "allowed_attachs:\t0x%llx\n", token->allowed_attachs);
-}
-
-#define BPF_TOKEN_INODE_NAME "bpf-token"
-
-static const struct inode_operations bpf_token_iops = { };
-
-static const struct file_operations bpf_token_fops = {
-	.release	= bpf_token_release,
-	.show_fdinfo	= bpf_token_show_fdinfo,
-};
-
-int bpf_token_create(union bpf_attr *attr)
-{
-	struct bpf_mount_opts *mnt_opts;
-	struct bpf_token *token = NULL;
-	struct user_namespace *userns;
-	struct inode *inode;
-	struct file *file;
-	struct path path;
-	struct fd f;
-	umode_t mode;
-	int err, fd;
-
-	f = fdget(attr->token_create.bpffs_fd);
-	if (!f.file)
-		return -EBADF;
-
-	path = f.file->f_path;
-	path_get(&path);
-	fdput(f);
-
-	if (path.dentry != path.mnt->mnt_sb->s_root) {
-		err = -EINVAL;
-		goto out_path;
-	}
-	if (path.mnt->mnt_sb->s_op != &bpf_super_ops) {
-		err = -EINVAL;
-		goto out_path;
-	}
-	err = path_permission(&path, MAY_ACCESS);
-	if (err)
-		goto out_path;
-
-	userns = path.dentry->d_sb->s_user_ns;
-	/*
-	 * Enforce that creators of BPF tokens are in the same user
-	 * namespace as the BPF FS instance. This makes reasoning about
-	 * permissions a lot easier and we can always relax this later.
-	 */
-	if (current_user_ns() != userns) {
-		err = -EPERM;
-		goto out_path;
-	}
-	if (!ns_capable(userns, CAP_BPF)) {
-		err = -EPERM;
-		goto out_path;
-	}
-
-	mnt_opts = path.dentry->d_sb->s_fs_info;
-	if (mnt_opts->delegate_cmds == 0 &&
-	    mnt_opts->delegate_maps == 0 &&
-	    mnt_opts->delegate_progs == 0 &&
-	    mnt_opts->delegate_attachs == 0) {
-		err = -ENOENT; /* no BPF token delegation is set up */
-		goto out_path;
-	}
-
-	mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
-	inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode);
-	if (IS_ERR(inode)) {
-		err = PTR_ERR(inode);
-		goto out_path;
-	}
-
-	inode->i_op = &bpf_token_iops;
-	inode->i_fop = &bpf_token_fops;
-	clear_nlink(inode); /* make sure it is unlinked */
-
-	file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops);
-	if (IS_ERR(file)) {
-		iput(inode);
-		err = PTR_ERR(file);
-		goto out_path;
-	}
-
-	token = kvzalloc(sizeof(*token), GFP_USER);
-	if (!token) {
-		err = -ENOMEM;
-		goto out_file;
-	}
-
-	atomic64_set(&token->refcnt, 1);
-
-	/* remember bpffs owning userns for future ns_capable() checks */
-	token->userns = get_user_ns(userns);
-
-	token->allowed_cmds = mnt_opts->delegate_cmds;
-	token->allowed_maps = mnt_opts->delegate_maps;
-	token->allowed_progs = mnt_opts->delegate_progs;
-	token->allowed_attachs = mnt_opts->delegate_attachs;
-
-	err = security_bpf_token_create(token, attr, &path);
-	if (err)
-		goto out_token;
-
-	fd = get_unused_fd_flags(O_CLOEXEC);
-	if (fd < 0) {
-		err = fd;
-		goto out_token;
-	}
-
-	file->private_data = token;
-	fd_install(fd, file);
-
-	path_put(&path);
-	return fd;
-
-out_token:
-	bpf_token_free(token);
-out_file:
-	fput(file);
-out_path:
-	path_put(&path);
-	return err;
-}
-
-struct bpf_token *bpf_token_get_from_fd(u32 ufd)
-{
-	struct fd f = fdget(ufd);
-	struct bpf_token *token;
-
-	if (!f.file)
-		return ERR_PTR(-EBADF);
-	if (f.file->f_op != &bpf_token_fops) {
-		fdput(f);
-		return ERR_PTR(-EINVAL);
-	}
-
-	token = f.file->private_data;
-	bpf_token_inc(token);
-	fdput(f);
-
-	return token;
-}
-
-bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
-{
-	/* BPF token can be used only within exactly the same userns in which
-	 * it was created
-	 */
-	if (!token || current_user_ns() != token->userns)
-		return false;
-	if (!(token->allowed_cmds & (1ULL << cmd)))
-		return false;
-	return security_bpf_token_cmd(token, cmd) == 0;
-}
-
-bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type)
-{
-	if (!token || type >= __MAX_BPF_MAP_TYPE)
-		return false;
-
-	return token->allowed_maps & (1ULL << type);
-}
-
-bool bpf_token_allow_prog_type(const struct bpf_token *token,
-			       enum bpf_prog_type prog_type,
-			       enum bpf_attach_type attach_type)
-{
-	if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE)
-		return false;
-
-	return (token->allowed_progs & (1ULL << prog_type)) &&
-	       (token->allowed_attachs & (1ULL << attach_type));
-}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9456ee0ad129..4ceec8c2a484 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20594,12 +20594,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	env->prog = *prog;
 	env->ops = bpf_verifier_ops[env->prog->type];
 	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
-
-	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
-	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
-	env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
-	env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
-	env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
+	is_priv = bpf_capable();
 
 	bpf_get_btf_vmlinux();
 
@@ -20631,6 +20626,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
 		env->strict_alignment = false;
 
+	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
+	env->allow_uninit_stack = bpf_allow_uninit_stack();
+	env->bypass_spec_v1 = bpf_bypass_spec_v1();
+	env->bypass_spec_v4 = bpf_bypass_spec_v4();
+	env->bpf_capable = bpf_capable();
+
 	if (is_priv)
 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
 	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 492d60e9c480..7ac6c52b25eb 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1629,7 +1629,7 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_trace_vprintk:
 		return bpf_get_trace_vprintk_proto();
 	default:
-		return bpf_base_func_proto(func_id, prog);
+		return bpf_base_func_proto(func_id);
 	}
 }
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 3cc52b82bab8..24061f29c9dd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -87,7 +87,7 @@
 #include "dev.h"
 
 static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
+bpf_sk_base_func_proto(enum bpf_func_id func_id);
 
 int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
 {
@@ -7862,7 +7862,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_ktime_get_coarse_ns:
 		return &bpf_ktime_get_coarse_ns_proto;
 	default:
-		return bpf_base_func_proto(func_id, prog);
+		return bpf_base_func_proto(func_id);
 	}
 }
 
@@ -7955,7 +7955,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 			return NULL;
 		}
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 }
 
@@ -7974,7 +7974,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_perf_event_output:
 		return &bpf_skb_event_output_proto;
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 }
 
@@ -8161,7 +8161,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #endif
 #endif
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 }
 
@@ -8220,7 +8220,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #endif
 #endif
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 
 #if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
@@ -8281,7 +8281,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_tcp_sock_proto;
 #endif /* CONFIG_INET */
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 }
 
@@ -8323,7 +8323,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_cgroup_classid_curr_proto;
 #endif
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 }
 
@@ -8367,7 +8367,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skc_lookup_tcp_proto;
 #endif
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 }
 
@@ -8378,7 +8378,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_load_bytes:
 		return &bpf_flow_dissector_load_bytes_proto;
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 }
 
@@ -8405,7 +8405,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_under_cgroup:
 		return &bpf_skb_under_cgroup_proto;
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 }
 
@@ -8580,7 +8580,7 @@ static bool cg_skb_is_valid_access(int off, int size,
 		return false;
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_end):
-		if (!bpf_token_capable(prog->aux->token, CAP_BPF))
+		if (!bpf_capable())
 			return false;
 		break;
 	}
@@ -8592,7 +8592,7 @@ static bool cg_skb_is_valid_access(int off, int size,
 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
 		case bpf_ctx_range(struct __sk_buff, tstamp):
-			if (!bpf_token_capable(prog->aux->token, CAP_BPF))
+			if (!bpf_capable())
 				return false;
 			break;
 		default:
@@ -11236,7 +11236,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
 	case BPF_FUNC_ktime_get_coarse_ns:
 		return &bpf_ktime_get_coarse_ns_proto;
 	default:
-		return bpf_base_func_proto(func_id, prog);
+		return bpf_base_func_proto(func_id);
 	}
 }
 
@@ -11418,7 +11418,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_sk_release:
 		return &bpf_sk_release_proto;
 	default:
-		return bpf_sk_base_func_proto(func_id, prog);
+		return bpf_sk_base_func_proto(func_id);
 	}
 }
 
@@ -11752,7 +11752,7 @@ const struct bpf_func_proto bpf_sock_from_file_proto = {
 };
 
 static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+bpf_sk_base_func_proto(enum bpf_func_id func_id)
 {
 	const struct bpf_func_proto *func;
 
@@ -11781,10 +11781,10 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_ktime_get_coarse_ns:
 		return &bpf_ktime_get_coarse_ns_proto;
 	default:
-		return bpf_base_func_proto(func_id, prog);
+		return bpf_base_func_proto(func_id);
 	}
 
-	if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
+	if (!perfmon_capable())
 		return NULL;
 
 	return func;
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 634cfafa583d..ae8b15e6896f 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -191,7 +191,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
 	case BPF_FUNC_ktime_get_coarse_ns:
 		return &bpf_ktime_get_coarse_ns_proto;
 	default:
-		return bpf_base_func_proto(func_id, prog);
+		return bpf_base_func_proto(func_id);
 	}
 }
 
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 5257d5e7eb09..0e4beae421f8 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type,
 static const struct bpf_func_proto *
 bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
-	return bpf_base_func_proto(func_id, prog);
+	return bpf_base_func_proto(func_id);
 }
 
 const struct bpf_verifier_ops netfilter_verifier_ops = {
diff --git a/security/security.c b/security/security.c
index 088a79c35c26..dcb3e7014f9b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -5167,87 +5167,29 @@ int security_bpf_prog(struct bpf_prog *prog)
 }
 
 /**
- * security_bpf_map_create() - Check if BPF map creation is allowed
- * @map: BPF map object
- * @attr: BPF syscall attributes used to create BPF map
- * @token: BPF token used to grant user access
- *
- * Do a check when the kernel creates a new BPF map. This is also the
- * point where LSM blob is allocated for LSMs that need them.
- *
- * Return: Returns 0 on success, error on failure.
- */
-int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
-			    struct bpf_token *token)
-{
-	return call_int_hook(bpf_map_create, 0, map, attr, token);
-}
-
-/**
- * security_bpf_prog_load() - Check if loading of BPF program is allowed
- * @prog: BPF program object
- * @attr: BPF syscall attributes used to create BPF program
- * @token: BPF token used to grant user access to BPF subsystem
- *
- * Perform an access control check when the kernel loads a BPF program and
- * allocates associated BPF program object. This hook is also responsible for
- * allocating any required LSM state for the BPF program.
- *
- * Return: Returns 0 on success, error on failure.
- */
-int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
-			   struct bpf_token *token)
-{
-	return call_int_hook(bpf_prog_load, 0, prog, attr, token);
-}
-
-/**
- * security_bpf_token_create() - Check if creating of BPF token is allowed
- * @token: BPF token object
- * @attr: BPF syscall attributes used to create BPF token
- * @path: path pointing to BPF FS mount point from which BPF token is created
- *
- * Do a check when the kernel instantiates a new BPF token object from BPF FS
- * instance. This is also the point where LSM blob can be allocated for LSMs.
- *
- * Return: Returns 0 on success, error on failure.
- */
-int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-			      struct path *path)
-{
-	return call_int_hook(bpf_token_create, 0, token, attr, path);
-}
-
-/**
- * security_bpf_token_cmd() - Check if BPF token is allowed to delegate
- * requested BPF syscall command
- * @token: BPF token object
- * @cmd: BPF syscall command requested to be delegated by BPF token
+ * security_bpf_map_alloc() - Allocate a bpf map LSM blob
+ * @map: bpf map
  *
- * Do a check when the kernel decides whether provided BPF token should allow
- * delegation of requested BPF syscall command.
+ * Initialize the security field inside bpf map.
  *
  * Return: Returns 0 on success, error on failure.
  */
-int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+int security_bpf_map_alloc(struct bpf_map *map)
 {
-	return call_int_hook(bpf_token_cmd, 0, token, cmd);
+	return call_int_hook(bpf_map_alloc_security, 0, map);
 }
 
 /**
- * security_bpf_token_capable() - Check if BPF token is allowed to delegate
- * requested BPF-related capability
- * @token: BPF token object
- * @cap: capabilities requested to be delegated by BPF token
+ * security_bpf_prog_alloc() - Allocate a bpf program LSM blob
+ * @aux: bpf program aux info struct
  *
- * Do a check when the kernel decides whether provided BPF token should allow
- * delegation of requested BPF-related capabilities.
+ * Initialize the security field inside bpf program.
  *
  * Return: Returns 0 on success, error on failure.
  */
-int security_bpf_token_capable(const struct bpf_token *token, int cap)
+int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
 {
-	return call_int_hook(bpf_token_capable, 0, token, cap);
+	return call_int_hook(bpf_prog_alloc_security, 0, aux);
 }
 
 /**
@@ -5258,29 +5200,18 @@ int security_bpf_token_capable(const struct bpf_token *token, int cap)
  */
 void security_bpf_map_free(struct bpf_map *map)
 {
-	call_void_hook(bpf_map_free, map);
-}
-
-/**
- * security_bpf_prog_free() - Free a BPF program's LSM blob
- * @prog: BPF program struct
- *
- * Clean up the security information stored inside BPF program.
- */
-void security_bpf_prog_free(struct bpf_prog *prog)
-{
-	call_void_hook(bpf_prog_free, prog);
+	call_void_hook(bpf_map_free_security, map);
 }
 
 /**
- * security_bpf_token_free() - Free a BPF token's LSM blob
- * @token: BPF token struct
+ * security_bpf_prog_free() - Free a bpf program's LSM blob
+ * @aux: bpf program aux info struct
  *
- * Clean up the security information stored inside BPF token.
+ * Clean up the security information stored inside bpf prog.
  */
-void security_bpf_token_free(struct bpf_token *token)
+void security_bpf_prog_free(struct bpf_prog_aux *aux)
 {
-	call_void_hook(bpf_token_free, token);
+	call_void_hook(bpf_prog_free_security, aux);
 }
 #endif /* CONFIG_BPF_SYSCALL */
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1501e95366a1..feda711c6b7b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6783,8 +6783,7 @@ static int selinux_bpf_prog(struct bpf_prog *prog)
 			    BPF__PROG_RUN, NULL);
 }
 
-static int selinux_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
-				  struct bpf_token *token)
+static int selinux_bpf_map_alloc(struct bpf_map *map)
 {
 	struct bpf_security_struct *bpfsec;
 
@@ -6806,8 +6805,7 @@ static void selinux_bpf_map_free(struct bpf_map *map)
 	kfree(bpfsec);
 }
 
-static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
-				 struct bpf_token *token)
+static int selinux_bpf_prog_alloc(struct bpf_prog_aux *aux)
 {
 	struct bpf_security_struct *bpfsec;
 
@@ -6816,39 +6814,16 @@ static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
 		return -ENOMEM;
 
 	bpfsec->sid = current_sid();
-	prog->aux->security = bpfsec;
+	aux->security = bpfsec;
 
 	return 0;
 }
 
-static void selinux_bpf_prog_free(struct bpf_prog *prog)
+static void selinux_bpf_prog_free(struct bpf_prog_aux *aux)
 {
-	struct bpf_security_struct *bpfsec = prog->aux->security;
+	struct bpf_security_struct *bpfsec = aux->security;
 
-	prog->aux->security = NULL;
-	kfree(bpfsec);
-}
-
-static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
-				    struct path *path)
-{
-	struct bpf_security_struct *bpfsec;
-
-	bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL);
-	if (!bpfsec)
-		return -ENOMEM;
-
-	bpfsec->sid = current_sid();
-	token->security = bpfsec;
-
-	return 0;
-}
-
-static void selinux_bpf_token_free(struct bpf_token *token)
-{
-	struct bpf_security_struct *bpfsec = token->security;
-
-	token->security = NULL;
+	aux->security = NULL;
 	kfree(bpfsec);
 }
 #endif
@@ -7204,9 +7179,8 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
 	LSM_HOOK_INIT(bpf, selinux_bpf),
 	LSM_HOOK_INIT(bpf_map, selinux_bpf_map),
 	LSM_HOOK_INIT(bpf_prog, selinux_bpf_prog),
-	LSM_HOOK_INIT(bpf_map_free, selinux_bpf_map_free),
-	LSM_HOOK_INIT(bpf_prog_free, selinux_bpf_prog_free),
-	LSM_HOOK_INIT(bpf_token_free, selinux_bpf_token_free),
+	LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free),
+	LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free),
 #endif
 
 #ifdef CONFIG_PERF_EVENTS
@@ -7263,9 +7237,8 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
 	LSM_HOOK_INIT(audit_rule_init, selinux_audit_rule_init),
 #endif
 #ifdef CONFIG_BPF_SYSCALL
-	LSM_HOOK_INIT(bpf_map_create, selinux_bpf_map_create),
-	LSM_HOOK_INIT(bpf_prog_load, selinux_bpf_prog_load),
-	LSM_HOOK_INIT(bpf_token_create, selinux_bpf_token_create),
+	LSM_HOOK_INIT(bpf_map_alloc_security, selinux_bpf_map_alloc),
+	LSM_HOOK_INIT(bpf_prog_alloc_security, selinux_bpf_prog_alloc),
 #endif
 #ifdef CONFIG_PERF_EVENTS
 	LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc),
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e0545201b55f..7f24d898efbb 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -847,36 +847,6 @@ union bpf_iter_link_info {
  *		Returns zero on success. On error, -1 is returned and *errno*
  *		is set appropriately.
  *
- * BPF_TOKEN_CREATE
- *	Description
- *		Create BPF token with embedded information about what
- *		BPF-related functionality it allows:
- *		- a set of allowed bpf() syscall commands;
- *		- a set of allowed BPF map types to be created with
- *		BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
- *		- a set of allowed BPF program types and BPF program attach
- *		types to be loaded with BPF_PROG_LOAD command, if
- *		BPF_PROG_LOAD itself is allowed.
- *
- *		BPF token is created (derived) from an instance of BPF FS,
- *		assuming it has necessary delegation mount options specified.
- *		This BPF token can be passed as an extra parameter to various
- *		bpf() syscall commands to grant BPF subsystem functionality to
- *		unprivileged processes.
- *
- *		When created, BPF token is "associated" with the owning
- *		user namespace of BPF FS instance (super block) that it was
- *		derived from, and subsequent BPF operations performed with
- *		BPF token would be performing capabilities checks (i.e.,
- *		CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
- *		that user namespace. Without BPF token, such capabilities
- *		have to be granted in init user namespace, making bpf()
- *		syscall incompatible with user namespace, for the most part.
- *
- *	Return
- *		A new file descriptor (a nonnegative integer), or -1 if an
- *		error occurred (in which case, *errno* is set appropriately).
- *
  * NOTES
  *	eBPF objects (maps and programs) can be shared between processes.
  *
@@ -931,8 +901,6 @@ enum bpf_cmd {
 	BPF_ITER_CREATE,
 	BPF_LINK_DETACH,
 	BPF_PROG_BIND_MAP,
-	BPF_TOKEN_CREATE,
-	__MAX_BPF_CMD,
 };
 
 enum bpf_map_type {
@@ -983,7 +951,6 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_BLOOM_FILTER,
 	BPF_MAP_TYPE_USER_RINGBUF,
 	BPF_MAP_TYPE_CGRP_STORAGE,
-	__MAX_BPF_MAP_TYPE
 };
 
 /* Note that tracing related programs such as
@@ -1028,7 +995,6 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_LOOKUP,
 	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
 	BPF_PROG_TYPE_NETFILTER,
-	__MAX_BPF_PROG_TYPE
 };
 
 enum bpf_attach_type {
@@ -1437,7 +1403,6 @@ union bpf_attr {
 		 * to using 5 hash functions).
 		 */
 		__u64	map_extra;
-		__u32	map_token_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1507,7 +1472,6 @@ union bpf_attr {
 		 * truncated), or smaller (if log buffer wasn't filled completely).
 		 */
 		__u32		log_true_size;
-		__u32		prog_token_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1620,7 +1584,6 @@ union bpf_attr {
 		 * truncated), or smaller (if log buffer wasn't filled completely).
 		 */
 		__u32		btf_log_true_size;
-		__u32		btf_token_fd;
 	};
 
 	struct {
@@ -1751,11 +1714,6 @@ union bpf_attr {
 		__u32		flags;		/* extra flags */
 	} prog_bind_map;
 
-	struct { /* struct used by BPF_TOKEN_CREATE command */
-		__u32		flags;
-		__u32		bpffs_fd;
-	} token_create;
-
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index b6619199a706..2d0c282c8588 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
 	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
-	    usdt.o zip.o elf.o features.o
+	    usdt.o zip.o elf.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 0ad8e532b3cf..9dc9625651dc 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
  *   [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
  *   [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
  */
-int probe_memcg_account(int token_fd)
+int probe_memcg_account(void)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
 	struct bpf_insn insns[] = {
@@ -120,7 +120,6 @@ int probe_memcg_account(int token_fd)
 	attr.insns = ptr_to_u64(insns);
 	attr.insn_cnt = insn_cnt;
 	attr.license = ptr_to_u64("GPL");
-	attr.prog_token_fd = token_fd;
 
 	prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
 	if (prog_fd >= 0) {
@@ -147,7 +146,7 @@ int bump_rlimit_memlock(void)
 	struct rlimit rlim;
 
 	/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
-	if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT))
+	if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
 		return 0;
 
 	memlock_bumped = true;
@@ -170,7 +169,7 @@ int bpf_map_create(enum bpf_map_type map_type,
 		   __u32 max_entries,
 		   const struct bpf_map_create_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
+	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
 	union bpf_attr attr;
 	int fd;
 
@@ -182,7 +181,7 @@ int bpf_map_create(enum bpf_map_type map_type,
 		return libbpf_err(-EINVAL);
 
 	attr.map_type = map_type;
-	if (map_name && feat_supported(NULL, FEAT_PROG_NAME))
+	if (map_name && kernel_supports(NULL, FEAT_PROG_NAME))
 		libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
 	attr.key_size = key_size;
 	attr.value_size = value_size;
@@ -199,8 +198,6 @@ int bpf_map_create(enum bpf_map_type map_type,
 	attr.numa_node = OPTS_GET(opts, numa_node, 0);
 	attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
 
-	attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
-
 	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
@@ -235,7 +232,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 		  const struct bpf_insn *insns, size_t insn_cnt,
 		  struct bpf_prog_load_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
+	const size_t attr_sz = offsetofend(union bpf_attr, log_true_size);
 	void *finfo = NULL, *linfo = NULL;
 	const char *func_info, *line_info;
 	__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -264,9 +261,8 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 	attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
 	attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
 	attr.kern_version = OPTS_GET(opts, kern_version, 0);
-	attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
 
-	if (prog_name && feat_supported(NULL, FEAT_PROG_NAME))
+	if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME))
 		libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
 	attr.license = ptr_to_u64(license);
 
@@ -1186,7 +1182,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 
 int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd);
+	const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size);
 	union bpf_attr attr;
 	char *log_buf;
 	size_t log_size;
@@ -1211,8 +1207,6 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts
 
 	attr.btf = ptr_to_u64(btf_data);
 	attr.btf_size = btf_size;
-	attr.btf_token_fd = OPTS_GET(opts, token_fd, 0);
-
 	/* log_level == 0 and log_buf != NULL means "try loading without
 	 * log_buf, but retry with log_buf and log_level=1 on error", which is
 	 * consistent across low-level and high-level BTF and program loading
@@ -1293,20 +1287,3 @@ int bpf_prog_bind_map(int prog_fd, int map_fd,
 	ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
-
-int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts)
-{
-	const size_t attr_sz = offsetofend(union bpf_attr, token_create);
-	union bpf_attr attr;
-	int fd;
-
-	if (!OPTS_VALID(opts, bpf_token_create_opts))
-		return libbpf_err(-EINVAL);
-
-	memset(&attr, 0, attr_sz);
-	attr.token_create.bpffs_fd = bpffs_fd;
-	attr.token_create.flags = OPTS_GET(opts, flags, 0);
-
-	fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz);
-	return libbpf_err_errno(fd);
-}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 991b86bfe7e4..d0f53772bdc0 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -51,11 +51,8 @@ struct bpf_map_create_opts {
 
 	__u32 numa_node;
 	__u32 map_ifindex;
-
-	__u32 token_fd;
-	size_t :0;
 };
-#define bpf_map_create_opts__last_field token_fd
+#define bpf_map_create_opts__last_field map_ifindex
 
 LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
 			      const char *map_name,
@@ -105,10 +102,9 @@ struct bpf_prog_load_opts {
 	 * If kernel doesn't support this feature, log_size is left unchanged.
 	 */
 	__u32 log_true_size;
-	__u32 token_fd;
 	size_t :0;
 };
-#define bpf_prog_load_opts__last_field token_fd
+#define bpf_prog_load_opts__last_field log_true_size
 
 LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
 			     const char *prog_name, const char *license,
@@ -134,10 +130,9 @@ struct bpf_btf_load_opts {
 	 * If kernel doesn't support this feature, log_size is left unchanged.
 	 */
 	__u32 log_true_size;
-	__u32 token_fd;
 	size_t :0;
 };
-#define bpf_btf_load_opts__last_field token_fd
+#define bpf_btf_load_opts__last_field log_true_size
 
 LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
 			    struct bpf_btf_load_opts *opts);
@@ -645,30 +640,6 @@ struct bpf_test_run_opts {
 LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
 				      struct bpf_test_run_opts *opts);
 
-struct bpf_token_create_opts {
-	size_t sz; /* size of this struct for forward/backward compatibility */
-	__u32 flags;
-	size_t :0;
-};
-#define bpf_token_create_opts__last_field flags
-
-/**
- * @brief **bpf_token_create()** creates a new instance of BPF token derived
- * from specified BPF FS mount point.
- *
- * BPF token created with this API can be passed to bpf() syscall for
- * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc.
- *
- * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token
- * instance.
- * @param opts optional BPF token creation options, can be NULL
- *
- * @return BPF token FD > 0, on success; negative error code, otherwise (errno
- * is also set to the error code)
- */
-LIBBPF_API int bpf_token_create(int bpffs_fd,
-				struct bpf_token_create_opts *opts);
-
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 63033c334320..ee95fd379d4d 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1317,9 +1317,7 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)
 
 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
 
-int btf_load_into_kernel(struct btf *btf,
-			 char *log_buf, size_t log_sz, __u32 log_level,
-			 int token_fd)
+int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level)
 {
 	LIBBPF_OPTS(bpf_btf_load_opts, opts);
 	__u32 buf_sz = 0, raw_size;
@@ -1369,7 +1367,6 @@ retry_load:
 		opts.log_level = log_level;
 	}
 
-	opts.token_fd = token_fd;
 	btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
 	if (btf->fd < 0) {
 		/* time to turn on verbose mode and try again */
@@ -1397,7 +1394,7 @@ done:
 
 int btf__load_into_kernel(struct btf *btf)
 {
-	return btf_load_into_kernel(btf, NULL, 0, 0, 0);
+	return btf_load_into_kernel(btf, NULL, 0, 0);
 }
 
 int btf__fd(const struct btf *btf)
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index c92e02394159..b02faec748a5 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -11,6 +11,8 @@
 #include "libbpf_internal.h"
 #include "str_error.h"
 
+#define STRERR_BUFSIZE  128
+
 /* A SHT_GNU_versym section holds 16-bit words. This bit is set if
  * the symbol is hidden and can only be seen when referenced using an
  * explicit version number. This is a GNU extension.
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
deleted file mode 100644
index ce98a334be21..000000000000
--- a/tools/lib/bpf/features.c
+++ /dev/null
@@ -1,478 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
-#include <linux/kernel.h>
-#include <linux/filter.h>
-#include "bpf.h"
-#include "libbpf.h"
-#include "libbpf_common.h"
-#include "libbpf_internal.h"
-#include "str_error.h"
-
-static inline __u64 ptr_to_u64(const void *ptr)
-{
-	return (__u64)(unsigned long)ptr;
-}
-
-static int probe_fd(int fd)
-{
-	if (fd >= 0)
-		close(fd);
-	return fd >= 0;
-}
-
-static int probe_kern_prog_name(int token_fd)
-{
-	const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	union bpf_attr attr;
-	int ret;
-
-	memset(&attr, 0, attr_sz);
-	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
-	attr.license = ptr_to_u64("GPL");
-	attr.insns = ptr_to_u64(insns);
-	attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
-	attr.prog_token_fd = token_fd;
-	libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
-
-	/* make sure loading with name works */
-	ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
-	return probe_fd(ret);
-}
-
-static int probe_kern_global_data(int token_fd)
-{
-	char *cp, errmsg[STRERR_BUFSIZE];
-	struct bpf_insn insns[] = {
-		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
-		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd);
-	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd);
-	int ret, map, insn_cnt = ARRAY_SIZE(insns);
-
-	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts);
-	if (map < 0) {
-		ret = -errno;
-		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
-		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
-			__func__, cp, -ret);
-		return ret;
-	}
-
-	insns[0].imm = map;
-
-	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
-	close(map);
-	return probe_fd(ret);
-}
-
-static int probe_kern_btf(int token_fd)
-{
-	static const char strs[] = "\0int";
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs), token_fd));
-}
-
-static int probe_kern_btf_func(int token_fd)
-{
-	static const char strs[] = "\0int\0x\0a";
-	/* void x(int a) {} */
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
-		/* FUNC_PROTO */                                /* [2] */
-		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
-		BTF_PARAM_ENC(7, 1),
-		/* FUNC x */                                    /* [3] */
-		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs), token_fd));
-}
-
-static int probe_kern_btf_func_global(int token_fd)
-{
-	static const char strs[] = "\0int\0x\0a";
-	/* static void x(int a) {} */
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
-		/* FUNC_PROTO */                                /* [2] */
-		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
-		BTF_PARAM_ENC(7, 1),
-		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
-		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs), token_fd));
-}
-
-static int probe_kern_btf_datasec(int token_fd)
-{
-	static const char strs[] = "\0x\0.data";
-	/* static int a; */
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
-		/* VAR x */                                     /* [2] */
-		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
-		BTF_VAR_STATIC,
-		/* DATASEC val */                               /* [3] */
-		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
-		BTF_VAR_SECINFO_ENC(2, 0, 4),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs), token_fd));
-}
-
-static int probe_kern_btf_float(int token_fd)
-{
-	static const char strs[] = "\0float";
-	__u32 types[] = {
-		/* float */
-		BTF_TYPE_FLOAT_ENC(1, 4),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs), token_fd));
-}
-
-static int probe_kern_btf_decl_tag(int token_fd)
-{
-	static const char strs[] = "\0tag";
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
-		/* VAR x */                                     /* [2] */
-		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
-		BTF_VAR_STATIC,
-		/* attr */
-		BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs), token_fd));
-}
-
-static int probe_kern_btf_type_tag(int token_fd)
-{
-	static const char strs[] = "\0tag";
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
-		/* attr */
-		BTF_TYPE_TYPE_TAG_ENC(1, 1),				/* [2] */
-		/* ptr */
-		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),	/* [3] */
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs), token_fd));
-}
-
-static int probe_kern_array_mmap(int token_fd)
-{
-	LIBBPF_OPTS(bpf_map_create_opts, opts,
-		.map_flags = BPF_F_MMAPABLE,
-		.token_fd = token_fd,
-	);
-	int fd;
-
-	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
-	return probe_fd(fd);
-}
-
-static int probe_kern_exp_attach_type(int token_fd)
-{
-	LIBBPF_OPTS(bpf_prog_load_opts, opts,
-		.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
-		.token_fd = token_fd,
-	);
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	int fd, insn_cnt = ARRAY_SIZE(insns);
-
-	/* use any valid combination of program type and (optional)
-	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
-	 * to see if kernel supports expected_attach_type field for
-	 * BPF_PROG_LOAD command
-	 */
-	fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
-	return probe_fd(fd);
-}
-
-static int probe_kern_probe_read_kernel(int token_fd)
-{
-	LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
-	struct bpf_insn insns[] = {
-		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
-		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
-		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
-		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
-		BPF_EXIT_INSN(),
-	};
-	int fd, insn_cnt = ARRAY_SIZE(insns);
-
-	fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
-	return probe_fd(fd);
-}
-
-static int probe_prog_bind_map(int token_fd)
-{
-	char *cp, errmsg[STRERR_BUFSIZE];
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd);
-	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd);
-	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
-
-	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts);
-	if (map < 0) {
-		ret = -errno;
-		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
-		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
-			__func__, cp, -ret);
-		return ret;
-	}
-
-	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
-	if (prog < 0) {
-		close(map);
-		return 0;
-	}
-
-	ret = bpf_prog_bind_map(prog, map, NULL);
-
-	close(map);
-	close(prog);
-
-	return ret >= 0;
-}
-
-static int probe_module_btf(int token_fd)
-{
-	static const char strs[] = "\0int";
-	__u32 types[] = {
-		/* int */
-		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
-	};
-	struct bpf_btf_info info;
-	__u32 len = sizeof(info);
-	char name[16];
-	int fd, err;
-
-	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
-	if (fd < 0)
-		return 0; /* BTF not supported at all */
-
-	memset(&info, 0, sizeof(info));
-	info.name = ptr_to_u64(name);
-	info.name_len = sizeof(name);
-
-	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
-	 * kernel's module BTF support coincides with support for
-	 * name/name_len fields in struct bpf_btf_info.
-	 */
-	err = bpf_btf_get_info_by_fd(fd, &info, &len);
-	close(fd);
-	return !err;
-}
-
-static int probe_perf_link(int token_fd)
-{
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
-	int prog_fd, link_fd, err;
-
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
-				insns, ARRAY_SIZE(insns), &opts);
-	if (prog_fd < 0)
-		return -errno;
-
-	/* use invalid perf_event FD to get EBADF, if link is supported;
-	 * otherwise EINVAL should be returned
-	 */
-	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
-	err = -errno; /* close() can clobber errno */
-
-	if (link_fd >= 0)
-		close(link_fd);
-	close(prog_fd);
-
-	return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_uprobe_multi_link(int token_fd)
-{
-	LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
-		.expected_attach_type = BPF_TRACE_UPROBE_MULTI,
-		.token_fd = token_fd,
-	);
-	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
-	struct bpf_insn insns[] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	int prog_fd, link_fd, err;
-	unsigned long offset = 0;
-
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
-				insns, ARRAY_SIZE(insns), &load_opts);
-	if (prog_fd < 0)
-		return -errno;
-
-	/* Creating uprobe in '/' binary should fail with -EBADF. */
-	link_opts.uprobe_multi.path = "/";
-	link_opts.uprobe_multi.offsets = &offset;
-	link_opts.uprobe_multi.cnt = 1;
-
-	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
-	err = -errno; /* close() can clobber errno */
-
-	if (link_fd >= 0)
-		close(link_fd);
-	close(prog_fd);
-
-	return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_kern_bpf_cookie(int token_fd)
-{
-	struct bpf_insn insns[] = {
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
-		BPF_EXIT_INSN(),
-	};
-	LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
-	int ret, insn_cnt = ARRAY_SIZE(insns);
-
-	ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
-	return probe_fd(ret);
-}
-
-static int probe_kern_btf_enum64(int token_fd)
-{
-	static const char strs[] = "\0enum64";
-	__u32 types[] = {
-		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
-	};
-
-	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
-					     strs, sizeof(strs), token_fd));
-}
-
-typedef int (*feature_probe_fn)(int /* token_fd */);
-
-static struct kern_feature_cache feature_cache;
-
-static struct kern_feature_desc {
-	const char *desc;
-	feature_probe_fn probe;
-} feature_probes[__FEAT_CNT] = {
-	[FEAT_PROG_NAME] = {
-		"BPF program name", probe_kern_prog_name,
-	},
-	[FEAT_GLOBAL_DATA] = {
-		"global variables", probe_kern_global_data,
-	},
-	[FEAT_BTF] = {
-		"minimal BTF", probe_kern_btf,
-	},
-	[FEAT_BTF_FUNC] = {
-		"BTF functions", probe_kern_btf_func,
-	},
-	[FEAT_BTF_GLOBAL_FUNC] = {
-		"BTF global function", probe_kern_btf_func_global,
-	},
-	[FEAT_BTF_DATASEC] = {
-		"BTF data section and variable", probe_kern_btf_datasec,
-	},
-	[FEAT_ARRAY_MMAP] = {
-		"ARRAY map mmap()", probe_kern_array_mmap,
-	},
-	[FEAT_EXP_ATTACH_TYPE] = {
-		"BPF_PROG_LOAD expected_attach_type attribute",
-		probe_kern_exp_attach_type,
-	},
-	[FEAT_PROBE_READ_KERN] = {
-		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
-	},
-	[FEAT_PROG_BIND_MAP] = {
-		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
-	},
-	[FEAT_MODULE_BTF] = {
-		"module BTF support", probe_module_btf,
-	},
-	[FEAT_BTF_FLOAT] = {
-		"BTF_KIND_FLOAT support", probe_kern_btf_float,
-	},
-	[FEAT_PERF_LINK] = {
-		"BPF perf link support", probe_perf_link,
-	},
-	[FEAT_BTF_DECL_TAG] = {
-		"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
-	},
-	[FEAT_BTF_TYPE_TAG] = {
-		"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
-	},
-	[FEAT_MEMCG_ACCOUNT] = {
-		"memcg-based memory accounting", probe_memcg_account,
-	},
-	[FEAT_BPF_COOKIE] = {
-		"BPF cookie support", probe_kern_bpf_cookie,
-	},
-	[FEAT_BTF_ENUM64] = {
-		"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
-	},
-	[FEAT_SYSCALL_WRAPPER] = {
-		"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
-	},
-	[FEAT_UPROBE_MULTI_LINK] = {
-		"BPF multi-uprobe link support", probe_uprobe_multi_link,
-	},
-};
-
-bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
-{
-	struct kern_feature_desc *feat = &feature_probes[feat_id];
-	int ret;
-
-	/* assume global feature cache, unless custom one is provided */
-	if (!cache)
-		cache = &feature_cache;
-
-	if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
-		ret = feat->probe(cache->token_fd);
-		if (ret > 0) {
-			WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
-		} else if (ret == 0) {
-			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
-		} else {
-			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
-			WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
-		}
-	}
-
-	return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
-}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4b5ff9508e18..ac54ebc0629f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -59,8 +59,6 @@
 #define BPF_FS_MAGIC		0xcafe4a11
 #endif
 
-#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
-
 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
 
 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
@@ -695,10 +693,6 @@ struct bpf_object {
 
 	struct usdt_manager *usdt_man;
 
-	struct kern_feature_cache *feat_cache;
-	char *token_path;
-	int token_fd;
-
 	char path[];
 };
 
@@ -2198,7 +2192,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
 	int err;
 
 	if (!path)
-		path = BPF_FS_DEFAULT_PATH;
+		path = "/sys/fs/bpf";
 
 	err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
 	if (err)
@@ -3285,7 +3279,7 @@ skip_exception_cb:
 	} else {
 		/* currently BPF_BTF_LOAD only supports log_level 1 */
 		err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
-					   obj->log_level ? 1 : 0, obj->token_fd);
+					   obj->log_level ? 1 : 0);
 	}
 	if (sanitize) {
 		if (!err) {
@@ -4608,63 +4602,6 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 	return 0;
 }
 
-static int bpf_object_prepare_token(struct bpf_object *obj)
-{
-	const char *bpffs_path;
-	int bpffs_fd = -1, token_fd, err;
-	bool mandatory;
-	enum libbpf_print_level level;
-
-	/* token is already set up */
-	if (obj->token_fd > 0)
-		return 0;
-	/* token is explicitly prevented */
-	if (obj->token_fd < 0) {
-		pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
-		/* reset to zero to avoid extra checks during map_create and prog_load steps */
-		obj->token_fd = 0;
-		return 0;
-	}
-
-	mandatory = obj->token_path != NULL;
-	level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
-
-	bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
-	bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
-	if (bpffs_fd < 0) {
-		err = -errno;
-		__pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
-		     obj->name, err, bpffs_path,
-		     mandatory ? "" : ", skipping optional step...");
-		return mandatory ? err : 0;
-	}
-
-	token_fd = bpf_token_create(bpffs_fd, 0);
-	close(bpffs_fd);
-	if (token_fd < 0) {
-		if (!mandatory && token_fd == -ENOENT) {
-			pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
-				 obj->name, bpffs_path);
-			return 0;
-		}
-		__pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
-		     obj->name, token_fd, bpffs_path,
-		     mandatory ? "" : ", skipping optional step...");
-		return mandatory ? token_fd : 0;
-	}
-
-	obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
-	if (!obj->feat_cache) {
-		close(token_fd);
-		return -ENOMEM;
-	}
-
-	obj->token_fd = token_fd;
-	obj->feat_cache->token_fd = token_fd;
-
-	return 0;
-}
-
 static int
 bpf_object__probe_loading(struct bpf_object *obj)
 {
@@ -4674,7 +4611,6 @@ bpf_object__probe_loading(struct bpf_object *obj)
 		BPF_EXIT_INSN(),
 	};
 	int ret, insn_cnt = ARRAY_SIZE(insns);
-	LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd);
 
 	if (obj->gen_loader)
 		return 0;
@@ -4684,9 +4620,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
 		pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
 
 	/* make sure basic loading works */
-	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
+	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
 	if (ret < 0)
-		ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+		ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
 	if (ret < 0) {
 		ret = errno;
 		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4701,18 +4637,462 @@ bpf_object__probe_loading(struct bpf_object *obj)
 	return 0;
 }
 
+static int probe_fd(int fd)
+{
+	if (fd >= 0)
+		close(fd);
+	return fd >= 0;
+}
+
+static int probe_kern_prog_name(void)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	union bpf_attr attr;
+	int ret;
+
+	memset(&attr, 0, attr_sz);
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.license = ptr_to_u64("GPL");
+	attr.insns = ptr_to_u64(insns);
+	attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
+	libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
+
+	/* make sure loading with name works */
+	ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
+	return probe_fd(ret);
+}
+
+static int probe_kern_global_data(void)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret, map, insn_cnt = ARRAY_SIZE(insns);
+
+	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
+	if (map < 0) {
+		ret = -errno;
+		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+			__func__, cp, -ret);
+		return ret;
+	}
+
+	insns[0].imm = map;
+
+	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+	close(map);
+	return probe_fd(ret);
+}
+
+static int probe_kern_btf(void)
+{
+	static const char strs[] = "\0int";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_func(void)
+{
+	static const char strs[] = "\0int\0x\0a";
+	/* void x(int a) {} */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* FUNC_PROTO */                                /* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+		BTF_PARAM_ENC(7, 1),
+		/* FUNC x */                                    /* [3] */
+		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_func_global(void)
+{
+	static const char strs[] = "\0int\0x\0a";
+	/* static void x(int a) {} */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* FUNC_PROTO */                                /* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+		BTF_PARAM_ENC(7, 1),
+		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
+		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_datasec(void)
+{
+	static const char strs[] = "\0x\0.data";
+	/* static int a; */
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* VAR x */                                     /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+		BTF_VAR_STATIC,
+		/* DATASEC val */                               /* [3] */
+		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(2, 0, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_float(void)
+{
+	static const char strs[] = "\0float";
+	__u32 types[] = {
+		/* float */
+		BTF_TYPE_FLOAT_ENC(1, 4),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_decl_tag(void)
+{
+	static const char strs[] = "\0tag";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* VAR x */                                     /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+		BTF_VAR_STATIC,
+		/* attr */
+		BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_btf_type_tag(void)
+{
+	static const char strs[] = "\0tag";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		/* attr */
+		BTF_TYPE_TYPE_TAG_ENC(1, 1),				/* [2] */
+		/* ptr */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),	/* [3] */
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_array_mmap(void)
+{
+	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
+	int fd;
+
+	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
+	return probe_fd(fd);
+}
+
+static int probe_kern_exp_attach_type(void)
+{
+	LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int fd, insn_cnt = ARRAY_SIZE(insns);
+
+	/* use any valid combination of program type and (optional)
+	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
+	 * to see if kernel supports expected_attach_type field for
+	 * BPF_PROG_LOAD command
+	 */
+	fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
+	return probe_fd(fd);
+}
+
+static int probe_kern_probe_read_kernel(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
+		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
+		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+		BPF_EXIT_INSN(),
+	};
+	int fd, insn_cnt = ARRAY_SIZE(insns);
+
+	fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
+	return probe_fd(fd);
+}
+
+static int probe_prog_bind_map(void)
+{
+	char *cp, errmsg[STRERR_BUFSIZE];
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
+
+	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
+	if (map < 0) {
+		ret = -errno;
+		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+			__func__, cp, -ret);
+		return ret;
+	}
+
+	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+	if (prog < 0) {
+		close(map);
+		return 0;
+	}
+
+	ret = bpf_prog_bind_map(prog, map, NULL);
+
+	close(map);
+	close(prog);
+
+	return ret >= 0;
+}
+
+static int probe_module_btf(void)
+{
+	static const char strs[] = "\0int";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+	};
+	struct bpf_btf_info info;
+	__u32 len = sizeof(info);
+	char name[16];
+	int fd, err;
+
+	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
+	if (fd < 0)
+		return 0; /* BTF not supported at all */
+
+	memset(&info, 0, sizeof(info));
+	info.name = ptr_to_u64(name);
+	info.name_len = sizeof(name);
+
+	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
+	 * kernel's module BTF support coincides with support for
+	 * name/name_len fields in struct bpf_btf_info.
+	 */
+	err = bpf_btf_get_info_by_fd(fd, &info, &len);
+	close(fd);
+	return !err;
+}
+
+static int probe_perf_link(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd, link_fd, err;
+
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
+				insns, ARRAY_SIZE(insns), NULL);
+	if (prog_fd < 0)
+		return -errno;
+
+	/* use invalid perf_event FD to get EBADF, if link is supported;
+	 * otherwise EINVAL should be returned
+	 */
+	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+	err = -errno; /* close() can clobber errno */
+
+	if (link_fd >= 0)
+		close(link_fd);
+	close(prog_fd);
+
+	return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_uprobe_multi_link(void)
+{
+	LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
+		.expected_attach_type = BPF_TRACE_UPROBE_MULTI,
+	);
+	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd, link_fd, err;
+	unsigned long offset = 0;
+
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
+				insns, ARRAY_SIZE(insns), &load_opts);
+	if (prog_fd < 0)
+		return -errno;
+
+	/* Creating uprobe in '/' binary should fail with -EBADF. */
+	link_opts.uprobe_multi.path = "/";
+	link_opts.uprobe_multi.offsets = &offset;
+	link_opts.uprobe_multi.cnt = 1;
+
+	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
+	err = -errno; /* close() can clobber errno */
+
+	if (link_fd >= 0)
+		close(link_fd);
+	close(prog_fd);
+
+	return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_kern_bpf_cookie(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
+		BPF_EXIT_INSN(),
+	};
+	int ret, insn_cnt = ARRAY_SIZE(insns);
+
+	ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
+	return probe_fd(ret);
+}
+
+static int probe_kern_btf_enum64(void)
+{
+	static const char strs[] = "\0enum64";
+	__u32 types[] = {
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
+static int probe_kern_syscall_wrapper(void);
+
+enum kern_feature_result {
+	FEAT_UNKNOWN = 0,
+	FEAT_SUPPORTED = 1,
+	FEAT_MISSING = 2,
+};
+
+typedef int (*feature_probe_fn)(void);
+
+static struct kern_feature_desc {
+	const char *desc;
+	feature_probe_fn probe;
+	enum kern_feature_result res;
+} feature_probes[__FEAT_CNT] = {
+	[FEAT_PROG_NAME] = {
+		"BPF program name", probe_kern_prog_name,
+	},
+	[FEAT_GLOBAL_DATA] = {
+		"global variables", probe_kern_global_data,
+	},
+	[FEAT_BTF] = {
+		"minimal BTF", probe_kern_btf,
+	},
+	[FEAT_BTF_FUNC] = {
+		"BTF functions", probe_kern_btf_func,
+	},
+	[FEAT_BTF_GLOBAL_FUNC] = {
+		"BTF global function", probe_kern_btf_func_global,
+	},
+	[FEAT_BTF_DATASEC] = {
+		"BTF data section and variable", probe_kern_btf_datasec,
+	},
+	[FEAT_ARRAY_MMAP] = {
+		"ARRAY map mmap()", probe_kern_array_mmap,
+	},
+	[FEAT_EXP_ATTACH_TYPE] = {
+		"BPF_PROG_LOAD expected_attach_type attribute",
+		probe_kern_exp_attach_type,
+	},
+	[FEAT_PROBE_READ_KERN] = {
+		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+	},
+	[FEAT_PROG_BIND_MAP] = {
+		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
+	},
+	[FEAT_MODULE_BTF] = {
+		"module BTF support", probe_module_btf,
+	},
+	[FEAT_BTF_FLOAT] = {
+		"BTF_KIND_FLOAT support", probe_kern_btf_float,
+	},
+	[FEAT_PERF_LINK] = {
+		"BPF perf link support", probe_perf_link,
+	},
+	[FEAT_BTF_DECL_TAG] = {
+		"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
+	},
+	[FEAT_BTF_TYPE_TAG] = {
+		"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
+	},
+	[FEAT_MEMCG_ACCOUNT] = {
+		"memcg-based memory accounting", probe_memcg_account,
+	},
+	[FEAT_BPF_COOKIE] = {
+		"BPF cookie support", probe_kern_bpf_cookie,
+	},
+	[FEAT_BTF_ENUM64] = {
+		"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
+	},
+	[FEAT_SYSCALL_WRAPPER] = {
+		"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
+	},
+	[FEAT_UPROBE_MULTI_LINK] = {
+		"BPF multi-uprobe link support", probe_uprobe_multi_link,
+	},
+};
+
 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 {
+	struct kern_feature_desc *feat = &feature_probes[feat_id];
+	int ret;
+
 	if (obj && obj->gen_loader)
 		/* To generate loader program assume the latest kernel
 		 * to avoid doing extra prog_load, map_create syscalls.
 		 */
 		return true;
 
-	if (obj->token_fd)
-		return feat_supported(obj->feat_cache, feat_id);
+	if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
+		ret = feat->probe();
+		if (ret > 0) {
+			WRITE_ONCE(feat->res, FEAT_SUPPORTED);
+		} else if (ret == 0) {
+			WRITE_ONCE(feat->res, FEAT_MISSING);
+		} else {
+			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+			WRITE_ONCE(feat->res, FEAT_MISSING);
+		}
+	}
 
-	return feat_supported(NULL, feat_id);
+	return READ_ONCE(feat->res) == FEAT_SUPPORTED;
 }
 
 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
@@ -4831,7 +5211,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 	create_attr.map_flags = def->map_flags;
 	create_attr.numa_node = map->numa_node;
 	create_attr.map_extra = map->map_extra;
-	create_attr.token_fd = obj->token_fd;
 
 	if (bpf_map__is_struct_ops(map))
 		create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
@@ -6667,7 +7046,6 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog
 	load_attr.attach_btf_id = prog->attach_btf_id;
 	load_attr.kern_version = kern_version;
 	load_attr.prog_ifindex = prog->prog_ifindex;
-	load_attr.token_fd = obj->token_fd;
 
 	/* specify func_info/line_info only if kernel supports them */
 	btf_fd = bpf_object__btf_fd(obj);
@@ -7129,10 +7507,10 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 					  const struct bpf_object_open_opts *opts)
 {
-	const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
+	const char *obj_name, *kconfig, *btf_tmp_path;
 	struct bpf_object *obj;
 	char tmp_name[64];
-	int err, token_fd;
+	int err;
 	char *log_buf;
 	size_t log_size;
 	__u32 log_level;
@@ -7166,28 +7544,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
 	if (log_size && !log_buf)
 		return ERR_PTR(-EINVAL);
 
-	token_path = OPTS_GET(opts, bpf_token_path, NULL);
-	token_fd = OPTS_GET(opts, bpf_token_fd, -1);
-	/* non-empty token path can't be combined with invalid token FD */
-	if (token_path && token_path[0] != '\0' && token_fd < 0)
-		return ERR_PTR(-EINVAL);
-	/* empty token path can't be combined with valid token FD */
-	if (token_path && token_path[0] == '\0' && token_fd > 0)
-		return ERR_PTR(-EINVAL);
-	/* if user didn't specify bpf_token_path/bpf_token_fd explicitly,
-	 * check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as
-	 * bpf_token_path option
-	 */
-	if (token_fd == 0 && !token_path)
-		token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
-	/* empty token_path is equivalent to invalid token_fd */
-	if (token_path && token_path[0] == '\0') {
-		token_path = NULL;
-		token_fd = -1;
-	}
-	if (token_path && strlen(token_path) >= PATH_MAX)
-		return ERR_PTR(-ENAMETOOLONG);
-
 	obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
 	if (IS_ERR(obj))
 		return obj;
@@ -7196,19 +7552,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
 	obj->log_size = log_size;
 	obj->log_level = log_level;
 
-	obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd);
-	if (token_fd > 0 && obj->token_fd < 0) {
-		err = -errno;
-		goto out;
-	}
-	if (token_path) {
-		obj->token_path = strdup(token_path);
-		if (!obj->token_path) {
-			err = -ENOMEM;
-			goto out;
-		}
-	}
-
 	btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
 	if (btf_tmp_path) {
 		if (strlen(btf_tmp_path) >= PATH_MAX) {
@@ -7719,8 +8062,7 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
 	if (obj->gen_loader)
 		bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
 
-	err = bpf_object_prepare_token(obj);
-	err = err ? : bpf_object__probe_loading(obj);
+	err = bpf_object__probe_loading(obj);
 	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 	err = err ? : bpf_object__sanitize_and_load_btf(obj);
@@ -8257,11 +8599,6 @@ void bpf_object__close(struct bpf_object *obj)
 	}
 	zfree(&obj->programs);
 
-	zfree(&obj->feat_cache);
-	zfree(&obj->token_path);
-	if (obj->token_fd > 0)
-		close(obj->token_fd);
-
 	free(obj);
 }
 
@@ -10275,7 +10612,7 @@ static const char *arch_specific_syscall_pfx(void)
 #endif
 }
 
-int probe_kern_syscall_wrapper(int token_fd)
+static int probe_kern_syscall_wrapper(void)
 {
 	char syscall_name[64];
 	const char *ksys_pfx;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 916904bd2a7a..6cd9c501624f 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -177,45 +177,10 @@ struct bpf_object_open_opts {
 	 * logs through its print callback.
 	 */
 	__u32 kernel_log_level;
-	/* FD of a BPF token instantiated by user through bpf_token_create()
-	 * API. BPF object will keep dup()'ed FD internally, so passed token
-	 * FD can be closed after BPF object/skeleton open step.
-	 *
-	 * Setting bpf_token_fd to negative value disables libbpf's automatic
-	 * attempt to create BPF token from default BPF FS mount point
-	 * (/sys/fs/bpf), in case this default behavior is undesirable.
-	 *
-	 * If bpf_token_path and bpf_token_fd are not specified, libbpf will
-	 * consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will
-	 * be taken as a value of bpf_token_path option and will force libbpf
-	 * to either create BPF token from provided custom BPF FS path, or
-	 * will disable implicit BPF token creation, if envvar value is an
-	 * empty string.
-	 *
-	 * bpf_token_path and bpf_token_fd are mutually exclusive and only one
-	 * of those options should be set. Either of them overrides
-	 * LIBBPF_BPF_TOKEN_PATH envvar.
-	 */
-	int bpf_token_fd;
-	/* Path to BPF FS mount point to derive BPF token from.
-	 *
-	 * Created BPF token will be used for all bpf() syscall operations
-	 * that accept BPF token (e.g., map creation, BTF and program loads,
-	 * etc) automatically within instantiated BPF object.
-	 *
-	 * Setting bpf_token_path option to empty string disables libbpf's
-	 * automatic attempt to create BPF token from default BPF FS mount
-	 * point (/sys/fs/bpf), in case this default behavior is undesirable.
-	 *
-	 * bpf_token_path and bpf_token_fd are mutually exclusive and only one
-	 * of those options should be set. Either of them overrides
-	 * LIBBPF_BPF_TOKEN_PATH envvar.
-	 */
-	const char *bpf_token_path;
 
 	size_t :0;
 };
-#define bpf_object_open_opts__last_field bpf_token_path
+#define bpf_object_open_opts__last_field kernel_log_level
 
 /**
  * @brief **bpf_object__open()** creates a bpf_object by opening
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index df7657b65c47..91c5aef7dae7 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -401,7 +401,6 @@ LIBBPF_1.3.0 {
 		bpf_program__attach_netkit;
 		bpf_program__attach_tcx;
 		bpf_program__attach_uprobe_multi;
-		bpf_token_create;
 		ring__avail_data_size;
 		ring__consume;
 		ring__consumer_pos;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 4cda32298c49..b5d334754e5d 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -360,32 +360,15 @@ enum kern_feature_id {
 	__FEAT_CNT,
 };
 
-enum kern_feature_result {
-	FEAT_UNKNOWN = 0,
-	FEAT_SUPPORTED = 1,
-	FEAT_MISSING = 2,
-};
-
-struct kern_feature_cache {
-	enum kern_feature_result res[__FEAT_CNT];
-	int token_fd;
-};
-
-bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
+int probe_memcg_account(void);
 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
-
-int probe_kern_syscall_wrapper(int token_fd);
-int probe_memcg_account(int token_fd);
 int bump_rlimit_memlock(void);
 
 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
-			 const char *str_sec, size_t str_len,
-			 int token_fd);
-int btf_load_into_kernel(struct btf *btf,
-			 char *log_buf, size_t log_sz, __u32 log_level,
-			 int token_fd);
+			 const char *str_sec, size_t str_len);
+int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);
 
 struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
@@ -549,17 +532,6 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn)
 	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
 }
 
-/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range.
- * Original FD is not closed or altered in any other way.
- * Preserves original FD value, if it's invalid (negative).
- */
-static inline int dup_good_fd(int fd)
-{
-	if (fd < 0)
-		return fd;
-	return fcntl(fd, F_DUPFD_CLOEXEC, 3);
-}
-
 /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2
  * Takes ownership of the fd passed in, and closes it if calling
  * fcntl(fd, F_DUPFD_CLOEXEC, 3).
@@ -571,7 +543,7 @@ static inline int ensure_good_fd(int fd)
 	if (fd < 0)
 		return fd;
 	if (fd < 3) {
-		fd = dup_good_fd(fd);
+		fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
 		saved_errno = errno;
 		close(old_fd);
 		errno = saved_errno;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 8e7437006639..9c4db90b92b6 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -219,8 +219,7 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
 }
 
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
-			 const char *str_sec, size_t str_len,
-			 int token_fd)
+			 const char *str_sec, size_t str_len)
 {
 	struct btf_header hdr = {
 		.magic = BTF_MAGIC,
@@ -230,7 +229,6 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 		.str_off = types_len,
 		.str_len = str_len,
 	};
-	LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd);
 	int btf_fd, btf_len;
 	__u8 *raw_btf;
 
@@ -243,7 +241,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 	memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
 	memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
 
-	btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
+	btf_fd = bpf_btf_load(raw_btf, btf_len, NULL);
 
 	free(raw_btf);
 	return btf_fd;
@@ -273,7 +271,7 @@ static int load_local_storage_btf(void)
 	};
 
 	return libbpf__load_raw_btf((char *)types, sizeof(types),
-				     strs, sizeof(strs), 0);
+				     strs, sizeof(strs));
 }
 
 static int probe_map_create(enum bpf_map_type map_type)
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
index 626d7ffb03d6..a139334d57b6 100644
--- a/tools/lib/bpf/str_error.h
+++ b/tools/lib/bpf/str_error.h
@@ -2,8 +2,5 @@
 #ifndef __LIBBPF_STR_ERROR_H
 #define __LIBBPF_STR_ERROR_H
 
-#define STRERR_BUFSIZE  128
-
 char *libbpf_strerror_r(int err, char *dst, int len);
-
 #endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
index 4ed46ed58a7b..9f766ddd946a 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
@@ -30,8 +30,6 @@ void test_libbpf_probe_prog_types(void)
 
 		if (prog_type == BPF_PROG_TYPE_UNSPEC)
 			continue;
-		if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0)
-			continue;
 
 		if (!test__start_subtest(prog_type_name))
 			continue;
@@ -70,8 +68,6 @@ void test_libbpf_probe_map_types(void)
 
 		if (map_type == BPF_MAP_TYPE_UNSPEC)
 			continue;
-		if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0)
-			continue;
 
 		if (!test__start_subtest(map_type_name))
 			continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index 62ea855ec4d0..eb34d612d6f8 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -132,9 +132,6 @@ static void test_libbpf_bpf_map_type_str(void)
 		const char *map_type_str;
 		char buf[256];
 
-		if (map_type == __MAX_BPF_MAP_TYPE)
-			continue;
-
 		map_type_name = btf__str_by_offset(btf, e->name_off);
 		map_type_str = libbpf_bpf_map_type_str(map_type);
 		ASSERT_OK_PTR(map_type_str, map_type_name);
@@ -189,9 +186,6 @@ static void test_libbpf_bpf_prog_type_str(void)
 		const char *prog_type_str;
 		char buf[256];
 
-		if (prog_type == __MAX_BPF_PROG_TYPE)
-			continue;
-
 		prog_type_name = btf__str_by_offset(btf, e->name_off);
 		prog_type_str = libbpf_bpf_prog_type_str(prog_type);
 		ASSERT_OK_PTR(prog_type_str, prog_type_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
deleted file mode 100644
index b5dce630e0e1..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ /dev/null
@@ -1,1031 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
-#define _GNU_SOURCE
-#include <test_progs.h>
-#include <bpf/btf.h>
-#include "cap_helpers.h"
-#include <fcntl.h>
-#include <sched.h>
-#include <signal.h>
-#include <unistd.h>
-#include <linux/filter.h>
-#include <linux/unistd.h>
-#include <linux/mount.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/syscall.h>
-#include <sys/un.h>
-#include "priv_map.skel.h"
-#include "priv_prog.skel.h"
-#include "dummy_st_ops_success.skel.h"
-
-static inline int sys_mount(const char *dev_name, const char *dir_name,
-			    const char *type, unsigned long flags,
-			    const void *data)
-{
-	return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
-}
-
-static inline int sys_fsopen(const char *fsname, unsigned flags)
-{
-	return syscall(__NR_fsopen, fsname, flags);
-}
-
-static inline int sys_fspick(int dfd, const char *path, unsigned flags)
-{
-	return syscall(__NR_fspick, dfd, path, flags);
-}
-
-static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
-{
-	return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
-}
-
-static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
-{
-	return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
-}
-
-static inline int sys_move_mount(int from_dfd, const char *from_path,
-				 int to_dfd, const char *to_path,
-				 unsigned flags)
-{
-	return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
-}
-
-static int drop_priv_caps(__u64 *old_caps)
-{
-	return cap_disable_effective((1ULL << CAP_BPF) |
-				     (1ULL << CAP_PERFMON) |
-				     (1ULL << CAP_NET_ADMIN) |
-				     (1ULL << CAP_SYS_ADMIN), old_caps);
-}
-
-static int restore_priv_caps(__u64 old_caps)
-{
-	return cap_enable_effective(old_caps, NULL);
-}
-
-static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
-{
-	char buf[32];
-	int err;
-
-	if (!mask_str) {
-		if (mask == ~0ULL) {
-			mask_str = "any";
-		} else {
-			snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
-			mask_str = buf;
-		}
-	}
-
-	err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
-			   mask_str, 0);
-	if (err < 0)
-		err = -errno;
-	return err;
-}
-
-#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
-
-struct bpffs_opts {
-	__u64 cmds;
-	__u64 maps;
-	__u64 progs;
-	__u64 attachs;
-	const char *cmds_str;
-	const char *maps_str;
-	const char *progs_str;
-	const char *attachs_str;
-};
-
-static int create_bpffs_fd(void)
-{
-	int fs_fd;
-
-	/* create VFS context */
-	fs_fd = sys_fsopen("bpf", 0);
-	ASSERT_GE(fs_fd, 0, "fs_fd");
-
-	return fs_fd;
-}
-
-static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
-{
-	int mnt_fd, err;
-
-	/* set up token delegation mount options */
-	err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
-	if (!ASSERT_OK(err, "fs_cfg_cmds"))
-		return err;
-	err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
-	if (!ASSERT_OK(err, "fs_cfg_maps"))
-		return err;
-	err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
-	if (!ASSERT_OK(err, "fs_cfg_progs"))
-		return err;
-	err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
-	if (!ASSERT_OK(err, "fs_cfg_attachs"))
-		return err;
-
-	/* instantiate FS object */
-	err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
-	if (err < 0)
-		return -errno;
-
-	/* create O_PATH fd for detached mount */
-	mnt_fd = sys_fsmount(fs_fd, 0, 0);
-	if (err < 0)
-		return -errno;
-
-	return mnt_fd;
-}
-
-/* send FD over Unix domain (AF_UNIX) socket */
-static int sendfd(int sockfd, int fd)
-{
-	struct msghdr msg = {};
-	struct cmsghdr *cmsg;
-	int fds[1] = { fd }, err;
-	char iobuf[1];
-	struct iovec io = {
-		.iov_base = iobuf,
-		.iov_len = sizeof(iobuf),
-	};
-	union {
-		char buf[CMSG_SPACE(sizeof(fds))];
-		struct cmsghdr align;
-	} u;
-
-	msg.msg_iov = &io;
-	msg.msg_iovlen = 1;
-	msg.msg_control = u.buf;
-	msg.msg_controllen = sizeof(u.buf);
-	cmsg = CMSG_FIRSTHDR(&msg);
-	cmsg->cmsg_level = SOL_SOCKET;
-	cmsg->cmsg_type = SCM_RIGHTS;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
-	memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
-
-	err = sendmsg(sockfd, &msg, 0);
-	if (err < 0)
-		err = -errno;
-	if (!ASSERT_EQ(err, 1, "sendmsg"))
-		return -EINVAL;
-
-	return 0;
-}
-
-/* receive FD over Unix domain (AF_UNIX) socket */
-static int recvfd(int sockfd, int *fd)
-{
-	struct msghdr msg = {};
-	struct cmsghdr *cmsg;
-	int fds[1], err;
-	char iobuf[1];
-	struct iovec io = {
-		.iov_base = iobuf,
-		.iov_len = sizeof(iobuf),
-	};
-	union {
-		char buf[CMSG_SPACE(sizeof(fds))];
-		struct cmsghdr align;
-	} u;
-
-	msg.msg_iov = &io;
-	msg.msg_iovlen = 1;
-	msg.msg_control = u.buf;
-	msg.msg_controllen = sizeof(u.buf);
-
-	err = recvmsg(sockfd, &msg, 0);
-	if (err < 0)
-		err = -errno;
-	if (!ASSERT_EQ(err, 1, "recvmsg"))
-		return -EINVAL;
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
-	    !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
-	    !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
-	    !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
-		return -EINVAL;
-
-	memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
-	*fd = fds[0];
-
-	return 0;
-}
-
-static ssize_t write_nointr(int fd, const void *buf, size_t count)
-{
-	ssize_t ret;
-
-	do {
-		ret = write(fd, buf, count);
-	} while (ret < 0 && errno == EINTR);
-
-	return ret;
-}
-
-static int write_file(const char *path, const void *buf, size_t count)
-{
-	int fd;
-	ssize_t ret;
-
-	fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
-	if (fd < 0)
-		return -1;
-
-	ret = write_nointr(fd, buf, count);
-	close(fd);
-	if (ret < 0 || (size_t)ret != count)
-		return -1;
-
-	return 0;
-}
-
-static int create_and_enter_userns(void)
-{
-	uid_t uid;
-	gid_t gid;
-	char map[100];
-
-	uid = getuid();
-	gid = getgid();
-
-	if (unshare(CLONE_NEWUSER))
-		return -1;
-
-	if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
-	    errno != ENOENT)
-		return -1;
-
-	snprintf(map, sizeof(map), "0 %d 1", uid);
-	if (write_file("/proc/self/uid_map", map, strlen(map)))
-		return -1;
-
-
-	snprintf(map, sizeof(map), "0 %d 1", gid);
-	if (write_file("/proc/self/gid_map", map, strlen(map)))
-		return -1;
-
-	if (setgid(0))
-		return -1;
-
-	if (setuid(0))
-		return -1;
-
-	return 0;
-}
-
-typedef int (*child_callback_fn)(int);
-
-static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
-{
-	LIBBPF_OPTS(bpf_map_create_opts, map_opts);
-	int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1;
-
-	/* setup userns with root mappings */
-	err = create_and_enter_userns();
-	if (!ASSERT_OK(err, "create_and_enter_userns"))
-		goto cleanup;
-
-	/* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
-	err = unshare(CLONE_NEWNS);
-	if (!ASSERT_OK(err, "create_mountns"))
-		goto cleanup;
-
-	err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
-	if (!ASSERT_OK(err, "remount_root"))
-		goto cleanup;
-
-	fs_fd = create_bpffs_fd();
-	if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-	/* ensure unprivileged child cannot set delegation options */
-	err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
-	ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
-	err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
-	ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
-	err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
-	ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
-	err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
-	ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
-
-	/* pass BPF FS context object to parent */
-	err = sendfd(sock_fd, fs_fd);
-	if (!ASSERT_OK(err, "send_fs_fd"))
-		goto cleanup;
-	zclose(fs_fd);
-
-	/* avoid mucking around with mount namespaces and mounting at
-	 * well-known path, just get detach-mounted BPF FS fd back from parent
-	 */
-	err = recvfd(sock_fd, &mnt_fd);
-	if (!ASSERT_OK(err, "recv_mnt_fd"))
-		goto cleanup;
-
-	/* try to fspick() BPF FS and try to add some delegation options */
-	fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
-	if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-	/* ensure unprivileged child cannot reconfigure to set delegation options */
-	err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
-	if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-	err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
-	if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-	err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
-	if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-	err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
-	if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-	zclose(fs_fd);
-
-	bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
-	if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-	/* do custom test logic with customly set up BPF FS instance */
-	err = callback(bpffs_fd);
-	if (!ASSERT_OK(err, "test_callback"))
-		goto cleanup;
-
-	err = 0;
-cleanup:
-	zclose(sock_fd);
-	zclose(mnt_fd);
-	zclose(fs_fd);
-	zclose(bpffs_fd);
-
-	exit(-err);
-}
-
-static int wait_for_pid(pid_t pid)
-{
-	int status, ret;
-
-again:
-	ret = waitpid(pid, &status, 0);
-	if (ret == -1) {
-		if (errno == EINTR)
-			goto again;
-
-		return -1;
-	}
-
-	if (!WIFEXITED(status))
-		return -1;
-
-	return WEXITSTATUS(status);
-}
-
-static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
-{
-	int fs_fd = -1, mnt_fd = -1, err;
-
-	err = recvfd(sock_fd, &fs_fd);
-	if (!ASSERT_OK(err, "recv_bpffs_fd"))
-		goto cleanup;
-
-	mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
-	if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-	zclose(fs_fd);
-
-	/* pass BPF FS context object to parent */
-	err = sendfd(sock_fd, mnt_fd);
-	if (!ASSERT_OK(err, "send_mnt_fd"))
-		goto cleanup;
-	zclose(mnt_fd);
-
-	err = wait_for_pid(child_pid);
-	ASSERT_OK(err, "waitpid_child");
-
-cleanup:
-	zclose(sock_fd);
-	zclose(fs_fd);
-	zclose(mnt_fd);
-
-	if (child_pid > 0)
-		(void)kill(child_pid, SIGKILL);
-}
-
-static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb)
-{
-	int sock_fds[2] = { -1, -1 };
-	int child_pid = 0, err;
-
-	err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
-	if (!ASSERT_OK(err, "socketpair"))
-		goto cleanup;
-
-	child_pid = fork();
-	if (!ASSERT_GE(child_pid, 0, "fork"))
-		goto cleanup;
-
-	if (child_pid == 0) {
-		zclose(sock_fds[0]);
-		return child(sock_fds[1], bpffs_opts, cb);
-
-	} else {
-		zclose(sock_fds[1]);
-		return parent(child_pid, bpffs_opts, sock_fds[0]);
-	}
-
-cleanup:
-	zclose(sock_fds[0]);
-	zclose(sock_fds[1]);
-	if (child_pid > 0)
-		(void)kill(child_pid, SIGKILL);
-}
-
-static int userns_map_create(int mnt_fd)
-{
-	LIBBPF_OPTS(bpf_map_create_opts, map_opts);
-	int err, token_fd = -1, map_fd = -1;
-	__u64 old_caps = 0;
-
-	/* create BPF token from BPF FS mount */
-	token_fd = bpf_token_create(mnt_fd, NULL);
-	if (!ASSERT_GT(token_fd, 0, "token_create")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-	/* while inside non-init userns, we need both a BPF token *and*
-	 * CAP_BPF inside current userns to create privileged map; let's test
-	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
-	 */
-	err = drop_priv_caps(&old_caps);
-	if (!ASSERT_OK(err, "drop_caps"))
-		goto cleanup;
-
-	/* no token, no CAP_BPF -> fail */
-	map_opts.token_fd = 0;
-	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
-	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-	/* token without CAP_BPF -> fail */
-	map_opts.token_fd = token_fd;
-	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
-	if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
-	err = restore_priv_caps(old_caps);
-	if (!ASSERT_OK(err, "restore_caps"))
-		goto cleanup;
-
-	/* CAP_BPF without token -> fail */
-	map_opts.token_fd = 0;
-	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
-	if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-	/* finally, namespaced CAP_BPF + token -> success */
-	map_opts.token_fd = token_fd;
-	map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
-	if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-cleanup:
-	zclose(token_fd);
-	zclose(map_fd);
-	return err;
-}
-
-static int userns_btf_load(int mnt_fd)
-{
-	LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
-	int err, token_fd = -1, btf_fd = -1;
-	const void *raw_btf_data;
-	struct btf *btf = NULL;
-	__u32 raw_btf_size;
-	__u64 old_caps = 0;
-
-	/* create BPF token from BPF FS mount */
-	token_fd = bpf_token_create(mnt_fd, NULL);
-	if (!ASSERT_GT(token_fd, 0, "token_create")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-	/* while inside non-init userns, we need both a BPF token *and*
-	 * CAP_BPF inside current userns to create privileged map; let's test
-	 * that neither BPF token alone nor namespaced CAP_BPF is sufficient
-	 */
-	err = drop_priv_caps(&old_caps);
-	if (!ASSERT_OK(err, "drop_caps"))
-		goto cleanup;
-
-	/* setup a trivial BTF data to load to the kernel */
-	btf = btf__new_empty();
-	if (!ASSERT_OK_PTR(btf, "empty_btf"))
-		goto cleanup;
-
-	ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
-
-	raw_btf_data = btf__raw_data(btf, &raw_btf_size);
-	if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
-		goto cleanup;
-
-	/* no token + no CAP_BPF -> failure */
-	btf_opts.token_fd = 0;
-	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
-	if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
-		goto cleanup;
-
-	/* token + no CAP_BPF -> failure */
-	btf_opts.token_fd = token_fd;
-	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
-	if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
-		goto cleanup;
-
-	/* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
-	err = restore_priv_caps(old_caps);
-	if (!ASSERT_OK(err, "restore_caps"))
-		goto cleanup;
-
-	/* token + CAP_BPF -> success */
-	btf_opts.token_fd = token_fd;
-	btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
-	if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
-		goto cleanup;
-
-	err = 0;
-cleanup:
-	btf__free(btf);
-	zclose(btf_fd);
-	zclose(token_fd);
-	return err;
-}
-
-static int userns_prog_load(int mnt_fd)
-{
-	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
-	int err, token_fd = -1, prog_fd = -1;
-	struct bpf_insn insns[] = {
-		/* bpf_jiffies64() requires CAP_BPF */
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
-		/* bpf_get_current_task() requires CAP_PERFMON */
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
-		/* r0 = 0; exit; */
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN(),
-	};
-	size_t insn_cnt = ARRAY_SIZE(insns);
-	__u64 old_caps = 0;
-
-	/* create BPF token from BPF FS mount */
-	token_fd = bpf_token_create(mnt_fd, NULL);
-	if (!ASSERT_GT(token_fd, 0, "token_create")) {
-		err = -EINVAL;
-		goto cleanup;
-	}
-
-	/* validate we can successfully load BPF program with token; this
-	 * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
-	 * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
-	 * BPF token wired properly in a bunch of places in the kernel
-	 */
-	prog_opts.token_fd = token_fd;
-	prog_opts.expected_attach_type = BPF_XDP;
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
-				insns, insn_cnt, &prog_opts);
-	if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
-		err = -EPERM;
-		goto cleanup;
-	}
-
-	/* no token + caps -> failure */
-	prog_opts.token_fd = 0;
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
-				insns, insn_cnt, &prog_opts);
-	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
-		err = -EPERM;
-		goto cleanup;
-	}
-
-	err = drop_priv_caps(&old_caps);
-	if (!ASSERT_OK(err, "drop_caps"))
-		goto cleanup;
-
-	/* no caps + token -> failure */
-	prog_opts.token_fd = token_fd;
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
-				insns, insn_cnt, &prog_opts);
-	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
-		err = -EPERM;
-		goto cleanup;
-	}
-
-	/* no caps + no token -> definitely a failure */
-	prog_opts.token_fd = 0;
-	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
-				insns, insn_cnt, &prog_opts);
-	if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
-		err = -EPERM;
-		goto cleanup;
-	}
-
-	err = 0;
-cleanup:
-	zclose(prog_fd);
-	zclose(token_fd);
-	return err;
-}
-
-static int userns_obj_priv_map(int mnt_fd)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
-	char buf[256];
-	struct priv_map *skel;
-	int err, token_fd;
-
-	skel = priv_map__open_and_load();
-	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
-		priv_map__destroy(skel);
-		return -EINVAL;
-	}
-
-	/* use bpf_token_path to provide BPF FS path */
-	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
-	opts.bpf_token_path = buf;
-	skel = priv_map__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
-		return -EINVAL;
-
-	err = priv_map__load(skel);
-	priv_map__destroy(skel);
-	if (!ASSERT_OK(err, "obj_token_path_load"))
-		return -EINVAL;
-
-	/* create token and pass it through bpf_token_fd */
-	token_fd = bpf_token_create(mnt_fd, NULL);
-	if (!ASSERT_GT(token_fd, 0, "create_token"))
-		return -EINVAL;
-
-	opts.bpf_token_path = NULL;
-	opts.bpf_token_fd = token_fd;
-	skel = priv_map__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "obj_token_fd_open"))
-		return -EINVAL;
-
-	/* we can close our token FD, bpf_object owns dup()'ed FD now */
-	close(token_fd);
-
-	err = priv_map__load(skel);
-	priv_map__destroy(skel);
-	if (!ASSERT_OK(err, "obj_token_fd_load"))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int userns_obj_priv_prog(int mnt_fd)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
-	char buf[256];
-	struct priv_prog *skel;
-	int err;
-
-	skel = priv_prog__open_and_load();
-	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
-		priv_prog__destroy(skel);
-		return -EINVAL;
-	}
-
-	/* use bpf_token_path to provide BPF FS path */
-	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
-	opts.bpf_token_path = buf;
-	skel = priv_prog__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
-		return -EINVAL;
-
-	err = priv_prog__load(skel);
-	priv_prog__destroy(skel);
-	if (!ASSERT_OK(err, "obj_token_path_load"))
-		return -EINVAL;
-
-	return 0;
-}
-
-/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
- * which should cause struct_ops application to fail, as BTF won't be uploaded
- * into the kernel, even if STRUCT_OPS programs themselves are allowed
- */
-static int validate_struct_ops_load(int mnt_fd, bool expect_success)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
-	char buf[256];
-	struct dummy_st_ops_success *skel;
-	int err;
-
-	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
-	opts.bpf_token_path = buf;
-	skel = dummy_st_ops_success__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
-		return -EINVAL;
-
-	err = dummy_st_ops_success__load(skel);
-	dummy_st_ops_success__destroy(skel);
-	if (expect_success) {
-		if (!ASSERT_OK(err, "obj_token_path_load"))
-			return -EINVAL;
-	} else /* expect failure */ {
-		if (!ASSERT_ERR(err, "obj_token_path_load"))
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int userns_obj_priv_btf_fail(int mnt_fd)
-{
-	return validate_struct_ops_load(mnt_fd, false /* should fail */);
-}
-
-static int userns_obj_priv_btf_success(int mnt_fd)
-{
-	return validate_struct_ops_load(mnt_fd, true /* should succeed */);
-}
-
-#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
-#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
-
-static int userns_obj_priv_implicit_token(int mnt_fd)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
-	struct dummy_st_ops_success *skel;
-	int err;
-
-	/* before we mount BPF FS with token delegation, struct_ops skeleton
-	 * should fail to load
-	 */
-	skel = dummy_st_ops_success__open_and_load();
-	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
-		dummy_st_ops_success__destroy(skel);
-		return -EINVAL;
-	}
-
-	/* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
-	 * token automatically and implicitly
-	 */
-	err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
-	if (!ASSERT_OK(err, "move_mount_bpffs"))
-		return -EINVAL;
-
-	/* disable implicit BPF token creation by setting
-	 * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
-	 */
-	err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
-	if (!ASSERT_OK(err, "setenv_token_path"))
-		return -EINVAL;
-	skel = dummy_st_ops_success__open_and_load();
-	if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
-		unsetenv(TOKEN_ENVVAR);
-		dummy_st_ops_success__destroy(skel);
-		return -EINVAL;
-	}
-	unsetenv(TOKEN_ENVVAR);
-
-	/* now the same struct_ops skeleton should succeed thanks to libppf
-	 * creating BPF token from /sys/fs/bpf mount point
-	 */
-	skel = dummy_st_ops_success__open_and_load();
-	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
-		return -EINVAL;
-
-	dummy_st_ops_success__destroy(skel);
-
-	/* now disable implicit token through empty bpf_token_path, should fail */
-	opts.bpf_token_path = "";
-	skel = dummy_st_ops_success__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
-		return -EINVAL;
-
-	err = dummy_st_ops_success__load(skel);
-	dummy_st_ops_success__destroy(skel);
-	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
-		return -EINVAL;
-
-	/* now disable implicit token through negative bpf_token_fd, should fail */
-	opts.bpf_token_path = NULL;
-	opts.bpf_token_fd = -1;
-	skel = dummy_st_ops_success__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
-		return -EINVAL;
-
-	err = dummy_st_ops_success__load(skel);
-	dummy_st_ops_success__destroy(skel);
-	if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
-		return -EINVAL;
-
-	return 0;
-}
-
-static int userns_obj_priv_implicit_token_envvar(int mnt_fd)
-{
-	LIBBPF_OPTS(bpf_object_open_opts, opts);
-	struct dummy_st_ops_success *skel;
-	int err;
-
-	/* before we mount BPF FS with token delegation, struct_ops skeleton
-	 * should fail to load
-	 */
-	skel = dummy_st_ops_success__open_and_load();
-	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
-		dummy_st_ops_success__destroy(skel);
-		return -EINVAL;
-	}
-
-	/* mount custom BPF FS over custom location, so libbpf can't create
-	 * BPF token implicitly, unless pointed to it through
-	 * LIBBPF_BPF_TOKEN_PATH envvar
-	 */
-	rmdir(TOKEN_BPFFS_CUSTOM);
-	if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
-		goto err_out;
-	err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
-	if (!ASSERT_OK(err, "move_mount_bpffs"))
-		goto err_out;
-
-	/* even though we have BPF FS with delegation, it's not at default
-	 * /sys/fs/bpf location, so we still fail to load until envvar is set up
-	 */
-	skel = dummy_st_ops_success__open_and_load();
-	if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
-		dummy_st_ops_success__destroy(skel);
-		goto err_out;
-	}
-
-	err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
-	if (!ASSERT_OK(err, "setenv_token_path"))
-		goto err_out;
-
-	/* now the same struct_ops skeleton should succeed thanks to libppf
-	 * creating BPF token from custom mount point
-	 */
-	skel = dummy_st_ops_success__open_and_load();
-	if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
-		goto err_out;
-
-	dummy_st_ops_success__destroy(skel);
-
-	/* now disable implicit token through empty bpf_token_path, envvar
-	 * will be ignored, should fail
-	 */
-	opts.bpf_token_path = "";
-	skel = dummy_st_ops_success__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
-		goto err_out;
-
-	err = dummy_st_ops_success__load(skel);
-	dummy_st_ops_success__destroy(skel);
-	if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
-		goto err_out;
-
-	/* now disable implicit token through negative bpf_token_fd, envvar
-	 * will be ignored, should fail
-	 */
-	opts.bpf_token_path = NULL;
-	opts.bpf_token_fd = -1;
-	skel = dummy_st_ops_success__open_opts(&opts);
-	if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
-		goto err_out;
-
-	err = dummy_st_ops_success__load(skel);
-	dummy_st_ops_success__destroy(skel);
-	if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
-		goto err_out;
-
-	rmdir(TOKEN_BPFFS_CUSTOM);
-	unsetenv(TOKEN_ENVVAR);
-	return 0;
-err_out:
-	rmdir(TOKEN_BPFFS_CUSTOM);
-	unsetenv(TOKEN_ENVVAR);
-	return -EINVAL;
-}
-
-#define bit(n) (1ULL << (n))
-
-void test_token(void)
-{
-	if (test__start_subtest("map_token")) {
-		struct bpffs_opts opts = {
-			.cmds_str = "map_create",
-			.maps_str = "stack",
-		};
-
-		subtest_userns(&opts, userns_map_create);
-	}
-	if (test__start_subtest("btf_token")) {
-		struct bpffs_opts opts = {
-			.cmds = 1ULL << BPF_BTF_LOAD,
-		};
-
-		subtest_userns(&opts, userns_btf_load);
-	}
-	if (test__start_subtest("prog_token")) {
-		struct bpffs_opts opts = {
-			.cmds_str = "PROG_LOAD",
-			.progs_str = "XDP",
-			.attachs_str = "xdp",
-		};
-
-		subtest_userns(&opts, userns_prog_load);
-	}
-	if (test__start_subtest("obj_priv_map")) {
-		struct bpffs_opts opts = {
-			.cmds = bit(BPF_MAP_CREATE),
-			.maps = bit(BPF_MAP_TYPE_QUEUE),
-		};
-
-		subtest_userns(&opts, userns_obj_priv_map);
-	}
-	if (test__start_subtest("obj_priv_prog")) {
-		struct bpffs_opts opts = {
-			.cmds = bit(BPF_PROG_LOAD),
-			.progs = bit(BPF_PROG_TYPE_KPROBE),
-			.attachs = ~0ULL,
-		};
-
-		subtest_userns(&opts, userns_obj_priv_prog);
-	}
-	if (test__start_subtest("obj_priv_btf_fail")) {
-		struct bpffs_opts opts = {
-			/* disallow BTF loading */
-			.cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
-			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
-			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
-			.attachs = ~0ULL,
-		};
-
-		subtest_userns(&opts, userns_obj_priv_btf_fail);
-	}
-	if (test__start_subtest("obj_priv_btf_success")) {
-		struct bpffs_opts opts = {
-			/* allow BTF loading */
-			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
-			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
-			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
-			.attachs = ~0ULL,
-		};
-
-		subtest_userns(&opts, userns_obj_priv_btf_success);
-	}
-	if (test__start_subtest("obj_priv_implicit_token")) {
-		struct bpffs_opts opts = {
-			/* allow BTF loading */
-			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
-			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
-			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
-			.attachs = ~0ULL,
-		};
-
-		subtest_userns(&opts, userns_obj_priv_implicit_token);
-	}
-	if (test__start_subtest("obj_priv_implicit_token_envvar")) {
-		struct bpffs_opts opts = {
-			/* allow BTF loading */
-			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
-			.maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
-			.progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
-			.attachs = ~0ULL,
-		};
-
-		subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
-	}
-}
diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c
deleted file mode 100644
index 9085be50f03b..000000000000
--- a/tools/testing/selftests/bpf/progs/priv_map.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
-
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-
-char _license[] SEC("license") = "GPL";
-
-struct {
-	__uint(type, BPF_MAP_TYPE_QUEUE);
-	__uint(max_entries, 1);
-	__type(value, __u32);
-} priv_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
deleted file mode 100644
index 3c7b2b618c8a..000000000000
--- a/tools/testing/selftests/bpf/progs/priv_prog.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
-
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-
-char _license[] SEC("license") = "GPL";
-
-SEC("kprobe")
-int kprobe_prog(void *ctx)
-{
-	return 1;
-}
-- 
cgit v1.2.3-70-g09d2


From 1de584832375d0dc4234ee406185384a58fb96ac Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Tue, 19 Dec 2023 21:47:58 +0800
Subject: selftests/bpf: remove reduplicated s32 casting in "crafted_cases"

The "S32_MIN" is already defined with s32 casting, so there is no need
to do it again.

Signed-off-by: Menglong Dong <menglong8.dong@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231219134800.1550388-3-menglong8.dong@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/reg_bounds.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 0c9abd279e18..3bf4ddd720a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -2097,10 +2097,10 @@ static struct subtest_case crafted_cases[] = {
 
 	{U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
 
-	{S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}},
-	{S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
-	{S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
-	{S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+	{S32, U64, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)(s32)-255, 0}},
+	{S32, S64, {(u32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
+	{S32, S64, {0, 1}, {(u32)S32_MIN, (u32)S32_MIN}},
+	{S32, U32, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)S32_MIN, (u32)S32_MIN}},
 };
 
 /* Go over crafted hard-coded cases. This is fast, so we do it as part of
-- 
cgit v1.2.3-70-g09d2


From 31d9cc96b1e3b28daf74938cb1233231474bbcf6 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Tue, 19 Dec 2023 21:47:59 +0800
Subject: selftests/bpf: activate the OP_NE logic in range_cond()

The edge range checking for the registers is supported by the verifier
now, so we can activate the extended logic in
tools/testing/selftests/bpf/prog_tests/reg_bounds.c/range_cond() to test
such logic.

Besides, I added some cases to the "crafted_cases" array for this logic.
These cases are mainly used to test the edge of the src reg and dst reg.

All reg bounds testings has passed in the SLOW_TESTS mode:

$ export SLOW_TESTS=1 && ./test_progs -t reg_bounds -j
Summary: 65/18959832 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Menglong Dong <menglong8.dong@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231219134800.1550388-4-menglong8.dong@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/reg_bounds.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 3bf4ddd720a8..820d0bcfc474 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -590,12 +590,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
 		*newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
 		break;
 	case OP_NE:
-		/* generic case, can't derive more information */
-		*newx = range(t, x.a, x.b);
-		*newy = range(t, y.a, y.b);
-		break;
-
-		/* below extended logic is not supported by verifier just yet */
+		/* below logic is supported by the verifier now */
 		if (x.a == x.b && x.a == y.a) {
 			/* X is a constant matching left side of Y */
 			*newx = range(t, x.a, x.b);
@@ -2101,6 +2096,18 @@ static struct subtest_case crafted_cases[] = {
 	{S32, S64, {(u32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
 	{S32, S64, {0, 1}, {(u32)S32_MIN, (u32)S32_MIN}},
 	{S32, U32, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)S32_MIN, (u32)S32_MIN}},
+
+	/* edge overlap testings for BPF_NE */
+	{U64, U64, {0, U64_MAX}, {U64_MAX, U64_MAX}},
+	{U64, U64, {0, U64_MAX}, {0, 0}},
+	{S64, U64, {S64_MIN, 0}, {S64_MIN, S64_MIN}},
+	{S64, U64, {S64_MIN, 0}, {0, 0}},
+	{S64, U64, {S64_MIN, S64_MAX}, {S64_MAX, S64_MAX}},
+	{U32, U32, {0, U32_MAX}, {0, 0}},
+	{U32, U32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+	{S32, U32, {(u32)S32_MIN, 0}, {0, 0}},
+	{S32, U32, {(u32)S32_MIN, 0}, {(u32)S32_MIN, (u32)S32_MIN}},
+	{S32, U32, {(u32)S32_MIN, S32_MAX}, {S32_MAX, S32_MAX}},
 };
 
 /* Go over crafted hard-coded cases. This is fast, so we do it as part of
-- 
cgit v1.2.3-70-g09d2


From 463ea64eb008b7abb63245ed69446b404bf042b1 Mon Sep 17 00:00:00 2001
From: Menglong Dong <menglong8.dong@gmail.com>
Date: Tue, 19 Dec 2023 21:48:00 +0800
Subject: selftests/bpf: add testcase to verifier_bounds.c for BPF_JNE

Add testcase for the logic that the verifier tracks the BPF_JNE for regs.
The assembly function "reg_not_equal_const()" and "reg_equal_const" that
we add is exactly converted from the following case:

  u32 a = bpf_get_prandom_u32();
  u64 b = 0;

  a %= 8;
  /* the "a > 0" here will be optimized to "a != 0" */
  if (a > 0) {
    /* now the range of a should be [1, 7] */
    bpf_skb_store_bytes(skb, 0, &b, a, 0);
  }

Signed-off-by: Menglong Dong <menglong8.dong@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231219134800.1550388-5-menglong8.dong@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/progs/verifier_bounds.c  | 62 ++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index ec430b71730b..960998f16306 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -1075,4 +1075,66 @@ l0_%=:	r0 = 0;						\
 	: __clobber_all);
 }
 
+SEC("tc")
+__description("bounds check with JMP_NE for reg edge")
+__success __retval(0)
+__naked void reg_not_equal_const(void)
+{
+	asm volatile ("					\
+	r6 = r1;					\
+	r1 = 0;						\
+	*(u64*)(r10 - 8) = r1;				\
+	call %[bpf_get_prandom_u32];			\
+	r4 = r0;					\
+	r4 &= 7;					\
+	if r4 != 0 goto l0_%=;				\
+	r0 = 0;						\
+	exit;						\
+l0_%=:	r1 = r6;					\
+	r2 = 0;						\
+	r3 = r10;					\
+	r3 += -8;					\
+	r5 = 0;						\
+	/* The 4th argument of bpf_skb_store_bytes is defined as \
+	 * ARG_CONST_SIZE, so 0 is not allowed. The 'r4 != 0' \
+	 * is providing us this exclusion of zero from initial \
+	 * [0, 7] range.				\
+	 */						\
+	call %[bpf_skb_store_bytes];			\
+	r0 = 0;						\
+	exit;						\
+"	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("bounds check with JMP_EQ for reg edge")
+__success __retval(0)
+__naked void reg_equal_const(void)
+{
+	asm volatile ("					\
+	r6 = r1;					\
+	r1 = 0;						\
+	*(u64*)(r10 - 8) = r1;				\
+	call %[bpf_get_prandom_u32];			\
+	r4 = r0;					\
+	r4 &= 7;					\
+	if r4 == 0 goto l0_%=;				\
+	r1 = r6;					\
+	r2 = 0;						\
+	r3 = r10;					\
+	r3 += -8;					\
+	r5 = 0;						\
+	/* Just the same as what we do in reg_not_equal_const() */ \
+	call %[bpf_skb_store_bytes];			\
+l0_%=:	r0 = 0;						\
+	exit;						\
+"	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 5eccd2db42d77e3570619c32d39e39bf486607cf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 14 Dec 2023 17:13:26 -0800
Subject: bpf: reuse btf_prepare_func_args() check for main program BTF
 validation

Instead of btf_check_subprog_arg_match(), use btf_prepare_func_args()
logic to validate "trustworthiness" of main BPF program's BTF information,
if it is present.

We ignored results of original BTF check anyway, often times producing
confusing and ominously-sounding "reg type unsupported for arg#0
function" message, which has no apparent effect on program correctness
and verification process.

All the -EFAULT returning sanity checks are already performed in
check_btf_info_early(), so there is zero reason to have this duplication
of logic between btf_check_subprog_call() and btf_check_subprog_arg_match().
Dropping btf_check_subprog_arg_match() simplifies
btf_check_func_arg_match() further removing `bool processing_call` flag.

One subtle bit that was done by btf_check_subprog_arg_match() was
potentially marking main program's BTF as unreliable. We do this
explicitly now with a dedicated simple check, preserving the original
behavior, but now based on well factored btf_prepare_func_args() logic.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |  2 -
 kernel/bpf/btf.c                                   | 50 ++--------------------
 kernel/bpf/verifier.c                              | 25 ++++++-----
 tools/testing/selftests/bpf/prog_tests/log_fixup.c |  4 +-
 .../selftests/bpf/progs/cgrp_kfunc_failure.c       |  2 +-
 .../selftests/bpf/progs/task_kfunc_failure.c       |  2 +-
 6 files changed, 19 insertions(+), 66 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c050c82cc9a5..d0d7eff22b8a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2466,8 +2466,6 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 			   struct btf_func_model *m);
 
 struct bpf_reg_state;
-int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
-				struct bpf_reg_state *regs);
 int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
 			   struct bpf_reg_state *regs);
 int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index be2104e5f2f5..33d9a1c73f6e 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6768,8 +6768,7 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr
 static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 				    const struct btf *btf, u32 func_id,
 				    struct bpf_reg_state *regs,
-				    bool ptr_to_mem_ok,
-				    bool processing_call)
+				    bool ptr_to_mem_ok)
 {
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
 	struct bpf_verifier_log *log = &env->log;
@@ -6842,7 +6841,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 					i, btf_type_str(t));
 				return -EINVAL;
 			}
-		} else if (ptr_to_mem_ok && processing_call) {
+		} else if (ptr_to_mem_ok) {
 			const struct btf_type *resolve_ret;
 			u32 type_size;
 
@@ -6867,55 +6866,12 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 	return 0;
 }
 
-/* Compare BTF of a function declaration with given bpf_reg_state.
- * Returns:
- * EFAULT - there is a verifier bug. Abort verification.
- * EINVAL - there is a type mismatch or BTF is not available.
- * 0 - BTF matches with what bpf_reg_state expects.
- * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
- */
-int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
-				struct bpf_reg_state *regs)
-{
-	struct bpf_prog *prog = env->prog;
-	struct btf *btf = prog->aux->btf;
-	bool is_global;
-	u32 btf_id;
-	int err;
-
-	if (!prog->aux->func_info)
-		return -EINVAL;
-
-	btf_id = prog->aux->func_info[subprog].type_id;
-	if (!btf_id)
-		return -EFAULT;
-
-	if (prog->aux->func_info_aux[subprog].unreliable)
-		return -EINVAL;
-
-	is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
-	err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, false);
-
-	/* Compiler optimizations can remove arguments from static functions
-	 * or mismatched type can be passed into a global function.
-	 * In such cases mark the function as unreliable from BTF point of view.
-	 */
-	if (err)
-		prog->aux->func_info_aux[subprog].unreliable = true;
-	return err;
-}
-
 /* Compare BTF of a function call with given bpf_reg_state.
  * Returns:
  * EFAULT - there is a verifier bug. Abort verification.
  * EINVAL - there is a type mismatch or BTF is not available.
  * 0 - BTF matches with what bpf_reg_state expects.
  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
- *
- * NOTE: the code is duplicated from btf_check_subprog_arg_match()
- * because btf_check_func_arg_match() is still doing both. Once that
- * function is split in 2, we can call from here btf_check_subprog_arg_match()
- * first, and then treat the calling part in a new code path.
  */
 int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
 			   struct bpf_reg_state *regs)
@@ -6937,7 +6893,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
 		return -EINVAL;
 
 	is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
-	err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, true);
+	err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global);
 
 	/* Compiler optimizations can remove arguments from static functions
 	 * or mismatched type can be passed into a global function.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6555785b9f63..c26e9ab5226c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -19904,6 +19904,7 @@ static void free_states(struct bpf_verifier_env *env)
 static int do_check_common(struct bpf_verifier_env *env, int subprog)
 {
 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
+	struct bpf_subprog_info *sub = subprog_info(env, subprog);
 	struct bpf_verifier_state *state;
 	struct bpf_reg_state *regs;
 	int ret, i;
@@ -19930,9 +19931,9 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
 	state->first_insn_idx = env->subprog_info[subprog].start;
 	state->last_insn_idx = -1;
 
+
 	regs = state->frame[state->curframe]->regs;
 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
-		struct bpf_subprog_info *sub = subprog_info(env, subprog);
 		const char *sub_name = subprog_name(env, subprog);
 		struct bpf_subprog_arg_info *arg;
 		struct bpf_reg_state *reg;
@@ -19979,21 +19980,19 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
 			}
 		}
 	} else {
+		/* if main BPF program has associated BTF info, validate that
+		 * it's matching expected signature, and otherwise mark BTF
+		 * info for main program as unreliable
+		 */
+		if (env->prog->aux->func_info_aux) {
+			ret = btf_prepare_func_args(env, 0);
+			if (ret || sub->arg_cnt != 1 || sub->args[0].arg_type != ARG_PTR_TO_CTX)
+				env->prog->aux->func_info_aux[0].unreliable = true;
+		}
+
 		/* 1st arg to a function */
 		regs[BPF_REG_1].type = PTR_TO_CTX;
 		mark_reg_known_zero(env, regs, BPF_REG_1);
-		ret = btf_check_subprog_arg_match(env, subprog, regs);
-		if (ret == -EFAULT)
-			/* unlikely verifier bug. abort.
-			 * ret == 0 and ret < 0 are sadly acceptable for
-			 * main() function due to backward compatibility.
-			 * Like socket filter program may be written as:
-			 * int bpf_prog(struct pt_regs *ctx)
-			 * and never dereference that ctx in the program.
-			 * 'struct pt_regs' is a type mismatch for socket
-			 * filter that should be using 'struct __sk_buff'.
-			 */
-			goto out;
 	}
 
 	ret = do_check(env);
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
index effd78b2a657..7a3fa2ff567b 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -169,9 +169,9 @@ void test_log_fixup(void)
 	if (test__start_subtest("bad_core_relo_trunc_none"))
 		bad_core_relo(0, TRUNC_NONE /* full buf */);
 	if (test__start_subtest("bad_core_relo_trunc_partial"))
-		bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
+		bad_core_relo(280, TRUNC_PARTIAL /* truncate original log a bit */);
 	if (test__start_subtest("bad_core_relo_trunc_full"))
-		bad_core_relo(210, TRUNC_FULL  /* truncate also libbpf's message patch */);
+		bad_core_relo(220, TRUNC_FULL  /* truncate also libbpf's message patch */);
 	if (test__start_subtest("bad_core_relo_subprog"))
 		bad_core_relo_subprog();
 	if (test__start_subtest("missing_map"))
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
index 0fa564a5cc5b..9fe9c4a4e8f6 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -78,7 +78,7 @@ int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path)
 }
 
 SEC("kretprobe/cgroup_destroy_locked")
-__failure __msg("reg type unsupported for arg#0 function")
+__failure __msg("calling kernel function bpf_cgroup_acquire is not allowed")
 int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp)
 {
 	struct cgroup *acquired;
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index dcdea3127086..ad88a3796ddf 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -248,7 +248,7 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl
 }
 
 SEC("lsm/task_free")
-__failure __msg("reg type unsupported for arg#0 function")
+__failure __msg("R1 must be a rcu pointer")
 int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
 {
 	struct task_struct *acquired;
-- 
cgit v1.2.3-70-g09d2


From f18c3d88deedf0defc3e4800341cc7bcaaabcdf9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 14 Dec 2023 17:13:29 -0800
Subject: bpf: reuse subprog argument parsing logic for subprog call checks

Remove duplicated BTF parsing logic when it comes to subprog call check.
Instead, use (potentially cached) results of btf_prepare_func_args() to
abstract away expectations of each subprog argument in generic terms
(e.g., "this is pointer to context", or "this is a pointer to memory of
size X"), and then use those simple high-level argument type
expectations to validate actual register states to check if they match
expectations.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                              | 110 ++++++---------------
 .../selftests/bpf/progs/test_global_func5.c        |   2 +-
 2 files changed, 31 insertions(+), 81 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f48e49f2d482..3c9e6d0d77d6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9249,101 +9249,54 @@ err_out:
 	return err;
 }
 
-static int btf_check_func_arg_match(struct bpf_verifier_env *env,
-				    const struct btf *btf, u32 func_id,
-				    struct bpf_reg_state *regs,
-				    bool ptr_to_mem_ok)
+static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
+				    const struct btf *btf,
+				    struct bpf_reg_state *regs)
 {
-	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+	struct bpf_subprog_info *sub = subprog_info(env, subprog);
 	struct bpf_verifier_log *log = &env->log;
-	const char *func_name, *ref_tname;
-	const struct btf_type *t, *ref_t;
-	const struct btf_param *args;
-	u32 i, nargs, ref_id;
+	u32 i;
 	int ret;
 
-	t = btf_type_by_id(btf, func_id);
-	if (!t || !btf_type_is_func(t)) {
-		/* These checks were already done by the verifier while loading
-		 * struct bpf_func_info or in add_kfunc_call().
-		 */
-		bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n",
-			func_id);
-		return -EFAULT;
-	}
-	func_name = btf_name_by_offset(btf, t->name_off);
-
-	t = btf_type_by_id(btf, t->type);
-	if (!t || !btf_type_is_func_proto(t)) {
-		bpf_log(log, "Invalid BTF of func %s\n", func_name);
-		return -EFAULT;
-	}
-	args = (const struct btf_param *)(t + 1);
-	nargs = btf_type_vlen(t);
-	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
-		bpf_log(log, "Function %s has %d > %d args\n", func_name, nargs,
-			MAX_BPF_FUNC_REG_ARGS);
-		return -EINVAL;
-	}
+	ret = btf_prepare_func_args(env, subprog);
+	if (ret)
+		return ret;
 
 	/* check that BTF function arguments match actual types that the
 	 * verifier sees.
 	 */
-	for (i = 0; i < nargs; i++) {
-		enum bpf_arg_type arg_type = ARG_DONTCARE;
+	for (i = 0; i < sub->arg_cnt; i++) {
 		u32 regno = i + 1;
 		struct bpf_reg_state *reg = &regs[regno];
+		struct bpf_subprog_arg_info *arg = &sub->args[i];
 
-		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
-		if (btf_type_is_scalar(t)) {
-			if (reg->type == SCALAR_VALUE)
-				continue;
-			bpf_log(log, "R%d is not a scalar\n", regno);
-			return -EINVAL;
-		}
-
-		if (!btf_type_is_ptr(t)) {
-			bpf_log(log, "Unrecognized arg#%d type %s\n",
-				i, btf_type_str(t));
-			return -EINVAL;
-		}
-
-		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
-		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
-
-		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
-		if (ret < 0)
-			return ret;
-
-		if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
+		if (arg->arg_type == ARG_ANYTHING) {
+			if (reg->type != SCALAR_VALUE) {
+				bpf_log(log, "R%d is not a scalar\n", regno);
+				return -EINVAL;
+			}
+		} else if (arg->arg_type == ARG_PTR_TO_CTX) {
+			ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
+			if (ret < 0)
+				return ret;
 			/* If function expects ctx type in BTF check that caller
 			 * is passing PTR_TO_CTX.
 			 */
 			if (reg->type != PTR_TO_CTX) {
-				bpf_log(log,
-					"arg#%d expected pointer to ctx, but got %s\n",
-					i, btf_type_str(t));
-				return -EINVAL;
-			}
-		} else if (ptr_to_mem_ok) {
-			const struct btf_type *resolve_ret;
-			u32 type_size;
-
-			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
-			if (IS_ERR(resolve_ret)) {
-				bpf_log(log,
-					"arg#%d reference type('%s %s') size cannot be determined: %ld\n",
-					i, btf_type_str(ref_t), ref_tname,
-					PTR_ERR(resolve_ret));
+				bpf_log(log, "arg#%d expects pointer to ctx\n", i);
 				return -EINVAL;
 			}
+		} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
+			ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
+			if (ret < 0)
+				return ret;
 
-			if (check_mem_reg(env, reg, regno, type_size))
+			if (check_mem_reg(env, reg, regno, arg->mem_size))
 				return -EINVAL;
 		} else {
-			bpf_log(log, "reg type unsupported for arg#%d function %s#%d\n", i,
-				func_name, func_id);
-			return -EINVAL;
+			bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n",
+				i, arg->arg_type);
+			return -EFAULT;
 		}
 	}
 
@@ -9358,11 +9311,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
  * Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
  */
 static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
-			          struct bpf_reg_state *regs)
+				  struct bpf_reg_state *regs)
 {
 	struct bpf_prog *prog = env->prog;
 	struct btf *btf = prog->aux->btf;
-	bool is_global;
 	u32 btf_id;
 	int err;
 
@@ -9376,9 +9328,7 @@ static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
 	if (prog->aux->func_info_aux[subprog].unreliable)
 		return -EINVAL;
 
-	is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
-	err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global);
-
+	err = btf_check_func_arg_match(env, subprog, btf, regs);
 	/* Compiler optimizations can remove arguments from static functions
 	 * or mismatched type can be passed into a global function.
 	 * In such cases mark the function as unreliable from BTF point of view.
diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c
index cc55aedaf82d..257c0569ff98 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func5.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func5.c
@@ -26,7 +26,7 @@ int f3(int val, struct __sk_buff *skb)
 }
 
 SEC("tc")
-__failure __msg("expected pointer to ctx, but got PTR")
+__failure __msg("expects pointer to ctx")
 int global_func5(struct __sk_buff *skb)
 {
 	return f1(skb) + f2(2, skb) + f3(3, skb);
-- 
cgit v1.2.3-70-g09d2


From aae9c25dda159045b223ecb471cd0729ccec8285 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 14 Dec 2023 17:13:32 -0800
Subject: libbpf: add __arg_xxx macros for annotating global func args

Add a set of __arg_xxx macros which can be used to augment BPF global
subprogs/functions with extra information for use by BPF verifier.

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-9-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf_helpers.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 77ceea575dc7..2324cc42b017 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -188,6 +188,9 @@ enum libbpf_tristate {
 	!!sym;											\
 })
 
+#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx")))
+#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))
+
 #ifndef ___bpf_concat
 #define ___bpf_concat(a, b) a ## b
 #endif
-- 
cgit v1.2.3-70-g09d2


From 0a0ffcac92d5b41133c97d260ad1f320572783a5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 14 Dec 2023 17:13:33 -0800
Subject: selftests/bpf: add global subprog annotation tests

Add test cases to validate semantics of global subprog argument
annotations:
  - non-null pointers;
  - context argument;
  - const dynptr passing;
  - packet pointers (data, metadata, end).

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-10-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/verifier_global_subprogs.c | 99 +++++++++++++++++++++-
 1 file changed, 95 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
index bd696a431244..9eeb2d89cda8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -1,12 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
 
-#include <stdbool.h>
-#include <errno.h>
-#include <string.h>
-#include <linux/bpf.h>
+#include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
+#include "xdp_metadata.h"
+#include "bpf_kfuncs.h"
 
 int arr[1];
 int unkn_idx;
@@ -98,4 +97,96 @@ int unguarded_unsupp_global_called(void)
 	return global_unsupp(&x);
 }
 
+long stack[128];
+
+__weak int subprog_nullable_ptr_bad(int *p)
+{
+	return (*p) * 2; /* bad, missing null check */
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("invalid mem access 'mem_or_null'")
+int arg_tag_nullable_ptr_fail(void *ctx)
+{
+	int x = 42;
+
+	return subprog_nullable_ptr_bad(&x);
+}
+
+__noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull)
+{
+	return (*p1) * (*p2); /* good, no need for NULL checks */
+}
+
+int x = 47;
+
+SEC("?raw_tp")
+__success __log_level(2)
+int arg_tag_nonnull_ptr_good(void *ctx)
+{
+	int y = 74;
+
+	return subprog_nonnull_ptr_good(&x, &y);
+}
+
+/* this global subprog can be now called from many types of entry progs, each
+ * with different context type
+ */
+__weak int subprog_ctx_tag(void *ctx __arg_ctx)
+{
+	return bpf_get_stack(ctx, stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp(void *ctx)
+{
+	return subprog_ctx_tag(ctx);
+}
+
+SEC("?tp")
+__success __log_level(2)
+int arg_tag_ctx_tp(void *ctx)
+{
+	return subprog_ctx_tag(ctx);
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+int arg_tag_ctx_kprobe(void *ctx)
+{
+	return subprog_ctx_tag(ctx);
+}
+
+__weak int subprog_dynptr(struct bpf_dynptr *dptr)
+{
+	long *d, t, buf[1] = {};
+
+	d = bpf_dynptr_data(dptr, 0, sizeof(long));
+	if (!d)
+		return 0;
+
+	t = *d + 1;
+
+	d = bpf_dynptr_slice(dptr, 0, &buf, sizeof(long));
+	if (!d)
+		return t;
+
+	t = *d + 2;
+
+	return t;
+}
+
+SEC("?xdp")
+__success __log_level(2)
+int arg_tag_dynptr(struct xdp_md *ctx)
+{
+	struct bpf_dynptr dptr;
+
+	bpf_dynptr_from_xdp(ctx, 0, &dptr);
+
+	return subprog_dynptr(&dptr);
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From f0a5056222f2cfa6d40b4c888cb6b01e8569e282 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 14 Dec 2023 17:13:34 -0800
Subject: selftests/bpf: add freplace of BTF-unreliable main prog test

Add a test validating that freplace'ing another main (entry) BPF program
fails if the target BPF program doesn't have valid/expected func proto BTF.

We extend fexit_bpf2bpf test to allow to specify expected log message
for negative test cases (where freplace program is expected to fail to
load).

Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231215011334.2307144-11-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c       | 30 +++++++++++++++++++---
 tools/testing/selftests/bpf/prog_tests/verifier.c  |  2 ++
 .../selftests/bpf/progs/freplace_unreliable_prog.c | 20 +++++++++++++++
 .../bpf/progs/verifier_btf_unreliable_prog.c       | 20 +++++++++++++++
 4 files changed, 69 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c
 create mode 100644 tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 8ec73fdfcdab..f29fc789c14b 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -348,7 +348,8 @@ static void test_func_sockmap_update(void)
 }
 
 static void test_obj_load_failure_common(const char *obj_file,
-					 const char *target_obj_file)
+					 const char *target_obj_file,
+					 const char *exp_msg)
 {
 	/*
 	 * standalone test that asserts failure to load freplace prog
@@ -356,6 +357,7 @@ static void test_obj_load_failure_common(const char *obj_file,
 	 */
 	struct bpf_object *obj = NULL, *pkt_obj;
 	struct bpf_program *prog;
+	char log_buf[64 * 1024];
 	int err, pkt_fd;
 	__u32 duration = 0;
 
@@ -374,11 +376,21 @@ static void test_obj_load_failure_common(const char *obj_file,
 	err = bpf_program__set_attach_target(prog, pkt_fd, NULL);
 	ASSERT_OK(err, "set_attach_target");
 
+	log_buf[0] = '\0';
+	if (exp_msg)
+		bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf));
+	if (env.verbosity > VERBOSE_NONE)
+		bpf_program__set_log_level(prog, 2);
+
 	/* It should fail to load the program */
 	err = bpf_object__load(obj);
+	if (env.verbosity > VERBOSE_NONE && exp_msg) /* we overtook log */
+		printf("VERIFIER LOG:\n================\n%s\n================\n", log_buf);
 	if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))
 		goto close_prog;
 
+	if (exp_msg)
+		ASSERT_HAS_SUBSTR(log_buf, exp_msg, "fail_msg");
 close_prog:
 	bpf_object__close(obj);
 	bpf_object__close(pkt_obj);
@@ -388,14 +400,24 @@ static void test_func_replace_return_code(void)
 {
 	/* test invalid return code in the replaced program */
 	test_obj_load_failure_common("./freplace_connect_v4_prog.bpf.o",
-				     "./connect4_prog.bpf.o");
+				     "./connect4_prog.bpf.o", NULL);
 }
 
 static void test_func_map_prog_compatibility(void)
 {
 	/* test with spin lock map value in the replaced program */
 	test_obj_load_failure_common("./freplace_attach_probe.bpf.o",
-				     "./test_attach_probe.bpf.o");
+				     "./test_attach_probe.bpf.o", NULL);
+}
+
+static void test_func_replace_unreliable(void)
+{
+	/* freplace'ing unreliable main prog should fail with error
+	 * "Cannot replace static functions"
+	 */
+	test_obj_load_failure_common("freplace_unreliable_prog.bpf.o",
+				     "./verifier_btf_unreliable_prog.bpf.o",
+				     "Cannot replace static functions");
 }
 
 static void test_func_replace_global_func(void)
@@ -563,6 +585,8 @@ void serial_test_fexit_bpf2bpf(void)
 		test_func_replace_return_code();
 	if (test__start_subtest("func_map_prog_compatibility"))
 		test_func_map_prog_compatibility();
+	if (test__start_subtest("func_replace_unreliable"))
+		test_func_replace_unreliable();
 	if (test__start_subtest("func_replace_multi"))
 		test_func_replace_multi();
 	if (test__start_subtest("fmod_ret_freplace"))
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index ac49ec25211d..d62c5bf00e71 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -14,6 +14,7 @@
 #include "verifier_bpf_get_stack.skel.h"
 #include "verifier_bswap.skel.h"
 #include "verifier_btf_ctx_access.skel.h"
+#include "verifier_btf_unreliable_prog.skel.h"
 #include "verifier_cfg.skel.h"
 #include "verifier_cgroup_inv_retcode.skel.h"
 #include "verifier_cgroup_skb.skel.h"
@@ -125,6 +126,7 @@ void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_u
 void test_verifier_bpf_get_stack(void)        { RUN(verifier_bpf_get_stack); }
 void test_verifier_bswap(void)                { RUN(verifier_bswap); }
 void test_verifier_btf_ctx_access(void)       { RUN(verifier_btf_ctx_access); }
+void test_verifier_btf_unreliable_prog(void)  { RUN(verifier_btf_unreliable_prog); }
 void test_verifier_cfg(void)                  { RUN(verifier_cfg); }
 void test_verifier_cgroup_inv_retcode(void)   { RUN(verifier_cgroup_inv_retcode); }
 void test_verifier_cgroup_skb(void)           { RUN(verifier_cgroup_skb); }
diff --git a/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c
new file mode 100644
index 000000000000..624078abf3de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+SEC("freplace/btf_unreliable_kprobe")
+/* context type is what BPF verifier expects for kprobe context, but target
+ * program has `stuct whatever *ctx` argument, so freplace operation will be
+ * rejected with the following message:
+ *
+ * arg0 replace_btf_unreliable_kprobe(struct pt_regs *) doesn't match btf_unreliable_kprobe(struct whatever *)
+ */
+int replace_btf_unreliable_kprobe(bpf_user_pt_regs_t *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c
new file mode 100644
index 000000000000..36e033a2e02c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+struct whatever {};
+
+SEC("kprobe")
+__success __log_level(2)
+/* context type is wrong, making it impossible to freplace this program */
+int btf_unreliable_kprobe(struct whatever *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 441c725ed592cb22f2a82f2827dccd045356cc81 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Tue, 19 Dec 2023 21:57:27 +0800
Subject: selftests/bpf: Close cgrp fd before calling
 cleanup_cgroup_environment()

There is error log when htab-mem benchmark completes. The error log
looks as follows:

$ ./bench htab-mem -d1
Setting up benchmark 'htab-mem'...
Benchmark 'htab-mem' started.
......
(cgroup_helpers.c:353: errno: Device or resource busy) umount cgroup2

Fix it by closing cgrp fd before invoking cleanup_cgroup_environment().

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20231219135727.2661527-1-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/benchs/bench_htab_mem.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
index 9146d3f414d2..926ee822143e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
+++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
@@ -335,6 +335,7 @@ static void htab_mem_report_final(struct bench_res res[], int res_cnt)
 	       " peak memory usage %7.2lfMiB\n",
 	       loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0);
 
+	close(ctx.fd);
 	cleanup_cgroup_environment();
 }
 
-- 
cgit v1.2.3-70-g09d2


From bd2dcb94c81e3408731d129e884954c36ef2f1fa Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 17 Dec 2023 10:32:43 +0200
Subject: selftests: bridge_mdb: Add MDB bulk deletion test

Add test cases to verify the behavior of the MDB bulk deletion
functionality in the bridge driver.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/forwarding/bridge_mdb.sh | 191 ++++++++++++++++++++-
 1 file changed, 189 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index e4e3e9405056..61348f71728c 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -803,11 +803,198 @@ cfg_test_dump()
 	cfg_test_dump_common "L2" l2_grps_get
 }
 
+# Check flush functionality with different parameters.
+cfg_test_flush()
+{
+	local num_entries
+
+	# Add entries with different attributes and check that they are all
+	# flushed when the flush command is given with no parameters.
+
+	# Different port.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.2 vid 10
+
+	# Different VLAN ID.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.3 vid 10
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.4 vid 20
+
+	# Different routing protocol.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.5 vid 10 proto bgp
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.6 vid 10 proto zebra
+
+	# Different state.
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.7 vid 10 permanent
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.8 vid 10 temp
+
+	bridge mdb flush dev br0
+	num_entries=$(bridge mdb show dev br0 | wc -l)
+	[[ $num_entries -eq 0 ]]
+	check_err $? 0 "Not all entries flushed after flush all"
+
+	# Check that when flushing by port only entries programmed with the
+	# specified port are flushed and the rest are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10
+
+	bridge mdb flush dev br0 port $swp1
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+	check_fail $? "Entry not flushed by specified port"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+	check_err $? "Entry flushed by wrong port"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
+	check_err $? "Host entry flushed by wrong port"
+
+	bridge mdb flush dev br0 port br0
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
+	check_fail $? "Host entry not flushed by specified port"
+
+	bridge mdb flush dev br0
+
+	# Check that when flushing by VLAN ID only entries programmed with the
+	# specified VLAN ID are flushed and the rest are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 20
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 20
+
+	bridge mdb flush dev br0 vid 10
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
+	check_fail $? "Entry not flushed by specified VLAN ID"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 20 &> /dev/null
+	check_err $? "Entry flushed by wrong VLAN ID"
+
+	bridge mdb flush dev br0
+
+	# Check that all permanent entries are flushed when "permanent" is
+	# specified and that temporary entries are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10
+
+	bridge mdb flush dev br0 permanent
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+	check_fail $? "Entry not flushed by \"permanent\" state"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+	check_err $? "Entry flushed by wrong state (\"permanent\")"
+
+	bridge mdb flush dev br0
+
+	# Check that all temporary entries are flushed when "nopermanent" is
+	# specified and that permanent entries are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10
+
+	bridge mdb flush dev br0 nopermanent
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+	check_err $? "Entry flushed by wrong state (\"nopermanent\")"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+	check_fail $? "Entry not flushed by \"nopermanent\" state"
+
+	bridge mdb flush dev br0
+
+	# Check that L2 host entries are not flushed when "nopermanent" is
+	# specified, but flushed when "permanent" is specified.
+
+	bridge mdb add dev br0 port br0 grp 01:02:03:04:05:06 permanent vid 10
+
+	bridge mdb flush dev br0 nopermanent
+
+	bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null
+	check_err $? "L2 host entry flushed by wrong state (\"nopermanent\")"
+
+	bridge mdb flush dev br0 permanent
+
+	bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null
+	check_fail $? "L2 host entry not flushed by \"permanent\" state"
+
+	bridge mdb flush dev br0
+
+	# Check that IPv4 host entries are not flushed when "permanent" is
+	# specified, but flushed when "nopermanent" is specified.
+
+	bridge mdb add dev br0 port br0 grp 239.1.1.1 temp vid 10
+
+	bridge mdb flush dev br0 permanent
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
+	check_err $? "IPv4 host entry flushed by wrong state (\"permanent\")"
+
+	bridge mdb flush dev br0 nopermanent
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
+	check_fail $? "IPv4 host entry not flushed by \"nopermanent\" state"
+
+	bridge mdb flush dev br0
+
+	# Check that IPv6 host entries are not flushed when "permanent" is
+	# specified, but flushed when "nopermanent" is specified.
+
+	bridge mdb add dev br0 port br0 grp ff0e::1 temp vid 10
+
+	bridge mdb flush dev br0 permanent
+
+	bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null
+	check_err $? "IPv6 host entry flushed by wrong state (\"permanent\")"
+
+	bridge mdb flush dev br0 nopermanent
+
+	bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null
+	check_fail $? "IPv6 host entry not flushed by \"nopermanent\" state"
+
+	bridge mdb flush dev br0
+
+	# Check that when flushing by routing protocol only entries programmed
+	# with the specified routing protocol are flushed and the rest are not.
+
+	bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 proto bgp
+	bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 proto zebra
+	bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10
+
+	bridge mdb flush dev br0 proto bgp
+
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+	check_fail $? "Entry not flushed by specified routing protocol"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+	check_err $? "Entry flushed by wrong routing protocol"
+	bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
+	check_err $? "Host entry flushed by wrong routing protocol"
+
+	bridge mdb flush dev br0
+
+	# Test that an error is returned when trying to flush using unsupported
+	# parameters.
+
+	bridge mdb flush dev br0 src_vni 10 &> /dev/null
+	check_fail $? "Managed to flush by source VNI"
+
+	bridge mdb flush dev br0 dst 198.51.100.1 &> /dev/null
+	check_fail $? "Managed to flush by destination IP"
+
+	bridge mdb flush dev br0 dst_port 4789 &> /dev/null
+	check_fail $? "Managed to flush by UDP destination port"
+
+	bridge mdb flush dev br0 vni 10 &> /dev/null
+	check_fail $? "Managed to flush by destination VNI"
+
+	log_test "Flush tests"
+}
+
 cfg_test()
 {
 	cfg_test_host
 	cfg_test_port
 	cfg_test_dump
+	cfg_test_flush
 }
 
 __fwd_test_host_ip()
@@ -1166,8 +1353,8 @@ ctrl_test()
 	ctrl_mldv2_is_in_test
 }
 
-if ! bridge mdb help 2>&1 | grep -q "get"; then
-	echo "SKIP: iproute2 too old, missing bridge mdb get support"
+if ! bridge mdb help 2>&1 | grep -q "flush"; then
+	echo "SKIP: iproute2 too old, missing bridge mdb flush support"
 	exit $ksft_skip
 fi
 
-- 
cgit v1.2.3-70-g09d2


From c3e87a7fcd0bb5820ca6db9b385bbfacb556d083 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 17 Dec 2023 10:32:44 +0200
Subject: selftests: vxlan_mdb: Add MDB bulk deletion test

Add test cases to verify the behavior of the MDB bulk deletion
functionality in the VXLAN driver.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Petr Machata <petrm@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/test_vxlan_mdb.sh | 201 +++++++++++++++++++++++++-
 1 file changed, 199 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 6725fd9157b9..84a05a9e46d8 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -79,6 +79,7 @@ CONTROL_PATH_TESTS="
 	dump_ipv6_ipv4
 	dump_ipv4_ipv6
 	dump_ipv6_ipv6
+	flush
 "
 
 DATA_PATH_TESTS="
@@ -968,6 +969,202 @@ dump_ipv6_ipv6()
 	dump_common $ns1 $local_addr $remote_prefix $fn
 }
 
+flush()
+{
+	local num_entries
+
+	echo
+	echo "Control path: Flush"
+	echo "-------------------"
+
+	# Add entries with different attributes and check that they are all
+	# flushed when the flush command is given with no parameters.
+
+	# Different source VNI.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.2 permanent dst 198.51.100.1 src_vni 10011"
+
+	# Different routing protocol.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.3 permanent proto bgp dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.4 permanent proto zebra dst 198.51.100.1 src_vni 10010"
+
+	# Different destination IP.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.5 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.6 permanent dst 198.51.100.2 src_vni 10010"
+
+	# Different destination port.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.7 permanent dst 198.51.100.1 dst_port 11111 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.8 permanent dst 198.51.100.1 dst_port 22222 src_vni 10010"
+
+	# Different VNI.
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.9 permanent dst 198.51.100.1 vni 10010 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.10 permanent dst 198.51.100.1 vni 10020 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+	num_entries=$(bridge -n $ns1_v4 mdb show dev vx0 | wc -l)
+	[[ $num_entries -eq 0 ]]
+	log_test $? 0 "Flush all"
+
+	# Check that entries are flushed when port is specified as the VXLAN
+	# device and that an error is returned when port is specified as a
+	# different net device.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
+	log_test $? 254 "Flush by port"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0"
+	log_test $? 255 "Flush by wrong port"
+
+	# Check that when flushing by source VNI only entries programmed with
+	# the specified source VNI are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10011"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10011"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
+	log_test $? 254 "Flush by specified source VNI"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011"
+	log_test $? 0 "Flush by unspecified source VNI"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Check that all entries are flushed when "permanent" is specified and
+	# that an error is returned when "nopermanent" is specified.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 permanent"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
+	log_test $? 254 "Flush by \"permanent\" state"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 nopermanent"
+	log_test $? 255 "Flush by \"nopermanent\" state"
+
+	# Check that when flushing by routing protocol only entries programmed
+	# with the specified routing protocol are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto bgp dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto zebra dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\""
+	log_test $? 1 "Flush by specified routing protocol"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\""
+	log_test $? 0 "Flush by unspecified routing protocol"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Check that when flushing by destination IP only entries programmed
+	# with the specified destination IP are flushed and the rest are not.
+
+	# IPv4.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
+	log_test $? 1 "Flush by specified destination IP - IPv4"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
+	log_test $? 0 "Flush by unspecified destination IP - IPv4"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# IPv6.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2"
+	log_test $? 1 "Flush by specified destination IP - IPv6"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1"
+	log_test $? 0 "Flush by unspecified destination IP - IPv6"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Check that when flushing by UDP destination port only entries
+	# programmed with the specified port are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 11111 dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\""
+	log_test $? 1 "Flush by specified UDP destination port"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\""
+	log_test $? 0 "Flush by unspecified UDP destination port"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# When not specifying a UDP destination port for an entry, traffic is
+	# encapsulated with the device's UDP destination port. Check that when
+	# flushing by the device's UDP destination port only entries programmed
+	# with this port are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
+	log_test $? 1 "Flush by device's UDP destination port"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
+	log_test $? 0 "Flush by unspecified UDP destination port"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Check that when flushing by destination VNI only entries programmed
+	# with the specified destination VNI are flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20011 dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\""
+	log_test $? 1 "Flush by specified destination VNI"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\""
+	log_test $? 0 "Flush by unspecified destination VNI"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# When not specifying a destination VNI for an entry, traffic is
+	# encapsulated with the source VNI. Check that when flushing by a
+	# destination VNI that is equal to the source VNI only such entries are
+	# flushed and the rest are not.
+
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+	run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.2 src_vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010"
+
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
+	log_test $? 1 "Flush by destination VNI equal to source VNI"
+	run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
+	log_test $? 0 "Flush by unspecified destination VNI"
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+	# Test that an error is returned when trying to flush using VLAN ID.
+
+	run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vid 10"
+	log_test $? 255 "Flush by VLAN ID"
+}
+
 ################################################################################
 # Tests - Data path
 
@@ -2292,9 +2489,9 @@ if [ ! -x "$(command -v jq)" ]; then
 	exit $ksft_skip
 fi
 
-bridge mdb help 2>&1 | grep -q "get"
+bridge mdb help 2>&1 | grep -q "flush"
 if [ $? -ne 0 ]; then
-   echo "SKIP: iproute2 bridge too old, missing VXLAN MDB get support"
+   echo "SKIP: iproute2 bridge too old, missing VXLAN MDB flush support"
    exit $ksft_skip
 fi
 
-- 
cgit v1.2.3-70-g09d2


From 69ff403d87be4812571c54b1159e24998414bcab Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Sat, 16 Dec 2023 21:10:52 +0800
Subject: selftests/bpf: Remove tests for zeroed-array kptr

bpf_mem_alloc() doesn't support zero-sized allocation, so removing these
tests from test_bpf_ma test. After the removal, there will no definition
for bin_data_8, so remove 8 from data_sizes array and adjust the index
of data_btf_ids array in all test cases accordingly.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20231216131052.27621-3-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_bpf_ma.c | 100 ++++++++++++------------
 1 file changed, 49 insertions(+), 51 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
index b685a4aba6bd..069db9085e78 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -17,7 +17,7 @@ struct generic_map_value {
 
 char _license[] SEC("license") = "GPL";
 
-const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096};
+const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096};
 const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {};
 
 int err = 0;
@@ -166,7 +166,7 @@ static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int
 		batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \
 	} while (0)
 
-DEFINE_ARRAY_WITH_KPTR(8);
+/* kptr doesn't support bin_data_8 which is a zero-sized array */
 DEFINE_ARRAY_WITH_KPTR(16);
 DEFINE_ARRAY_WITH_KPTR(32);
 DEFINE_ARRAY_WITH_KPTR(64);
@@ -198,21 +198,20 @@ int test_batch_alloc_free(void *ctx)
 	if ((u32)bpf_get_current_pid_tgid() != pid)
 		return 0;
 
-	/* Alloc 128 8-bytes objects in batch to trigger refilling,
-	 * then free 128 8-bytes objects in batch to trigger freeing.
+	/* Alloc 128 16-bytes objects in batch to trigger refilling,
+	 * then free 128 16-bytes objects in batch to trigger freeing.
 	 */
-	CALL_BATCH_ALLOC_FREE(8, 128, 0);
-	CALL_BATCH_ALLOC_FREE(16, 128, 1);
-	CALL_BATCH_ALLOC_FREE(32, 128, 2);
-	CALL_BATCH_ALLOC_FREE(64, 128, 3);
-	CALL_BATCH_ALLOC_FREE(96, 128, 4);
-	CALL_BATCH_ALLOC_FREE(128, 128, 5);
-	CALL_BATCH_ALLOC_FREE(192, 128, 6);
-	CALL_BATCH_ALLOC_FREE(256, 128, 7);
-	CALL_BATCH_ALLOC_FREE(512, 64, 8);
-	CALL_BATCH_ALLOC_FREE(1024, 32, 9);
-	CALL_BATCH_ALLOC_FREE(2048, 16, 10);
-	CALL_BATCH_ALLOC_FREE(4096, 8, 11);
+	CALL_BATCH_ALLOC_FREE(16, 128, 0);
+	CALL_BATCH_ALLOC_FREE(32, 128, 1);
+	CALL_BATCH_ALLOC_FREE(64, 128, 2);
+	CALL_BATCH_ALLOC_FREE(96, 128, 3);
+	CALL_BATCH_ALLOC_FREE(128, 128, 4);
+	CALL_BATCH_ALLOC_FREE(192, 128, 5);
+	CALL_BATCH_ALLOC_FREE(256, 128, 6);
+	CALL_BATCH_ALLOC_FREE(512, 64, 7);
+	CALL_BATCH_ALLOC_FREE(1024, 32, 8);
+	CALL_BATCH_ALLOC_FREE(2048, 16, 9);
+	CALL_BATCH_ALLOC_FREE(4096, 8, 10);
 
 	return 0;
 }
@@ -223,21 +222,20 @@ int test_free_through_map_free(void *ctx)
 	if ((u32)bpf_get_current_pid_tgid() != pid)
 		return 0;
 
-	/* Alloc 128 8-bytes objects in batch to trigger refilling,
+	/* Alloc 128 16-bytes objects in batch to trigger refilling,
 	 * then free these objects through map free.
 	 */
-	CALL_BATCH_ALLOC(8, 128, 0);
-	CALL_BATCH_ALLOC(16, 128, 1);
-	CALL_BATCH_ALLOC(32, 128, 2);
-	CALL_BATCH_ALLOC(64, 128, 3);
-	CALL_BATCH_ALLOC(96, 128, 4);
-	CALL_BATCH_ALLOC(128, 128, 5);
-	CALL_BATCH_ALLOC(192, 128, 6);
-	CALL_BATCH_ALLOC(256, 128, 7);
-	CALL_BATCH_ALLOC(512, 64, 8);
-	CALL_BATCH_ALLOC(1024, 32, 9);
-	CALL_BATCH_ALLOC(2048, 16, 10);
-	CALL_BATCH_ALLOC(4096, 8, 11);
+	CALL_BATCH_ALLOC(16, 128, 0);
+	CALL_BATCH_ALLOC(32, 128, 1);
+	CALL_BATCH_ALLOC(64, 128, 2);
+	CALL_BATCH_ALLOC(96, 128, 3);
+	CALL_BATCH_ALLOC(128, 128, 4);
+	CALL_BATCH_ALLOC(192, 128, 5);
+	CALL_BATCH_ALLOC(256, 128, 6);
+	CALL_BATCH_ALLOC(512, 64, 7);
+	CALL_BATCH_ALLOC(1024, 32, 8);
+	CALL_BATCH_ALLOC(2048, 16, 9);
+	CALL_BATCH_ALLOC(4096, 8, 10);
 
 	return 0;
 }
@@ -251,17 +249,17 @@ int test_batch_percpu_alloc_free(void *ctx)
 	/* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
 	 * then free 128 16-bytes per-cpu objects in batch to trigger freeing.
 	 */
-	CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1);
-	CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2);
-	CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3);
-	CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4);
-	CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5);
-	CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6);
-	CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7);
-	CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8);
-	CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 9);
-	CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 10);
-	CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 11);
+	CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 0);
+	CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 1);
+	CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 2);
+	CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 3);
+	CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 4);
+	CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 5);
+	CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 6);
+	CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 7);
+	CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 8);
+	CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 9);
+	CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 10);
 
 	return 0;
 }
@@ -275,17 +273,17 @@ int test_percpu_free_through_map_free(void *ctx)
 	/* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
 	 * then free these object through map free.
 	 */
-	CALL_BATCH_PERCPU_ALLOC(16, 128, 1);
-	CALL_BATCH_PERCPU_ALLOC(32, 128, 2);
-	CALL_BATCH_PERCPU_ALLOC(64, 128, 3);
-	CALL_BATCH_PERCPU_ALLOC(96, 128, 4);
-	CALL_BATCH_PERCPU_ALLOC(128, 128, 5);
-	CALL_BATCH_PERCPU_ALLOC(192, 128, 6);
-	CALL_BATCH_PERCPU_ALLOC(256, 128, 7);
-	CALL_BATCH_PERCPU_ALLOC(512, 64, 8);
-	CALL_BATCH_PERCPU_ALLOC(1024, 32, 9);
-	CALL_BATCH_PERCPU_ALLOC(2048, 16, 10);
-	CALL_BATCH_PERCPU_ALLOC(4096, 8, 11);
+	CALL_BATCH_PERCPU_ALLOC(16, 128, 0);
+	CALL_BATCH_PERCPU_ALLOC(32, 128, 1);
+	CALL_BATCH_PERCPU_ALLOC(64, 128, 2);
+	CALL_BATCH_PERCPU_ALLOC(96, 128, 3);
+	CALL_BATCH_PERCPU_ALLOC(128, 128, 4);
+	CALL_BATCH_PERCPU_ALLOC(192, 128, 5);
+	CALL_BATCH_PERCPU_ALLOC(256, 128, 6);
+	CALL_BATCH_PERCPU_ALLOC(512, 64, 7);
+	CALL_BATCH_PERCPU_ALLOC(1024, 32, 8);
+	CALL_BATCH_PERCPU_ALLOC(2048, 16, 9);
+	CALL_BATCH_PERCPU_ALLOC(4096, 8, 10);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 812d8bf87678f77055b575d20636fdbbbf15edaf Mon Sep 17 00:00:00 2001
From: Alyssa Ross <hi@alyssa.is>
Date: Tue, 19 Dec 2023 12:03:24 +0100
Subject: libbpf: Skip DWARF sections in linker sanity check

clang can generate (with -g -Wa,--compress-debug-sections) 4-byte
aligned DWARF sections that declare themselves to be 8-byte aligned in
the section header.  Since DWARF sections are dropped during linking
anyway, just skip running the sanity checks on them.

Reported-by: Sergei Trofimovich <slyich@gmail.com>
Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Alyssa Ross <hi@alyssa.is>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Closes: https://lore.kernel.org/bpf/ZXcFRJVKbKxtEL5t@nz.home/
Link: https://lore.kernel.org/bpf/20231219110324.8989-1-hi@alyssa.is
---
 tools/lib/bpf/linker.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 52a2901e8bd0..16bca56002ab 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -719,6 +719,9 @@ static int linker_sanity_check_elf(struct src_obj *obj)
 			return -EINVAL;
 		}
 
+		if (is_dwarf_sec_name(sec->sec_name))
+			continue;
+
 		if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) {
 			pr_warn("ELF section #%zu alignment %llu is non pow-of-2 alignment in %s\n",
 				sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign,
-- 
cgit v1.2.3-70-g09d2


From fc3a5534e2a8855427403113cbeb54af5837bbe0 Mon Sep 17 00:00:00 2001
From: Mingyi Zhang <zhangmingyi5@huawei.com>
Date: Thu, 21 Dec 2023 11:39:47 +0800
Subject: libbpf: Fix NULL pointer dereference in
 bpf_object__collect_prog_relos

An issue occurred while reading an ELF file in libbpf.c during fuzzing:

	Program received signal SIGSEGV, Segmentation fault.
	0x0000000000958e97 in bpf_object.collect_prog_relos () at libbpf.c:4206
	4206 in libbpf.c
	(gdb) bt
	#0 0x0000000000958e97 in bpf_object.collect_prog_relos () at libbpf.c:4206
	#1 0x000000000094f9d6 in bpf_object.collect_relos () at libbpf.c:6706
	#2 0x000000000092bef3 in bpf_object_open () at libbpf.c:7437
	#3 0x000000000092c046 in bpf_object.open_mem () at libbpf.c:7497
	#4 0x0000000000924afa in LLVMFuzzerTestOneInput () at fuzz/bpf-object-fuzzer.c:16
	#5 0x000000000060be11 in testblitz_engine::fuzzer::Fuzzer::run_one ()
	#6 0x000000000087ad92 in tracing::span::Span::in_scope ()
	#7 0x00000000006078aa in testblitz_engine::fuzzer::util::walkdir ()
	#8 0x00000000005f3217 in testblitz_engine::entrypoint::main::{{closure}} ()
	#9 0x00000000005f2601 in main ()
	(gdb)

scn_data was null at this code(tools/lib/bpf/src/libbpf.c):

	if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {

The scn_data is derived from the code above:

	scn = elf_sec_by_idx(obj, sec_idx);
	scn_data = elf_sec_data(obj, scn);

	relo_sec_name = elf_sec_str(obj, shdr->sh_name);
	sec_name = elf_sec_name(obj, scn);
	if (!relo_sec_name || !sec_name)// don't check whether scn_data is NULL
		return -EINVAL;

In certain special scenarios, such as reading a malformed ELF file,
it is possible that scn_data may be a null pointer

Signed-off-by: Mingyi Zhang <zhangmingyi5@huawei.com>
Signed-off-by: Xin Liu <liuxin350@huawei.com>
Signed-off-by: Changye Wu <wuchangye@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20231221033947.154564-1-liuxin350@huawei.com
---
 tools/lib/bpf/libbpf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ac54ebc0629f..ebcfb2147fbd 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4355,6 +4355,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat
 
 	scn = elf_sec_by_idx(obj, sec_idx);
 	scn_data = elf_sec_data(obj, scn);
+	if (!scn_data)
+		return -LIBBPF_ERRNO__FORMAT;
 
 	relo_sec_name = elf_sec_str(obj, shdr->sh_name);
 	sec_name = elf_sec_name(obj, scn);
-- 
cgit v1.2.3-70-g09d2


From eff3c558bb7e61c41b53e4c8130e514a5a4df9ba Mon Sep 17 00:00:00 2001
From: Felix Huettner <felix.huettner@mail.schwarz>
Date: Mon, 27 Nov 2023 11:49:16 +0000
Subject: netfilter: ctnetlink: support filtering by zone

conntrack zones are heavily used by tools like openvswitch to run
multiple virtual "routers" on a single machine. In this context each
conntrack zone matches to a single router, thereby preventing
overlapping IPs from becoming issues.
In these systems it is common to operate on all conntrack entries of a
given zone, e.g. to delete them when a router is deleted. Previously this
required these tools to dump the full conntrack table and filter out the
relevant entries in userspace potentially causing performance issues.

To do this we reuse the existing CTA_ZONE attribute. This was previous
parsed but not used during dump and flush requests. Now if CTA_ZONE is
set we filter these operations based on the provided zone.
However this means that users that previously passed CTA_ZONE will
experience a difference in functionality.

Alternatively CTA_FILTER could have been used for the same
functionality. However it is not yet supported during flush requests and
is only available when using AF_INET or AF_INET6.

Co-developed-by: Luca Czesla <luca.czesla@mail.schwarz>
Signed-off-by: Luca Czesla <luca.czesla@mail.schwarz>
Co-developed-by: Max Lamprecht <max.lamprecht@mail.schwarz>
Signed-off-by: Max Lamprecht <max.lamprecht@mail.schwarz>
Signed-off-by: Felix Huettner <felix.huettner@mail.schwarz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c               |  12 +-
 tools/testing/selftests/netfilter/.gitignore       |   2 +
 tools/testing/selftests/netfilter/Makefile         |   3 +-
 .../selftests/netfilter/conntrack_dump_flush.c     | 430 +++++++++++++++++++++
 4 files changed, 442 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/netfilter/conntrack_dump_flush.c

(limited to 'tools')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index fb0ae15e96df..0c22a02c2035 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -992,13 +992,13 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 	if (err)
 		goto err_filter;
 
-	if (!cda[CTA_FILTER])
-		return filter;
-
 	err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
 	if (err < 0)
 		goto err_filter;
 
+	if (!cda[CTA_FILTER])
+		return filter;
+
 	err = ctnetlink_parse_filter(cda[CTA_FILTER], filter);
 	if (err < 0)
 		goto err_filter;
@@ -1043,7 +1043,7 @@ err_filter:
 
 static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
 {
-	return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS];
+	return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS] || cda[CTA_ZONE];
 }
 
 static int ctnetlink_start(struct netlink_callback *cb)
@@ -1148,6 +1148,10 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
 	if (filter->family && nf_ct_l3num(ct) != filter->family)
 		goto ignore_entry;
 
+	if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID &&
+	    !nf_ct_zone_equal_any(ct, &filter->zone))
+		goto ignore_entry;
+
 	if (filter->orig_flags) {
 		tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL);
 		if (!ctnetlink_filter_match_tuple(&filter->orig, tuple,
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
index 4b2928e1c19d..c2229b3e40d4 100644
--- a/tools/testing/selftests/netfilter/.gitignore
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -2,3 +2,5 @@
 nf-queue
 connect_close
 audit_logread
+conntrack_dump_flush
+sctp_collision
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index bced422b78f7..db27153eb4a0 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -14,6 +14,7 @@ HOSTPKG_CONFIG := pkg-config
 CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
 LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
 
-TEST_GEN_FILES =  nf-queue connect_close audit_logread sctp_collision
+TEST_GEN_FILES =  nf-queue connect_close audit_logread sctp_collision \
+	conntrack_dump_flush
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
new file mode 100644
index 000000000000..f18c6db13bbf
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <time.h>
+#include <libmnl/libmnl.h>
+#include <netinet/ip.h>
+
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include "../kselftest_harness.h"
+
+#define TEST_ZONE_ID 123
+#define CTA_FILTER_F_CTA_TUPLE_ZONE (1 << 2)
+
+static int reply_counter;
+
+static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type,
+			      uint32_t src_ip, uint32_t dst_ip,
+			      uint16_t src_port, uint16_t dst_port)
+{
+	struct nlattr *nest, *nest_ip, *nest_proto;
+
+	nest = mnl_attr_nest_start(nlh, type);
+	if (!nest)
+		return -1;
+
+	nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+	if (!nest_ip)
+		return -1;
+	mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip);
+	mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip);
+	mnl_attr_nest_end(nlh, nest_ip);
+
+	nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+	if (!nest_proto)
+		return -1;
+	mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+	mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+	mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+	mnl_attr_nest_end(nlh, nest_proto);
+
+	mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type,
+			      struct in6_addr src_ip, struct in6_addr dst_ip,
+			      uint16_t src_port, uint16_t dst_port)
+{
+	struct nlattr *nest, *nest_ip, *nest_proto;
+
+	nest = mnl_attr_nest_start(nlh, type);
+	if (!nest)
+		return -1;
+
+	nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+	if (!nest_ip)
+		return -1;
+	mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip);
+	mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip);
+	mnl_attr_nest_end(nlh, nest_ip);
+
+	nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+	if (!nest_proto)
+		return -1;
+	mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+	mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+	mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+	mnl_attr_nest_end(nlh, nest_proto);
+
+	mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_proto(struct nlmsghdr *nlh)
+{
+	struct nlattr *nest, *nest_proto;
+
+	nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO);
+	if (!nest)
+		return -1;
+
+	nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP);
+	if (!nest_proto)
+		return -1;
+	mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED);
+	mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a);
+	mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a);
+	mnl_attr_nest_end(nlh, nest_proto);
+
+	mnl_attr_nest_end(nlh, nest);
+}
+
+static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh,
+				 uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *rplnlh;
+	unsigned int portid;
+	int err, ret;
+
+	portid = mnl_socket_get_portid(sock);
+
+	ret = build_cta_proto(nlh);
+	if (ret < 0) {
+		perror("build_cta_proto");
+		return -1;
+	}
+	mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000));
+	mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+	if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) {
+		perror("mnl_socket_sendto");
+		return -1;
+	}
+
+	ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	if (ret < 0) {
+		perror("mnl_socket_recvfrom");
+		return ret;
+	}
+
+	ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+	if (ret < 0) {
+		if (errno == EEXIST) {
+			/* The entries are probably still there from a previous
+			 * run. So we are good
+			 */
+			return 0;
+		}
+		perror("mnl_cb_run");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip,
+				      uint32_t dst_ip, uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfh;
+	int ret;
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+			   NLM_F_ACK | NLM_F_EXCL;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_INET;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443);
+	if (ret < 0) {
+		perror("build_cta_tuple_v4");
+		return ret;
+	}
+	ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345);
+	if (ret < 0) {
+		perror("build_cta_tuple_v4");
+		return ret;
+	}
+	return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int conntrack_data_generate_v6(struct mnl_socket *sock,
+				      struct in6_addr src_ip,
+				      struct in6_addr dst_ip,
+				      uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfh;
+	int ret;
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+			   NLM_F_ACK | NLM_F_EXCL;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_INET6;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip,
+				 12345, 443);
+	if (ret < 0) {
+		perror("build_cta_tuple_v6");
+		return ret;
+	}
+	ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip,
+				 12345, 443);
+	if (ret < 0) {
+		perror("build_cta_tuple_v6");
+		return ret;
+	}
+	return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int count_entries(const struct nlmsghdr *nlh, void *data)
+{
+	reply_counter++;
+}
+
+static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh, *rplnlh;
+	struct nfgenmsg *nfh;
+	struct nlattr *nest;
+	unsigned int portid;
+	int err, ret;
+
+	portid = mnl_socket_get_portid(sock);
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type	= (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_UNSPEC;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+	ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+	if (ret < 0) {
+		perror("mnl_socket_sendto");
+		return ret;
+	}
+
+	reply_counter = 0;
+	ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	while (ret > 0) {
+		ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid,
+				 count_entries, NULL);
+		if (ret <= MNL_CB_STOP)
+			break;
+
+		ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	}
+	if (ret < 0) {
+		perror("mnl_socket_recvfrom");
+		return ret;
+	}
+
+	return reply_counter;
+}
+
+static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh, *rplnlh;
+	struct nfgenmsg *nfh;
+	struct nlattr *nest;
+	unsigned int portid;
+	int err, ret;
+
+	portid = mnl_socket_get_portid(sock);
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type	= (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_UNSPEC;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+	ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+	if (ret < 0) {
+		perror("mnl_socket_sendto");
+		return ret;
+	}
+
+	ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	if (ret < 0) {
+		perror("mnl_socket_recvfrom");
+		return ret;
+	}
+
+	ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+	if (ret < 0) {
+		perror("mnl_cb_run");
+		return ret;
+	}
+
+	return 0;
+}
+
+FIXTURE(conntrack_dump_flush)
+{
+	struct mnl_socket *sock;
+};
+
+FIXTURE_SETUP(conntrack_dump_flush)
+{
+	struct in6_addr src, dst;
+	int ret;
+
+	self->sock = mnl_socket_open(NETLINK_NETFILTER);
+	if (!self->sock) {
+		perror("mnl_socket_open");
+		exit(EXIT_FAILURE);
+	}
+
+	if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) {
+		perror("mnl_socket_bind");
+		exit(EXIT_FAILURE);
+	}
+
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	if (ret < 0 && errno == EPERM)
+		SKIP(return, "Needs to be run as root");
+	else if (ret < 0 && errno == EOPNOTSUPP)
+		SKIP(return, "Kernel does not seem to support conntrack zones");
+
+	ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1,
+					 TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3,
+					 TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 0);
+	ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5,
+					 TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 0);
+
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x01000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x02000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x03000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x04000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 0);
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x05000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x06000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 0);
+
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_GE(ret, 2);
+	if (ret > 2)
+		SKIP(return, "kernel does not support filtering by zone");
+}
+
+FIXTURE_TEARDOWN(conntrack_dump_flush)
+{
+}
+
+TEST_F(conntrack_dump_flush, test_dump_by_zone)
+{
+	int ret;
+
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone)
+{
+	int ret;
+
+	ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 2);
+}
+
+TEST_HARNESS_MAIN
-- 
cgit v1.2.3-70-g09d2


From 67f440c05dd2fca9f26057e713d8618e23c4e021 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 18 Dec 2023 13:30:22 +0000
Subject: selftests/net: Fix various spelling mistakes in TCP-AO tests

There are a handful of spelling mistakes in test messages in the
TCP-AIO selftests. Fix these.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Dmitry Safonov <dima@arista.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/connect-deny.c      | 2 +-
 tools/testing/selftests/net/tcp_ao/lib/proc.c          | 4 ++--
 tools/testing/selftests/net/tcp_ao/setsockopt-closed.c | 2 +-
 tools/testing/selftests/net/tcp_ao/unsigned-md5.c      | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
index 1ca78040d8b7..185a2f6e5ff3 100644
--- a/tools/testing/selftests/net/tcp_ao/connect-deny.c
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -55,7 +55,7 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
 	err = test_wait_fd(lsk, timeout, 0);
 	if (err == -ETIMEDOUT) {
 		if (!fault(TIMEOUT))
-			test_fail("timeouted for accept()");
+			test_fail("timed out for accept()");
 	} else if (err < 0) {
 		test_error("test_wait_fd()");
 	} else {
diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c
index 2322f4d4676d..2fb6dd8adba6 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/proc.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c
@@ -227,7 +227,7 @@ void netstat_print_diff(struct netstat *nsa, struct netstat *nsb)
 		}
 
 		if (nsb->counters_nr < nsa->counters_nr)
-			test_error("Unexpected: some counters dissapeared!");
+			test_error("Unexpected: some counters disappeared!");
 
 		for (j = 0, i = 0; i < nsb->counters_nr; i++) {
 			if (strcmp(nsb->counters[i].name, nsa->counters[j].name)) {
@@ -244,7 +244,7 @@ void netstat_print_diff(struct netstat *nsa, struct netstat *nsb)
 			j++;
 		}
 		if (j != nsa->counters_nr)
-			test_error("Unexpected: some counters dissapeared!");
+			test_error("Unexpected: some counters disappeared!");
 
 		nsb = nsb->next;
 		nsa = nsa->next;
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
index 7e4601b3f6a3..a329f42f40ce 100644
--- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -427,7 +427,7 @@ static void test_einval_del_key(void)
 
 	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
 	del.set_current = 1;
-	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-exising current key");
+	setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current key");
 
 	sk = prepare_defs(TCP_AO_DEL_KEY, &del);
 	del.set_rnext = 1;
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index 7cffde02d2be..14addfd46468 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -72,7 +72,7 @@ static void try_accept(const char *tst_name, unsigned int port,
 	err = test_wait_fd(lsk, timeout, 0);
 	if (err == -ETIMEDOUT) {
 		if (!fault(TIMEOUT))
-			test_fail("timeouted for accept()");
+			test_fail("timed out for accept()");
 	} else if (err < 0) {
 		test_error("test_wait_fd()");
 	} else {
-- 
cgit v1.2.3-70-g09d2


From 826eb9bcc1844990e3c4a7c84846f1c1eaee0ed0 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Tue, 19 Dec 2023 02:03:05 +0000
Subject: selftest/tcp-ao: Rectify out-of-tree build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trivial fix for out-of-tree build that I wasn't testing previously:

1. Create a directory for library object files, fixes:
> gcc lib/kconfig.c -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing -I ../../../../../usr/include/ -iquote /tmp/kselftest/kselftest/net/tcp_ao/lib -I ../../../../include/  -o /tmp/kselftest/kselftest/net/tcp_ao/lib/kconfig.o -c
> Assembler messages:
> Fatal error: can't create /tmp/kselftest/kselftest/net/tcp_ao/lib/kconfig.o: No such file or directory
> make[1]: *** [Makefile:46: /tmp/kselftest/kselftest/net/tcp_ao/lib/kconfig.o] Error 1

2. Include $(KHDR_INCLUDES) that's exported by selftests/Makefile, fixes:
> In file included from lib/kconfig.c:6:
> lib/aolib.h:320:45: warning: ‘struct tcp_ao_add’ declared inside parameter list will not be visible outside of this definition or declaration
>   320 | extern int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg,
>       |                                             ^~~~~~~~~~
...

3. While at here, clean-up $(KSFT_KHDR_INSTALL): it's not needed anymore
   since commit f2745dc0ba3d ("selftests: stop using KSFT_KHDR_INSTALL")

4. Also, while at here, drop .DEFAULT_GOAL definition: that has a
   self-explaining comment, that was valid when I made these selftests
   compile on local v4.19 kernel, but not needed since
   commit 8ce72dc32578 ("selftests: fix headers_install circular dependency")

Fixes: cfbab37b3da0 ("selftests/net: Add TCP-AO library")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202312190645.q76MmHyq-lkp@intel.com/
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/Makefile | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 6343cfcf919b..8e60bae67aa9 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -17,22 +17,18 @@ TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6)
 TEST_GEN_PROGS := $(TEST_IPV4_PROGS) $(TEST_IPV6_PROGS)
 
 top_srcdir	  := ../../../../..
-KSFT_KHDR_INSTALL := 1
 include ../../lib.mk
 
 HOSTAR ?= ar
 
-# Drop it on port to linux/master with commit 8ce72dc32578
-.DEFAULT_GOAL := all
-
 LIBDIR	:= $(OUTPUT)/lib
 LIB	:= $(LIBDIR)/libaotst.a
 LDLIBS	+= $(LIB) -pthread
 LIBDEPS	:= lib/aolib.h Makefile
 
 CFLAGS	:= -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing
-CFLAGS	+= -I ../../../../../usr/include/ -iquote $(LIBDIR)
-CFLAGS	+= -I ../../../../include/
+CFLAGS	+= $(KHDR_INCLUDES)
+CFLAGS	+= -iquote ./lib/ -I ../../../../include/
 
 # Library
 LIBSRC	:= kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c
@@ -43,6 +39,7 @@ $(LIB): $(LIBOBJ)
 	$(HOSTAR) rcs $@ $^
 
 $(LIBDIR)/%.o: ./lib/%.c $(LIBDEPS)
+	mkdir -p $(LIBDIR)
 	$(CC) $< $(CFLAGS) $(CPPFLAGS) -o $@ -c
 
 $(TEST_GEN_PROGS): $(LIB)
-- 
cgit v1.2.3-70-g09d2


From 6530b29f77c8960bd21639ce71070499d155396b Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 19 Dec 2023 13:54:04 +0800
Subject: selftests/net: remove unneeded semicolon

No functional modification involved.

./tools/testing/selftests/net/tcp_ao/setsockopt-closed.c:121:2-3: Unneeded semicolon.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7771
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/setsockopt-closed.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
index a329f42f40ce..452de131fa3a 100644
--- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -118,7 +118,7 @@ static void setsockopt_checked(int sk, int optname, void *optval,
 		break;
 	default:
 		break;
-	};
+	}
 
 	__setsockopt_checked(sk, optname, false, optval, &len, err, cmd, tst);
 }
-- 
cgit v1.2.3-70-g09d2


From b84c2faeb986c73956fa897d14d2ecbc405abe08 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 19 Dec 2023 17:48:49 +0800
Subject: selftests/net: convert gre_gso.sh to run it in unique namespace

Here is the test result after conversion.

 # ./gre_gso.sh
     TEST: GREv6/v4 - copy file w/ TSO                                   [ OK ]
     TEST: GREv6/v4 - copy file w/ GSO                                   [ OK ]
     TEST: GREv6/v6 - copy file w/ TSO                                   [ OK ]
     TEST: GREv6/v6 - copy file w/ GSO                                   [ OK ]

 Tests passed:   4
 Tests failed:   0

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/gre_gso.sh | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh
index 3224651db97b..5100d90f92d2 100755
--- a/tools/testing/selftests/net/gre_gso.sh
+++ b/tools/testing/selftests/net/gre_gso.sh
@@ -2,10 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 # This test is for checking GRE GSO.
-
+source lib.sh
 ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
 TESTS="gre_gso"
@@ -13,8 +11,6 @@ TESTS="gre_gso"
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
-IP="ip -netns ns1"
-NS_EXEC="ip netns exec ns1"
 TMPFILE=`mktemp`
 PID=
 
@@ -50,13 +46,13 @@ log_test()
 setup()
 {
 	set -e
-	ip netns add ns1
-	ip netns set ns1 auto
-	$IP link set dev lo up
+	setup_ns ns1
+	IP="ip -netns $ns1"
+	NS_EXEC="ip netns exec $ns1"
 
 	ip link add veth0 type veth peer name veth1
 	ip link set veth0 up
-	ip link set veth1 netns ns1
+	ip link set veth1 netns $ns1
 	$IP link set veth1 name veth0
 	$IP link set veth0 up
 
@@ -70,7 +66,7 @@ cleanup()
 	[ -n "$PID" ] && kill $PID
 	ip link del dev gre1 &> /dev/null
 	ip link del dev veth0 &> /dev/null
-	ip netns del ns1
+	cleanup_ns $ns1
 }
 
 get_linklocal()
@@ -145,7 +141,7 @@ gre6_gso_test()
 	setup
 
 	a1=$(get_linklocal veth0)
-	a2=$(get_linklocal veth0 ns1)
+	a2=$(get_linklocal veth0 $ns1)
 
 	gre_create_tun $a1 $a2
 
-- 
cgit v1.2.3-70-g09d2


From f6476dedf08ded43bb9fb98ae634c2a1c56fdc06 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 19 Dec 2023 17:48:50 +0800
Subject: selftests/net: convert netns-name.sh to run it in unique namespace

This test will move the device to netns 1. Add a new test_ns to do this.
Here is the test result after conversion.

 # ./netns-name.sh
 netns-name.sh                           [  OK  ]

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/netns-name.sh | 44 +++++++++++++++----------------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
index 7d3d3fc99461..6974474c26f3 100755
--- a/tools/testing/selftests/net/netns-name.sh
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+source lib.sh
 set -o pipefail
 
-NS=netns-name-test
 DEV=dummy-dev0
 DEV2=dummy-dev1
 ALT_NAME=some-alt-name
@@ -11,7 +11,7 @@ ALT_NAME=some-alt-name
 RET_CODE=0
 
 cleanup() {
-    ip netns del $NS
+    cleanup_ns $NS $test_ns
 }
 
 trap cleanup EXIT
@@ -21,50 +21,50 @@ fail() {
     RET_CODE=1
 }
 
-ip netns add $NS
+setup_ns NS test_ns
 
 #
 # Test basic move without a rename
 #
 ip -netns $NS link add name $DEV type dummy || fail
-ip -netns $NS link set dev $DEV netns 1 ||
+ip -netns $NS link set dev $DEV netns $test_ns ||
     fail "Can't perform a netns move"
-ip link show dev $DEV >> /dev/null || fail "Device not found after move"
-ip link del $DEV || fail
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip -netns $test_ns link del $DEV || fail
 
 #
 # Test move with a conflict
 #
-ip link add name $DEV type dummy
+ip -netns $test_ns link add name $DEV type dummy
 ip -netns $NS link add name $DEV type dummy || fail
-ip -netns $NS link set dev $DEV netns 1 2> /dev/null &&
+ip -netns $NS link set dev $DEV netns $test_ns 2> /dev/null &&
     fail "Performed a netns move with a name conflict"
-ip link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
 ip -netns $NS link del $DEV || fail
-ip link del $DEV || fail
+ip -netns $test_ns link del $DEV || fail
 
 #
 # Test move with a conflict and rename
 #
-ip link add name $DEV type dummy
+ip -netns $test_ns link add name $DEV type dummy
 ip -netns $NS link add name $DEV type dummy || fail
-ip -netns $NS link set dev $DEV netns 1 name $DEV2 ||
+ip -netns $NS link set dev $DEV netns $test_ns name $DEV2 ||
     fail "Can't perform a netns move with rename"
-ip link del $DEV2 || fail
-ip link del $DEV || fail
+ip -netns $test_ns link del $DEV2 || fail
+ip -netns $test_ns link del $DEV || fail
 
 #
 # Test dup alt-name with netns move
 #
-ip link add name $DEV type dummy || fail
-ip link property add dev $DEV altname $ALT_NAME || fail
+ip -netns $test_ns link add name $DEV type dummy || fail
+ip -netns $test_ns link property add dev $DEV altname $ALT_NAME || fail
 ip -netns $NS link add name $DEV2 type dummy || fail
 ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail
 
-ip -netns $NS link set dev $DEV2 netns 1 2> /dev/null &&
+ip -netns $NS link set dev $DEV2 netns $test_ns 2> /dev/null &&
     fail "Moved with alt-name dup"
 
-ip link del $DEV || fail
+ip -netns $test_ns link del $DEV || fail
 ip -netns $NS link del $DEV2 || fail
 
 #
@@ -72,11 +72,11 @@ ip -netns $NS link del $DEV2 || fail
 #
 ip -netns $NS link add name $DEV type dummy || fail
 ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail
-ip -netns $NS link set dev $DEV netns 1 || fail
-ip link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move"
-ip  -netns $NS link show dev $ALT_NAME 2> /dev/null &&
+ip -netns $NS link set dev $DEV netns $test_ns || fail
+ip -netns $test_ns link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move"
+ip -netns $NS link show dev $ALT_NAME 2> /dev/null &&
     fail "Can still find alt-name after move"
-ip link del $DEV || fail
+ip -netns $test_ns link del $DEV || fail
 
 echo -ne "$(basename $0) \t\t\t\t"
 if [ $RET_CODE -eq 0 ]; then
-- 
cgit v1.2.3-70-g09d2


From d3b6b1116127123c15b85ce1ccd5f6d2d3317925 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 19 Dec 2023 17:48:51 +0800
Subject: selftests/net: convert rtnetlink.sh to run it in unique namespace

When running the test in namespace, the debugfs may not load automatically.
So add a checking to make sure debugfs loaded. Here is the test result
after conversion.

 # ./rtnetlink.sh
 PASS: policy routing
 PASS: route get
 ...
 PASS: address proto IPv4
 PASS: address proto IPv6

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rtnetlink.sh | 34 +++++++++++++++++---------------
 1 file changed, 18 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 26827ea4e3e5..a10a32952f21 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -35,8 +35,7 @@ VERBOSE=0
 PAUSE=no
 PAUSE_ON_FAIL=no
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
 
 # set global exit status, but never reset nonzero one.
 check_err()
@@ -517,9 +516,8 @@ kci_test_encap_fou()
 # test various encap methods, use netns to avoid unwanted interference
 kci_test_encap()
 {
-	testns="testns"
 	local ret=0
-	run_cmd ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
 		end_test "SKIP encap tests: cannot add net namespace $testns"
 		return $ksft_skip
@@ -574,6 +572,10 @@ kci_test_macsec_offload()
 		return $ksft_skip
 	fi
 
+	if ! mount | grep -q debugfs; then
+		mount -t debugfs none /sys/kernel/debug/ &> /dev/null
+	fi
+
 	# setup netdevsim since dummydev doesn't have offload support
 	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
 		run_cmd modprobe -q netdevsim
@@ -738,6 +740,10 @@ kci_test_ipsec_offload()
 	sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
 	probed=false
 
+	if ! mount | grep -q debugfs; then
+		mount -t debugfs none /sys/kernel/debug/ &> /dev/null
+	fi
+
 	# setup netdevsim since dummydev doesn't have offload support
 	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
 		run_cmd modprobe -q netdevsim
@@ -836,11 +842,10 @@ EOF
 
 kci_test_gretap()
 {
-	testns="testns"
 	DEV_NS=gretap00
 	local ret=0
 
-	run_cmd ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
 		end_test "SKIP gretap tests: cannot add net namespace $testns"
 		return $ksft_skip
@@ -878,11 +883,10 @@ kci_test_gretap()
 
 kci_test_ip6gretap()
 {
-	testns="testns"
 	DEV_NS=ip6gretap00
 	local ret=0
 
-	run_cmd ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
 		end_test "SKIP ip6gretap tests: cannot add net namespace $testns"
 		return $ksft_skip
@@ -920,7 +924,6 @@ kci_test_ip6gretap()
 
 kci_test_erspan()
 {
-	testns="testns"
 	DEV_NS=erspan00
 	local ret=0
 	run_cmd_grep "^Usage:" ip link help erspan
@@ -928,7 +931,7 @@ kci_test_erspan()
 		end_test "SKIP: erspan: iproute2 too old"
 		return $ksft_skip
 	fi
-	run_cmd ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
 		end_test "SKIP erspan tests: cannot add net namespace $testns"
 		return $ksft_skip
@@ -970,7 +973,6 @@ kci_test_erspan()
 
 kci_test_ip6erspan()
 {
-	testns="testns"
 	DEV_NS=ip6erspan00
 	local ret=0
 	run_cmd_grep "^Usage:" ip link help ip6erspan
@@ -978,7 +980,7 @@ kci_test_ip6erspan()
 		end_test "SKIP: ip6erspan: iproute2 too old"
 		return $ksft_skip
 	fi
-	run_cmd ip netns add "$testns"
+	setup_ns testns
 	if [ $? -ne 0 ]; then
 		end_test "SKIP ip6erspan tests: cannot add net namespace $testns"
 		return $ksft_skip
@@ -1022,8 +1024,6 @@ kci_test_ip6erspan()
 
 kci_test_fdb_get()
 {
-	IP="ip -netns testns"
-	BRIDGE="bridge -netns testns"
 	brdev="test-br0"
 	vxlandev="vxlan10"
 	test_mac=de:ad:be:ef:13:37
@@ -1037,11 +1037,13 @@ kci_test_fdb_get()
 		return $ksft_skip
 	fi
 
-	run_cmd ip netns add testns
+	setup_ns testns
 	if [ $? -ne 0 ]; then
 		end_test "SKIP fdb get tests: cannot add net namespace $testns"
 		return $ksft_skip
 	fi
+	IP="ip -netns $testns"
+	BRIDGE="bridge -netns $testns"
 	run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \
                 dstport 4789
 	run_cmd $IP link add name "$brdev" type bridge
@@ -1052,7 +1054,7 @@ kci_test_fdb_get()
 	run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev"
 	run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self
 
-	ip netns del testns &>/dev/null
+	ip netns del $testns &>/dev/null
 
 	if [ $ret -ne 0 ]; then
 		end_test "FAIL: bridge fdb get"
-- 
cgit v1.2.3-70-g09d2


From 098f1ce08bbce6030e8c136f1efc753fd9199f33 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 19 Dec 2023 17:48:52 +0800
Subject: selftests/net: convert stress_reuseport_listen.sh to run it in unique
 namespace

Here is the test result after conversion.

 # ./stress_reuseport_listen.sh
 listen 24000 socks took 0.47714

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/stress_reuseport_listen.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/stress_reuseport_listen.sh b/tools/testing/selftests/net/stress_reuseport_listen.sh
index 4de11da4092b..94d5d1a1c90f 100755
--- a/tools/testing/selftests/net/stress_reuseport_listen.sh
+++ b/tools/testing/selftests/net/stress_reuseport_listen.sh
@@ -2,18 +2,18 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
 
-NS='stress_reuseport_listen_ns'
+source lib.sh
 NR_FILES=24100
 SAVED_NR_FILES=$(ulimit -n)
 
 setup() {
-	ip netns add $NS
+	setup_ns NS
 	ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1
 	ulimit -n $NR_FILES
 }
 
 cleanup() {
-	ip netns del $NS
+	cleanup_ns $NS
 	ulimit -n $SAVED_NR_FILES
 }
 
-- 
cgit v1.2.3-70-g09d2


From 976fd1fe4f58ccb5762d1341a6e7524932a31557 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 19 Dec 2023 17:48:53 +0800
Subject: selftests/net: convert xfrm_policy.sh to run it in unique namespace

Here is the test result after conversion.

 # ./xfrm_policy.sh
 PASS: policy before exception matches
 PASS: ping to .254 bypassed ipsec tunnel (exceptions)
 PASS: direct policy matches (exceptions)
 PASS: policy matches (exceptions)
 PASS: ping to .254 bypassed ipsec tunnel (exceptions and block policies)
 PASS: direct policy matches (exceptions and block policies)
 PASS: policy matches (exceptions and block policies)
 PASS: ping to .254 bypassed ipsec tunnel (exceptions and block policies after hresh changes)
 PASS: direct policy matches (exceptions and block policies after hresh changes)
 PASS: policy matches (exceptions and block policies after hresh changes)
 PASS: ping to .254 bypassed ipsec tunnel (exceptions and block policies after hthresh change in ns3)
 PASS: direct policy matches (exceptions and block policies after hthresh change in ns3)
 PASS: policy matches (exceptions and block policies after hthresh change in ns3)
 PASS: ping to .254 bypassed ipsec tunnel (exceptions and block policies after htresh change to normal)
 PASS: direct policy matches (exceptions and block policies after htresh change to normal)
 PASS: policy matches (exceptions and block policies after htresh change to normal)
 PASS: policies with repeated htresh change

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/xfrm_policy.sh | 138 ++++++++++++++---------------
 1 file changed, 69 insertions(+), 69 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index bdf450eaf60c..457789530645 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -18,8 +18,7 @@
 # ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception)
 # ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception)
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
 ret=0
 policy_checks_ok=1
 
@@ -204,24 +203,24 @@ check_xfrm() {
 	ip=$2
 	local lret=0
 
-	ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null
+	ip netns exec ${ns[1]} ping -q -c 1 10.0.2.$ip > /dev/null
 
-	check_ipt_policy_count ns3
+	check_ipt_policy_count ${ns[3]}
 	if [ $? -ne $rval ] ; then
 		lret=1
 	fi
-	check_ipt_policy_count ns4
+	check_ipt_policy_count ${ns[4]}
 	if [ $? -ne $rval ] ; then
 		lret=1
 	fi
 
-	ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null
+	ip netns exec ${ns[2]} ping -q -c 1 10.0.1.$ip > /dev/null
 
-	check_ipt_policy_count ns3
+	check_ipt_policy_count ${ns[3]}
 	if [ $? -ne $rval ] ; then
 		lret=1
 	fi
-	check_ipt_policy_count ns4
+	check_ipt_policy_count ${ns[4]}
 	if [ $? -ne $rval ] ; then
 		lret=1
 	fi
@@ -270,11 +269,11 @@ check_hthresh_repeat()
 	i=0
 
 	for i in $(seq 1 10);do
-		ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
-		ip -net ns1 xfrm policy set hthresh6 0 28 || break
+		ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
+		ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break
 
-		ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
-		ip -net ns1 xfrm policy set hthresh6 0 28 || break
+		ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
+		ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break
 	done
 
 	if [ $i -ne 10 ] ;then
@@ -347,79 +346,80 @@ if [ $? -ne 0 ];then
 	exit $ksft_skip
 fi
 
-for i in 1 2 3 4; do
-    ip netns add ns$i
-    ip -net ns$i link set lo up
-done
+setup_ns ns1 ns2 ns3 ns4
+ns[1]=$ns1
+ns[2]=$ns2
+ns[3]=$ns3
+ns[4]=$ns4
 
 DEV=veth0
-ip link add $DEV netns ns1 type veth peer name eth1 netns ns3
-ip link add $DEV netns ns2 type veth peer name eth1 netns ns4
+ip link add $DEV netns ${ns[1]} type veth peer name eth1 netns ${ns[3]}
+ip link add $DEV netns ${ns[2]} type veth peer name eth1 netns ${ns[4]}
 
-ip link add $DEV netns ns3 type veth peer name veth0 netns ns4
+ip link add $DEV netns ${ns[3]} type veth peer name veth0 netns ${ns[4]}
 
 DEV=veth0
 for i in 1 2; do
-    ip -net ns$i link set $DEV up
-    ip -net ns$i addr add 10.0.$i.2/24 dev $DEV
-    ip -net ns$i addr add dead:$i::2/64 dev $DEV
-
-    ip -net ns$i addr add 10.0.$i.253 dev $DEV
-    ip -net ns$i addr add 10.0.$i.254 dev $DEV
-    ip -net ns$i addr add dead:$i::fd dev $DEV
-    ip -net ns$i addr add dead:$i::fe dev $DEV
+    ip -net ${ns[$i]} link set $DEV up
+    ip -net ${ns[$i]} addr add 10.0.$i.2/24 dev $DEV
+    ip -net ${ns[$i]} addr add dead:$i::2/64 dev $DEV
+
+    ip -net ${ns[$i]} addr add 10.0.$i.253 dev $DEV
+    ip -net ${ns[$i]} addr add 10.0.$i.254 dev $DEV
+    ip -net ${ns[$i]} addr add dead:$i::fd dev $DEV
+    ip -net ${ns[$i]} addr add dead:$i::fe dev $DEV
 done
 
 for i in 3 4; do
-ip -net ns$i link set eth1 up
-ip -net ns$i link set veth0 up
+    ip -net ${ns[$i]} link set eth1 up
+    ip -net ${ns[$i]} link set veth0 up
 done
 
-ip -net ns1 route add default via 10.0.1.1
-ip -net ns2 route add default via 10.0.2.1
+ip -net ${ns[1]} route add default via 10.0.1.1
+ip -net ${ns[2]} route add default via 10.0.2.1
 
-ip -net ns3 addr add 10.0.1.1/24 dev eth1
-ip -net ns3 addr add 10.0.3.1/24 dev veth0
-ip -net ns3 addr add 2001:1::1/64 dev eth1
-ip -net ns3 addr add 2001:3::1/64 dev veth0
+ip -net ${ns[3]} addr add 10.0.1.1/24 dev eth1
+ip -net ${ns[3]} addr add 10.0.3.1/24 dev veth0
+ip -net ${ns[3]} addr add 2001:1::1/64 dev eth1
+ip -net ${ns[3]} addr add 2001:3::1/64 dev veth0
 
-ip -net ns3 route add default via 10.0.3.10
+ip -net ${ns[3]} route add default via 10.0.3.10
 
-ip -net ns4 addr add 10.0.2.1/24 dev eth1
-ip -net ns4 addr add 10.0.3.10/24 dev veth0
-ip -net ns4 addr add 2001:2::1/64 dev eth1
-ip -net ns4 addr add 2001:3::10/64 dev veth0
-ip -net ns4 route add default via 10.0.3.1
+ip -net ${ns[4]} addr add 10.0.2.1/24 dev eth1
+ip -net ${ns[4]} addr add 10.0.3.10/24 dev veth0
+ip -net ${ns[4]} addr add 2001:2::1/64 dev eth1
+ip -net ${ns[4]} addr add 2001:3::10/64 dev veth0
+ip -net ${ns[4]} route add default via 10.0.3.1
 
 for j in 4 6; do
 	for i in 3 4;do
-		ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null
-		ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null
+		ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null
+		ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null
 	done
 done
 
 # abuse iptables rule counter to check if ping matches a policy
-ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
-ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+ip netns exec ${ns[3]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+ip netns exec ${ns[4]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not insert iptables rule"
-	for i in 1 2 3 4;do ip netns del ns$i;done
+	cleanup_ns $ns1 $ns2 $ns3 $ns4
 	exit $ksft_skip
 fi
 
 #          localip  remoteip  localnet    remotenet
-do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
-do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2
-do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
-do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1
+do_esp ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp ${ns[3]} dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2
+do_esp ${ns[4]} 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp ${ns[4]} dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1
 
-do_dummies4 ns3
-do_dummies6 ns4
+do_dummies4 ${ns[3]}
+do_dummies6 ${ns[4]}
 
-do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24
-do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24
-do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64
-do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64
+do_esp_policy_get_check ${ns[3]} 10.0.1.0/24 10.0.2.0/24
+do_esp_policy_get_check ${ns[4]} 10.0.2.0/24 10.0.1.0/24
+do_esp_policy_get_check ${ns[3]} dead:1::/64 dead:2::/64
+do_esp_policy_get_check ${ns[4]} dead:2::/64 dead:1::/64
 
 # ping to .254 should use ipsec, exception is not installed.
 check_xfrm 1 254
@@ -432,11 +432,11 @@ fi
 
 # installs exceptions
 #                localip  remoteip   encryptdst  plaindst
-do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
-do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
+do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
+do_exception ${ns[4]} 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
 
-do_exception ns3 dead:3::1 dead:3::10 dead:2::fd  dead:2:f0::/96
-do_exception ns4 dead:3::10 dead:3::1 dead:1::fd  dead:1:f0::/96
+do_exception ${ns[3]} dead:3::1 dead:3::10 dead:2::fd  dead:2:f0::/96
+do_exception ${ns[4]} dead:3::10 dead:3::1 dead:1::fd  dead:1:f0::/96
 
 check_exceptions "exceptions"
 if [ $? -ne 0 ]; then
@@ -444,14 +444,14 @@ if [ $? -ne 0 ]; then
 fi
 
 # insert block policies with adjacent/overlapping netmasks
-do_overlap ns3
+do_overlap ${ns[3]}
 
 check_exceptions "exceptions and block policies"
 if [ $? -ne 0 ]; then
 	ret=1
 fi
 
-for n in ns3 ns4;do
+for n in ${ns[3]} ${ns[4]};do
 	ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125
 	sleep $((RANDOM%5))
 done
@@ -459,19 +459,19 @@ done
 check_exceptions "exceptions and block policies after hresh changes"
 
 # full flush of policy db, check everything gets freed incl. internal meta data
-ip -net ns3 xfrm policy flush
+ip -net ${ns[3]} xfrm policy flush
 
-do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24
-do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
+do_esp_policy ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24
+do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
 
 # move inexact policies to hash table
-ip -net ns3 xfrm policy set hthresh4 16 16
+ip -net ${ns[3]} xfrm policy set hthresh4 16 16
 
 sleep $((RANDOM%5))
 check_exceptions "exceptions and block policies after hthresh change in ns3"
 
 # restore original hthresh settings -- move policies back to tables
-for n in ns3 ns4;do
+for n in ${ns[3]} ${ns[4]};do
 	ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128
 	sleep $((RANDOM%5))
 done
@@ -479,8 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal"
 
 check_hthresh_repeat "policies with repeated htresh change"
 
-check_random_order ns3 "policies inserted in random order"
+check_random_order ${ns[3]} "policies inserted in random order"
 
-for i in 1 2 3 4;do ip netns del ns$i;done
+cleanup_ns $ns1 $ns2 $ns3 $ns4
 
 exit $ret
-- 
cgit v1.2.3-70-g09d2


From 4416c5f53b439cec05a9ec88af71cb2e64a1e9aa Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 19 Dec 2023 17:48:54 +0800
Subject: selftests/net: use unique netns name for setup_loopback.sh
 setup_veth.sh

The setup_loopback and setup_veth use their own way to create namespace.
So let's just re-define server_ns/client_ns to unique name.
At the same time update the namespace name in gro.sh and toeplitz.sh.
As I don't have env to run toeplitz.sh. Here is only the gro test result.

 # ./gro.sh
 running test ipv4 data
 Expected {200 }, Total 1 packets
 Received {200 }, Total 1 packets.
 ...
 Gro::large test passed.
 All Tests Succeeded!

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/gro.sh            |  4 ++--
 tools/testing/selftests/net/setup_loopback.sh |  8 +++++---
 tools/testing/selftests/net/setup_veth.sh     |  9 ++++++---
 tools/testing/selftests/net/toeplitz.sh       | 14 +++++++-------
 4 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
index 342ad27f631b..19352f106c1d 100755
--- a/tools/testing/selftests/net/gro.sh
+++ b/tools/testing/selftests/net/gro.sh
@@ -23,11 +23,11 @@ run_test() {
   # on every try.
   for tries in {1..3}; do
     # Actual test starts here
-    ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+    ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
       1>>log.txt &
     server_pid=$!
     sleep 0.5  # to allow for socket init
-    ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+    ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \
       1>>log.txt
     wait "${server_pid}"
     exit_code=$?
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
index e57bbfbc5208..2070b57849de 100755
--- a/tools/testing/selftests/net/setup_loopback.sh
+++ b/tools/testing/selftests/net/setup_loopback.sh
@@ -5,6 +5,8 @@ readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
 readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
 readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
 readonly HARD_IRQS="$(< ${IRQ_PATH})"
+readonly server_ns=$(mktemp -u server-XXXXXXXX)
+readonly client_ns=$(mktemp -u client-XXXXXXXX)
 
 netdev_check_for_carrier() {
 	local -r dev="$1"
@@ -97,12 +99,12 @@ setup_interrupt() {
 
 setup_ns() {
 	# Set up server_ns namespace and client_ns namespace
-	setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
-	setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+	setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
+	setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
 }
 
 cleanup_ns() {
-	cleanup_macvlan_ns server_ns server client_ns client
+	cleanup_macvlan_ns ${server_ns} server ${client_ns} client
 }
 
 setup() {
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
index 1003ddf7b3b2..a9a1759e035c 100644
--- a/tools/testing/selftests/net/setup_veth.sh
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -1,6 +1,9 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+readonly server_ns=$(mktemp -u server-XXXXXXXX)
+readonly client_ns=$(mktemp -u client-XXXXXXXX)
+
 setup_veth_ns() {
 	local -r link_dev="$1"
 	local -r ns_name="$2"
@@ -19,14 +22,14 @@ setup_ns() {
 	# Set up server_ns namespace and client_ns namespace
 	ip link add name server type veth peer name client
 
-	setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}"
-	setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}"
+	setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
+	setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
 }
 
 cleanup_ns() {
 	local ns_name
 
-	for ns_name in client_ns server_ns; do
+	for ns_name in ${client_ns} ${server_ns}; do
 		[[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
 	done
 }
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
index da5bfd834eff..8ff172f7bb1b 100755
--- a/tools/testing/selftests/net/toeplitz.sh
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -147,14 +147,14 @@ setup() {
 	setup_loopback_environment "${DEV}"
 
 	# Set up server_ns namespace and client_ns namespace
-	setup_macvlan_ns "${DEV}" server_ns server \
+	setup_macvlan_ns "${DEV}" $server_ns server \
 	"${SERVER_MAC}" "${SERVER_IP}"
-	setup_macvlan_ns "${DEV}" client_ns client \
+	setup_macvlan_ns "${DEV}" $client_ns client \
 	"${CLIENT_MAC}" "${CLIENT_IP}"
 }
 
 cleanup() {
-	cleanup_macvlan_ns server_ns server client_ns client
+	cleanup_macvlan_ns $server_ns server $client_ns client
 	cleanup_loopback "${DEV}"
 }
 
@@ -170,22 +170,22 @@ if [[ "${TEST_RSS}" = true ]]; then
 	# RPS/RFS must be disabled because they move packets between cpus,
 	# which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
 	eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
-	  ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+	  ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
 	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
 	  -C "$(get_rx_irq_cpus)" -s -v &
 elif [[ ! -z "${RPS_MAP}" ]]; then
 	eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
-	  ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+	  ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
 	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
 	  -r "0x${RPS_MAP}" -s -v &
 else
-	ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+	ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
 	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
 fi
 
 server_pid=$!
 
-ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
   "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
 
 client_pid=$!
-- 
cgit v1.2.3-70-g09d2


From 378f082eaf3760cd7430fbcb1e4f8626bb6bc0ae Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 19 Dec 2023 17:48:55 +0800
Subject: selftests/net: convert pmtu.sh to run it in unique namespace

pmtu test use /bin/sh, so we need to source ./lib.sh instead of lib.sh
Here is the test result after conversion.

 # ./pmtu.sh
 TEST: ipv4: PMTU exceptions                                         [ OK ]
 TEST: ipv4: PMTU exceptions - nexthop objects                       [ OK ]
 TEST: ipv6: PMTU exceptions                                         [ OK ]
 TEST: ipv6: PMTU exceptions - nexthop objects                       [ OK ]
 ...
 TEST: ipv4: list and flush cached exceptions - nexthop objects      [ OK ]
 TEST: ipv6: list and flush cached exceptions                        [ OK ]
 TEST: ipv6: list and flush cached exceptions - nexthop objects      [ OK ]
 TEST: ipv4: PMTU exception w/route replace                          [ OK ]
 TEST: ipv4: PMTU exception w/route replace - nexthop objects        [ OK ]
 TEST: ipv6: PMTU exception w/route replace                          [ OK ]
 TEST: ipv6: PMTU exception w/route replace - nexthop objects        [ OK ]

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index b3b2dc5a630c..175d3d1d773b 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -198,8 +198,7 @@
 # - pmtu_ipv6_route_change
 #	Same as above but with IPv6
 
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source ./lib.sh
 
 PAUSE_ON_FAIL=no
 VERBOSE=0
@@ -268,16 +267,6 @@ tests="
 	pmtu_ipv4_route_change		ipv4: PMTU exception w/route replace	1
 	pmtu_ipv6_route_change		ipv6: PMTU exception w/route replace	1"
 
-NS_A="ns-A"
-NS_B="ns-B"
-NS_C="ns-C"
-NS_R1="ns-R1"
-NS_R2="ns-R2"
-ns_a="ip netns exec ${NS_A}"
-ns_b="ip netns exec ${NS_B}"
-ns_c="ip netns exec ${NS_C}"
-ns_r1="ip netns exec ${NS_R1}"
-ns_r2="ip netns exec ${NS_R2}"
 # Addressing and routing for tests with routers: four network segments, with
 # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
 # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
@@ -543,13 +532,17 @@ setup_ip6ip6() {
 }
 
 setup_namespaces() {
+	setup_ns NS_A NS_B NS_C NS_R1 NS_R2
 	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
-		ip netns add ${n} || return 1
-
 		# Disable DAD, so that we don't have to wait to use the
 		# configured IPv6 addresses
 		ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
 	done
+	ns_a="ip netns exec ${NS_A}"
+	ns_b="ip netns exec ${NS_B}"
+	ns_c="ip netns exec ${NS_C}"
+	ns_r1="ip netns exec ${NS_R1}"
+	ns_r2="ip netns exec ${NS_R2}"
 }
 
 setup_veth() {
@@ -839,7 +832,7 @@ setup_bridge() {
 	run_cmd ${ns_a} ip link set br0 up
 
 	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
-	run_cmd ${ns_c} ip link set veth_A-C netns ns-A
+	run_cmd ${ns_c} ip link set veth_A-C netns ${NS_A}
 
 	run_cmd ${ns_a} ip link set veth_A-C up
 	run_cmd ${ns_c} ip link set veth_C-A up
@@ -944,9 +937,7 @@ cleanup() {
 	done
 	socat_pids=
 
-	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
-		ip netns del ${n} 2> /dev/null
-	done
+	cleanup_all_ns
 
 	ip link del veth_A-C			2>/dev/null
 	ip link del veth_A-R1			2>/dev/null
-- 
cgit v1.2.3-70-g09d2


From 9d0b4ad82d6117e6d7ead50f64be54ec782aa1fe Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 19 Dec 2023 17:48:56 +0800
Subject: kselftest/runner.sh: add netns support

Add a variable RUN_IN_NETNS if the user wants to run all the selected tests
in namespace in parallel. With this, we can save a lot of testing time.

Note that some tests may not fit to run in namespace, e.g.
net/drop_monitor_tests.sh, as the dwdump needs to be run in init ns.

I also added another parameter -p to make all the logs reported separately
instead of mixing them in the stdout or output.log.

Nit: the NUM in run_one is not used, rename it to test_num.

Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/kselftest/runner.sh | 38 +++++++++++++++++++++++++++--
 tools/testing/selftests/run_kselftest.sh    | 10 +++++++-
 2 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index cd2fb43eea61..74954f6a8f94 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -6,6 +6,7 @@ export skip_rc=4
 export timeout_rc=124
 export logfile=/dev/stdout
 export per_test_logging=
+export RUN_IN_NETNS=
 
 # Defaults for "settings" file fields:
 # "timeout" how many seconds to let each test run before running
@@ -47,7 +48,7 @@ run_one()
 {
 	DIR="$1"
 	TEST="$2"
-	NUM="$3"
+	local test_num="$3"
 
 	BASENAME_TEST=$(basename $TEST)
 
@@ -141,6 +142,33 @@ run_one()
 	fi
 }
 
+in_netns()
+{
+	local name=$1
+	ip netns exec $name bash <<-EOF
+		BASE_DIR=$BASE_DIR
+		source $BASE_DIR/kselftest/runner.sh
+		logfile=$logfile
+		run_one $DIR $TEST $test_num
+	EOF
+}
+
+run_in_netns()
+{
+	local netns=$(mktemp -u ${BASENAME_TEST}-XXXXXX)
+	local tmplog="/tmp/$(mktemp -u ${BASENAME_TEST}-XXXXXX)"
+	ip netns add $netns
+	if [ $? -ne 0 ]; then
+		echo "# Warning: Create namespace failed for $BASENAME_TEST"
+		echo "not ok $test_num selftests: $DIR: $BASENAME_TEST # Create NS failed"
+	fi
+	ip -n $netns link set lo up
+	in_netns $netns &> $tmplog
+	ip netns del $netns &> /dev/null
+	cat $tmplog
+	rm -f $tmplog
+}
+
 run_many()
 {
 	echo "TAP version 13"
@@ -155,6 +183,12 @@ run_many()
 			logfile="/tmp/$BASENAME_TEST"
 			cat /dev/null > "$logfile"
 		fi
-		run_one "$DIR" "$TEST" "$test_num"
+		if [ -n "$RUN_IN_NETNS" ]; then
+			run_in_netns &
+		else
+			run_one "$DIR" "$TEST" "$test_num"
+		fi
 	done
+
+	wait
 }
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
index 92743980e553..a28c1416cb89 100755
--- a/tools/testing/selftests/run_kselftest.sh
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -20,11 +20,13 @@ usage()
 {
 	cat <<EOF
 Usage: $0 [OPTIONS]
-  -s | --summary		Print summary with detailed log in output.log
+  -s | --summary		Print summary with detailed log in output.log (conflict with -p)
+  -p | --per_test_log		Print test log in /tmp with each test name (conflict with -s)
   -t | --test COLLECTION:TEST	Run TEST from COLLECTION
   -c | --collection COLLECTION	Run all tests from COLLECTION
   -l | --list			List the available collection:test entries
   -d | --dry-run		Don't actually run any tests
+  -n | --netns			Run each test in namespace
   -h | --help			Show this usage info
   -o | --override-timeout	Number of seconds after which we timeout
 EOF
@@ -41,6 +43,9 @@ while true; do
 			logfile="$BASE_DIR"/output.log
 			cat /dev/null > $logfile
 			shift ;;
+		-p | --per-test-log)
+			per_test_logging=1
+			shift ;;
 		-t | --test)
 			TESTS="$TESTS $2"
 			shift 2 ;;
@@ -53,6 +58,9 @@ while true; do
 		-d | --dry-run)
 			dryrun="echo"
 			shift ;;
+		-n | --netns)
+			RUN_IN_NETNS=1
+			shift ;;
 		-o | --override-timeout)
 			kselftest_override_timeout="$2"
 			shift 2 ;;
-- 
cgit v1.2.3-70-g09d2


From 2491d66ae66cdd761c38563e17940a707d1b5e91 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 19 Dec 2023 12:57:56 +0200
Subject: selftests: forwarding: ethtool_mm: support devices with higher
 rx-min-frag-size

Some devices have errata due to which they cannot report ETH_ZLEN (60)
in the rx-min-frag-size. This was foreseen of course, and lldpad has
logic that when we request it to advertise addFragSize 0, it will round
it up to the lowest value that is _actually_ supported by the hardware.

The problem is that the selftest expects lldpad to report back to us the
same value as we requested.

Make the selftest smarter by figuring out on its own what is a
reasonable value to expect.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Roger Quadros <rogerq@kernel.org>
Signed-off-by: Roger Quadros <rogerq@kernel.org>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/forwarding/ethtool_mm.sh | 37 ++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
index 39e736f30322..6212913f4ad1 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
@@ -155,15 +155,48 @@ manual_failed_verification_h2_to_h1()
 	manual_failed_verification $h2 $h1
 }
 
+smallest_supported_add_frag_size()
+{
+	local iface=$1
+	local rx_min_frag_size=
+
+	rx_min_frag_size=$(ethtool --json --show-mm $iface | \
+		jq '.[]."rx-min-frag-size"')
+
+	if [ $rx_min_frag_size -le 60 ]; then
+		echo 0
+	elif [ $rx_min_frag_size -le 124 ]; then
+		echo 1
+	elif [ $rx_min_frag_size -le 188 ]; then
+		echo 2
+	elif [ $rx_min_frag_size -le 252 ]; then
+		echo 3
+	else
+		echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP"
+		exit 1
+	fi
+}
+
+expected_add_frag_size()
+{
+	local iface=$1
+	local requested=$2
+	local min=$(smallest_supported_add_frag_size $iface)
+
+	[ $requested -le $min ] && echo $min || echo $requested
+}
+
 lldp_change_add_frag_size()
 {
 	local add_frag_size=$1
+	local pattern=
 
 	lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null
 	# Wait for TLVs to be received
 	sleep 2
-	lldptool -i $h2 -t -n -V addEthCaps | \
-		grep -q "Additional fragment size: $add_frag_size"
+	pattern=$(printf "Additional fragment size: %d" \
+			 $(expected_add_frag_size $h1 $add_frag_size))
+	lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern"
 }
 
 lldp()
-- 
cgit v1.2.3-70-g09d2


From c8659bd9d1c09300c6a30734d781096c1530c234 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 19 Dec 2023 12:57:57 +0200
Subject: selftests: forwarding: ethtool_mm: fall back to aggregate if device
 does not report pMAC stats

Some devices do not support individual 'pmac' and 'emac' stats.
For such devices, resort to 'aggregate' stats.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Roger Quadros <rogerq@kernel.org>
Signed-off-by: Roger Quadros <rogerq@kernel.org>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/ethtool_mm.sh | 11 +++++++++++
 tools/testing/selftests/net/forwarding/lib.sh        |  9 +++++++++
 2 files changed, 20 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
index 6212913f4ad1..50d5bfb17ef1 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
@@ -25,6 +25,10 @@ traffic_test()
 	local after=
 	local delta=
 
+	if [ ${has_pmac_stats[$if]} = false ]; then
+		src="aggregate"
+	fi
+
 	before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
 
 	$MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO
@@ -317,6 +321,13 @@ for netif in ${NETIFS[@]}; do
 		echo "SKIP: $netif does not support MAC Merge"
 		exit $ksft_skip
 	fi
+
+	if check_ethtool_pmac_std_stats_support $netif eth-mac; then
+		has_pmac_stats[$netif]=true
+	else
+		has_pmac_stats[$netif]=false
+		echo "$netif does not report pMAC statistics, falling back to aggregate"
+	fi
 done
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index e3740163c384..69ef2a40df21 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -155,6 +155,15 @@ check_ethtool_counter_group_support()
 	fi
 }
 
+check_ethtool_pmac_std_stats_support()
+{
+	local dev=$1; shift
+	local grp=$1; shift
+
+	[ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \
+		| jq ".[].\"$grp\" | length") ]
+}
+
 check_locked_port_support()
 {
 	if ! bridge -d link show | grep -q " locked"; then
-- 
cgit v1.2.3-70-g09d2


From 122db5e3634b85f6caeca19345e0adbdf79cb257 Mon Sep 17 00:00:00 2001
From: Maxim Galaganov <max@internet.ru>
Date: Tue, 19 Dec 2023 22:31:07 +0100
Subject: selftests/net: add MPTCP coverage for IP_LOCAL_PORT_RANGE

Since previous commit, MPTCP has support for IP_BIND_ADDRESS_NO_PORT and
IP_LOCAL_PORT_RANGE sockopts.

Add ip4_mptcp and ip6_mptcp fixture variants to ip_local_port_range
selftest to provide selftest coverage for these sockopts.

Acked-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Maxim Galaganov <max@internet.ru>
Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/ip_local_port_range.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
index 75e3fdacdf73..0f217a1cc837 100644
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -146,6 +146,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) {
 	.so_protocol	= IPPROTO_SCTP,
 };
 
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) {
+	.so_domain	= AF_INET,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= IPPROTO_MPTCP,
+};
+
 FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) {
 	.so_domain	= AF_INET6,
 	.so_type	= SOCK_STREAM,
@@ -164,6 +170,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) {
 	.so_protocol	= IPPROTO_SCTP,
 };
 
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) {
+	.so_domain	= AF_INET6,
+	.so_type	= SOCK_STREAM,
+	.so_protocol	= IPPROTO_MPTCP,
+};
+
 TEST_F(ip_local_port_range, invalid_option_value)
 {
 	__u16 val16;
-- 
cgit v1.2.3-70-g09d2


From ba24ea129126362e7139fed4e13701ca5b71ac0b Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Thu, 21 Dec 2023 16:31:03 -0500
Subject: net/sched: Retire ipt action

The tc ipt action was intended to run all netfilter/iptables target.
Unfortunately it has not benefitted over the years from proper updates when
netfilter changes, and for that reason it has remained rudimentary.
Pinging a bunch of people that i was aware were using this indicates that
removing it wont affect them.
Retire it to reduce maintenance efforts. Buh-bye.

Reviewed-by: Victor Noguiera <victor@mojatatu.com>
Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_ipt.h               |  17 --
 include/net/tc_wrapper.h                  |   4 -
 include/uapi/linux/pkt_cls.h              |   4 +-
 include/uapi/linux/tc_act/tc_ipt.h        |  20 --
 net/sched/Makefile                        |   1 -
 net/sched/act_ipt.c                       | 464 ------------------------------
 tools/testing/selftests/tc-testing/config |   1 -
 tools/testing/selftests/tc-testing/tdc.sh |   1 -
 8 files changed, 2 insertions(+), 510 deletions(-)
 delete mode 100644 include/net/tc_act/tc_ipt.h
 delete mode 100644 include/uapi/linux/tc_act/tc_ipt.h
 delete mode 100644 net/sched/act_ipt.c

(limited to 'tools')

diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h
deleted file mode 100644
index 4225fcb1c6ba..000000000000
--- a/include/net/tc_act/tc_ipt.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NET_TC_IPT_H
-#define __NET_TC_IPT_H
-
-#include <net/act_api.h>
-
-struct xt_entry_target;
-
-struct tcf_ipt {
-	struct tc_action	common;
-	u32			tcfi_hook;
-	char			*tcfi_tname;
-	struct xt_entry_target	*tcfi_t;
-};
-#define to_ipt(a) ((struct tcf_ipt *)a)
-
-#endif /* __NET_TC_IPT_H */
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index a6d481b5bcbc..a608546bcefc 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -117,10 +117,6 @@ static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
 	if (a->ops->act == tcf_ife_act)
 		return tcf_ife_act(skb, a, res);
 #endif
-#if IS_BUILTIN(CONFIG_NET_ACT_IPT)
-	if (a->ops->act == tcf_ipt_act)
-		return tcf_ipt_act(skb, a, res);
-#endif
 #if IS_BUILTIN(CONFIG_NET_ACT_SIMP)
 	if (a->ops->act == tcf_simp_act)
 		return tcf_simp_act(skb, a, res);
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index c7082cc60d21..2fec9b51d28d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -99,7 +99,7 @@ enum {
  * versions.
  */
 #define TCA_ACT_GACT 5
-#define TCA_ACT_IPT 6
+#define TCA_ACT_IPT 6 /* obsoleted, can be reused */
 #define TCA_ACT_PEDIT 7
 #define TCA_ACT_MIRRED 8
 #define TCA_ACT_NAT 9
@@ -120,7 +120,7 @@ enum tca_id {
 	TCA_ID_UNSPEC = 0,
 	TCA_ID_POLICE = 1,
 	TCA_ID_GACT = TCA_ACT_GACT,
-	TCA_ID_IPT = TCA_ACT_IPT,
+	TCA_ID_IPT = TCA_ACT_IPT, /* Obsoleted, can be reused */
 	TCA_ID_PEDIT = TCA_ACT_PEDIT,
 	TCA_ID_MIRRED = TCA_ACT_MIRRED,
 	TCA_ID_NAT = TCA_ACT_NAT,
diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h
deleted file mode 100644
index c48d7da6750d..000000000000
--- a/include/uapi/linux/tc_act/tc_ipt.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef __LINUX_TC_IPT_H
-#define __LINUX_TC_IPT_H
-
-#include <linux/pkt_cls.h>
-
-enum {
-	TCA_IPT_UNSPEC,
-	TCA_IPT_TABLE,
-	TCA_IPT_HOOK,
-	TCA_IPT_INDEX,
-	TCA_IPT_CNT,
-	TCA_IPT_TM,
-	TCA_IPT_TARG,
-	TCA_IPT_PAD,
-	__TCA_IPT_MAX
-};
-#define TCA_IPT_MAX (__TCA_IPT_MAX - 1)
-                                                                                
-#endif
diff --git a/net/sched/Makefile b/net/sched/Makefile
index b5fd49641d91..82c3f78ca486 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_NET_ACT_POLICE)	+= act_police.o
 obj-$(CONFIG_NET_ACT_GACT)	+= act_gact.o
 obj-$(CONFIG_NET_ACT_MIRRED)	+= act_mirred.o
 obj-$(CONFIG_NET_ACT_SAMPLE)	+= act_sample.o
-obj-$(CONFIG_NET_ACT_IPT)	+= act_ipt.o
 obj-$(CONFIG_NET_ACT_NAT)	+= act_nat.o
 obj-$(CONFIG_NET_ACT_PEDIT)	+= act_pedit.o
 obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
deleted file mode 100644
index 598d6e299152..000000000000
--- a/net/sched/act_ipt.c
+++ /dev/null
@@ -1,464 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/act_ipt.c		iptables target interface
- *
- *TODO: Add other tables. For now we only support the ipv4 table targets
- *
- * Copyright:	Jamal Hadi Salim (2002-13)
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <linux/tc_act/tc_ipt.h>
-#include <net/tc_act/tc_ipt.h>
-#include <net/tc_wrapper.h>
-#include <net/ip.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-
-static struct tc_action_ops act_ipt_ops;
-static struct tc_action_ops act_xt_ops;
-
-static int ipt_init_target(struct net *net, struct xt_entry_target *t,
-			   char *table, unsigned int hook)
-{
-	struct xt_tgchk_param par;
-	struct xt_target *target;
-	struct ipt_entry e = {};
-	int ret = 0;
-
-	target = xt_request_find_target(AF_INET, t->u.user.name,
-					t->u.user.revision);
-	if (IS_ERR(target))
-		return PTR_ERR(target);
-
-	t->u.kernel.target = target;
-	memset(&par, 0, sizeof(par));
-	par.net       = net;
-	par.table     = table;
-	par.entryinfo = &e;
-	par.target    = target;
-	par.targinfo  = t->data;
-	par.hook_mask = 1 << hook;
-	par.family    = NFPROTO_IPV4;
-
-	ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
-	if (ret < 0) {
-		module_put(t->u.kernel.target->me);
-		return ret;
-	}
-	return 0;
-}
-
-static void ipt_destroy_target(struct xt_entry_target *t, struct net *net)
-{
-	struct xt_tgdtor_param par = {
-		.target   = t->u.kernel.target,
-		.targinfo = t->data,
-		.family   = NFPROTO_IPV4,
-		.net      = net,
-	};
-	if (par.target->destroy != NULL)
-		par.target->destroy(&par);
-	module_put(par.target->me);
-}
-
-static void tcf_ipt_release(struct tc_action *a)
-{
-	struct tcf_ipt *ipt = to_ipt(a);
-
-	if (ipt->tcfi_t) {
-		ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net);
-		kfree(ipt->tcfi_t);
-	}
-	kfree(ipt->tcfi_tname);
-}
-
-static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
-	[TCA_IPT_TABLE]	= { .type = NLA_STRING, .len = IFNAMSIZ },
-	[TCA_IPT_HOOK]	= NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING,
-					   NF_INET_NUMHOOKS),
-	[TCA_IPT_INDEX]	= { .type = NLA_U32 },
-	[TCA_IPT_TARG]	= { .len = sizeof(struct xt_entry_target) },
-};
-
-static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
-			  struct nlattr *est, struct tc_action **a,
-			  const struct tc_action_ops *ops,
-			  struct tcf_proto *tp, u32 flags)
-{
-	struct tc_action_net *tn = net_generic(net, id);
-	bool bind = flags & TCA_ACT_FLAGS_BIND;
-	struct nlattr *tb[TCA_IPT_MAX + 1];
-	struct tcf_ipt *ipt;
-	struct xt_entry_target *td, *t;
-	char *tname;
-	bool exists = false;
-	int ret = 0, err;
-	u32 hook = 0;
-	u32 index = 0;
-
-	if (nla == NULL)
-		return -EINVAL;
-
-	err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy,
-					  NULL);
-	if (err < 0)
-		return err;
-
-	if (tb[TCA_IPT_INDEX] != NULL)
-		index = nla_get_u32(tb[TCA_IPT_INDEX]);
-
-	err = tcf_idr_check_alloc(tn, &index, a, bind);
-	if (err < 0)
-		return err;
-	exists = err;
-	if (exists && bind)
-		return 0;
-
-	if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
-		if (exists)
-			tcf_idr_release(*a, bind);
-		else
-			tcf_idr_cleanup(tn, index);
-		return -EINVAL;
-	}
-
-	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
-	if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) {
-		if (exists)
-			tcf_idr_release(*a, bind);
-		else
-			tcf_idr_cleanup(tn, index);
-		return -EINVAL;
-	}
-
-	if (!exists) {
-		ret = tcf_idr_create(tn, index, est, a, ops, bind,
-				     false, flags);
-		if (ret) {
-			tcf_idr_cleanup(tn, index);
-			return ret;
-		}
-		ret = ACT_P_CREATED;
-	} else {
-		if (bind)/* dont override defaults */
-			return 0;
-
-		if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
-			tcf_idr_release(*a, bind);
-			return -EEXIST;
-		}
-	}
-
-	err = -EINVAL;
-	hook = nla_get_u32(tb[TCA_IPT_HOOK]);
-	switch (hook) {
-	case NF_INET_PRE_ROUTING:
-		break;
-	case NF_INET_POST_ROUTING:
-		break;
-	default:
-		goto err1;
-	}
-
-	if (tb[TCA_IPT_TABLE]) {
-		/* mangle only for now */
-		if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle"))
-			goto err1;
-	}
-
-	tname = kstrdup("mangle", GFP_KERNEL);
-	if (unlikely(!tname))
-		goto err1;
-
-	t = kmemdup(td, td->u.target_size, GFP_KERNEL);
-	if (unlikely(!t))
-		goto err2;
-
-	err = ipt_init_target(net, t, tname, hook);
-	if (err < 0)
-		goto err3;
-
-	ipt = to_ipt(*a);
-
-	spin_lock_bh(&ipt->tcf_lock);
-	if (ret != ACT_P_CREATED) {
-		ipt_destroy_target(ipt->tcfi_t, net);
-		kfree(ipt->tcfi_tname);
-		kfree(ipt->tcfi_t);
-	}
-	ipt->tcfi_tname = tname;
-	ipt->tcfi_t     = t;
-	ipt->tcfi_hook  = hook;
-	spin_unlock_bh(&ipt->tcf_lock);
-	return ret;
-
-err3:
-	kfree(t);
-err2:
-	kfree(tname);
-err1:
-	tcf_idr_release(*a, bind);
-	return err;
-}
-
-static int tcf_ipt_init(struct net *net, struct nlattr *nla,
-			struct nlattr *est, struct tc_action **a,
-			struct tcf_proto *tp,
-			u32 flags, struct netlink_ext_ack *extack)
-{
-	return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est,
-			      a, &act_ipt_ops, tp, flags);
-}
-
-static int tcf_xt_init(struct net *net, struct nlattr *nla,
-		       struct nlattr *est, struct tc_action **a,
-		       struct tcf_proto *tp,
-		       u32 flags, struct netlink_ext_ack *extack)
-{
-	return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est,
-			      a, &act_xt_ops, tp, flags);
-}
-
-static bool tcf_ipt_act_check(struct sk_buff *skb)
-{
-	const struct iphdr *iph;
-	unsigned int nhoff, len;
-
-	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		return false;
-
-	nhoff = skb_network_offset(skb);
-	iph = ip_hdr(skb);
-	if (iph->ihl < 5 || iph->version != 4)
-		return false;
-
-	len = skb_ip_totlen(skb);
-	if (skb->len < nhoff + len || len < (iph->ihl * 4u))
-		return false;
-
-	return pskb_may_pull(skb, iph->ihl * 4u);
-}
-
-TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
-				  const struct tc_action *a,
-				  struct tcf_result *res)
-{
-	char saved_cb[sizeof_field(struct sk_buff, cb)];
-	int ret = 0, result = 0;
-	struct tcf_ipt *ipt = to_ipt(a);
-	struct xt_action_param par;
-	struct nf_hook_state state = {
-		.net	= dev_net(skb->dev),
-		.in	= skb->dev,
-		.hook	= ipt->tcfi_hook,
-		.pf	= NFPROTO_IPV4,
-	};
-
-	if (skb_protocol(skb, false) != htons(ETH_P_IP))
-		return TC_ACT_UNSPEC;
-
-	if (skb_unclone(skb, GFP_ATOMIC))
-		return TC_ACT_UNSPEC;
-
-	if (!tcf_ipt_act_check(skb))
-		return TC_ACT_UNSPEC;
-
-	if (state.hook == NF_INET_POST_ROUTING) {
-		if (!skb_dst(skb))
-			return TC_ACT_UNSPEC;
-
-		state.out = skb->dev;
-	}
-
-	memcpy(saved_cb, skb->cb, sizeof(saved_cb));
-
-	spin_lock(&ipt->tcf_lock);
-
-	tcf_lastuse_update(&ipt->tcf_tm);
-	bstats_update(&ipt->tcf_bstats, skb);
-
-	/* yes, we have to worry about both in and out dev
-	 * worry later - danger - this API seems to have changed
-	 * from earlier kernels
-	 */
-	par.state    = &state;
-	par.target   = ipt->tcfi_t->u.kernel.target;
-	par.targinfo = ipt->tcfi_t->data;
-
-	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
-
-	ret = par.target->target(skb, &par);
-
-	switch (ret) {
-	case NF_ACCEPT:
-		result = TC_ACT_OK;
-		break;
-	case NF_DROP:
-		result = TC_ACT_SHOT;
-		ipt->tcf_qstats.drops++;
-		break;
-	case XT_CONTINUE:
-		result = TC_ACT_PIPE;
-		break;
-	default:
-		net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n",
-				       ret);
-		result = TC_ACT_OK;
-		break;
-	}
-	spin_unlock(&ipt->tcf_lock);
-
-	memcpy(skb->cb, saved_cb, sizeof(skb->cb));
-
-	return result;
-
-}
-
-static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
-			int ref)
-{
-	unsigned char *b = skb_tail_pointer(skb);
-	struct tcf_ipt *ipt = to_ipt(a);
-	struct xt_entry_target *t;
-	struct tcf_t tm;
-	struct tc_cnt c;
-
-	/* for simple targets kernel size == user size
-	 * user name = target name
-	 * for foolproof you need to not assume this
-	 */
-
-	spin_lock_bh(&ipt->tcf_lock);
-	t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
-	if (unlikely(!t))
-		goto nla_put_failure;
-
-	c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind;
-	c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref;
-	strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
-
-	if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
-	    nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) ||
-	    nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) ||
-	    nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) ||
-	    nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname))
-		goto nla_put_failure;
-
-	tcf_tm_dump(&tm, &ipt->tcf_tm);
-	if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD))
-		goto nla_put_failure;
-
-	spin_unlock_bh(&ipt->tcf_lock);
-	kfree(t);
-	return skb->len;
-
-nla_put_failure:
-	spin_unlock_bh(&ipt->tcf_lock);
-	nlmsg_trim(skb, b);
-	kfree(t);
-	return -1;
-}
-
-static struct tc_action_ops act_ipt_ops = {
-	.kind		=	"ipt",
-	.id		=	TCA_ID_IPT,
-	.owner		=	THIS_MODULE,
-	.act		=	tcf_ipt_act,
-	.dump		=	tcf_ipt_dump,
-	.cleanup	=	tcf_ipt_release,
-	.init		=	tcf_ipt_init,
-	.size		=	sizeof(struct tcf_ipt),
-};
-
-static __net_init int ipt_init_net(struct net *net)
-{
-	struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id);
-
-	return tc_action_net_init(net, tn, &act_ipt_ops);
-}
-
-static void __net_exit ipt_exit_net(struct list_head *net_list)
-{
-	tc_action_net_exit(net_list, act_ipt_ops.net_id);
-}
-
-static struct pernet_operations ipt_net_ops = {
-	.init = ipt_init_net,
-	.exit_batch = ipt_exit_net,
-	.id   = &act_ipt_ops.net_id,
-	.size = sizeof(struct tc_action_net),
-};
-
-static struct tc_action_ops act_xt_ops = {
-	.kind		=	"xt",
-	.id		=	TCA_ID_XT,
-	.owner		=	THIS_MODULE,
-	.act		=	tcf_ipt_act,
-	.dump		=	tcf_ipt_dump,
-	.cleanup	=	tcf_ipt_release,
-	.init		=	tcf_xt_init,
-	.size		=	sizeof(struct tcf_ipt),
-};
-
-static __net_init int xt_init_net(struct net *net)
-{
-	struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id);
-
-	return tc_action_net_init(net, tn, &act_xt_ops);
-}
-
-static void __net_exit xt_exit_net(struct list_head *net_list)
-{
-	tc_action_net_exit(net_list, act_xt_ops.net_id);
-}
-
-static struct pernet_operations xt_net_ops = {
-	.init = xt_init_net,
-	.exit_batch = xt_exit_net,
-	.id   = &act_xt_ops.net_id,
-	.size = sizeof(struct tc_action_net),
-};
-
-MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
-MODULE_DESCRIPTION("Iptables target actions");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("act_xt");
-
-static int __init ipt_init_module(void)
-{
-	int ret1, ret2;
-
-	ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops);
-	if (ret1 < 0)
-		pr_err("Failed to load xt action\n");
-
-	ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops);
-	if (ret2 < 0)
-		pr_err("Failed to load ipt action\n");
-
-	if (ret1 < 0 && ret2 < 0) {
-		return ret1;
-	} else
-		return 0;
-}
-
-static void __exit ipt_cleanup_module(void)
-{
-	tcf_unregister_action(&act_ipt_ops, &ipt_net_ops);
-	tcf_unregister_action(&act_xt_ops, &xt_net_ops);
-}
-
-module_init(ipt_init_module);
-module_exit(ipt_cleanup_module);
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 012aa33b341b..c60acba951c2 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -82,7 +82,6 @@ CONFIG_NET_ACT_GACT=m
 CONFIG_GACT_PROB=y
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_NET_ACT_SAMPLE=m
-CONFIG_NET_ACT_IPT=m
 CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index 407fa53822a0..c53ede8b730d 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -20,7 +20,6 @@ try_modprobe act_ct
 try_modprobe act_ctinfo
 try_modprobe act_gact
 try_modprobe act_gate
-try_modprobe act_ipt
 try_modprobe act_mirred
 try_modprobe act_mpls
 try_modprobe act_nat
-- 
cgit v1.2.3-70-g09d2


From 72cd9f8d5a9925fb8ccedaf9b42ccf5fc955a716 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 22 Dec 2023 01:59:06 +0000
Subject: selftest/tcp-ao: Set routes in a proper VRF table id

In unsigned-md5 selftests ip_route_add() is not needed in
client_add_ip(): the route was pre-setup in __test_init() => link_init()
for subnet, rather than a specific ip-address.

Currently, __ip_route_add() mistakenly always sets VRF table
to RT_TABLE_MAIN - this seems to have sneaked in during unsigned-md5
tests debugging. That also explains, why ip_route_add_vrf() ignored
EEXIST, returned by fib6.

Yet, keep EEXIST ignoring in bench-lookups selftests as it's expected
that those selftests may add the same (duplicate) routes.

Reported-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/bench-lookups.c |  4 +++-
 tools/testing/selftests/net/tcp_ao/lib/netlink.c   |  4 +---
 tools/testing/selftests/net/tcp_ao/unsigned-md5.c  | 11 +++++------
 3 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
index 7be8a7d9308c..a1e6e007c291 100644
--- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c
+++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
@@ -46,8 +46,10 @@ static void test_add_routes(union tcp_addr *ips, size_t ips_nr)
 
 	for (i = 0; i < ips_nr; i++) {
 		union tcp_addr *p = (union tcp_addr *)&ips[i];
+		int err;
 
-		if (ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p))
+		err = ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p);
+		if (err && err != -EEXIST)
 			test_error("Failed to add route");
 	}
 }
diff --git a/tools/testing/selftests/net/tcp_ao/lib/netlink.c b/tools/testing/selftests/net/tcp_ao/lib/netlink.c
index b731f2c84083..7f108493a29a 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/netlink.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/netlink.c
@@ -261,7 +261,7 @@ static int __ip_route_add(int sock, uint32_t seq, const char *intf, int family,
 	req.nh.nlmsg_seq	= seq;
 	req.rt.rtm_family	= family;
 	req.rt.rtm_dst_len	= (family == AF_INET) ? 32 : 128;
-	req.rt.rtm_table	= RT_TABLE_MAIN;
+	req.rt.rtm_table	= vrf;
 	req.rt.rtm_protocol	= RTPROT_BOOT;
 	req.rt.rtm_scope	= RT_SCOPE_UNIVERSE;
 	req.rt.rtm_type		= RTN_UNICAST;
@@ -294,8 +294,6 @@ int ip_route_add_vrf(const char *intf, int family,
 
 	ret = __ip_route_add(route_sock, route_seq++, intf,
 			     family, src, dst, vrf);
-	if (ret == -EEXIST) /* ignoring */
-		ret = 0;
 
 	close(route_sock);
 	return ret;
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index 14addfd46468..c5b568cd7d90 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -30,7 +30,7 @@ static void setup_vrfs(void)
 	err = ip_route_add_vrf(veth_name, TEST_FAMILY,
 			       this_ip_addr, this_ip_dest, test_vrf_tabid);
 	if (err)
-		test_error("Failed to add a route to VRF");
+		test_error("Failed to add a route to VRF: %d", err);
 }
 
 static void try_accept(const char *tst_name, unsigned int port,
@@ -494,15 +494,14 @@ out:
 
 static void client_add_ip(union tcp_addr *client, const char *ip)
 {
-	int family = TEST_FAMILY;
+	int err, family = TEST_FAMILY;
 
 	if (inet_pton(family, ip, client) != 1)
 		test_error("Can't convert ip address %s", ip);
 
-	if (ip_addr_add(veth_name, family, *client, TEST_PREFIX))
-		test_error("Failed to add ip address");
-	if (ip_route_add(veth_name, family, *client, this_ip_dest))
-		test_error("Failed to add route");
+	err = ip_addr_add(veth_name, family, *client, TEST_PREFIX);
+	if (err)
+		test_error("Failed to add ip address: %d", err);
 }
 
 static void client_add_ips(void)
-- 
cgit v1.2.3-70-g09d2


From 80057b2080a8f80b93c6f7b474909ccda8a21b09 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 22 Dec 2023 01:59:07 +0000
Subject: selftest/tcp-ao: Work on namespace-ified sysctl_optmem_max

Since commit f5769faeec36 ("net: Namespace-ify sysctl_optmem_max")
optmem_max is per-netns, so need of switching to root namespace.
It seems trivial to keep the old logic working, so going to keep it for
a while (at least, until kernel with netns-optmem_max will be release).

Currently, there is a test that checks that optmem_max limit applies to
TCP-AO keys and a little benchmark that measures linked-list TCP-AO keys
scaling, those are fixed by this.

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_ao/lib/setup.c | 35 ++++++++++++++++++++------
 1 file changed, 27 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index 374b27c26ebd..92276f916f2f 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -277,22 +277,38 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix,
 
 /* /proc/sys/net/core/optmem_max artifically limits the amount of memory
  * that can be allocated with sock_kmalloc() on each socket in the system.
- * It is not virtualized, so it has to written outside test namespaces.
- * To be nice a test will revert optmem back to the old value.
+ * It is not virtualized in v6.7, so it has to written outside test
+ * namespaces. To be nice a test will revert optmem back to the old value.
  * Keeping it simple without any file lock, which means the tests that
  * need to set/increase optmem value shouldn't run in parallel.
  * Also, not re-entrant.
+ * Since commit f5769faeec36 ("net: Namespace-ify sysctl_optmem_max")
+ * it is per-namespace, keeping logic for non-virtualized optmem_max
+ * for v6.7, which supports TCP-AO.
  */
 static const char *optmem_file = "/proc/sys/net/core/optmem_max";
 static size_t saved_optmem;
+static int optmem_ns = -1;
+
+static bool is_optmem_namespaced(void)
+{
+	if (optmem_ns == -1) {
+		int old_ns = switch_save_ns(nsfd_child);
+
+		optmem_ns = !access(optmem_file, F_OK);
+		switch_ns(old_ns);
+	}
+	return !!optmem_ns;
+}
 
 size_t test_get_optmem(void)
 {
+	int old_ns = 0;
 	FILE *foptmem;
-	int old_ns;
 	size_t ret;
 
-	old_ns = switch_save_ns(nsfd_outside);
+	if (!is_optmem_namespaced())
+		old_ns = switch_save_ns(nsfd_outside);
 	foptmem = fopen(optmem_file, "r");
 	if (!foptmem)
 		test_error("failed to open %s", optmem_file);
@@ -300,19 +316,21 @@ size_t test_get_optmem(void)
 	if (fscanf(foptmem, "%zu", &ret) != 1)
 		test_error("can't read from %s", optmem_file);
 	fclose(foptmem);
-	switch_ns(old_ns);
+	if (!is_optmem_namespaced())
+		switch_ns(old_ns);
 	return ret;
 }
 
 static void __test_set_optmem(size_t new, size_t *old)
 {
+	int old_ns = 0;
 	FILE *foptmem;
-	int old_ns;
 
 	if (old != NULL)
 		*old = test_get_optmem();
 
-	old_ns = switch_save_ns(nsfd_outside);
+	if (!is_optmem_namespaced())
+		old_ns = switch_save_ns(nsfd_outside);
 	foptmem = fopen(optmem_file, "w");
 	if (!foptmem)
 		test_error("failed to open %s", optmem_file);
@@ -320,7 +338,8 @@ static void __test_set_optmem(size_t new, size_t *old)
 	if (fprintf(foptmem, "%zu", new) <= 0)
 		test_error("can't write %zu to %s", new, optmem_file);
 	fclose(foptmem);
-	switch_ns(old_ns);
+	if (!is_optmem_namespaced())
+		switch_ns(old_ns);
 }
 
 static void test_revert_optmem(void)
-- 
cgit v1.2.3-70-g09d2


From 0bd962dd86b2e5d097fc42c7411818851eca2995 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@linux.dev>
Date: Fri, 22 Dec 2023 13:47:24 +0100
Subject: selftests: mptcp: join: check CURRESTAB counters

This patch adds a new helper chk_cestab_nr() to check the current
established connections counter MIB_CURRESTAB. Set the newly added
variables cestab_ns1 and cestab_ns2 to indicate how many connections
are expected in ns1 or ns2.

Invoke check_cestab() to check the counter during the connection in
do_transfer() and invoke chk_cestab_nr() to re-check it when the
connection closed. These checks are embedded in add_tests().

Signed-off-by: Geliang Tang <geliang.tang@linux.dev>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 46 ++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 87590a43b50d..3a5b63026191 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -56,6 +56,8 @@ unset FAILING_LINKS
 unset test_linkfail
 unset addr_nr_ns1
 unset addr_nr_ns2
+unset cestab_ns1
+unset cestab_ns2
 unset sflags
 unset fastclose
 unset fullmesh
@@ -976,6 +978,34 @@ pm_nl_set_endpoint()
 	fi
 }
 
+chk_cestab_nr()
+{
+	local ns=$1
+	local cestab=$2
+	local count
+
+	print_check "cestab $cestab"
+	count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab")
+	if [ -z "$count" ]; then
+		print_skip
+	elif [ "$count" != "$cestab" ]; then
+		fail_test "got $count current establish[s] expected $cestab"
+	else
+		print_ok
+	fi
+}
+
+# $1 namespace 1, $2 namespace 2
+check_cestab()
+{
+	if [ -n "${cestab_ns1}" ]; then
+		chk_cestab_nr ${1} ${cestab_ns1}
+	fi
+	if [ -n "${cestab_ns2}" ]; then
+		chk_cestab_nr ${2} ${cestab_ns2}
+	fi
+}
+
 do_transfer()
 {
 	local listener_ns="$1"
@@ -1089,6 +1119,7 @@ do_transfer()
 	local cpid=$!
 
 	pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
+	check_cestab $listener_ns $connector_ns
 
 	wait $cpid
 	local retc=$?
@@ -2477,47 +2508,52 @@ add_tests()
 	if reset "add single subflow"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
-		addr_nr_ns2=1 speed=slow \
+		addr_nr_ns2=1 speed=slow cestab_ns2=1 \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 1 1 1
+		chk_cestab_nr $ns2 0
 	fi
 
 	# add signal address
 	if reset "add signal address"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 1 1
-		addr_nr_ns1=1 speed=slow \
+		addr_nr_ns1=1 speed=slow cestab_ns1=1 \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 1 1 1
 		chk_add_nr 1 1
+		chk_cestab_nr $ns1 0
 	fi
 
 	# add multiple subflows
 	if reset "add multiple subflows"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
-		addr_nr_ns2=2 speed=slow \
+		addr_nr_ns2=2 speed=slow cestab_ns2=1 \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 2 2 2
+		chk_cestab_nr $ns2 0
 	fi
 
 	# add multiple subflows IPv6
 	if reset "add multiple subflows IPv6"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
-		addr_nr_ns2=2 speed=slow \
+		addr_nr_ns2=2 speed=slow cestab_ns2=1 \
 			run_tests $ns1 $ns2 dead:beef:1::1
 		chk_join_nr 2 2 2
+		chk_cestab_nr $ns2 0
 	fi
 
 	# add multiple addresses IPv6
 	if reset "add multiple addresses IPv6"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 2 2
-		addr_nr_ns1=2 speed=slow \
+		addr_nr_ns1=2 speed=slow cestab_ns1=1 \
 			run_tests $ns1 $ns2 dead:beef:1::1
 		chk_join_nr 2 2 2
 		chk_add_nr 2 2
+		chk_cestab_nr $ns1 0
 	fi
 }
 
-- 
cgit v1.2.3-70-g09d2


From 81ab772819da408977ac79c0a17d8be57283379f Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@linux.dev>
Date: Fri, 22 Dec 2023 13:47:25 +0100
Subject: selftests: mptcp: diag: check CURRESTAB counters

This patch adds a new helper chk_msk_cestab() to check the current
established connections counter MIB_CURRESTAB in diag.sh. Invoke it
to check the counter during the connection after every chk_msk_inuse().

Signed-off-by: Geliang Tang <geliang.tang@linux.dev>
Reviewed-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/diag.sh | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 95b498efacd1..04fcb8a077c9 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -56,7 +56,7 @@ __chk_nr()
 	local command="$1"
 	local expected=$2
 	local msg="$3"
-	local skip="${4:-SKIP}"
+	local skip="${4-SKIP}"
 	local nr
 
 	nr=$(eval $command)
@@ -182,6 +182,15 @@ chk_msk_inuse()
 	__chk_nr get_msk_inuse $expected "$msg" 0
 }
 
+# $1: cestab nr
+chk_msk_cestab()
+{
+	local cestab=$1
+
+	__chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \
+		 "${cestab}" "....chk ${cestab} cestab" ""
+}
+
 wait_connected()
 {
 	local listener_ns="${1}"
@@ -219,9 +228,11 @@ chk_msk_nr 2 "after MPC handshake "
 chk_msk_remote_key_nr 2 "....chk remote_key"
 chk_msk_fallback_nr 0 "....chk no fallback"
 chk_msk_inuse 2 "....chk 2 msk in use"
+chk_msk_cestab 2
 flush_pids
 
 chk_msk_inuse 0 "....chk 0 msk in use after flush"
+chk_msk_cestab 0
 
 echo "a" | \
 	timeout ${timeout_test} \
@@ -237,9 +248,11 @@ echo "b" | \
 wait_connected $ns 10001
 chk_msk_fallback_nr 1 "check fallback"
 chk_msk_inuse 1 "....chk 1 msk in use"
+chk_msk_cestab 1
 flush_pids
 
 chk_msk_inuse 0 "....chk 0 msk in use after flush"
+chk_msk_cestab 0
 
 NR_CLIENTS=100
 for I in `seq 1 $NR_CLIENTS`; do
@@ -261,9 +274,11 @@ done
 
 wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
 chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use"
+chk_msk_cestab $((NR_CLIENTS*2))
 flush_pids
 
 chk_msk_inuse 0 "....chk 0 msk in use after flush"
+chk_msk_cestab 0
 
 mptcp_lib_result_print_all_tap
 exit $ret
-- 
cgit v1.2.3-70-g09d2


From 41bc3e8fc1f728085da0ca6dbc1bef4a2ddb543c Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sat, 23 Dec 2023 09:01:50 -0500
Subject: net/sched: Remove uapi support for rsvp classifier

commit 265b4da82dbf ("net/sched: Retire rsvp classifier") retired the TC RSVP
classifier.
Remove UAPI for it. Iproute2 will sync by equally removing it from user space.

Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h       | 31 -------------------------------
 tools/include/uapi/linux/pkt_cls.h | 31 -------------------------------
 2 files changed, 62 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 2fec9b51d28d..fe922b61b99e 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -280,37 +280,6 @@ struct tc_u32_pcnt {
 
 #define TC_U32_MAXDEPTH 8
 
-
-/* RSVP filter */
-
-enum {
-	TCA_RSVP_UNSPEC,
-	TCA_RSVP_CLASSID,
-	TCA_RSVP_DST,
-	TCA_RSVP_SRC,
-	TCA_RSVP_PINFO,
-	TCA_RSVP_POLICE,
-	TCA_RSVP_ACT,
-	__TCA_RSVP_MAX
-};
-
-#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
-
-struct tc_rsvp_gpi {
-	__u32	key;
-	__u32	mask;
-	int	offset;
-};
-
-struct tc_rsvp_pinfo {
-	struct tc_rsvp_gpi dpi;
-	struct tc_rsvp_gpi spi;
-	__u8	protocol;
-	__u8	tunnelid;
-	__u8	tunnelhdr;
-	__u8	pad;
-};
-
 /* ROUTE filter */
 
 enum {
diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h
index 3faee0199a9b..82eccb6a4994 100644
--- a/tools/include/uapi/linux/pkt_cls.h
+++ b/tools/include/uapi/linux/pkt_cls.h
@@ -204,37 +204,6 @@ struct tc_u32_pcnt {
 
 #define TC_U32_MAXDEPTH 8
 
-
-/* RSVP filter */
-
-enum {
-	TCA_RSVP_UNSPEC,
-	TCA_RSVP_CLASSID,
-	TCA_RSVP_DST,
-	TCA_RSVP_SRC,
-	TCA_RSVP_PINFO,
-	TCA_RSVP_POLICE,
-	TCA_RSVP_ACT,
-	__TCA_RSVP_MAX
-};
-
-#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
-
-struct tc_rsvp_gpi {
-	__u32	key;
-	__u32	mask;
-	int	offset;
-};
-
-struct tc_rsvp_pinfo {
-	struct tc_rsvp_gpi dpi;
-	struct tc_rsvp_gpi spi;
-	__u8	protocol;
-	__u8	tunnelid;
-	__u8	tunnelhdr;
-	__u8	pad;
-};
-
 /* ROUTE filter */
 
 enum {
-- 
cgit v1.2.3-70-g09d2


From 82b2545ed9a465e4c470d2dbbb461522f767c56f Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sat, 23 Dec 2023 09:01:51 -0500
Subject: net/sched: Remove uapi support for tcindex classifier

commit 8c710f75256b ("net/sched: Retire tcindex classifier") retired the TC
tcindex classifier.
Remove UAPI for it.  Iproute2 will sync by equally removing it from user space.

Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h       | 16 ----------------
 tools/include/uapi/linux/pkt_cls.h | 16 ----------------
 2 files changed, 32 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index fe922b61b99e..ea277039f89d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -310,22 +310,6 @@ enum {
 
 #define TCA_FW_MAX (__TCA_FW_MAX - 1)
 
-/* TC index filter */
-
-enum {
-	TCA_TCINDEX_UNSPEC,
-	TCA_TCINDEX_HASH,
-	TCA_TCINDEX_MASK,
-	TCA_TCINDEX_SHIFT,
-	TCA_TCINDEX_FALL_THROUGH,
-	TCA_TCINDEX_CLASSID,
-	TCA_TCINDEX_POLICE,
-	TCA_TCINDEX_ACT,
-	__TCA_TCINDEX_MAX
-};
-
-#define TCA_TCINDEX_MAX     (__TCA_TCINDEX_MAX - 1)
-
 /* Flow filter */
 
 enum {
diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h
index 82eccb6a4994..bd4b227ab4ba 100644
--- a/tools/include/uapi/linux/pkt_cls.h
+++ b/tools/include/uapi/linux/pkt_cls.h
@@ -234,22 +234,6 @@ enum {
 
 #define TCA_FW_MAX (__TCA_FW_MAX - 1)
 
-/* TC index filter */
-
-enum {
-	TCA_TCINDEX_UNSPEC,
-	TCA_TCINDEX_HASH,
-	TCA_TCINDEX_MASK,
-	TCA_TCINDEX_SHIFT,
-	TCA_TCINDEX_FALL_THROUGH,
-	TCA_TCINDEX_CLASSID,
-	TCA_TCINDEX_POLICE,
-	TCA_TCINDEX_ACT,
-	__TCA_TCINDEX_MAX
-};
-
-#define TCA_TCINDEX_MAX     (__TCA_TCINDEX_MAX - 1)
-
 /* Flow filter */
 
 enum {
-- 
cgit v1.2.3-70-g09d2


From fe3b739a5472968d8d349522b6816bc4db82bc0f Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sat, 23 Dec 2023 09:01:52 -0500
Subject: net/sched: Remove uapi support for dsmark qdisc

Commit bbe77c14ee61 ("net/sched: Retire dsmark qdisc") retired the dsmark
classifier. Remove UAPI support for it.
Iproute2 will sync by equally removing it from user space.

Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h       | 14 --------------
 tools/include/uapi/linux/pkt_sched.h | 14 --------------
 2 files changed, 28 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index f762a10bfb78..1e3a2b9ddf7e 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -557,20 +557,6 @@ enum {
 
 #define TCA_CBQ_MAX	(__TCA_CBQ_MAX - 1)
 
-/* dsmark section */
-
-enum {
-	TCA_DSMARK_UNSPEC,
-	TCA_DSMARK_INDICES,
-	TCA_DSMARK_DEFAULT_INDEX,
-	TCA_DSMARK_SET_TC_INDEX,
-	TCA_DSMARK_MASK,
-	TCA_DSMARK_VALUE,
-	__TCA_DSMARK_MAX,
-};
-
-#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
-
 /* ATM  section */
 
 enum {
diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h
index 5c903abc9fa5..0f164f1458fd 100644
--- a/tools/include/uapi/linux/pkt_sched.h
+++ b/tools/include/uapi/linux/pkt_sched.h
@@ -537,20 +537,6 @@ enum {
 
 #define TCA_CBQ_MAX	(__TCA_CBQ_MAX - 1)
 
-/* dsmark section */
-
-enum {
-	TCA_DSMARK_UNSPEC,
-	TCA_DSMARK_INDICES,
-	TCA_DSMARK_DEFAULT_INDEX,
-	TCA_DSMARK_SET_TC_INDEX,
-	TCA_DSMARK_MASK,
-	TCA_DSMARK_VALUE,
-	__TCA_DSMARK_MAX,
-};
-
-#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1)
-
 /* ATM  section */
 
 enum {
-- 
cgit v1.2.3-70-g09d2


From 26cc8714fc7f79a806c3d7ffa215b984c384ab4d Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sat, 23 Dec 2023 09:01:53 -0500
Subject: net/sched: Remove uapi support for ATM qdisc

Commit fb38306ceb9e ("net/sched: Retire ATM qdisc") retired the ATM qdisc.
Remove UAPI for it. Iproute2 will sync by equally removing it from user space.

Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h       | 15 ---------------
 tools/include/uapi/linux/pkt_sched.h | 15 ---------------
 2 files changed, 30 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 1e3a2b9ddf7e..28f08acdad52 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -557,21 +557,6 @@ enum {
 
 #define TCA_CBQ_MAX	(__TCA_CBQ_MAX - 1)
 
-/* ATM  section */
-
-enum {
-	TCA_ATM_UNSPEC,
-	TCA_ATM_FD,		/* file/socket descriptor */
-	TCA_ATM_PTR,		/* pointer to descriptor - later */
-	TCA_ATM_HDR,		/* LL header */
-	TCA_ATM_EXCESS,		/* excess traffic class (0 for CLP)  */
-	TCA_ATM_ADDR,		/* PVC address (for output only) */
-	TCA_ATM_STATE,		/* VC state (ATM_VS_*; for output only) */
-	__TCA_ATM_MAX,
-};
-
-#define TCA_ATM_MAX	(__TCA_ATM_MAX - 1)
-
 /* Network emulator */
 
 enum {
diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h
index 0f164f1458fd..fc695429bc59 100644
--- a/tools/include/uapi/linux/pkt_sched.h
+++ b/tools/include/uapi/linux/pkt_sched.h
@@ -537,21 +537,6 @@ enum {
 
 #define TCA_CBQ_MAX	(__TCA_CBQ_MAX - 1)
 
-/* ATM  section */
-
-enum {
-	TCA_ATM_UNSPEC,
-	TCA_ATM_FD,		/* file/socket descriptor */
-	TCA_ATM_PTR,		/* pointer to descriptor - later */
-	TCA_ATM_HDR,		/* LL header */
-	TCA_ATM_EXCESS,		/* excess traffic class (0 for CLP)  */
-	TCA_ATM_ADDR,		/* PVC address (for output only) */
-	TCA_ATM_STATE,		/* VC state (ATM_VS_*; for output only) */
-	__TCA_ATM_MAX,
-};
-
-#define TCA_ATM_MAX	(__TCA_ATM_MAX - 1)
-
 /* Network emulator */
 
 enum {
-- 
cgit v1.2.3-70-g09d2


From 33241dca486264193ed68167c8eeae1fb197f3df Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sat, 23 Dec 2023 09:01:54 -0500
Subject: net/sched: Remove uapi support for CBQ qdisc

Commit 051d44209842 ("net/sched: Retire CBQ qdisc") retired the CBQ qdisc.
Remove UAPI for it. Iproute2 will sync by equally removing it from user space.

Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h       | 80 ------------------------------------
 tools/include/uapi/linux/pkt_sched.h | 80 ------------------------------------
 2 files changed, 160 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 28f08acdad52..a3cd0c2dc995 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -477,86 +477,6 @@ enum {
 
 #define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
 
-
-/* CBQ section */
-
-#define TC_CBQ_MAXPRIO		8
-#define TC_CBQ_MAXLEVEL		8
-#define TC_CBQ_DEF_EWMA		5
-
-struct tc_cbq_lssopt {
-	unsigned char	change;
-	unsigned char	flags;
-#define TCF_CBQ_LSS_BOUNDED	1
-#define TCF_CBQ_LSS_ISOLATED	2
-	unsigned char  	ewma_log;
-	unsigned char  	level;
-#define TCF_CBQ_LSS_FLAGS	1
-#define TCF_CBQ_LSS_EWMA	2
-#define TCF_CBQ_LSS_MAXIDLE	4
-#define TCF_CBQ_LSS_MINIDLE	8
-#define TCF_CBQ_LSS_OFFTIME	0x10
-#define TCF_CBQ_LSS_AVPKT	0x20
-	__u32		maxidle;
-	__u32		minidle;
-	__u32		offtime;
-	__u32		avpkt;
-};
-
-struct tc_cbq_wrropt {
-	unsigned char	flags;
-	unsigned char	priority;
-	unsigned char	cpriority;
-	unsigned char	__reserved;
-	__u32		allot;
-	__u32		weight;
-};
-
-struct tc_cbq_ovl {
-	unsigned char	strategy;
-#define	TC_CBQ_OVL_CLASSIC	0
-#define	TC_CBQ_OVL_DELAY	1
-#define	TC_CBQ_OVL_LOWPRIO	2
-#define	TC_CBQ_OVL_DROP		3
-#define	TC_CBQ_OVL_RCLASSIC	4
-	unsigned char	priority2;
-	__u16		pad;
-	__u32		penalty;
-};
-
-struct tc_cbq_police {
-	unsigned char	police;
-	unsigned char	__res1;
-	unsigned short	__res2;
-};
-
-struct tc_cbq_fopt {
-	__u32		split;
-	__u32		defmap;
-	__u32		defchange;
-};
-
-struct tc_cbq_xstats {
-	__u32		borrows;
-	__u32		overactions;
-	__s32		avgidle;
-	__s32		undertime;
-};
-
-enum {
-	TCA_CBQ_UNSPEC,
-	TCA_CBQ_LSSOPT,
-	TCA_CBQ_WRROPT,
-	TCA_CBQ_FOPT,
-	TCA_CBQ_OVL_STRATEGY,
-	TCA_CBQ_RATE,
-	TCA_CBQ_RTAB,
-	TCA_CBQ_POLICE,
-	__TCA_CBQ_MAX,
-};
-
-#define TCA_CBQ_MAX	(__TCA_CBQ_MAX - 1)
-
 /* Network emulator */
 
 enum {
diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h
index fc695429bc59..587481a19433 100644
--- a/tools/include/uapi/linux/pkt_sched.h
+++ b/tools/include/uapi/linux/pkt_sched.h
@@ -457,86 +457,6 @@ enum {
 
 #define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1)
 
-
-/* CBQ section */
-
-#define TC_CBQ_MAXPRIO		8
-#define TC_CBQ_MAXLEVEL		8
-#define TC_CBQ_DEF_EWMA		5
-
-struct tc_cbq_lssopt {
-	unsigned char	change;
-	unsigned char	flags;
-#define TCF_CBQ_LSS_BOUNDED	1
-#define TCF_CBQ_LSS_ISOLATED	2
-	unsigned char  	ewma_log;
-	unsigned char  	level;
-#define TCF_CBQ_LSS_FLAGS	1
-#define TCF_CBQ_LSS_EWMA	2
-#define TCF_CBQ_LSS_MAXIDLE	4
-#define TCF_CBQ_LSS_MINIDLE	8
-#define TCF_CBQ_LSS_OFFTIME	0x10
-#define TCF_CBQ_LSS_AVPKT	0x20
-	__u32		maxidle;
-	__u32		minidle;
-	__u32		offtime;
-	__u32		avpkt;
-};
-
-struct tc_cbq_wrropt {
-	unsigned char	flags;
-	unsigned char	priority;
-	unsigned char	cpriority;
-	unsigned char	__reserved;
-	__u32		allot;
-	__u32		weight;
-};
-
-struct tc_cbq_ovl {
-	unsigned char	strategy;
-#define	TC_CBQ_OVL_CLASSIC	0
-#define	TC_CBQ_OVL_DELAY	1
-#define	TC_CBQ_OVL_LOWPRIO	2
-#define	TC_CBQ_OVL_DROP		3
-#define	TC_CBQ_OVL_RCLASSIC	4
-	unsigned char	priority2;
-	__u16		pad;
-	__u32		penalty;
-};
-
-struct tc_cbq_police {
-	unsigned char	police;
-	unsigned char	__res1;
-	unsigned short	__res2;
-};
-
-struct tc_cbq_fopt {
-	__u32		split;
-	__u32		defmap;
-	__u32		defchange;
-};
-
-struct tc_cbq_xstats {
-	__u32		borrows;
-	__u32		overactions;
-	__s32		avgidle;
-	__s32		undertime;
-};
-
-enum {
-	TCA_CBQ_UNSPEC,
-	TCA_CBQ_LSSOPT,
-	TCA_CBQ_WRROPT,
-	TCA_CBQ_FOPT,
-	TCA_CBQ_OVL_STRATEGY,
-	TCA_CBQ_RATE,
-	TCA_CBQ_RTAB,
-	TCA_CBQ_POLICE,
-	__TCA_CBQ_MAX,
-};
-
-#define TCA_CBQ_MAX	(__TCA_CBQ_MAX - 1)
-
 /* Network emulator */
 
 enum {
-- 
cgit v1.2.3-70-g09d2


From 8a021e7fa10576eeb3938328f39bbf98fe7d4715 Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Thu, 21 Dec 2023 18:22:24 -0500
Subject: bpf: Simplify checking size of helper accesses

This patch simplifies the verification of size arguments associated to
pointer arguments to helpers and kfuncs. Many helpers take a pointer
argument followed by the size of the memory access performed to be
performed through that pointer. Before this patch, the handling of the
size argument in check_mem_size_reg() was confusing and wasteful: if the
size register's lower bound was 0, then the verification was done twice:
once considering the size of the access to be the lower-bound of the
respective argument, and once considering the upper bound (even if the
two are the same). The upper bound checking is a super-set of the
lower-bound checking(*), except: the only point of the lower-bound check
is to handle the case where zero-sized-accesses are explicitly not
allowed and the lower-bound is zero. This static condition is now
checked explicitly, replacing a much more complex, expensive and
confusing verification call to check_helper_mem_access().

Error messages change in this patch. Before, messages about illegal
zero-size accesses depended on the type of the pointer and on other
conditions, and sometimes the message was plain wrong: in some tests
that changed you'll see that the old message was something like "R1 min
value is outside of the allowed memory range", where R1 is the pointer
register; the error was wrongly claiming that the pointer was bad
instead of the size being bad. Other times the information that the size
came for a register with a possible range of values was wrong, and the
error presented the size as a fixed zero. Now the errors refer to the
right register. However, the old error messages did contain useful
information about the pointer register which is now lost; recovering
this information was deemed not important enough.

(*) Besides standing to reason that the checks for a bigger size access
are a super-set of the checks for a smaller size access, I have also
mechanically verified this by reading the code for all types of
pointers. I could convince myself that it's true for all but
PTR_TO_BTF_ID (check_ptr_to_btf_access). There, simply looking
line-by-line does not immediately prove what we want. If anyone has any
qualms, let me know.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231221232225.568730-2-andreimatei1@gmail.com
---
 kernel/bpf/verifier.c                                          | 10 ++++------
 .../testing/selftests/bpf/progs/verifier_helper_value_access.c |  8 ++++----
 tools/testing/selftests/bpf/progs/verifier_raw_stack.c         |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a376eb609c41..d4e31f61de0e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7279,12 +7279,10 @@ static int check_mem_size_reg(struct bpf_verifier_env *env,
 		return -EACCES;
 	}
 
-	if (reg->umin_value == 0) {
-		err = check_helper_mem_access(env, regno - 1, 0,
-					      zero_size_allowed,
-					      meta);
-		if (err)
-			return err;
+	if (reg->umin_value == 0 && !zero_size_allowed) {
+		verbose(env, "R%d invalid zero-sized read: u64=[%lld,%lld]\n",
+			regno, reg->umin_value, reg->umax_value);
+		return -EACCES;
 	}
 
 	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
index 692216c0ad3d..3e8340c2408f 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
@@ -91,7 +91,7 @@ l0_%=:	exit;						\
 
 SEC("tracepoint")
 __description("helper access to map: empty range")
-__failure __msg("invalid access to map value, value_size=48 off=0 size=0")
+__failure __msg("R2 invalid zero-sized read")
 __naked void access_to_map_empty_range(void)
 {
 	asm volatile ("					\
@@ -221,7 +221,7 @@ l0_%=:	exit;						\
 
 SEC("tracepoint")
 __description("helper access to adjusted map (via const imm): empty range")
-__failure __msg("invalid access to map value, value_size=48 off=4 size=0")
+__failure __msg("R2 invalid zero-sized read")
 __naked void via_const_imm_empty_range(void)
 {
 	asm volatile ("					\
@@ -386,7 +386,7 @@ l0_%=:	exit;						\
 
 SEC("tracepoint")
 __description("helper access to adjusted map (via const reg): empty range")
-__failure __msg("R1 min value is outside of the allowed memory range")
+__failure __msg("R2 invalid zero-sized read")
 __naked void via_const_reg_empty_range(void)
 {
 	asm volatile ("					\
@@ -556,7 +556,7 @@ l0_%=:	exit;						\
 
 SEC("tracepoint")
 __description("helper access to adjusted map (via variable): empty range")
-__failure __msg("R1 min value is outside of the allowed memory range")
+__failure __msg("R2 invalid zero-sized read")
 __naked void map_via_variable_empty_range(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
index f67390224a9c..7cc83acac727 100644
--- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
@@ -64,7 +64,7 @@ __naked void load_bytes_negative_len_2(void)
 
 SEC("tc")
 __description("raw_stack: skb_load_bytes, zero len")
-__failure __msg("invalid zero-sized read")
+__failure __msg("R4 invalid zero-sized read: u64=[0,0]")
 __naked void skb_load_bytes_zero_len(void)
 {
 	asm volatile ("					\
-- 
cgit v1.2.3-70-g09d2


From 72187506de4f19fcc8ae63a2b2f36d75e5259d9d Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Thu, 21 Dec 2023 18:22:25 -0500
Subject: bpf: Add a possibly-zero-sized read test

This patch adds a test for the condition that the previous patch mucked
with - illegal zero-sized helper memory access. As opposed to existing
tests, this new one uses a size whose lower bound is zero, as opposed to
a known-zero one.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231221232225.568730-3-andreimatei1@gmail.com
---
 .../bpf/progs/verifier_helper_value_access.c       | 39 +++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
index 3e8340c2408f..886498b5e6f3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
@@ -89,9 +89,14 @@ l0_%=:	exit;						\
 	: __clobber_all);
 }
 
+/* Call a function taking a pointer and a size which doesn't allow the size to
+ * be zero (i.e. bpf_trace_printk() declares the second argument to be
+ * ARG_CONST_SIZE, not ARG_CONST_SIZE_OR_ZERO). We attempt to pass zero for the
+ * size and expect to fail.
+ */
 SEC("tracepoint")
 __description("helper access to map: empty range")
-__failure __msg("R2 invalid zero-sized read")
+__failure __msg("R2 invalid zero-sized read: u64=[0,0]")
 __naked void access_to_map_empty_range(void)
 {
 	asm volatile ("					\
@@ -113,6 +118,38 @@ l0_%=:	exit;						\
 	: __clobber_all);
 }
 
+/* Like the test above, but this time the size register is not known to be zero;
+ * its lower-bound is zero though, which is still unacceptable.
+ */
+SEC("tracepoint")
+__description("helper access to map: possibly-empty ange")
+__failure __msg("R2 invalid zero-sized read: u64=[0,4]")
+__naked void access_to_map_possibly_empty_range(void)
+{
+	asm volatile ("                                         \
+	r2 = r10;                                               \
+	r2 += -8;                                               \
+	r1 = 0;                                                 \
+	*(u64*)(r2 + 0) = r1;                                   \
+	r1 = %[map_hash_48b] ll;                                \
+	call %[bpf_map_lookup_elem];                            \
+	if r0 == 0 goto l0_%=;                                  \
+	r1 = r0;                                                \
+	/* Read an unknown value */                             \
+	r7 = *(u64*)(r0 + 0);                                   \
+	/* Make it small and positive, to avoid other errors */ \
+	r7 &= 4;                                                \
+	r2 = 0;                                                 \
+	r2 += r7;                                               \
+	call %[bpf_trace_printk];                               \
+l0_%=:	exit;                                               \
+"	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm(bpf_trace_printk),
+	  __imm_addr(map_hash_48b)
+	: __clobber_all);
+}
+
 SEC("tracepoint")
 __description("helper access to map: out-of-bound range")
 __failure __msg("invalid access to map value, value_size=48 off=0 size=56")
-- 
cgit v1.2.3-70-g09d2


From 495d2d8133fd1407519170a5238f455abbd9ec9b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 26 Dec 2023 11:11:43 -0800
Subject: selftests/bpf: Attempt to build BPF programs with -Wsign-compare

GCC's -Wall includes -Wsign-compare while clang does not.
Since BPF programs are built with clang we need to add this flag explicitly
to catch problematic comparisons like:

  int i = -1;
  unsigned int j = 1;
  if (i < j) // this is false.

  long i = -1;
  unsigned int j = 1;
  if (i < j) // this is true.

C standard for reference:

- If either operand is unsigned long the other shall be converted to unsigned long.

- Otherwise, if one operand is a long int and the other unsigned int, then if a
long int can represent all the values of an unsigned int, the unsigned int
shall be converted to a long int; otherwise both operands shall be converted to
unsigned long int.

- Otherwise, if either operand is long, the other shall be converted to long.

- Otherwise, if either operand is unsigned, the other shall be converted to unsigned.

Unfortunately clang's -Wsign-compare is very noisy.
It complains about (s32)a == (u32)b which is safe and doen't have surprising behavior.

This patch fixes some of the issues. It needs a follow up to fix the rest.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/20231226191148.48536-2-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/Makefile                              | 1 +
 tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c  | 2 +-
 tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c            | 2 +-
 tools/testing/selftests/bpf/progs/bpf_iter_tasks.c                | 2 +-
 tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c           | 2 +-
 .../testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c | 2 +-
 tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c             | 2 +-
 tools/testing/selftests/bpf/progs/cpumask_success.c               | 2 +-
 tools/testing/selftests/bpf/progs/iters.c                         | 4 ++--
 tools/testing/selftests/bpf/progs/linked_funcs1.c                 | 2 +-
 tools/testing/selftests/bpf/progs/linked_funcs2.c                 | 2 +-
 tools/testing/selftests/bpf/progs/linked_list.c                   | 2 +-
 tools/testing/selftests/bpf/progs/local_storage.c                 | 2 +-
 tools/testing/selftests/bpf/progs/lsm.c                           | 2 +-
 tools/testing/selftests/bpf/progs/normal_map_btf.c                | 2 +-
 tools/testing/selftests/bpf/progs/profiler.inc.h                  | 4 ++--
 tools/testing/selftests/bpf/progs/sockopt_inherit.c               | 2 +-
 tools/testing/selftests/bpf/progs/sockopt_multi.c                 | 2 +-
 tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c             | 2 +-
 tools/testing/selftests/bpf/progs/test_bpf_ma.c                   | 2 +-
 tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c        | 2 +-
 tools/testing/selftests/bpf/progs/test_core_reloc_module.c        | 8 ++++----
 tools/testing/selftests/bpf/progs/test_fsverity.c                 | 2 +-
 tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c         | 2 +-
 tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c          | 2 +-
 25 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 617ae55c3bb5..fd15017ed3b1 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -383,6 +383,7 @@ CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
 BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)	\
 	     -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)			\
 	     -I$(abspath $(OUTPUT)/../usr/include)
+# TODO: enable me -Wsign-compare
 
 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
 	       -Wno-compare-distinct-pointer-types
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
index feaaa2b89c57..5014a17d6c02 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
@@ -20,7 +20,7 @@ struct {
 } hashmap1 SEC(".maps");
 
 /* will set before prog run */
-volatile const __u32 num_cpus = 0;
+volatile const __s32 num_cpus = 0;
 
 /* will collect results during prog run */
 __u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
index dd923dc637d5..423b39e60b6f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
@@ -35,7 +35,7 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
 		return 0;
 
 	file = vma->vm_file;
-	if (task->tgid != pid) {
+	if (task->tgid != (pid_t)pid) {
 		if (one_task)
 			one_task_error = 1;
 		return 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
index 96131b9a1caa..6cbb3393f243 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
@@ -22,7 +22,7 @@ int dump_task(struct bpf_iter__task *ctx)
 		return 0;
 	}
 
-	if (task->pid != tid)
+	if (task->pid != (pid_t)tid)
 		num_unknown_tid++;
 	else
 		num_known_tid++;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index 400fdf8d6233..dbf61c44acac 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -45,7 +45,7 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
 	}
 
 	/* fill seq_file buffer */
-	for (i = 0; i < print_len; i++)
+	for (i = 0; i < (int)print_len; i++)
 		bpf_seq_write(seq, &seq_num, sizeof(seq_num));
 
 	return ret;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
index b7fa8804e19d..45a0e9f492a9 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
@@ -11,7 +11,7 @@
 __u32 invocations = 0;
 __u32 assertion_error = 0;
 __u32 retval_value = 0;
-__u32 page_size = 0;
+__s32 page_size = 0;
 
 SEC("cgroup/setsockopt")
 int get_retval(struct bpf_sockopt *ctx)
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
index facedd8b8250..5e282c16eadc 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -15,7 +15,7 @@ struct {
 	__type(value, long);
 } map_a SEC(".maps");
 
-__u32 target_pid;
+__s32 target_pid;
 __u64 cgroup_id;
 int target_hid;
 bool is_cgroup1;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index fc3666edf456..7a1e64c6c065 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -332,7 +332,7 @@ SEC("tp_btf/task_newtask")
 int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags)
 {
 	struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
-	u32 cpu;
+	int cpu;
 
 	if (!is_test_task())
 		return 0;
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 3aca3dc145b5..fe971992e635 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -6,7 +6,7 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
 
 static volatile int zero = 0;
 
@@ -676,7 +676,7 @@ static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n)
 
 	while ((t = bpf_iter_num_next(it))) {
 		i = *t;
-		if (i >= n)
+		if ((__u32)i >= n)
 			break;
 		sum += arr[i];
 	}
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
index c4b49ceea967..cc79dddac182 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs1.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -8,7 +8,7 @@
 #include "bpf_misc.h"
 
 /* weak and shared between two files */
-const volatile int my_tid __weak;
+const volatile __u32 my_tid __weak;
 long syscall_id __weak;
 
 int output_val1;
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
index 013ff0645f0c..942cc5526ddf 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs2.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -68,7 +68,7 @@ int BPF_PROG(handler2, struct pt_regs *regs, long id)
 {
 	static volatile int whatever;
 
-	if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+	if (my_tid != (s32)bpf_get_current_pid_tgid() || id != syscall_id)
 		return 0;
 
 	/* make sure we have CO-RE relocations in main program */
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index 84d1777a9e6c..26205ca80679 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -6,7 +6,7 @@
 #include "bpf_experimental.h"
 
 #ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
 #endif
 
 #include "linked_list.h"
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index bc8ea56671a1..e5e3a8b8dd07 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -13,7 +13,7 @@ char _license[] SEC("license") = "GPL";
 
 #define DUMMY_STORAGE_VALUE 0xdeadbeef
 
-int monitored_pid = 0;
+__u32 monitored_pid = 0;
 int inode_storage_result = -1;
 int sk_storage_result = -1;
 int task_storage_result = -1;
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index fadfdd98707c..0c13b7409947 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -92,7 +92,7 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
 	if (ret != 0)
 		return ret;
 
-	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	__s32 pid = bpf_get_current_pid_tgid() >> 32;
 	int is_stack = 0;
 
 	is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
diff --git a/tools/testing/selftests/bpf/progs/normal_map_btf.c b/tools/testing/selftests/bpf/progs/normal_map_btf.c
index 66cde82aa86d..a45c9299552c 100644
--- a/tools/testing/selftests/bpf/progs/normal_map_btf.c
+++ b/tools/testing/selftests/bpf/progs/normal_map_btf.c
@@ -36,7 +36,7 @@ int add_to_list_in_array(void *ctx)
 	struct node_data *new;
 	int zero = 0;
 
-	if (done || (u32)bpf_get_current_pid_tgid() != pid)
+	if (done || (int)bpf_get_current_pid_tgid() != pid)
 		return 0;
 
 	value = bpf_map_lookup_elem(&array, &zero);
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 897061930cb7..ba99d17dac54 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -132,7 +132,7 @@ struct {
 } disallowed_exec_inodes SEC(".maps");
 
 #ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+#define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0]))
 #endif
 
 static INLINE bool IS_ERR(const void* ptr)
@@ -645,7 +645,7 @@ int raw_tracepoint__sched_process_exit(void* ctx)
 	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
 		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
 
-		if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
+		if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) {
 			bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
 					      past_kill_data);
 			void* payload = kill_data->payload;
diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
index c8f59caa4639..a3434b840928 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
@@ -9,7 +9,7 @@ char _license[] SEC("license") = "GPL";
 #define CUSTOM_INHERIT2			1
 #define CUSTOM_LISTENER			2
 
-__u32 page_size = 0;
+__s32 page_size = 0;
 
 struct sockopt_inherit {
 	__u8 val;
diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c
index 96f29fce050b..db67278e12d4 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c
@@ -5,7 +5,7 @@
 
 char _license[] SEC("license") = "GPL";
 
-__u32 page_size = 0;
+__s32 page_size = 0;
 
 SEC("cgroup/getsockopt")
 int _getsockopt_child(struct bpf_sockopt *ctx)
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
index dbe235ede7f3..83753b00a556 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -9,7 +9,7 @@
 
 char _license[] SEC("license") = "GPL";
 
-__u32 page_size = 0;
+__s32 page_size = 0;
 
 SEC("cgroup/setsockopt")
 int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
index 069db9085e78..b78f4f702ae0 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -21,7 +21,7 @@ const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 204
 const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {};
 
 int err = 0;
-int pid = 0;
+u32 pid = 0;
 
 #define DEFINE_ARRAY_WITH_KPTR(_size) \
 	struct bin_data_##_size { \
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index a17dd83eae67..ee4a601dcb06 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -53,7 +53,7 @@ int test_core_kernel(void *ctx)
 	struct task_struct *task = (void *)bpf_get_current_task();
 	struct core_reloc_kernel_output *out = (void *)&data.out;
 	uint64_t pid_tgid = bpf_get_current_pid_tgid();
-	uint32_t real_tgid = (uint32_t)pid_tgid;
+	int32_t real_tgid = (int32_t)pid_tgid;
 	int pid, tgid;
 
 	if (data.my_pid_tgid != pid_tgid)
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
index f59f175c7baf..bcb31ff92dcc 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -43,8 +43,8 @@ int BPF_PROG(test_core_module_probed,
 #if __has_builtin(__builtin_preserve_enum_value)
 	struct core_reloc_module_output *out = (void *)&data.out;
 	__u64 pid_tgid = bpf_get_current_pid_tgid();
-	__u32 real_tgid = (__u32)(pid_tgid >> 32);
-	__u32 real_pid = (__u32)pid_tgid;
+	__s32 real_tgid = (__s32)(pid_tgid >> 32);
+	__s32 real_pid = (__s32)pid_tgid;
 
 	if (data.my_pid_tgid != pid_tgid)
 		return 0;
@@ -77,8 +77,8 @@ int BPF_PROG(test_core_module_direct,
 #if __has_builtin(__builtin_preserve_enum_value)
 	struct core_reloc_module_output *out = (void *)&data.out;
 	__u64 pid_tgid = bpf_get_current_pid_tgid();
-	__u32 real_tgid = (__u32)(pid_tgid >> 32);
-	__u32 real_pid = (__u32)pid_tgid;
+	__s32 real_tgid = (__s32)(pid_tgid >> 32);
+	__s32 real_pid = (__s32)pid_tgid;
 
 	if (data.my_pid_tgid != pid_tgid)
 		return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_fsverity.c b/tools/testing/selftests/bpf/progs/test_fsverity.c
index 3975495b75c8..9e0f73e8189c 100644
--- a/tools/testing/selftests/bpf/progs/test_fsverity.c
+++ b/tools/testing/selftests/bpf/progs/test_fsverity.c
@@ -38,7 +38,7 @@ int BPF_PROG(test_file_open, struct file *f)
 		return 0;
 	got_fsverity = 1;
 
-	for (i = 0; i < sizeof(digest); i++) {
+	for (i = 0; i < (int)sizeof(digest); i++) {
 		if (digest[i] != expected_digest[i])
 			return 0;
 	}
diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
index eacda9fe07eb..4cfa42aa9436 100644
--- a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
+++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
@@ -29,7 +29,7 @@ int BPF_PROG(unix_listen, struct socket *sock, int backlog)
 	len = unix_sk->addr->len - sizeof(short);
 	path[0] = '@';
 	for (i = 1; i < len; i++) {
-		if (i >= sizeof(struct sockaddr_un))
+		if (i >= (int)sizeof(struct sockaddr_un))
 			break;
 
 		path[i] = unix_sk->addr->name->sun_path[i];
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
index 5baaafed0d2d..3abf068b8446 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -38,7 +38,7 @@ int xdp_redirect(struct xdp_md *xdp)
 	if (payload + 1 > data_end)
 		return XDP_ABORTED;
 
-	if (xdp->ingress_ifindex != ifindex_in)
+	if (xdp->ingress_ifindex != (__u32)ifindex_in)
 		return XDP_ABORTED;
 
 	if (metadata + 1 > data)
-- 
cgit v1.2.3-70-g09d2


From a8b242d77bd72556b7a9d8be779f7d27b95ba73c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 26 Dec 2023 11:11:44 -0800
Subject: bpf: Introduce "volatile compare" macros

Compilers optimize conditional operators at will, but often bpf programmers
want to force compilers to keep the same operator in asm as it's written in C.
Introduce bpf_cmp_likely/unlikely(var1, conditional_op, var2) macros that can be used as:

-               if (seen >= 1000)
+               if (bpf_cmp_unlikely(seen, >=, 1000))

The macros take advantage of BPF assembly that is C like.

The macros check the sign of variable 'seen' and emits either
signed or unsigned compare.

For example:
int a;
bpf_cmp_unlikely(a, >, 0) will be translated to 'if rX s> 0 goto' in BPF assembly.

unsigned int a;
bpf_cmp_unlikely(a, >, 0) will be translated to 'if rX > 0 goto' in BPF assembly.

C type conversions coupled with comparison operator are tricky.
  int i = -1;
  unsigned int j = 1;
  if (i < j) // this is false.

  long i = -1;
  unsigned int j = 1;
  if (i < j) // this is true.

Make sure BPF program is compiled with -Wsign-compare then the macros will catch
the mistake.

The macros check LHS (left hand side) only to figure out the sign of compare.

'if 0 < rX goto' is not allowed in the assembly, so the users
have to use a variable on LHS anyway.

The patch updates few tests to demonstrate the use of the macros.

The macro allows to use BPF_JSET in C code, since LLVM doesn't generate it at
present. For example:

if (i & j) compiles into r0 &= r1; if r0 == 0 goto

while

if (bpf_cmp_unlikely(i, &, j)) compiles into if r0 & r1 goto

Note that the macros has to be careful with RHS assembly predicate.
Since:
u64 __rhs = 1ull << 42;
asm goto("if r0 < %[rhs] goto +1" :: [rhs] "ri" (__rhs));
LLVM will silently truncate 64-bit constant into s32 imm.

Note that [lhs] "r"((short)LHS) the type cast is a workaround for LLVM issue.
When LHS is exactly 32-bit LLVM emits redundant <<=32, >>=32 to zero upper 32-bits.
When LHS is 64 or 16 or 8-bit variable there are no shifts.
When LHS is 32-bit the (u64) cast doesn't help. Hence use (short) cast.
It does _not_ truncate the variable before it's assigned to a register.

Traditional likely()/unlikely() macros that use __builtin_expect(!!(x), 1 or 0)
have no effect on these macros, hence macros implement the logic manually.
bpf_cmp_unlikely() macro preserves compare operator as-is while
bpf_cmp_likely() macro flips the compare.

Consider two cases:
A.
  for() {
    if (foo >= 10) {
      bar += foo;
    }
    other code;
  }

B.
  for() {
    if (foo >= 10)
       break;
    other code;
  }

It's ok to use either bpf_cmp_likely or bpf_cmp_unlikely macros in both cases,
but consider that 'break' is effectively 'goto out_of_the_loop'.
Hence it's better to use bpf_cmp_unlikely in the B case.
While 'bar += foo' is better to keep as 'fallthrough' == likely code path in the A case.

When it's written as:
A.
  for() {
    if (bpf_cmp_likely(foo, >=, 10)) {
      bar += foo;
    }
    other code;
  }

B.
  for() {
    if (bpf_cmp_unlikely(foo, >=, 10))
       break;
    other code;
  }

The assembly will look like:
A.
  for() {
    if r1 < 10 goto L1;
      bar += foo;
  L1:
    other code;
  }

B.
  for() {
    if r1 >= 10 goto L2;
    other code;
  }
  L2:

The bpf_cmp_likely vs bpf_cmp_unlikely changes basic block layout, hence it will
greatly influence the verification process. The number of processed instructions
will be different, since the verifier walks the fallthrough first.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/20231226191148.48536-3-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/bpf_experimental.h     | 69 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/exceptions.c     | 20 +++----
 tools/testing/selftests/bpf/progs/iters_task_vma.c |  3 +-
 3 files changed, 80 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 1386baf9ae4a..19ed6c941c1c 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -254,6 +254,75 @@ extern void bpf_throw(u64 cookie) __ksym;
 		}									\
 	 })
 
+#define __cmp_cannot_be_signed(x) \
+	__builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \
+	__builtin_strcmp(#x, "&") == 0
+
+#define __is_signed_type(type) (((type)(-1)) < (type)1)
+
+#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT)						\
+	({											\
+		__label__ l_true;								\
+		bool ret = DEFAULT;								\
+		asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]"		\
+				  :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true);	\
+		ret = !DEFAULT;									\
+l_true:												\
+		ret;										\
+       })
+
+/* C type conversions coupled with comparison operator are tricky.
+ * Make sure BPF program is compiled with -Wsign-compare then
+ * __lhs OP __rhs below will catch the mistake.
+ * Be aware that we check only __lhs to figure out the sign of compare.
+ */
+#define _bpf_cmp(LHS, OP, RHS, NOFLIP)								\
+	({											\
+		typeof(LHS) __lhs = (LHS);							\
+		typeof(RHS) __rhs = (RHS);							\
+		bool ret;									\
+		_Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression");	\
+		(void)(__lhs OP __rhs);								\
+		if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) {		\
+			if (sizeof(__rhs) == 8)							\
+				ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP);		\
+			else									\
+				ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP);		\
+		} else {									\
+			if (sizeof(__rhs) == 8)							\
+				ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP);		\
+			else									\
+				ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP);		\
+		}										\
+		ret;										\
+       })
+
+#ifndef bpf_cmp_unlikely
+#define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
+#endif
+
+#ifndef bpf_cmp_likely
+#define bpf_cmp_likely(LHS, OP, RHS)								\
+	({											\
+		bool ret;									\
+		if (__builtin_strcmp(#OP, "==") == 0)						\
+			ret = _bpf_cmp(LHS, !=, RHS, false);					\
+		else if (__builtin_strcmp(#OP, "!=") == 0)					\
+			ret = _bpf_cmp(LHS, ==, RHS, false);					\
+		else if (__builtin_strcmp(#OP, "<=") == 0)					\
+			ret = _bpf_cmp(LHS, >, RHS, false);					\
+		else if (__builtin_strcmp(#OP, "<") == 0)					\
+			ret = _bpf_cmp(LHS, >=, RHS, false);					\
+		else if (__builtin_strcmp(#OP, ">") == 0)					\
+			ret = _bpf_cmp(LHS, <=, RHS, false);					\
+		else if (__builtin_strcmp(#OP, ">=") == 0)					\
+			ret = _bpf_cmp(LHS, <, RHS, false);					\
+		else										\
+			(void) "bug";								\
+		ret;										\
+       })
+#endif
+
 /* Description
  *	Assert that a conditional expression is true.
  * Returns
diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c
index 2811ee842b01..f09cd14d8e04 100644
--- a/tools/testing/selftests/bpf/progs/exceptions.c
+++ b/tools/testing/selftests/bpf/progs/exceptions.c
@@ -210,7 +210,7 @@ __noinline int assert_zero_gfunc(u64 c)
 {
 	volatile u64 cookie = c;
 
-	bpf_assert_eq(cookie, 0);
+	bpf_assert(bpf_cmp_unlikely(cookie, ==, 0));
 	return 0;
 }
 
@@ -218,7 +218,7 @@ __noinline int assert_neg_gfunc(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_lt(cookie, 0);
+	bpf_assert(bpf_cmp_unlikely(cookie, <, 0));
 	return 0;
 }
 
@@ -226,7 +226,7 @@ __noinline int assert_pos_gfunc(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_gt(cookie, 0);
+	bpf_assert(bpf_cmp_unlikely(cookie, >, 0));
 	return 0;
 }
 
@@ -234,7 +234,7 @@ __noinline int assert_negeq_gfunc(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_le(cookie, -1);
+	bpf_assert(bpf_cmp_unlikely(cookie, <=, -1));
 	return 0;
 }
 
@@ -242,7 +242,7 @@ __noinline int assert_poseq_gfunc(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_ge(cookie, 1);
+	bpf_assert(bpf_cmp_unlikely(cookie, >=, 1));
 	return 0;
 }
 
@@ -258,7 +258,7 @@ __noinline int assert_zero_gfunc_with(u64 c)
 {
 	volatile u64 cookie = c;
 
-	bpf_assert_eq_with(cookie, 0, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, ==, 0), cookie + 100);
 	return 0;
 }
 
@@ -266,7 +266,7 @@ __noinline int assert_neg_gfunc_with(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_lt_with(cookie, 0, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, <, 0), cookie + 100);
 	return 0;
 }
 
@@ -274,7 +274,7 @@ __noinline int assert_pos_gfunc_with(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_gt_with(cookie, 0, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, >, 0), cookie + 100);
 	return 0;
 }
 
@@ -282,7 +282,7 @@ __noinline int assert_negeq_gfunc_with(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_le_with(cookie, -1, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, <=, -1), cookie + 100);
 	return 0;
 }
 
@@ -290,7 +290,7 @@ __noinline int assert_poseq_gfunc_with(s64 c)
 {
 	volatile s64 cookie = c;
 
-	bpf_assert_ge_with(cookie, 1, cookie + 100);
+	bpf_assert_with(bpf_cmp_unlikely(cookie, >=, 1), cookie + 100);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c
index e085a51d153e..dc0c3691dcc2 100644
--- a/tools/testing/selftests/bpf/progs/iters_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c
@@ -28,9 +28,8 @@ int iter_task_vma_for_each(const void *ctx)
 		return 0;
 
 	bpf_for_each(task_vma, vma, task, 0) {
-		if (seen >= 1000)
+		if (bpf_cmp_unlikely(seen, >=, 1000))
 			break;
-		barrier_var(seen);
 
 		vm_ranges[seen].vm_start = vma->vm_start;
 		vm_ranges[seen].vm_end = vma->vm_end;
-- 
cgit v1.2.3-70-g09d2


From 624cd2a17672f4596fee97a5558bc990778bbcf9 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 26 Dec 2023 11:11:45 -0800
Subject: selftests/bpf: Convert exceptions_assert.c to bpf_cmp

Convert exceptions_assert.c to bpf_cmp_unlikely() macro.

Since

bpf_assert(bpf_cmp_unlikely(var, ==, 100));
other code;

will generate assembly code:

  if r1 == 100 goto L2;
  r0 = 0
  call bpf_throw
L1:
  other code;
  ...

L2: goto L1;

LLVM generates redundant basic block with extra goto. LLVM will be fixed eventually.
Right now it's less efficient than __bpf_assert(var, ==, 100) macro that produces:
  if r1 == 100 goto L1;
  r0 = 0
  call bpf_throw
L1:
  other code;

But extra goto doesn't hurt the verification process.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/20231226191148.48536-4-alexei.starovoitov@gmail.com
---
 .../selftests/bpf/progs/exceptions_assert.c        | 80 +++++++++++-----------
 1 file changed, 40 insertions(+), 40 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 0ef81040da59..5e0a1ca96d4e 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -11,51 +11,51 @@
 #define check_assert(type, op, name, value)				\
 	SEC("?tc")							\
 	__log_level(2) __failure					\
-	int check_assert_##op##_##name(void *ctx)			\
+	int check_assert_##name(void *ctx)				\
 	{								\
 		type num = bpf_ktime_get_ns();				\
-		bpf_assert_##op(num, value);				\
+		bpf_assert(bpf_cmp_unlikely(num, op, value));		\
 		return *(u64 *)num;					\
 	}
 
-__msg(": R0_w=0xffffffff80000000 R10=fp0")
-check_assert(s64, eq, int_min, INT_MIN);
-__msg(": R0_w=0x7fffffff R10=fp0")
-check_assert(s64, eq, int_max, INT_MAX);
-__msg(": R0_w=0 R10=fp0")
-check_assert(s64, eq, zero, 0);
-__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0")
-check_assert(s64, eq, llong_min, LLONG_MIN);
-__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0")
-check_assert(s64, eq, llong_max, LLONG_MAX);
-
-__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0")
-check_assert(s64, lt, pos, INT_MAX);
-__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
-check_assert(s64, lt, zero, 0);
-__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
-check_assert(s64, lt, neg, INT_MIN);
-
-__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0")
-check_assert(s64, le, pos, INT_MAX);
-__msg(": R0_w=scalar(smax=0) R10=fp0")
-check_assert(s64, le, zero, 0);
-__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
-check_assert(s64, le, neg, INT_MIN);
-
-__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
-check_assert(s64, gt, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
-check_assert(s64, gt, zero, 0);
-__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0")
-check_assert(s64, gt, neg, INT_MIN);
-
-__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
-check_assert(s64, ge, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
-check_assert(s64, ge, zero, 0);
-__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0")
-check_assert(s64, ge, neg, INT_MIN);
+__msg(": R0_w=0xffffffff80000000")
+check_assert(s64, ==, eq_int_min, INT_MIN);
+__msg(": R0_w=0x7fffffff")
+check_assert(s64, ==, eq_int_max, INT_MAX);
+__msg(": R0_w=0")
+check_assert(s64, ==, eq_zero, 0);
+__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000")
+check_assert(s64, ==, eq_llong_min, LLONG_MIN);
+__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff")
+check_assert(s64, ==, eq_llong_max, LLONG_MAX);
+
+__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)")
+check_assert(s64, <, lt_pos, INT_MAX);
+__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+check_assert(s64, <, lt_zero, 0);
+__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff")
+check_assert(s64, <, lt_neg, INT_MIN);
+
+__msg(": R0_w=scalar(id=1,smax=0x7fffffff)")
+check_assert(s64, <=, le_pos, INT_MAX);
+__msg(": R0_w=scalar(id=1,smax=0)")
+check_assert(s64, <=, le_zero, 0);
+__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000")
+check_assert(s64, <=, le_neg, INT_MIN);
+
+__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >, gt_pos, INT_MAX);
+__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >, gt_zero, 0);
+__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001")
+check_assert(s64, >, gt_neg, INT_MIN);
+
+__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >=, ge_pos, INT_MAX);
+__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >=, ge_zero, 0);
+__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000")
+check_assert(s64, >=, ge_neg, INT_MIN);
 
 SEC("?tc")
 __log_level(2) __failure
-- 
cgit v1.2.3-70-g09d2


From 907dbd3ede5ffd4f9519dd1fae2a8a983603bf3b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 26 Dec 2023 11:11:46 -0800
Subject: selftests/bpf: Remove bpf_assert_eq-like macros.

Since the last user was converted to bpf_cmp, remove bpf_assert_eq/ne/... macros.

__bpf_assert_op() macro is kept for experiments, since it's slightly more efficient
than bpf_assert(bpf_cmp_unlikely()) until LLVM is fixed.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/20231226191148.48536-5-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/bpf_experimental.h | 150 -------------------------
 1 file changed, 150 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 19ed6c941c1c..2ef9949fbd63 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -341,156 +341,6 @@ l_true:												\
  */
 #define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value);
 
-/* Description
- *	Assert that LHS is equal to RHS. This statement updates the known value
- *	of LHS during verification. Note that RHS must be a constant value, and
- *	must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the value zero when the assertion fails.
- */
-#define bpf_assert_eq(LHS, RHS)						\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, ==, RHS, 0, true);			\
-	})
-
-/* Description
- *	Assert that LHS is equal to RHS. This statement updates the known value
- *	of LHS during verification. Note that RHS must be a constant value, and
- *	must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the specified value when the assertion fails.
- */
-#define bpf_assert_eq_with(LHS, RHS, value)				\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, ==, RHS, value, true);		\
-	})
-
-/* Description
- *	Assert that LHS is less than RHS. This statement updates the known
- *	bounds of LHS during verification. Note that RHS must be a constant
- *	value, and must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the value zero when the assertion fails.
- */
-#define bpf_assert_lt(LHS, RHS)						\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, <, RHS, 0, false);			\
-	})
-
-/* Description
- *	Assert that LHS is less than RHS. This statement updates the known
- *	bounds of LHS during verification. Note that RHS must be a constant
- *	value, and must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the specified value when the assertion fails.
- */
-#define bpf_assert_lt_with(LHS, RHS, value)				\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, <, RHS, value, false);		\
-	})
-
-/* Description
- *	Assert that LHS is greater than RHS. This statement updates the known
- *	bounds of LHS during verification. Note that RHS must be a constant
- *	value, and must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the value zero when the assertion fails.
- */
-#define bpf_assert_gt(LHS, RHS)						\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, >, RHS, 0, false);			\
-	})
-
-/* Description
- *	Assert that LHS is greater than RHS. This statement updates the known
- *	bounds of LHS during verification. Note that RHS must be a constant
- *	value, and must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the specified value when the assertion fails.
- */
-#define bpf_assert_gt_with(LHS, RHS, value)				\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, >, RHS, value, false);		\
-	})
-
-/* Description
- *	Assert that LHS is less than or equal to RHS. This statement updates the
- *	known bounds of LHS during verification. Note that RHS must be a
- *	constant value, and must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the value zero when the assertion fails.
- */
-#define bpf_assert_le(LHS, RHS)						\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, <=, RHS, 0, false);		\
-	})
-
-/* Description
- *	Assert that LHS is less than or equal to RHS. This statement updates the
- *	known bounds of LHS during verification. Note that RHS must be a
- *	constant value, and must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the specified value when the assertion fails.
- */
-#define bpf_assert_le_with(LHS, RHS, value)				\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, <=, RHS, value, false);		\
-	})
-
-/* Description
- *	Assert that LHS is greater than or equal to RHS. This statement updates
- *	the known bounds of LHS during verification. Note that RHS must be a
- *	constant value, and must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the value zero when the assertion fails.
- */
-#define bpf_assert_ge(LHS, RHS)						\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, >=, RHS, 0, false);		\
-	})
-
-/* Description
- *	Assert that LHS is greater than or equal to RHS. This statement updates
- *	the known bounds of LHS during verification. Note that RHS must be a
- *	constant value, and must fit within the data type of LHS.
- * Returns
- *	Void.
- * Throws
- *	An exception with the specified value when the assertion fails.
- */
-#define bpf_assert_ge_with(LHS, RHS, value)				\
-	({								\
-		barrier_var(LHS);					\
-		__bpf_assert_op(LHS, >=, RHS, value, false);		\
-	})
-
 /* Description
  *	Assert that LHS is in the range [BEG, END] (inclusive of both). This
  *	statement updates the known bounds of LHS during verification. Note
-- 
cgit v1.2.3-70-g09d2


From 0bcc62aa9813f519db58df14ddf1d523fa971e62 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 26 Dec 2023 11:11:47 -0800
Subject: bpf: Add bpf_nop_mov() asm macro.

bpf_nop_mov(var) asm macro emits nop register move: rX = rX.
If 'var' is a scalar and not a fixed constant the verifier will assign ID to it.
If it's later spilled the stack slot will carry that ID as well.
Hence the range refining comparison "if rX < const" will update all copies
including spilled slot.
This macro is a temporary workaround until the verifier gets smarter.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231226191148.48536-6-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/bpf_experimental.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 2ef9949fbd63..f44875f8b367 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -323,6 +323,11 @@ l_true:												\
        })
 #endif
 
+#ifndef bpf_nop_mov
+#define bpf_nop_mov(var) \
+	asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
+#endif
+
 /* Description
  *	Assert that a conditional expression is true.
  * Returns
-- 
cgit v1.2.3-70-g09d2


From 7e3811cb998f0e2493677c7daf6cefb4ece27111 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 26 Dec 2023 11:11:48 -0800
Subject: selftests/bpf: Convert profiler.c to bpf_cmp.

Convert profiler[123].c to "volatile compare" to compare barrier_var() approach vs bpf_cmp_likely() vs bpf_cmp_unlikely().

bpf_cmp_unlikely() produces correct code, but takes much longer to verify:

./veristat -C -e prog,insns,states before after_with_unlikely
Program                               Insns (A)  Insns (B)  Insns       (DIFF)  States (A)  States (B)  States     (DIFF)
------------------------------------  ---------  ---------  ------------------  ----------  ----------  -----------------
kprobe__proc_sys_write                     1603      19606  +18003 (+1123.08%)         123        1678  +1555 (+1264.23%)
kprobe__vfs_link                          11815      70305   +58490 (+495.05%)         971        4967   +3996 (+411.53%)
kprobe__vfs_symlink                        5464      42896   +37432 (+685.07%)         434        3126   +2692 (+620.28%)
kprobe_ret__do_filp_open                   5641      44578   +38937 (+690.25%)         446        3162   +2716 (+608.97%)
raw_tracepoint__sched_process_exec         2770      35962  +33192 (+1198.27%)         226        3121  +2895 (+1280.97%)
raw_tracepoint__sched_process_exit         1526       2135      +609 (+39.91%)         133         208      +75 (+56.39%)
raw_tracepoint__sched_process_fork          265        337       +72 (+27.17%)          19          24       +5 (+26.32%)
tracepoint__syscalls__sys_enter_kill      18782     140407  +121625 (+647.56%)        1286       12176  +10890 (+846.81%)

bpf_cmp_likely() is equivalent to barrier_var():

./veristat -C -e prog,insns,states before after_with_likely
Program                               Insns (A)  Insns (B)  Insns   (DIFF)  States (A)  States (B)  States (DIFF)
------------------------------------  ---------  ---------  --------------  ----------  ----------  -------------
kprobe__proc_sys_write                     1603       1663    +60 (+3.74%)         123         127    +4 (+3.25%)
kprobe__vfs_link                          11815      12090   +275 (+2.33%)         971         971    +0 (+0.00%)
kprobe__vfs_symlink                        5464       5448    -16 (-0.29%)         434         426    -8 (-1.84%)
kprobe_ret__do_filp_open                   5641       5739    +98 (+1.74%)         446         446    +0 (+0.00%)
raw_tracepoint__sched_process_exec         2770       2608   -162 (-5.85%)         226         216   -10 (-4.42%)
raw_tracepoint__sched_process_exit         1526       1526     +0 (+0.00%)         133         133    +0 (+0.00%)
raw_tracepoint__sched_process_fork          265        265     +0 (+0.00%)          19          19    +0 (+0.00%)
tracepoint__syscalls__sys_enter_kill      18782      18970   +188 (+1.00%)        1286        1286    +0 (+0.00%)
kprobe__proc_sys_write                     2700       2809   +109 (+4.04%)         107         109    +2 (+1.87%)
kprobe__vfs_link                          12238      12366   +128 (+1.05%)         267         269    +2 (+0.75%)
kprobe__vfs_symlink                        7139       7365   +226 (+3.17%)         167         175    +8 (+4.79%)
kprobe_ret__do_filp_open                   7264       7070   -194 (-2.67%)         180         182    +2 (+1.11%)
raw_tracepoint__sched_process_exec         3768       3453   -315 (-8.36%)         211         199   -12 (-5.69%)
raw_tracepoint__sched_process_exit         3138       3138     +0 (+0.00%)          83          83    +0 (+0.00%)
raw_tracepoint__sched_process_fork          265        265     +0 (+0.00%)          19          19    +0 (+0.00%)
tracepoint__syscalls__sys_enter_kill      26679      24327  -2352 (-8.82%)        1067        1037   -30 (-2.81%)
kprobe__proc_sys_write                     1833       1833     +0 (+0.00%)         157         157    +0 (+0.00%)
kprobe__vfs_link                           9995      10127   +132 (+1.32%)         803         803    +0 (+0.00%)
kprobe__vfs_symlink                        5606       5672    +66 (+1.18%)         451         451    +0 (+0.00%)
kprobe_ret__do_filp_open                   5716       5782    +66 (+1.15%)         462         462    +0 (+0.00%)
raw_tracepoint__sched_process_exec         3042       3042     +0 (+0.00%)         278         278    +0 (+0.00%)
raw_tracepoint__sched_process_exit         1680       1680     +0 (+0.00%)         146         146    +0 (+0.00%)
raw_tracepoint__sched_process_fork          299        299     +0 (+0.00%)          25          25    +0 (+0.00%)
tracepoint__syscalls__sys_enter_kill      18372      18372     +0 (+0.00%)        1558        1558    +0 (+0.00%)

default (mcpu=v3), no_alu32, cpuv4 have similar differences.

Note one place where bpf_nop_mov() is used to workaround the verifier lack of link
between the scalar register and its spill to stack.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231226191148.48536-7-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/progs/profiler.inc.h | 64 +++++++-----------------
 1 file changed, 18 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index ba99d17dac54..de3b6e4e4d0a 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -7,6 +7,7 @@
 
 #include "profiler.h"
 #include "err.h"
+#include "bpf_experimental.h"
 
 #ifndef NULL
 #define NULL 0
@@ -221,8 +222,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
 			return payload;
 		if (cgroup_node == cgroup_root_node)
 			*root_pos = payload - payload_start;
-		if (filepart_length <= MAX_PATH) {
-			barrier_var(filepart_length);
+		if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) {
 			payload += filepart_length;
 		}
 		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
@@ -305,9 +305,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 	size_t cgroup_root_length =
 		bpf_probe_read_kernel_str(payload, MAX_PATH,
 					  BPF_CORE_READ(root_kernfs, name));
-	barrier_var(cgroup_root_length);
-	if (cgroup_root_length <= MAX_PATH) {
-		barrier_var(cgroup_root_length);
+	if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) {
 		cgroup_data->cgroup_root_length = cgroup_root_length;
 		payload += cgroup_root_length;
 	}
@@ -315,9 +313,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 	size_t cgroup_proc_length =
 		bpf_probe_read_kernel_str(payload, MAX_PATH,
 					  BPF_CORE_READ(proc_kernfs, name));
-	barrier_var(cgroup_proc_length);
-	if (cgroup_proc_length <= MAX_PATH) {
-		barrier_var(cgroup_proc_length);
+	if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) {
 		cgroup_data->cgroup_proc_length = cgroup_proc_length;
 		payload += cgroup_proc_length;
 	}
@@ -347,9 +343,7 @@ static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
 	metadata->comm_length = 0;
 
 	size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
-	barrier_var(comm_length);
-	if (comm_length <= TASK_COMM_LEN) {
-		barrier_var(comm_length);
+	if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
 		metadata->comm_length = comm_length;
 		payload += comm_length;
 	}
@@ -494,10 +488,9 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
 		filepart_length =
 			bpf_probe_read_kernel_str(payload, MAX_PATH,
 						  BPF_CORE_READ(filp_dentry, d_name.name));
-		barrier_var(filepart_length);
-		if (filepart_length > MAX_PATH)
+		bpf_nop_mov(filepart_length);
+		if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH))
 			break;
-		barrier_var(filepart_length);
 		payload += filepart_length;
 		length += filepart_length;
 
@@ -579,9 +572,7 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write,
 
 	size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
 							     CTL_MAXNAME, buf);
-	barrier_var(sysctl_val_length);
-	if (sysctl_val_length <= CTL_MAXNAME) {
-		barrier_var(sysctl_val_length);
+	if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) {
 		sysctl_data->sysctl_val_length = sysctl_val_length;
 		payload += sysctl_val_length;
 	}
@@ -590,9 +581,7 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write,
 		bpf_probe_read_kernel_str(payload, MAX_PATH,
 					  BPF_CORE_READ(filp, f_path.dentry,
 							d_name.name));
-	barrier_var(sysctl_path_length);
-	if (sysctl_path_length <= MAX_PATH) {
-		barrier_var(sysctl_path_length);
+	if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) {
 		sysctl_data->sysctl_path_length = sysctl_path_length;
 		payload += sysctl_path_length;
 	}
@@ -658,9 +647,7 @@ int raw_tracepoint__sched_process_exit(void* ctx)
 			kill_data->kill_target_cgroup_proc_length = 0;
 
 			size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
-			barrier_var(comm_length);
-			if (comm_length <= TASK_COMM_LEN) {
-				barrier_var(comm_length);
+			if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
 				kill_data->kill_target_name_length = comm_length;
 				payload += comm_length;
 			}
@@ -669,9 +656,7 @@ int raw_tracepoint__sched_process_exit(void* ctx)
 				bpf_probe_read_kernel_str(payload,
 							  KILL_TARGET_LEN,
 							  BPF_CORE_READ(proc_kernfs, name));
-			barrier_var(cgroup_proc_length);
-			if (cgroup_proc_length <= KILL_TARGET_LEN) {
-				barrier_var(cgroup_proc_length);
+			if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) {
 				kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
 				payload += cgroup_proc_length;
 			}
@@ -731,9 +716,7 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
 	const char* filename = BPF_CORE_READ(bprm, filename);
 	size_t bin_path_length =
 		bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
-	barrier_var(bin_path_length);
-	if (bin_path_length <= MAX_FILENAME_LEN) {
-		barrier_var(bin_path_length);
+	if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) {
 		proc_exec_data->bin_path_length = bin_path_length;
 		payload += bin_path_length;
 	}
@@ -743,8 +726,7 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
 	unsigned int cmdline_length = probe_read_lim(payload, arg_start,
 						     arg_end - arg_start, MAX_ARGS_LEN);
 
-	if (cmdline_length <= MAX_ARGS_LEN) {
-		barrier_var(cmdline_length);
+	if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) {
 		proc_exec_data->cmdline_length = cmdline_length;
 		payload += cmdline_length;
 	}
@@ -821,9 +803,7 @@ int kprobe_ret__do_filp_open(struct pt_regs* ctx)
 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 
 	size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
-	barrier_var(len);
-	if (len <= MAX_FILEPATH_LENGTH) {
-		barrier_var(len);
+	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
 		payload += len;
 		filemod_data->dst_filepath_length = len;
 	}
@@ -876,17 +856,13 @@ int BPF_KPROBE(kprobe__vfs_link,
 	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 
 	size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
-	barrier_var(len);
-	if (len <= MAX_FILEPATH_LENGTH) {
-		barrier_var(len);
+	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
 		payload += len;
 		filemod_data->src_filepath_length = len;
 	}
 
 	len = read_absolute_file_path_from_dentry(new_dentry, payload);
-	barrier_var(len);
-	if (len <= MAX_FILEPATH_LENGTH) {
-		barrier_var(len);
+	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
 		payload += len;
 		filemod_data->dst_filepath_length = len;
 	}
@@ -936,16 +912,12 @@ int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
 
 	size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
 					       oldname);
-	barrier_var(len);
-	if (len <= MAX_FILEPATH_LENGTH) {
-		barrier_var(len);
+	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
 		payload += len;
 		filemod_data->src_filepath_length = len;
 	}
 	len = read_absolute_file_path_from_dentry(dentry, payload);
-	barrier_var(len);
-	if (len <= MAX_FILEPATH_LENGTH) {
-		barrier_var(len);
+	if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
 		payload += len;
 		filemod_data->dst_filepath_length = len;
 	}
-- 
cgit v1.2.3-70-g09d2


From 8c1b382a555adcd2008ae964047a35b739dfaf24 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 21 Dec 2023 15:23:25 -0800
Subject: bpf: sockmap, add tests for proto updates many to single map

Add test with a single map where each socket is inserted multiple
times. Test protocols: TCP, UDP, stream af_unix and dgram af_unix.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/r/20231221232327.43678-4-john.fastabend@gmail.com
---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 71 +++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 7c2241fae19a..ebd05788cfa0 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -555,6 +555,74 @@ static void test_sockmap_unconnected_unix(void)
 	close(dgram);
 }
 
+static void test_sockmap_many_socket(void)
+{
+	struct test_sockmap_pass_prog *skel;
+	int stream[2], dgram, udp, tcp;
+	int i, err, map, entry = 0;
+
+	skel = test_sockmap_pass_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	map = bpf_map__fd(skel->maps.sock_map_rx);
+
+	dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+	if (dgram < 0) {
+		test_sockmap_pass_prog__destroy(skel);
+		return;
+	}
+
+	tcp = connected_socket_v4();
+	if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
+		close(dgram);
+		test_sockmap_pass_prog__destroy(skel);
+		return;
+	}
+
+	udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+	if (udp < 0) {
+		close(dgram);
+		close(tcp);
+		test_sockmap_pass_prog__destroy(skel);
+		return;
+	}
+
+	err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
+	ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
+	if (err)
+		goto out;
+
+	for (i = 0; i < 2; i++, entry++) {
+		err = bpf_map_update_elem(map, &entry, &stream[0], BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(stream)");
+	}
+	for (i = 0; i < 2; i++, entry++) {
+		err = bpf_map_update_elem(map, &entry, &dgram, BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+	}
+	for (i = 0; i < 2; i++, entry++) {
+		err = bpf_map_update_elem(map, &entry, &udp, BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(udp)");
+	}
+	for (i = 0; i < 2; i++, entry++) {
+		err = bpf_map_update_elem(map, &entry, &tcp, BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(tcp)");
+	}
+	for (entry--; entry >= 0; entry--) {
+		err = bpf_map_delete_elem(map, &entry);
+		ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+	}
+
+	close(stream[0]);
+	close(stream[1]);
+out:
+	close(dgram);
+	close(tcp);
+	close(udp);
+	test_sockmap_pass_prog__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
@@ -597,7 +665,8 @@ void test_sockmap_basic(void)
 		test_sockmap_skb_verdict_fionread(false);
 	if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
 		test_sockmap_skb_verdict_peek();
-
 	if (test__start_subtest("sockmap unconnected af_unix"))
 		test_sockmap_unconnected_unix();
+	if (test__start_subtest("sockmap one socket to many map entries"))
+		test_sockmap_many_socket();
 }
-- 
cgit v1.2.3-70-g09d2


From f1300467dd9f67293a7aae86fd26471520fac36d Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 21 Dec 2023 15:23:26 -0800
Subject: bpf: sockmap, add tests for proto updates single socket to many map

Add test with multiple maps where each socket is inserted in multiple
maps. Test protocols: TCP, UDP, stream af_unix and dgram af_unix.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/r/20231221232327.43678-5-john.fastabend@gmail.com
---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 74 ++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index ebd05788cfa0..74bca07ebec2 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -623,6 +623,78 @@ out:
 	test_sockmap_pass_prog__destroy(skel);
 }
 
+static void test_sockmap_many_maps(void)
+{
+	struct test_sockmap_pass_prog *skel;
+	int stream[2], dgram, udp, tcp;
+	int i, err, map[2], entry = 0;
+
+	skel = test_sockmap_pass_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	map[0] = bpf_map__fd(skel->maps.sock_map_rx);
+	map[1] = bpf_map__fd(skel->maps.sock_map_tx);
+
+	dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+	if (dgram < 0) {
+		test_sockmap_pass_prog__destroy(skel);
+		return;
+	}
+
+	tcp = connected_socket_v4();
+	if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
+		close(dgram);
+		test_sockmap_pass_prog__destroy(skel);
+		return;
+	}
+
+	udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+	if (udp < 0) {
+		close(dgram);
+		close(tcp);
+		test_sockmap_pass_prog__destroy(skel);
+		return;
+	}
+
+	err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
+	ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
+	if (err)
+		goto out;
+
+	for (i = 0; i < 2; i++, entry++) {
+		err = bpf_map_update_elem(map[i], &entry, &stream[0], BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(stream)");
+	}
+	for (i = 0; i < 2; i++, entry++) {
+		err = bpf_map_update_elem(map[i], &entry, &dgram, BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+	}
+	for (i = 0; i < 2; i++, entry++) {
+		err = bpf_map_update_elem(map[i], &entry, &udp, BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(udp)");
+	}
+	for (i = 0; i < 2; i++, entry++) {
+		err = bpf_map_update_elem(map[i], &entry, &tcp, BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(tcp)");
+	}
+	for (entry--; entry >= 0; entry--) {
+		err = bpf_map_delete_elem(map[1], &entry);
+		entry--;
+		ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+		err = bpf_map_delete_elem(map[0], &entry);
+		ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+	}
+
+	close(stream[0]);
+	close(stream[1]);
+out:
+	close(dgram);
+	close(tcp);
+	close(udp);
+	test_sockmap_pass_prog__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
@@ -669,4 +741,6 @@ void test_sockmap_basic(void)
 		test_sockmap_unconnected_unix();
 	if (test__start_subtest("sockmap one socket to many map entries"))
 		test_sockmap_many_socket();
+	if (test__start_subtest("sockmap one socket to many maps"))
+		test_sockmap_many_maps();
 }
-- 
cgit v1.2.3-70-g09d2


From bdbca46d3f84a4455cd5c15a7483666218851549 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 21 Dec 2023 15:23:27 -0800
Subject: bpf: sockmap, add tests for proto updates replace socket

Add test that replaces the same socket with itself. This exercises a
corner case where old element and new element have the same posck.
Test protocols: TCP, UDP, stream af_unix and dgram af_unix.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/r/20231221232327.43678-6-john.fastabend@gmail.com
---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 69 ++++++++++++++++++++++
 1 file changed, 69 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 74bca07ebec2..77e26ecffa9d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -695,6 +695,73 @@ out:
 	test_sockmap_pass_prog__destroy(skel);
 }
 
+static void test_sockmap_same_sock(void)
+{
+	struct test_sockmap_pass_prog *skel;
+	int stream[2], dgram, udp, tcp;
+	int i, err, map, zero = 0;
+
+	skel = test_sockmap_pass_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	map = bpf_map__fd(skel->maps.sock_map_rx);
+
+	dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+	if (dgram < 0) {
+		test_sockmap_pass_prog__destroy(skel);
+		return;
+	}
+
+	tcp = connected_socket_v4();
+	if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
+		close(dgram);
+		test_sockmap_pass_prog__destroy(skel);
+		return;
+	}
+
+	udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+	if (udp < 0) {
+		close(dgram);
+		close(tcp);
+		test_sockmap_pass_prog__destroy(skel);
+		return;
+	}
+
+	err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
+	ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
+	if (err)
+		goto out;
+
+	for (i = 0; i < 2; i++) {
+		err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(stream)");
+	}
+	for (i = 0; i < 2; i++) {
+		err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+	}
+	for (i = 0; i < 2; i++) {
+		err = bpf_map_update_elem(map, &zero, &udp, BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(udp)");
+	}
+	for (i = 0; i < 2; i++) {
+		err = bpf_map_update_elem(map, &zero, &tcp, BPF_ANY);
+		ASSERT_OK(err, "bpf_map_update_elem(tcp)");
+	}
+
+	err = bpf_map_delete_elem(map, &zero);
+	ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+
+	close(stream[0]);
+	close(stream[1]);
+out:
+	close(dgram);
+	close(tcp);
+	close(udp);
+	test_sockmap_pass_prog__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
@@ -743,4 +810,6 @@ void test_sockmap_basic(void)
 		test_sockmap_many_socket();
 	if (test__start_subtest("sockmap one socket to many maps"))
 		test_sockmap_many_maps();
+	if (test__start_subtest("sockmap same socket replace"))
+		test_sockmap_same_sock();
 }
-- 
cgit v1.2.3-70-g09d2


From 05d92cb0e919239c29b3a26da1f76f1e18fed7d3 Mon Sep 17 00:00:00 2001
From: Yujie Liu <yujie.liu@intel.com>
Date: Fri, 29 Dec 2023 21:19:31 +0800
Subject: selftests/net: change shebang to bash to support "source"

The patch set [1] added a general lib.sh in net selftests, and converted
several test scripts to source the lib.sh.

unicast_extensions.sh (converted in [1]) and pmtu.sh (converted in [2])
have a /bin/sh shebang which may point to various shells in different
distributions, but "source" is only available in some of them. For
example, "source" is a built-it function in bash, but it cannot be
used in dash.

Refer to other scripts that were converted together, simply change the
shebang to bash to fix the following issues when the default /bin/sh
points to other shells.

not ok 51 selftests: net: unicast_extensions.sh # exit=1

v1 -> v2:
  - Fix pmtu.sh which has the same issue as unicast_extensions.sh,
    suggested by Hangbin
  - Change the style of the "source" line to be consistent with other
    tests, suggested by Hangbin

Link: https://lore.kernel.org/all/20231202020110.362433-1-liuhangbin@gmail.com/ [1]
Link: https://lore.kernel.org/all/20231219094856.1740079-1-liuhangbin@gmail.com/ [2]
Reported-by: kernel test robot <oliver.sang@intel.com>
Fixes: 378f082eaf37 ("selftests/net: convert pmtu.sh to run it in unique namespace")
Fixes: 0f4765d0b48d ("selftests/net: convert unicast_extensions.sh to run it in unique namespace")
Signed-off-by: Yujie Liu <yujie.liu@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Link: https://lore.kernel.org/r/20231229131931.3961150-1-yujie.liu@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/pmtu.sh               | 4 ++--
 tools/testing/selftests/net/unicast_extensions.sh | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 175d3d1d773b..f10879788f61 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 #
 # Check that route PMTU values match expectations, and that initial device MTU
@@ -198,7 +198,7 @@
 # - pmtu_ipv6_route_change
 #	Same as above but with IPv6
 
-source ./lib.sh
+source lib.sh
 
 PAUSE_ON_FAIL=no
 VERBOSE=0
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index b7a2cb9e7477..f52aa5f7da52 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 #
 # By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project
@@ -28,7 +28,7 @@
 # These tests provide an easy way to flip the expected result of any
 # of these behaviors for testing kernel patches that change them.
 
-source ./lib.sh
+source lib.sh
 
 # nettest can be run from PATH or from same directory as this selftest
 if ! which nettest >/dev/null; then
-- 
cgit v1.2.3-70-g09d2


From 21f5a801c171dff4e728e38f62cf626c4197d07c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Thu, 21 Dec 2023 19:18:07 -0800
Subject: selftests/bpf: Cope with 512 bytes limit with bpf_global_percpu_ma

In the previous patch, the maximum data size for bpf_global_percpu_ma
is 512 bytes. This breaks selftest test_bpf_ma. The test is adjusted
in two aspects:
  - Since the maximum allowed data size for bpf_global_percpu_ma is
    512, remove all tests beyond that, names sizes 1024, 2048 and 4096.
  - Previously the percpu data size is bucket_size - 8 in order to
    avoid percpu allocation into the next bucket. This patch removed
    such data size adjustment thanks to Patch 1.

Also, a better way to generate BTF type is used than adding
a member to the value struct.

Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20231222031807.1292853-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/test_bpf_ma.c | 20 ++++---
 tools/testing/selftests/bpf/progs/test_bpf_ma.c    | 66 +++++++++++-----------
 2 files changed, 46 insertions(+), 40 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
index d3491a84b3b9..ccae0b31ac6c 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
@@ -14,7 +14,8 @@ static void do_bpf_ma_test(const char *name)
 	struct test_bpf_ma *skel;
 	struct bpf_program *prog;
 	struct btf *btf;
-	int i, err;
+	int i, err, id;
+	char tname[32];
 
 	skel = test_bpf_ma__open();
 	if (!ASSERT_OK_PTR(skel, "open"))
@@ -25,16 +26,21 @@ static void do_bpf_ma_test(const char *name)
 		goto out;
 
 	for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) {
-		char name[32];
-		int id;
-
-		snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]);
-		id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT);
-		if (!ASSERT_GT(id, 0, "bin_data"))
+		snprintf(tname, sizeof(tname), "bin_data_%u", skel->rodata->data_sizes[i]);
+		id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+		if (!ASSERT_GT(id, 0, tname))
 			goto out;
 		skel->rodata->data_btf_ids[i] = id;
 	}
 
+	for (i = 0; i < ARRAY_SIZE(skel->rodata->percpu_data_sizes); i++) {
+		snprintf(tname, sizeof(tname), "percpu_bin_data_%u", skel->rodata->percpu_data_sizes[i]);
+		id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+		if (!ASSERT_GT(id, 0, tname))
+			goto out;
+		skel->rodata->percpu_data_btf_ids[i] = id;
+	}
+
 	prog = bpf_object__find_program_by_name(skel->obj, name);
 	if (!ASSERT_OK_PTR(prog, "invalid prog name"))
 		goto out;
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
index b78f4f702ae0..3494ca30fa7f 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -20,6 +20,9 @@ char _license[] SEC("license") = "GPL";
 const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096};
 const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {};
 
+const unsigned int percpu_data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512};
+const volatile unsigned int percpu_data_btf_ids[ARRAY_SIZE(data_sizes)] = {};
+
 int err = 0;
 u32 pid = 0;
 
@@ -27,10 +30,10 @@ u32 pid = 0;
 	struct bin_data_##_size { \
 		char data[_size - sizeof(void *)]; \
 	}; \
+	/* See Commit 5d8d6634ccc, force btf generation for type bin_data_##_size */	\
+	struct bin_data_##_size *__bin_data_##_size; \
 	struct map_value_##_size { \
 		struct bin_data_##_size __kptr * data; \
-		/* To emit BTF info for bin_data_xx */ \
-		struct bin_data_##_size not_used; \
 	}; \
 	struct { \
 		__uint(type, BPF_MAP_TYPE_ARRAY); \
@@ -40,8 +43,12 @@ u32 pid = 0;
 	} array_##_size SEC(".maps")
 
 #define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \
+	struct percpu_bin_data_##_size { \
+		char data[_size]; \
+	}; \
+	struct percpu_bin_data_##_size *__percpu_bin_data_##_size; \
 	struct map_value_percpu_##_size { \
-		struct bin_data_##_size __percpu_kptr * data; \
+		struct percpu_bin_data_##_size __percpu_kptr * data; \
 	}; \
 	struct { \
 		__uint(type, BPF_MAP_TYPE_ARRAY); \
@@ -114,7 +121,7 @@ static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int
 			return;
 		}
 		/* per-cpu allocator may not be able to refill in time */
-		new = bpf_percpu_obj_new_impl(data_btf_ids[idx], NULL);
+		new = bpf_percpu_obj_new_impl(percpu_data_btf_ids[idx], NULL);
 		if (!new)
 			continue;
 
@@ -179,7 +186,7 @@ DEFINE_ARRAY_WITH_KPTR(1024);
 DEFINE_ARRAY_WITH_KPTR(2048);
 DEFINE_ARRAY_WITH_KPTR(4096);
 
-/* per-cpu kptr doesn't support bin_data_8 which is a zero-sized array */
+DEFINE_ARRAY_WITH_PERCPU_KPTR(8);
 DEFINE_ARRAY_WITH_PERCPU_KPTR(16);
 DEFINE_ARRAY_WITH_PERCPU_KPTR(32);
 DEFINE_ARRAY_WITH_PERCPU_KPTR(64);
@@ -188,9 +195,6 @@ DEFINE_ARRAY_WITH_PERCPU_KPTR(128);
 DEFINE_ARRAY_WITH_PERCPU_KPTR(192);
 DEFINE_ARRAY_WITH_PERCPU_KPTR(256);
 DEFINE_ARRAY_WITH_PERCPU_KPTR(512);
-DEFINE_ARRAY_WITH_PERCPU_KPTR(1024);
-DEFINE_ARRAY_WITH_PERCPU_KPTR(2048);
-DEFINE_ARRAY_WITH_PERCPU_KPTR(4096);
 
 SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
 int test_batch_alloc_free(void *ctx)
@@ -246,20 +250,18 @@ int test_batch_percpu_alloc_free(void *ctx)
 	if ((u32)bpf_get_current_pid_tgid() != pid)
 		return 0;
 
-	/* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
-	 * then free 128 16-bytes per-cpu objects in batch to trigger freeing.
+	/* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling,
+	 * then free 128 8-bytes per-cpu objects in batch to trigger freeing.
 	 */
-	CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 0);
-	CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 1);
-	CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 2);
-	CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 3);
-	CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 4);
-	CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 5);
-	CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 6);
-	CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 7);
-	CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 8);
-	CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 9);
-	CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 10);
+	CALL_BATCH_PERCPU_ALLOC_FREE(8, 128, 0);
+	CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1);
+	CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2);
+	CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3);
+	CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4);
+	CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5);
+	CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6);
+	CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7);
+	CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8);
 
 	return 0;
 }
@@ -270,20 +272,18 @@ int test_percpu_free_through_map_free(void *ctx)
 	if ((u32)bpf_get_current_pid_tgid() != pid)
 		return 0;
 
-	/* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
+	/* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling,
 	 * then free these object through map free.
 	 */
-	CALL_BATCH_PERCPU_ALLOC(16, 128, 0);
-	CALL_BATCH_PERCPU_ALLOC(32, 128, 1);
-	CALL_BATCH_PERCPU_ALLOC(64, 128, 2);
-	CALL_BATCH_PERCPU_ALLOC(96, 128, 3);
-	CALL_BATCH_PERCPU_ALLOC(128, 128, 4);
-	CALL_BATCH_PERCPU_ALLOC(192, 128, 5);
-	CALL_BATCH_PERCPU_ALLOC(256, 128, 6);
-	CALL_BATCH_PERCPU_ALLOC(512, 64, 7);
-	CALL_BATCH_PERCPU_ALLOC(1024, 32, 8);
-	CALL_BATCH_PERCPU_ALLOC(2048, 16, 9);
-	CALL_BATCH_PERCPU_ALLOC(4096, 8, 10);
+	CALL_BATCH_PERCPU_ALLOC(8, 128, 0);
+	CALL_BATCH_PERCPU_ALLOC(16, 128, 1);
+	CALL_BATCH_PERCPU_ALLOC(32, 128, 2);
+	CALL_BATCH_PERCPU_ALLOC(64, 128, 3);
+	CALL_BATCH_PERCPU_ALLOC(96, 128, 4);
+	CALL_BATCH_PERCPU_ALLOC(128, 128, 5);
+	CALL_BATCH_PERCPU_ALLOC(192, 128, 6);
+	CALL_BATCH_PERCPU_ALLOC(256, 128, 7);
+	CALL_BATCH_PERCPU_ALLOC(512, 64, 8);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From adc8c4549d9e74d2359c217d2478b18ecdd15c91 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Thu, 21 Dec 2023 19:18:12 -0800
Subject: selftests/bpf: Add a selftest with > 512-byte percpu allocation size

Add a selftest to capture the verification failure when the allocation
size is greater than 512.

Acked-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20231222031812.1293190-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/percpu_alloc_fail.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
index 1a891d30f1fe..f2b8eb2ff76f 100644
--- a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
@@ -17,6 +17,10 @@ struct val_with_rb_root_t {
 	struct bpf_spin_lock lock;
 };
 
+struct val_600b_t {
+	char b[600];
+};
+
 struct elem {
 	long sum;
 	struct val_t __percpu_kptr *pc;
@@ -161,4 +165,18 @@ int BPF_PROG(test_array_map_7)
 	return 0;
 }
 
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type size (600) is greater than 512")
+int BPF_PROG(test_array_map_8)
+{
+	struct val_600b_t __percpu_kptr *p;
+
+	p = bpf_percpu_obj_new(struct val_600b_t);
+	if (!p)
+		return 0;
+
+	bpf_percpu_obj_drop(p);
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From df7c3f7d3a3ddab31ca8cfa9b86a8729ec43fd2e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Jan 2024 17:38:39 -0800
Subject: libbpf: make uniform use of btf__fd() accessor inside libbpf

It makes future grepping and code analysis a bit easier.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ebcfb2147fbd..f1521a400f02 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7050,7 +7050,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog
 	load_attr.prog_ifindex = prog->prog_ifindex;
 
 	/* specify func_info/line_info only if kernel supports them */
-	btf_fd = bpf_object__btf_fd(obj);
+	btf_fd = btf__fd(obj->btf);
 	if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
 		load_attr.prog_btf_fd = btf_fd;
 		load_attr.func_info = prog->func_info;
-- 
cgit v1.2.3-70-g09d2


From fa98b54bff39f51c46fc96d3385c6292391c277b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Jan 2024 17:38:40 -0800
Subject: libbpf: use explicit map reuse flag to skip map creation steps

Instead of inferring whether map already point to previously
created/pinned BPF map (which user can specify with bpf_map__reuse_fd()) API),
use explicit map->reused flag that is set in such case.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f1521a400f02..3b678b617213 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5465,7 +5465,7 @@ retry:
 			}
 		}
 
-		if (map->fd >= 0) {
+		if (map->reused) {
 			pr_debug("map '%s': skipping creation (preset fd=%d)\n",
 				 map->name, map->fd);
 		} else {
-- 
cgit v1.2.3-70-g09d2


From f08c18e083adfef92946ae1d44b07bb81e727e08 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Jan 2024 17:38:41 -0800
Subject: libbpf: don't rely on map->fd as an indicator of map being created

With the upcoming switch to preallocated placeholder FDs for maps,
switch various getters/setter away from checking map->fd. Use
map_is_created() helper that detect whether BPF map can be modified based
on map->obj->loaded state, with special provision for maps set up with
bpf_map__reuse_fd().

For backwards compatibility, we take map_is_created() into account in
bpf_map__fd() getter as well. This way before bpf_object__load() phase
bpf_map__fd() will always return -1, just as before the changes in
subsequent patches adding stable map->fd placeholders.

We also get rid of all internal uses of bpf_map__fd() getter, as it's
more oriented for uses external to libbpf. The above map_is_created()
check actually interferes with some of the internal uses, if map FD is
fetched through bpf_map__fd().

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 42 +++++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3b678b617213..6b27edd47c84 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5200,6 +5200,11 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 
 static void bpf_map__destroy(struct bpf_map *map);
 
+static bool map_is_created(const struct bpf_map *map)
+{
+	return map->obj->loaded || map->reused;
+}
+
 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
 {
 	LIBBPF_OPTS(bpf_map_create_opts, create_attr);
@@ -5231,7 +5236,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 					map->name, err);
 				return err;
 			}
-			map->inner_map_fd = bpf_map__fd(map->inner_map);
+			map->inner_map_fd = map->inner_map->fd;
 		}
 		if (map->inner_map_fd >= 0)
 			create_attr.inner_map_fd = map->inner_map_fd;
@@ -5314,7 +5319,7 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
 			continue;
 
 		targ_map = map->init_slots[i];
-		fd = bpf_map__fd(targ_map);
+		fd = targ_map->fd;
 
 		if (obj->gen_loader) {
 			bpf_gen__populate_outer_map(obj->gen_loader,
@@ -7135,7 +7140,7 @@ retry_load:
 				if (map->libbpf_type != LIBBPF_MAP_RODATA)
 					continue;
 
-				if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {
+				if (bpf_prog_bind_map(ret, map->fd, NULL)) {
 					cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 					pr_warn("prog '%s': failed to bind map '%s': %s\n",
 						prog->name, map->real_name, cp);
@@ -9601,7 +9606,11 @@ int libbpf_attach_type_by_name(const char *name,
 
 int bpf_map__fd(const struct bpf_map *map)
 {
-	return map ? map->fd : libbpf_err(-EINVAL);
+	if (!map)
+		return libbpf_err(-EINVAL);
+	if (!map_is_created(map))
+		return -1;
+	return map->fd;
 }
 
 static bool map_uses_real_name(const struct bpf_map *map)
@@ -9637,7 +9646,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map)
 
 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
 {
-	if (map->fd >= 0)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 	map->def.type = type;
 	return 0;
@@ -9650,7 +9659,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map)
 
 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
 {
-	if (map->fd >= 0)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 	map->def.map_flags = flags;
 	return 0;
@@ -9663,7 +9672,7 @@ __u64 bpf_map__map_extra(const struct bpf_map *map)
 
 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
 {
-	if (map->fd >= 0)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 	map->map_extra = map_extra;
 	return 0;
@@ -9676,7 +9685,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map)
 
 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
 {
-	if (map->fd >= 0)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 	map->numa_node = numa_node;
 	return 0;
@@ -9689,7 +9698,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map)
 
 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
 {
-	if (map->fd >= 0)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 	map->def.key_size = size;
 	return 0;
@@ -9773,7 +9782,7 @@ static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
 
 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
 {
-	if (map->fd >= 0)
+	if (map->obj->loaded || map->reused)
 		return libbpf_err(-EBUSY);
 
 	if (map->mmaped) {
@@ -9814,8 +9823,11 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
 int bpf_map__set_initial_value(struct bpf_map *map,
 			       const void *data, size_t size)
 {
+	if (map->obj->loaded || map->reused)
+		return libbpf_err(-EBUSY);
+
 	if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
-	    size != map->def.value_size || map->fd >= 0)
+	    size != map->def.value_size)
 		return libbpf_err(-EINVAL);
 
 	memcpy(map->mmaped, data, size);
@@ -9842,7 +9854,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map)
 
 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 {
-	if (map->fd >= 0)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 	map->map_ifindex = ifindex;
 	return 0;
@@ -9947,7 +9959,7 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
 			   size_t value_sz, bool check_value_sz)
 {
-	if (map->fd <= 0)
+	if (!map_is_created(map)) /* map is not yet created */
 		return -ENOENT;
 
 	if (map->def.key_size != key_sz) {
@@ -12400,7 +12412,7 @@ int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
 	__u32 zero = 0;
 	int err;
 
-	if (!bpf_map__is_struct_ops(map) || map->fd < 0)
+	if (!bpf_map__is_struct_ops(map) || !map_is_created(map))
 		return -EINVAL;
 
 	st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
@@ -13304,7 +13316,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
 	for (i = 0; i < s->map_cnt; i++) {
 		struct bpf_map *map = *s->maps[i].map;
 		size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
-		int prot, map_fd = bpf_map__fd(map);
+		int prot, map_fd = map->fd;
 		void **mmaped = s->maps[i].mmaped;
 
 		if (!mmaped)
-- 
cgit v1.2.3-70-g09d2


From dac645b950ea4fc0896fe46a645365cb8d9ab92b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Jan 2024 17:38:42 -0800
Subject: libbpf: use stable map placeholder FDs

Move map creation to later during BPF object loading by pre-creating
stable placeholder FDs (utilizing memfd_create()). Use dup2()
syscall to then atomically make those placeholder FDs point to real
kernel BPF map objects.

This change allows to delay BPF map creation to after all the BPF
program relocations. That, in turn, allows to delay BTF finalization and
loading into kernel to after all the relocations as well. We'll take
advantage of the latter in subsequent patches to allow libbpf to adjust
BTF in a way that helps with BPF global function usage.

Clean up a few places where we close map->fd, which now shouldn't
happen, because map->fd should be a valid FD regardless of whether map
was created or not. Surprisingly and nicely it simplifies a bunch of
error handling code. If this change doesn't backfire, I'm tempted to
pre-create such stable FDs for other entities (progs, maybe even BTF).
We previously did some manipulations to make gen_loader work with fake
map FDs, with stable map FDs this hack is not necessary for maps (we
still have it for BTF, but I left it as is for now).

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c          | 101 +++++++++++++++++++++++++---------------
 tools/lib/bpf/libbpf_internal.h |  14 ++++++
 2 files changed, 77 insertions(+), 38 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6b27edd47c84..a58569b7e4bf 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1503,6 +1503,16 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam
 	return ERR_PTR(-ENOENT);
 }
 
+static int create_placeholder_fd(void)
+{
+	int fd;
+
+	fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
+	if (fd < 0)
+		return -errno;
+	return fd;
+}
+
 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 {
 	struct bpf_map *map;
@@ -1515,7 +1525,21 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 
 	map = &obj->maps[obj->nr_maps++];
 	map->obj = obj;
-	map->fd = -1;
+	/* Preallocate map FD without actually creating BPF map just yet.
+	 * These map FD "placeholders" will be reused later without changing
+	 * FD value when map is actually created in the kernel.
+	 *
+	 * This is useful to be able to perform BPF program relocations
+	 * without having to create BPF maps before that step. This allows us
+	 * to finalize and load BTF very late in BPF object's loading phase,
+	 * right before BPF maps have to be created and BPF programs have to
+	 * be loaded. By having these map FD placeholders we can perform all
+	 * the sanitizations, relocations, and any other adjustments before we
+	 * start creating actual BPF kernel objects (BTF, maps, progs).
+	 */
+	map->fd = create_placeholder_fd();
+	if (map->fd < 0)
+		return ERR_PTR(map->fd);
 	map->inner_map_fd = -1;
 	map->autocreate = true;
 
@@ -2607,7 +2631,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 		map->inner_map = calloc(1, sizeof(*map->inner_map));
 		if (!map->inner_map)
 			return -ENOMEM;
-		map->inner_map->fd = -1;
+		map->inner_map->fd = create_placeholder_fd();
+		if (map->inner_map->fd < 0)
+			return map->inner_map->fd;
 		map->inner_map->sec_idx = sec_idx;
 		map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
 		if (!map->inner_map->name)
@@ -4549,14 +4575,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 		goto err_free_new_name;
 	}
 
-	err = zclose(map->fd);
-	if (err) {
-		err = -errno;
-		goto err_close_new_fd;
-	}
+	err = reuse_fd(map->fd, new_fd);
+	if (err)
+		goto err_free_new_name;
+
 	free(map->name);
 
-	map->fd = new_fd;
 	map->name = new_name;
 	map->def.type = info.type;
 	map->def.key_size = info.key_size;
@@ -4570,8 +4594,6 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 
 	return 0;
 
-err_close_new_fd:
-	close(new_fd);
 err_free_new_name:
 	free(new_name);
 	return libbpf_err(err);
@@ -5210,7 +5232,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 	LIBBPF_OPTS(bpf_map_create_opts, create_attr);
 	struct bpf_map_def *def = &map->def;
 	const char *map_name = NULL;
-	int err = 0;
+	int err = 0, map_fd;
 
 	if (kernel_supports(obj, FEAT_PROG_NAME))
 		map_name = map->name;
@@ -5269,17 +5291,19 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 		bpf_gen__map_create(obj->gen_loader, def->type, map_name,
 				    def->key_size, def->value_size, def->max_entries,
 				    &create_attr, is_inner ? -1 : map - obj->maps);
-		/* Pretend to have valid FD to pass various fd >= 0 checks.
-		 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
+		/* We keep pretenting we have valid FD to pass various fd >= 0
+		 * checks by just keeping original placeholder FDs in place.
+		 * See bpf_object__add_map() comment.
+		 * This placeholder fd will not be used with any syscall and
+		 * will be reset to -1 eventually.
 		 */
-		map->fd = 0;
+		map_fd = map->fd;
 	} else {
-		map->fd = bpf_map_create(def->type, map_name,
-					 def->key_size, def->value_size,
-					 def->max_entries, &create_attr);
+		map_fd = bpf_map_create(def->type, map_name,
+					def->key_size, def->value_size,
+					def->max_entries, &create_attr);
 	}
-	if (map->fd < 0 && (create_attr.btf_key_type_id ||
-			    create_attr.btf_value_type_id)) {
+	if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
 		char *cp, errmsg[STRERR_BUFSIZE];
 
 		err = -errno;
@@ -5291,13 +5315,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 		create_attr.btf_value_type_id = 0;
 		map->btf_key_type_id = 0;
 		map->btf_value_type_id = 0;
-		map->fd = bpf_map_create(def->type, map_name,
-					 def->key_size, def->value_size,
-					 def->max_entries, &create_attr);
+		map_fd = bpf_map_create(def->type, map_name,
+					def->key_size, def->value_size,
+					def->max_entries, &create_attr);
 	}
 
-	err = map->fd < 0 ? -errno : 0;
-
 	if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
 		if (obj->gen_loader)
 			map->inner_map->fd = -1;
@@ -5305,7 +5327,19 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 		zfree(&map->inner_map);
 	}
 
-	return err;
+	if (map_fd < 0)
+		return map_fd;
+
+	/* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
+	if (map->fd == map_fd)
+		return 0;
+
+	/* Keep placeholder FD value but now point it to the BPF map object.
+	 * This way everything that relied on this map's FD (e.g., relocated
+	 * ldimm64 instructions) will stay valid and won't need adjustments.
+	 * map->fd stays valid but now point to what map_fd points to.
+	 */
+	return reuse_fd(map->fd, map_fd);
 }
 
 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
@@ -5389,10 +5423,8 @@ static int bpf_object_init_prog_arrays(struct bpf_object *obj)
 			continue;
 
 		err = init_prog_array_slots(obj, map);
-		if (err < 0) {
-			zclose(map->fd);
+		if (err < 0)
 			return err;
-		}
 	}
 	return 0;
 }
@@ -5483,25 +5515,20 @@ retry:
 
 			if (bpf_map__is_internal(map)) {
 				err = bpf_object__populate_internal_map(obj, map);
-				if (err < 0) {
-					zclose(map->fd);
+				if (err < 0)
 					goto err_out;
-				}
 			}
 
 			if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
 				err = init_map_in_map_slots(obj, map);
-				if (err < 0) {
-					zclose(map->fd);
+				if (err < 0)
 					goto err_out;
-				}
 			}
 		}
 
 		if (map->pin_path && !map->pinned) {
 			err = bpf_map__pin(map, NULL);
 			if (err) {
-				zclose(map->fd);
 				if (!retried && err == -EEXIST) {
 					retried = true;
 					goto retry;
@@ -8075,8 +8102,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
 	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 	err = err ? : bpf_object__sanitize_maps(obj);
 	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
-	err = err ? : bpf_object__create_maps(obj);
 	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
+	err = err ? : bpf_object__create_maps(obj);
 	err = err ? : bpf_object__load_progs(obj, extra_log_level);
 	err = err ? : bpf_object_init_prog_arrays(obj);
 	err = err ? : bpf_object_prepare_struct_ops(obj);
@@ -8085,8 +8112,6 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
 		/* reset FDs */
 		if (obj->btf)
 			btf__set_fd(obj->btf, -1);
-		for (i = 0; i < obj->nr_maps; i++)
-			obj->maps[i].fd = -1;
 		if (!err)
 			err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
 	}
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index b5d334754e5d..27e4e320e1a6 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -555,6 +555,20 @@ static inline int ensure_good_fd(int fd)
 	return fd;
 }
 
+/* Point *fixed_fd* to the same file that *tmp_fd* points to.
+ * Regardless of success, *tmp_fd* is closed.
+ * Whatever *fixed_fd* pointed to is closed silently.
+ */
+static inline int reuse_fd(int fixed_fd, int tmp_fd)
+{
+	int err;
+
+	err = dup2(tmp_fd, fixed_fd);
+	err = err < 0 ? -errno : 0;
+	close(tmp_fd); /* clean up temporary FD */
+	return err;
+}
+
 /* The following two functions are exposed to bpftool */
 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
 		       size_t local_essent_len,
-- 
cgit v1.2.3-70-g09d2


From fb03be7c4a27c25696287df4ee06c5aafa31267c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Jan 2024 17:38:43 -0800
Subject: libbpf: move exception callbacks assignment logic into relocation
 step

Move the logic of finding and assigning exception callback indices from
BTF sanitization step to program relocations step, which seems more
logical and will unblock moving BTF loading to after relocation step.

Exception callbacks discovery and assignment has no dependency on BTF
being loaded into the kernel, it only uses BTF information. It does need
to happen before subprogram relocations happen, though. Which is why the
split.

No functional changes.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 165 +++++++++++++++++++++++++------------------------
 1 file changed, 85 insertions(+), 80 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a58569b7e4bf..01d45f0c40d0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3192,86 +3192,6 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 		}
 	}
 
-	if (!kernel_supports(obj, FEAT_BTF_DECL_TAG))
-		goto skip_exception_cb;
-	for (i = 0; i < obj->nr_programs; i++) {
-		struct bpf_program *prog = &obj->programs[i];
-		int j, k, n;
-
-		if (prog_is_subprog(obj, prog))
-			continue;
-		n = btf__type_cnt(obj->btf);
-		for (j = 1; j < n; j++) {
-			const char *str = "exception_callback:", *name;
-			size_t len = strlen(str);
-			struct btf_type *t;
-
-			t = btf_type_by_id(obj->btf, j);
-			if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
-				continue;
-
-			name = btf__str_by_offset(obj->btf, t->name_off);
-			if (strncmp(name, str, len))
-				continue;
-
-			t = btf_type_by_id(obj->btf, t->type);
-			if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
-				pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
-					prog->name);
-				return -EINVAL;
-			}
-			if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)))
-				continue;
-			/* Multiple callbacks are specified for the same prog,
-			 * the verifier will eventually return an error for this
-			 * case, hence simply skip appending a subprog.
-			 */
-			if (prog->exception_cb_idx >= 0) {
-				prog->exception_cb_idx = -1;
-				break;
-			}
-
-			name += len;
-			if (str_is_empty(name)) {
-				pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
-					prog->name);
-				return -EINVAL;
-			}
-
-			for (k = 0; k < obj->nr_programs; k++) {
-				struct bpf_program *subprog = &obj->programs[k];
-
-				if (!prog_is_subprog(obj, subprog))
-					continue;
-				if (strcmp(name, subprog->name))
-					continue;
-				/* Enforce non-hidden, as from verifier point of
-				 * view it expects global functions, whereas the
-				 * mark_btf_static fixes up linkage as static.
-				 */
-				if (!subprog->sym_global || subprog->mark_btf_static) {
-					pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
-						prog->name, subprog->name);
-					return -EINVAL;
-				}
-				/* Let's see if we already saw a static exception callback with the same name */
-				if (prog->exception_cb_idx >= 0) {
-					pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
-					        prog->name, subprog->name);
-					return -EINVAL;
-				}
-				prog->exception_cb_idx = k;
-				break;
-			}
-
-			if (prog->exception_cb_idx >= 0)
-				continue;
-			pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
-			return -ENOENT;
-		}
-	}
-skip_exception_cb:
-
 	sanitize = btf_needs_sanitization(obj);
 	if (sanitize) {
 		const void *raw_data;
@@ -6661,6 +6581,88 @@ static void bpf_object__sort_relos(struct bpf_object *obj)
 	}
 }
 
+static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
+{
+	const char *str = "exception_callback:";
+	size_t pfx_len = strlen(str);
+	int i, j, n;
+
+	if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
+		return 0;
+
+	n = btf__type_cnt(obj->btf);
+	for (i = 1; i < n; i++) {
+		const char *name;
+		struct btf_type *t;
+
+		t = btf_type_by_id(obj->btf, i);
+		if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
+			continue;
+
+		name = btf__str_by_offset(obj->btf, t->name_off);
+		if (strncmp(name, str, pfx_len) != 0)
+			continue;
+
+		t = btf_type_by_id(obj->btf, t->type);
+		if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
+			pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
+				prog->name);
+			return -EINVAL;
+		}
+		if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
+			continue;
+		/* Multiple callbacks are specified for the same prog,
+		 * the verifier will eventually return an error for this
+		 * case, hence simply skip appending a subprog.
+		 */
+		if (prog->exception_cb_idx >= 0) {
+			prog->exception_cb_idx = -1;
+			break;
+		}
+
+		name += pfx_len;
+		if (str_is_empty(name)) {
+			pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
+				prog->name);
+			return -EINVAL;
+		}
+
+		for (j = 0; j < obj->nr_programs; j++) {
+			struct bpf_program *subprog = &obj->programs[j];
+
+			if (!prog_is_subprog(obj, subprog))
+				continue;
+			if (strcmp(name, subprog->name) != 0)
+				continue;
+			/* Enforce non-hidden, as from verifier point of
+			 * view it expects global functions, whereas the
+			 * mark_btf_static fixes up linkage as static.
+			 */
+			if (!subprog->sym_global || subprog->mark_btf_static) {
+				pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
+					prog->name, subprog->name);
+				return -EINVAL;
+			}
+			/* Let's see if we already saw a static exception callback with the same name */
+			if (prog->exception_cb_idx >= 0) {
+				pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
+					prog->name, subprog->name);
+				return -EINVAL;
+			}
+			prog->exception_cb_idx = j;
+			break;
+		}
+
+		if (prog->exception_cb_idx >= 0)
+			continue;
+
+		pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
 static int
 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 {
@@ -6721,6 +6723,9 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 			return err;
 		}
 
+		err = bpf_prog_assign_exc_cb(obj, prog);
+		if (err)
+			return err;
 		/* Now, also append exception callback if it has not been done already. */
 		if (prog->exception_cb_idx >= 0) {
 			struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
-- 
cgit v1.2.3-70-g09d2


From 1004742d7ff03a088e74133af2401556ac80092b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Jan 2024 17:38:44 -0800
Subject: libbpf: move BTF loading step after relocation step

With all the preparations in previous patches done we are ready to
postpone BTF loading and sanitization step until after all the
relocations are performed.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-7-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 01d45f0c40d0..836986974de3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8104,10 +8104,10 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
 	err = bpf_object__probe_loading(obj);
 	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
-	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 	err = err ? : bpf_object__sanitize_maps(obj);
 	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
 	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
+	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 	err = err ? : bpf_object__create_maps(obj);
 	err = err ? : bpf_object__load_progs(obj, extra_log_level);
 	err = err ? : bpf_object_init_prog_arrays(obj);
-- 
cgit v1.2.3-70-g09d2


From 2f38fe689470055440bf80fc644920023a643a82 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Jan 2024 17:38:45 -0800
Subject: libbpf: implement __arg_ctx fallback logic

Out of all special global func arg tag annotations, __arg_ctx is
practically is the most immediately useful and most critical to have
working across multitude kernel version, if possible. This would allow
end users to write much simpler code if __arg_ctx semantics worked for
older kernels that don't natively understand btf_decl_tag("arg:ctx") in
verifier logic.

Luckily, it is possible to ensure __arg_ctx works on old kernels through
a bit of extra work done by libbpf, at least in a lot of common cases.

To explain the overall idea, we need to go back at how context argument
was supported in global funcs before __arg_ctx support was added. This
was done based on special struct name checks in kernel. E.g., for
BPF_PROG_TYPE_PERF_EVENT the expectation is that argument type `struct
bpf_perf_event_data *` mark that argument as PTR_TO_CTX. This is all
good as long as global function is used from the same BPF program types
only, which is often not the case. If the same subprog has to be called
from, say, kprobe and perf_event program types, there is no single
definition that would satisfy BPF verifier. Subprog will have context
argument either for kprobe (if using bpf_user_pt_regs_t struct name) or
perf_event (with bpf_perf_event_data struct name), but not both.

This limitation was the reason to add btf_decl_tag("arg:ctx"), making
the actual argument type not important, so that user can just define
"generic" signature:

  __noinline int global_subprog(void *ctx __arg_ctx) { ... }

I won't belabor how libbpf is implementing subprograms, see a huge
comment next to bpf_object_relocate_calls() function. The idea is that
each main/entry BPF program gets its own copy of global_subprog's code
appended.

This per-program copy of global subprog code *and* associated func_info
.BTF.ext information, pointing to FUNC -> FUNC_PROTO BTF type chain
allows libbpf to simulate __arg_ctx behavior transparently, even if the
kernel doesn't yet support __arg_ctx annotation natively.

The idea is straightforward: each time we append global subprog's code
and func_info information, we adjust its FUNC -> FUNC_PROTO type
information, if necessary (that is, libbpf can detect the presence of
btf_decl_tag("arg:ctx") just like BPF verifier would do it).

The rest is just mechanical and somewhat painful BTF manipulation code.
It's painful because we need to clone FUNC -> FUNC_PROTO, instead of
reusing it, as same FUNC -> FUNC_PROTO chain might be used by another
main BPF program within the same BPF object, so we can't just modify it
in-place (and cloning BTF types within the same struct btf object is
painful due to constant memory invalidation, see comments in code).
Uploaded BPF object's BTF information has to work for all BPF
programs at the same time.

Once we have FUNC -> FUNC_PROTO clones, we make sure that instead of
using some `void *ctx` parameter definition, we have an expected `struct
bpf_perf_event_data *ctx` definition (as far as BPF verifier and kernel
is concerned), which will mark it as context for BPF verifier. Same
global subprog relocated and copied into another main BPF program will
get different type information according to main program's type. It all
works out in the end in a completely transparent way for end user.

Libbpf maintains internal program type -> expected context struct name
mapping internally. Note, not all BPF program types have named context
struct, so this approach won't work for such programs (just like it
didn't before __arg_ctx). So native __arg_ctx is still important to have
in kernel to have generic context support across all BPF program types.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-8-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 256 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 252 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 836986974de3..c5a42ac309fd 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6181,7 +6181,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj,
 	int err;
 
 	/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
-	 * supprot func/line info
+	 * support func/line info
 	 */
 	if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
 		return 0;
@@ -6663,8 +6663,247 @@ static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *pr
 	return 0;
 }
 
-static int
-bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
+static struct {
+	enum bpf_prog_type prog_type;
+	const char *ctx_name;
+} global_ctx_map[] = {
+	{ BPF_PROG_TYPE_CGROUP_DEVICE,           "bpf_cgroup_dev_ctx" },
+	{ BPF_PROG_TYPE_CGROUP_SKB,              "__sk_buff" },
+	{ BPF_PROG_TYPE_CGROUP_SOCK,             "bpf_sock" },
+	{ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,        "bpf_sock_addr" },
+	{ BPF_PROG_TYPE_CGROUP_SOCKOPT,          "bpf_sockopt" },
+	{ BPF_PROG_TYPE_CGROUP_SYSCTL,           "bpf_sysctl" },
+	{ BPF_PROG_TYPE_FLOW_DISSECTOR,          "__sk_buff" },
+	{ BPF_PROG_TYPE_KPROBE,                  "bpf_user_pt_regs_t" },
+	{ BPF_PROG_TYPE_LWT_IN,                  "__sk_buff" },
+	{ BPF_PROG_TYPE_LWT_OUT,                 "__sk_buff" },
+	{ BPF_PROG_TYPE_LWT_SEG6LOCAL,           "__sk_buff" },
+	{ BPF_PROG_TYPE_LWT_XMIT,                "__sk_buff" },
+	{ BPF_PROG_TYPE_NETFILTER,               "bpf_nf_ctx" },
+	{ BPF_PROG_TYPE_PERF_EVENT,              "bpf_perf_event_data" },
+	{ BPF_PROG_TYPE_RAW_TRACEPOINT,          "bpf_raw_tracepoint_args" },
+	{ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
+	{ BPF_PROG_TYPE_SCHED_ACT,               "__sk_buff" },
+	{ BPF_PROG_TYPE_SCHED_CLS,               "__sk_buff" },
+	{ BPF_PROG_TYPE_SK_LOOKUP,               "bpf_sk_lookup" },
+	{ BPF_PROG_TYPE_SK_MSG,                  "sk_msg_md" },
+	{ BPF_PROG_TYPE_SK_REUSEPORT,            "sk_reuseport_md" },
+	{ BPF_PROG_TYPE_SK_SKB,                  "__sk_buff" },
+	{ BPF_PROG_TYPE_SOCK_OPS,                "bpf_sock_ops" },
+	{ BPF_PROG_TYPE_SOCKET_FILTER,           "__sk_buff" },
+	{ BPF_PROG_TYPE_XDP,                     "xdp_md" },
+	/* all other program types don't have "named" context structs */
+};
+
+static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
+{
+	int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
+	int i, err, arg_cnt, fn_name_off, linkage;
+	struct btf_type *fn_t, *fn_proto_t, *t;
+	struct btf_param *p;
+
+	/* caller already validated FUNC -> FUNC_PROTO validity */
+	fn_t = btf_type_by_id(btf, orig_fn_id);
+	fn_proto_t = btf_type_by_id(btf, fn_t->type);
+
+	/* Note that each btf__add_xxx() operation invalidates
+	 * all btf_type and string pointers, so we need to be
+	 * very careful when cloning BTF types. BTF type
+	 * pointers have to be always refetched. And to avoid
+	 * problems with invalidated string pointers, we
+	 * add empty strings initially, then just fix up
+	 * name_off offsets in place. Offsets are stable for
+	 * existing strings, so that works out.
+	 */
+	fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
+	linkage = btf_func_linkage(fn_t);
+	orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
+	ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
+	arg_cnt = btf_vlen(fn_proto_t);
+
+	/* clone FUNC_PROTO and its params */
+	fn_proto_id = btf__add_func_proto(btf, ret_type_id);
+	if (fn_proto_id < 0)
+		return -EINVAL;
+
+	for (i = 0; i < arg_cnt; i++) {
+		int name_off;
+
+		/* copy original parameter data */
+		t = btf_type_by_id(btf, orig_proto_id);
+		p = &btf_params(t)[i];
+		name_off = p->name_off;
+
+		err = btf__add_func_param(btf, "", p->type);
+		if (err)
+			return err;
+
+		fn_proto_t = btf_type_by_id(btf, fn_proto_id);
+		p = &btf_params(fn_proto_t)[i];
+		p->name_off = name_off; /* use remembered str offset */
+	}
+
+	/* clone FUNC now, btf__add_func() enforces non-empty name, so use
+	 * entry program's name as a placeholder, which we replace immediately
+	 * with original name_off
+	 */
+	fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
+	if (fn_id < 0)
+		return -EINVAL;
+
+	fn_t = btf_type_by_id(btf, fn_id);
+	fn_t->name_off = fn_name_off; /* reuse original string */
+
+	return fn_id;
+}
+
+/* Check if main program or global subprog's function prototype has `arg:ctx`
+ * argument tags, and, if necessary, substitute correct type to match what BPF
+ * verifier would expect, taking into account specific program type. This
+ * allows to support __arg_ctx tag transparently on old kernels that don't yet
+ * have a native support for it in the verifier, making user's life much
+ * easier.
+ */
+static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
+{
+	const char *ctx_name = NULL, *ctx_tag = "arg:ctx";
+	struct bpf_func_info_min *func_rec;
+	struct btf_type *fn_t, *fn_proto_t;
+	struct btf *btf = obj->btf;
+	const struct btf_type *t;
+	struct btf_param *p;
+	int ptr_id = 0, struct_id, tag_id, orig_fn_id;
+	int i, n, arg_idx, arg_cnt, err, rec_idx;
+	int *orig_ids;
+
+	/* no .BTF.ext, no problem */
+	if (!obj->btf_ext || !prog->func_info)
+		return 0;
+
+	/* some BPF program types just don't have named context structs, so
+	 * this fallback mechanism doesn't work for them
+	 */
+	for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
+		if (global_ctx_map[i].prog_type != prog->type)
+			continue;
+		ctx_name = global_ctx_map[i].ctx_name;
+		break;
+	}
+	if (!ctx_name)
+		return 0;
+
+	/* remember original func BTF IDs to detect if we already cloned them */
+	orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
+	if (!orig_ids)
+		return -ENOMEM;
+	for (i = 0; i < prog->func_info_cnt; i++) {
+		func_rec = prog->func_info + prog->func_info_rec_size * i;
+		orig_ids[i] = func_rec->type_id;
+	}
+
+	/* go through each DECL_TAG with "arg:ctx" and see if it points to one
+	 * of our subprogs; if yes and subprog is global and needs adjustment,
+	 * clone and adjust FUNC -> FUNC_PROTO combo
+	 */
+	for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
+		/* only DECL_TAG with "arg:ctx" value are interesting */
+		t = btf__type_by_id(btf, i);
+		if (!btf_is_decl_tag(t))
+			continue;
+		if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
+			continue;
+
+		/* only global funcs need adjustment, if at all */
+		orig_fn_id = t->type;
+		fn_t = btf_type_by_id(btf, orig_fn_id);
+		if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
+			continue;
+
+		/* sanity check FUNC -> FUNC_PROTO chain, just in case */
+		fn_proto_t = btf_type_by_id(btf, fn_t->type);
+		if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
+			continue;
+
+		/* find corresponding func_info record */
+		func_rec = NULL;
+		for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
+			if (orig_ids[rec_idx] == t->type) {
+				func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
+				break;
+			}
+		}
+		/* current main program doesn't call into this subprog */
+		if (!func_rec)
+			continue;
+
+		/* some more sanity checking of DECL_TAG */
+		arg_cnt = btf_vlen(fn_proto_t);
+		arg_idx = btf_decl_tag(t)->component_idx;
+		if (arg_idx < 0 || arg_idx >= arg_cnt)
+			continue;
+
+		/* check if existing parameter already matches verifier expectations */
+		p = &btf_params(fn_proto_t)[arg_idx];
+		t = skip_mods_and_typedefs(btf, p->type, NULL);
+		if (btf_is_ptr(t) &&
+		    (t = skip_mods_and_typedefs(btf, t->type, NULL)) &&
+		    btf_is_struct(t) &&
+		    strcmp(btf__str_by_offset(btf, t->name_off), ctx_name) == 0) {
+			continue; /* no need for fix up */
+		}
+
+		/* clone fn/fn_proto, unless we already did it for another arg */
+		if (func_rec->type_id == orig_fn_id) {
+			int fn_id;
+
+			fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
+			if (fn_id < 0) {
+				err = fn_id;
+				goto err_out;
+			}
+
+			/* point func_info record to a cloned FUNC type */
+			func_rec->type_id = fn_id;
+		}
+
+		/* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
+		 * we do it just once per main BPF program, as all global
+		 * funcs share the same program type, so need only PTR ->
+		 * STRUCT type chain
+		 */
+		if (ptr_id == 0) {
+			struct_id = btf__add_struct(btf, ctx_name, 0);
+			ptr_id = btf__add_ptr(btf, struct_id);
+			if (ptr_id < 0 || struct_id < 0) {
+				err = -EINVAL;
+				goto err_out;
+			}
+		}
+
+		/* for completeness, clone DECL_TAG and point it to cloned param */
+		tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
+		if (tag_id < 0) {
+			err = -EINVAL;
+			goto err_out;
+		}
+
+		/* all the BTF manipulations invalidated pointers, refetch them */
+		fn_t = btf_type_by_id(btf, func_rec->type_id);
+		fn_proto_t = btf_type_by_id(btf, fn_t->type);
+
+		/* fix up type ID pointed to by param */
+		p = &btf_params(fn_proto_t)[arg_idx];
+		p->type = ptr_id;
+	}
+
+	free(orig_ids);
+	return 0;
+err_out:
+	free(orig_ids);
+	return err;
+}
+
+static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 {
 	struct bpf_program *prog;
 	size_t i, j;
@@ -6745,19 +6984,28 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 			}
 		}
 	}
-	/* Process data relos for main programs */
 	for (i = 0; i < obj->nr_programs; i++) {
 		prog = &obj->programs[i];
 		if (prog_is_subprog(obj, prog))
 			continue;
 		if (!prog->autoload)
 			continue;
+
+		/* Process data relos for main programs */
 		err = bpf_object__relocate_data(obj, prog);
 		if (err) {
 			pr_warn("prog '%s': failed to relocate data references: %d\n",
 				prog->name, err);
 			return err;
 		}
+
+		/* Fix up .BTF.ext information, if necessary */
+		err = bpf_program_fixup_func_info(obj, prog);
+		if (err) {
+			pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
+				prog->name, err);
+			return err;
+		}
 	}
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 67fe459144dd629855bd9fb4b12bd9c4f792a8cf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Jan 2024 17:38:46 -0800
Subject: selftests/bpf: add arg:ctx cases to test_global_funcs tests

Add a few extra cases of global funcs with context arguments. This time
rely on "arg:ctx" decl_tag (__arg_ctx macro), but put it next to
"classic" cases where context argument has to be of an exact type that
BPF verifier expects (e.g., bpf_user_pt_regs_t for kprobe/uprobe).

Colocating all these cases separately from other global func args that
rely on arg:xxx decl tags (in verifier_global_subprogs.c) allows for
simpler backwards compatibility testing on old kernels. All the cases in
test_global_func_ctx_args.c are supposed to work on older kernels, which
was manually validated during development.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-9-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/progs/test_global_func_ctx_args.c          | 49 ++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
index 7faa8eef0598..9a06e5eb1fbe 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
@@ -102,3 +102,52 @@ int perf_event_ctx(void *ctx)
 {
 	return perf_event_ctx_subprog(ctx);
 }
+
+/* this global subprog can be now called from many types of entry progs, each
+ * with different context type
+ */
+__weak int subprog_ctx_tag(void *ctx __arg_ctx)
+{
+	return bpf_get_stack(ctx, stack, sizeof(stack), 0);
+}
+
+struct my_struct { int x; };
+
+__weak int subprog_multi_ctx_tags(void *ctx1 __arg_ctx,
+				  struct my_struct *mem,
+				  void *ctx2 __arg_ctx)
+{
+	if (!mem)
+		return 0;
+
+	return bpf_get_stack(ctx1, stack, sizeof(stack), 0) +
+	       mem->x +
+	       bpf_get_stack(ctx2, stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp(void *ctx)
+{
+	struct my_struct x = { .x = 123 };
+
+	return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx);
+}
+
+SEC("?perf_event")
+__success __log_level(2)
+int arg_tag_ctx_perf(void *ctx)
+{
+	struct my_struct x = { .x = 123 };
+
+	return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx);
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+int arg_tag_ctx_kprobe(void *ctx)
+{
+	struct my_struct x = { .x = 123 };
+
+	return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx);
+}
-- 
cgit v1.2.3-70-g09d2


From 95226f5a36695fd5740e130016d9ed697cfb2bad Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Jan 2024 17:38:47 -0800
Subject: selftests/bpf: add __arg_ctx BTF rewrite test

Add a test validating that libbpf uploads BTF and func_info with
rewritten type information for arguments of global subprogs that are
marked with __arg_ctx tag.

Suggested-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240104013847.3875810-10-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/test_global_funcs.c   | 106 +++++++++++++++++++++
 1 file changed, 106 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index e0879df38639..67d4ef9e62b3 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -20,6 +20,109 @@
 #include "test_global_func17.skel.h"
 #include "test_global_func_ctx_args.skel.h"
 
+#include "bpf/libbpf_internal.h"
+#include "btf_helpers.h"
+
+static void check_ctx_arg_type(const struct btf *btf, const struct btf_param *p)
+{
+	const struct btf_type *t;
+	const char *s;
+
+	t = btf__type_by_id(btf, p->type);
+	if (!ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_t"))
+		return;
+
+	s = btf_type_raw_dump(btf, t->type);
+	if (!ASSERT_HAS_SUBSTR(s, "STRUCT 'bpf_perf_event_data' size=0 vlen=0",
+			       "ctx_struct_t"))
+		return;
+}
+
+static void subtest_ctx_arg_rewrite(void)
+{
+	struct test_global_func_ctx_args *skel = NULL;
+	struct bpf_prog_info info;
+	char func_info_buf[1024] __attribute__((aligned(8)));
+	struct bpf_func_info_min *rec;
+	struct btf *btf = NULL;
+	__u32 info_len = sizeof(info);
+	int err, fd, i;
+
+	skel = test_global_func_ctx_args__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.arg_tag_ctx_perf, true);
+
+	err = test_global_func_ctx_args__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto out;
+
+	memset(&info, 0, sizeof(info));
+	info.func_info = ptr_to_u64(&func_info_buf);
+	info.nr_func_info = 3;
+	info.func_info_rec_size = sizeof(struct bpf_func_info_min);
+
+	fd = bpf_program__fd(skel->progs.arg_tag_ctx_perf);
+	err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+	if (!ASSERT_OK(err, "prog_info"))
+		goto out;
+
+	if (!ASSERT_EQ(info.nr_func_info, 3, "nr_func_info"))
+		goto out;
+
+	btf = btf__load_from_kernel_by_id(info.btf_id);
+	if (!ASSERT_OK_PTR(btf, "obj_kern_btf"))
+		goto out;
+
+	rec = (struct bpf_func_info_min *)func_info_buf;
+	for (i = 0; i < info.nr_func_info; i++, rec = (void *)rec + info.func_info_rec_size) {
+		const struct btf_type *fn_t, *proto_t;
+		const char *name;
+
+		if (rec->insn_off == 0)
+			continue; /* main prog, skip */
+
+		fn_t = btf__type_by_id(btf, rec->type_id);
+		if (!ASSERT_OK_PTR(fn_t, "fn_type"))
+			goto out;
+		if (!ASSERT_EQ(btf_kind(fn_t), BTF_KIND_FUNC, "fn_type_kind"))
+			goto out;
+		proto_t = btf__type_by_id(btf, fn_t->type);
+		if (!ASSERT_OK_PTR(proto_t, "proto_type"))
+			goto out;
+
+		name = btf__name_by_offset(btf, fn_t->name_off);
+		if (strcmp(name, "subprog_ctx_tag") == 0) {
+			/* int subprog_ctx_tag(void *ctx __arg_ctx) */
+			if (!ASSERT_EQ(btf_vlen(proto_t), 1, "arg_cnt"))
+				goto out;
+
+			/* arg 0 is PTR -> STRUCT bpf_perf_event_data */
+			check_ctx_arg_type(btf, &btf_params(proto_t)[0]);
+		} else if (strcmp(name, "subprog_multi_ctx_tags") == 0) {
+			/* int subprog_multi_ctx_tags(void *ctx1 __arg_ctx,
+			 *			      struct my_struct *mem,
+			 *			      void *ctx2 __arg_ctx)
+			 */
+			if (!ASSERT_EQ(btf_vlen(proto_t), 3, "arg_cnt"))
+				goto out;
+
+			/* arg 0 is PTR -> STRUCT bpf_perf_event_data */
+			check_ctx_arg_type(btf, &btf_params(proto_t)[0]);
+			/* arg 2 is PTR -> STRUCT bpf_perf_event_data */
+			check_ctx_arg_type(btf, &btf_params(proto_t)[2]);
+		} else {
+			ASSERT_FAIL("unexpected subprog %s", name);
+			goto out;
+		}
+	}
+
+out:
+	btf__free(btf);
+	test_global_func_ctx_args__destroy(skel);
+}
+
 void test_test_global_funcs(void)
 {
 	RUN_TESTS(test_global_func1);
@@ -40,4 +143,7 @@ void test_test_global_funcs(void)
 	RUN_TESTS(test_global_func16);
 	RUN_TESTS(test_global_func17);
 	RUN_TESTS(test_global_func_ctx_args);
+
+	if (test__start_subtest("ctx_arg_rewrite"))
+		subtest_ctx_arg_rewrite();
 }
-- 
cgit v1.2.3-70-g09d2


From 98e20e5e13d2811898921f999288be7151a11954 Mon Sep 17 00:00:00 2001
From: Quentin Deslandes <qde@naccy.de>
Date: Tue, 26 Dec 2023 14:07:42 +0100
Subject: bpfilter: remove bpfilter

bpfilter was supposed to convert iptables filtering rules into
BPF programs on the fly, from the kernel, through a usermode
helper. The base code for the UMH was introduced in 2018, and
couple of attempts (2, 3) tried to introduce the BPF program
generate features but were abandoned.

bpfilter now sits in a kernel tree unused and unusable, occasionally
causing confusion amongst Linux users (4, 5).

As bpfilter is now developed in a dedicated repository on GitHub (6),
it was suggested a couple of times this year (LSFMM/BPF 2023,
LPC 2023) to remove the deprecated kernel part of the project. This
is the purpose of this patch.

[1]: https://lore.kernel.org/lkml/20180522022230.2492505-1-ast@kernel.org/
[2]: https://lore.kernel.org/bpf/20210829183608.2297877-1-me@ubique.spb.ru/#t
[3]: https://lore.kernel.org/lkml/20221224000402.476079-1-qde@naccy.de/
[4]: https://dxuuu.xyz/bpfilter.html
[5]: https://github.com/linuxkit/linuxkit/pull/3904
[6]: https://github.com/facebook/bpfilter

Signed-off-by: Quentin Deslandes <qde@naccy.de>
Link: https://lore.kernel.org/r/20231226130745.465988-1-qde@naccy.de
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/loongarch/configs/loongson3_defconfig |   1 -
 include/linux/bpfilter.h                   |  24 -----
 include/uapi/linux/bpfilter.h              |  21 -----
 net/Kconfig                                |   2 -
 net/Makefile                               |   1 -
 net/bpfilter/.gitignore                    |   2 -
 net/bpfilter/Kconfig                       |  23 -----
 net/bpfilter/Makefile                      |  20 -----
 net/bpfilter/bpfilter_kern.c               | 136 -----------------------------
 net/bpfilter/bpfilter_umh_blob.S           |   7 --
 net/bpfilter/main.c                        |  64 --------------
 net/bpfilter/msgfmt.h                      |  17 ----
 net/ipv4/Makefile                          |   2 -
 net/ipv4/bpfilter/Makefile                 |   2 -
 net/ipv4/bpfilter/sockopt.c                |  71 ---------------
 net/ipv4/ip_sockglue.c                     |  12 ---
 tools/bpf/bpftool/feature.c                |   4 -
 tools/testing/selftests/bpf/config.aarch64 |   1 -
 tools/testing/selftests/bpf/config.s390x   |   1 -
 tools/testing/selftests/bpf/config.x86_64  |   1 -
 tools/testing/selftests/hid/config         |   1 -
 21 files changed, 413 deletions(-)
 delete mode 100644 include/linux/bpfilter.h
 delete mode 100644 include/uapi/linux/bpfilter.h
 delete mode 100644 net/bpfilter/.gitignore
 delete mode 100644 net/bpfilter/Kconfig
 delete mode 100644 net/bpfilter/Makefile
 delete mode 100644 net/bpfilter/bpfilter_kern.c
 delete mode 100644 net/bpfilter/bpfilter_umh_blob.S
 delete mode 100644 net/bpfilter/main.c
 delete mode 100644 net/bpfilter/msgfmt.h
 delete mode 100644 net/ipv4/bpfilter/Makefile
 delete mode 100644 net/ipv4/bpfilter/sockopt.c

(limited to 'tools')

diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 9c333d133c30..60e331af9839 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -276,7 +276,6 @@ CONFIG_BRIDGE_EBT_T_NAT=m
 CONFIG_BRIDGE_EBT_ARP=m
 CONFIG_BRIDGE_EBT_IP=m
 CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BPFILTER=y
 CONFIG_IP_SCTP=m
 CONFIG_RDS=y
 CONFIG_L2TP=m
diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
deleted file mode 100644
index 736ded4905e0..000000000000
--- a/include/linux/bpfilter.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_BPFILTER_H
-#define _LINUX_BPFILTER_H
-
-#include <uapi/linux/bpfilter.h>
-#include <linux/usermode_driver.h>
-#include <linux/sockptr.h>
-
-struct sock;
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
-			    unsigned int optlen);
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
-			    int __user *optlen);
-
-struct bpfilter_umh_ops {
-	struct umd_info info;
-	/* since ip_getsockopt() can run in parallel, serialize access to umh */
-	struct mutex lock;
-	int (*sockopt)(struct sock *sk, int optname, sockptr_t optval,
-		       unsigned int optlen, bool is_set);
-	int (*start)(void);
-};
-extern struct bpfilter_umh_ops bpfilter_ops;
-#endif
diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h
deleted file mode 100644
index cbc1f5813f50..000000000000
--- a/include/uapi/linux/bpfilter.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_BPFILTER_H
-#define _UAPI_LINUX_BPFILTER_H
-
-#include <linux/if.h>
-
-enum {
-	BPFILTER_IPT_SO_SET_REPLACE = 64,
-	BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65,
-	BPFILTER_IPT_SET_MAX,
-};
-
-enum {
-	BPFILTER_IPT_SO_GET_INFO = 64,
-	BPFILTER_IPT_SO_GET_ENTRIES = 65,
-	BPFILTER_IPT_SO_GET_REVISION_MATCH = 66,
-	BPFILTER_IPT_SO_GET_REVISION_TARGET = 67,
-	BPFILTER_IPT_GET_MAX,
-};
-
-#endif /* _UAPI_LINUX_BPFILTER_H */
diff --git a/net/Kconfig b/net/Kconfig
index 3ec6bc98fa05..4adc47d0c9c2 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -233,8 +233,6 @@ source "net/bridge/netfilter/Kconfig"
 
 endif
 
-source "net/bpfilter/Kconfig"
-
 source "net/dccp/Kconfig"
 source "net/sctp/Kconfig"
 source "net/rds/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 4c4dc535453d..b06b5539e7a6 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_TLS)		+= tls/
 obj-$(CONFIG_XFRM)		+= xfrm/
 obj-$(CONFIG_UNIX_SCM)		+= unix/
 obj-y				+= ipv6/
-obj-$(CONFIG_BPFILTER)		+= bpfilter/
 obj-$(CONFIG_PACKET)		+= packet/
 obj-$(CONFIG_NET_KEY)		+= key/
 obj-$(CONFIG_BRIDGE)		+= bridge/
diff --git a/net/bpfilter/.gitignore b/net/bpfilter/.gitignore
deleted file mode 100644
index f34e85ee8204..000000000000
--- a/net/bpfilter/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-bpfilter_umh
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
deleted file mode 100644
index 3d4a21462458..000000000000
--- a/net/bpfilter/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-menuconfig BPFILTER
-	bool "BPF based packet filtering framework (BPFILTER)"
-	depends on BPF && INET
-	select USERMODE_DRIVER
-	help
-	  This builds experimental bpfilter framework that is aiming to
-	  provide netfilter compatible functionality via BPF
-
-if BPFILTER
-config BPFILTER_UMH
-	tristate "bpfilter kernel module with user mode helper"
-	depends on CC_CAN_LINK
-	depends on m || CC_CAN_LINK_STATIC
-	default m
-	help
-	  This builds bpfilter kernel module with embedded user mode helper
-
-	  Note: To compile this as built-in, your toolchain must support
-	  building static binaries, since rootfs isn't mounted at the time
-	  when __init functions are called and do_execv won't be able to find
-	  the elf interpreter.
-endif
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
deleted file mode 100644
index cdac82b8c53a..000000000000
--- a/net/bpfilter/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the Linux BPFILTER layer.
-#
-
-userprogs := bpfilter_umh
-bpfilter_umh-objs := main.o
-userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
-
-ifeq ($(CONFIG_BPFILTER_UMH), y)
-# builtin bpfilter_umh should be linked with -static
-# since rootfs isn't mounted at the time of __init
-# function is called and do_execv won't find elf interpreter
-userldflags += -static
-endif
-
-$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
-
-obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
-bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
deleted file mode 100644
index 97e129e3f31c..000000000000
--- a/net/bpfilter/bpfilter_kern.c
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/umh.h>
-#include <linux/bpfilter.h>
-#include <linux/sched.h>
-#include <linux/sched/signal.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include "msgfmt.h"
-
-extern char bpfilter_umh_start;
-extern char bpfilter_umh_end;
-
-static void shutdown_umh(void)
-{
-	struct umd_info *info = &bpfilter_ops.info;
-	struct pid *tgid = info->tgid;
-
-	if (tgid) {
-		kill_pid(tgid, SIGKILL, 1);
-		wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
-		umd_cleanup_helper(info);
-	}
-}
-
-static void __stop_umh(void)
-{
-	if (IS_ENABLED(CONFIG_INET))
-		shutdown_umh();
-}
-
-static int bpfilter_send_req(struct mbox_request *req)
-{
-	struct mbox_reply reply;
-	loff_t pos = 0;
-	ssize_t n;
-
-	if (!bpfilter_ops.info.tgid)
-		return -EFAULT;
-	pos = 0;
-	n = kernel_write(bpfilter_ops.info.pipe_to_umh, req, sizeof(*req),
-			   &pos);
-	if (n != sizeof(*req)) {
-		pr_err("write fail %zd\n", n);
-		goto stop;
-	}
-	pos = 0;
-	n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply),
-			&pos);
-	if (n != sizeof(reply)) {
-		pr_err("read fail %zd\n", n);
-		goto stop;
-	}
-	return reply.status;
-stop:
-	__stop_umh();
-	return -EFAULT;
-}
-
-static int bpfilter_process_sockopt(struct sock *sk, int optname,
-				    sockptr_t optval, unsigned int optlen,
-				    bool is_set)
-{
-	struct mbox_request req = {
-		.is_set		= is_set,
-		.pid		= current->pid,
-		.cmd		= optname,
-		.addr		= (uintptr_t)optval.user,
-		.len		= optlen,
-	};
-	if (sockptr_is_kernel(optval)) {
-		pr_err("kernel access not supported\n");
-		return -EFAULT;
-	}
-	return bpfilter_send_req(&req);
-}
-
-static int start_umh(void)
-{
-	struct mbox_request req = { .pid = current->pid };
-	int err;
-
-	/* fork usermode process */
-	err = fork_usermode_driver(&bpfilter_ops.info);
-	if (err)
-		return err;
-	pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid));
-
-	/* health check that usermode process started correctly */
-	if (bpfilter_send_req(&req) != 0) {
-		shutdown_umh();
-		return -EFAULT;
-	}
-
-	return 0;
-}
-
-static int __init load_umh(void)
-{
-	int err;
-
-	err = umd_load_blob(&bpfilter_ops.info,
-			    &bpfilter_umh_start,
-			    &bpfilter_umh_end - &bpfilter_umh_start);
-	if (err)
-		return err;
-
-	mutex_lock(&bpfilter_ops.lock);
-	err = start_umh();
-	if (!err && IS_ENABLED(CONFIG_INET)) {
-		bpfilter_ops.sockopt = &bpfilter_process_sockopt;
-		bpfilter_ops.start = &start_umh;
-	}
-	mutex_unlock(&bpfilter_ops.lock);
-	if (err)
-		umd_unload_blob(&bpfilter_ops.info);
-	return err;
-}
-
-static void __exit fini_umh(void)
-{
-	mutex_lock(&bpfilter_ops.lock);
-	if (IS_ENABLED(CONFIG_INET)) {
-		shutdown_umh();
-		bpfilter_ops.start = NULL;
-		bpfilter_ops.sockopt = NULL;
-	}
-	mutex_unlock(&bpfilter_ops.lock);
-
-	umd_unload_blob(&bpfilter_ops.info);
-}
-module_init(load_umh);
-module_exit(fini_umh);
-MODULE_LICENSE("GPL");
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
deleted file mode 100644
index 40311d10d2f2..000000000000
--- a/net/bpfilter/bpfilter_umh_blob.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-	.section .init.rodata, "a"
-	.global bpfilter_umh_start
-bpfilter_umh_start:
-	.incbin "net/bpfilter/bpfilter_umh"
-	.global bpfilter_umh_end
-bpfilter_umh_end:
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
deleted file mode 100644
index 291a92546246..000000000000
--- a/net/bpfilter/main.c
+++ /dev/null
@@ -1,64 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <sys/uio.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/socket.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "../../include/uapi/linux/bpf.h"
-#include <asm/unistd.h>
-#include "msgfmt.h"
-
-FILE *debug_f;
-
-static int handle_get_cmd(struct mbox_request *cmd)
-{
-	switch (cmd->cmd) {
-	case 0:
-		return 0;
-	default:
-		break;
-	}
-	return -ENOPROTOOPT;
-}
-
-static int handle_set_cmd(struct mbox_request *cmd)
-{
-	return -ENOPROTOOPT;
-}
-
-static void loop(void)
-{
-	while (1) {
-		struct mbox_request req;
-		struct mbox_reply reply;
-		int n;
-
-		n = read(0, &req, sizeof(req));
-		if (n != sizeof(req)) {
-			fprintf(debug_f, "invalid request %d\n", n);
-			return;
-		}
-
-		reply.status = req.is_set ?
-			handle_set_cmd(&req) :
-			handle_get_cmd(&req);
-
-		n = write(1, &reply, sizeof(reply));
-		if (n != sizeof(reply)) {
-			fprintf(debug_f, "reply failed %d\n", n);
-			return;
-		}
-	}
-}
-
-int main(void)
-{
-	debug_f = fopen("/dev/kmsg", "w");
-	setvbuf(debug_f, 0, _IOLBF, 0);
-	fprintf(debug_f, "<5>Started bpfilter\n");
-	loop();
-	fclose(debug_f);
-	return 0;
-}
diff --git a/net/bpfilter/msgfmt.h b/net/bpfilter/msgfmt.h
deleted file mode 100644
index 98d121c62945..000000000000
--- a/net/bpfilter/msgfmt.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_BPFILTER_MSGFMT_H
-#define _NET_BPFILTER_MSGFMT_H
-
-struct mbox_request {
-	__u64 addr;
-	__u32 len;
-	__u32 is_set;
-	__u32 cmd;
-	__u32 pid;
-};
-
-struct mbox_reply {
-	__u32 status;
-};
-
-#endif
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index e144a02a6a61..ec36d2ec059e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -16,8 +16,6 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
 	     metrics.o netlink.o nexthop.o udp_tunnel_stub.o
 
-obj-$(CONFIG_BPFILTER) += bpfilter/
-
 obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
 obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
 obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile
deleted file mode 100644
index 00af5305e05a..000000000000
--- a/net/ipv4/bpfilter/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_BPFILTER) += sockopt.o
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
deleted file mode 100644
index 193bcc2acccc..000000000000
--- a/net/ipv4/bpfilter/sockopt.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <linux/bpfilter.h>
-#include <uapi/linux/bpf.h>
-#include <linux/wait.h>
-#include <linux/kmod.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-
-struct bpfilter_umh_ops bpfilter_ops;
-EXPORT_SYMBOL_GPL(bpfilter_ops);
-
-static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval,
-				 unsigned int optlen, bool is_set)
-{
-	int err;
-	mutex_lock(&bpfilter_ops.lock);
-	if (!bpfilter_ops.sockopt) {
-		mutex_unlock(&bpfilter_ops.lock);
-		request_module("bpfilter");
-		mutex_lock(&bpfilter_ops.lock);
-
-		if (!bpfilter_ops.sockopt) {
-			err = -ENOPROTOOPT;
-			goto out;
-		}
-	}
-	if (bpfilter_ops.info.tgid &&
-	    thread_group_exited(bpfilter_ops.info.tgid))
-		umd_cleanup_helper(&bpfilter_ops.info);
-
-	if (!bpfilter_ops.info.tgid) {
-		err = bpfilter_ops.start();
-		if (err)
-			goto out;
-	}
-	err = bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set);
-out:
-	mutex_unlock(&bpfilter_ops.lock);
-	return err;
-}
-
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
-			    unsigned int optlen)
-{
-	return bpfilter_mbox_request(sk, optname, optval, optlen, true);
-}
-
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
-			    int __user *optlen)
-{
-	int len;
-
-	if (get_user(len, optlen))
-		return -EFAULT;
-
-	return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len,
-				     false);
-}
-
-static int __init bpfilter_sockopt_init(void)
-{
-	mutex_init(&bpfilter_ops.lock);
-	bpfilter_ops.info.tgid = NULL;
-	bpfilter_ops.info.driver_name = "bpfilter_umh";
-
-	return 0;
-}
-device_initcall(bpfilter_sockopt_init);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 66247e8b429e..7aa9dc0e6760 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -47,8 +47,6 @@
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
 
-#include <linux/bpfilter.h>
-
 /*
  *	SOL_IP control messages.
  */
@@ -1411,11 +1409,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
 		return -ENOPROTOOPT;
 
 	err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#if IS_ENABLED(CONFIG_BPFILTER_UMH)
-	if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
-	    optname < BPFILTER_IPT_SET_MAX)
-		err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
-#endif
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
 	if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
@@ -1763,11 +1756,6 @@ int ip_getsockopt(struct sock *sk, int level,
 	err = do_ip_getsockopt(sk, level, optname,
 			       USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
 
-#if IS_ENABLED(CONFIG_BPFILTER_UMH)
-	if (optname >= BPFILTER_IPT_SO_GET_INFO &&
-	    optname < BPFILTER_IPT_GET_MAX)
-		err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
-#endif
 #ifdef CONFIG_NETFILTER
 	/* we need to exclude all possible ENOPROTOOPTs except default case */
 	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index edda4fc2c4d0..708733b0ea06 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -426,10 +426,6 @@ static void probe_kernel_image_config(const char *define_prefix)
 		{ "CONFIG_BPF_STREAM_PARSER", },
 		/* xt_bpf module for passing BPF programs to netfilter  */
 		{ "CONFIG_NETFILTER_XT_MATCH_BPF", },
-		/* bpfilter back-end for iptables */
-		{ "CONFIG_BPFILTER", },
-		/* bpftilter module with "user mode helper" */
-		{ "CONFIG_BPFILTER_UMH", },
 
 		/* test_bpf module for BPF tests */
 		{ "CONFIG_TEST_BPF", },
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 29c8635c5722..3720b7611523 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -11,7 +11,6 @@ CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BONDING=y
-CONFIG_BPFILTER=y
 CONFIG_BPF_JIT_ALWAYS_ON=y
 CONFIG_BPF_JIT_DEFAULT_ON=y
 CONFIG_BPF_PRELOAD_UMD=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index e93330382849..706931a8c2c6 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -9,7 +9,6 @@ CONFIG_BPF_JIT_ALWAYS_ON=y
 CONFIG_BPF_JIT_DEFAULT_ON=y
 CONFIG_BPF_PRELOAD=y
 CONFIG_BPF_PRELOAD_UMD=y
-CONFIG_BPFILTER=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_FREEZER=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index b946088017f1..5680befae8c6 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -19,7 +19,6 @@ CONFIG_BOOTTIME_TRACING=y
 CONFIG_BPF_JIT_ALWAYS_ON=y
 CONFIG_BPF_PRELOAD=y
 CONFIG_BPF_PRELOAD_UMD=y
-CONFIG_BPFILTER=y
 CONFIG_BSD_DISKLABEL=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_CFS_BANDWIDTH=y
diff --git a/tools/testing/selftests/hid/config b/tools/testing/selftests/hid/config
index 4f425178b56f..1758b055f295 100644
--- a/tools/testing/selftests/hid/config
+++ b/tools/testing/selftests/hid/config
@@ -1,5 +1,4 @@
 CONFIG_BPF_EVENTS=y
-CONFIG_BPFILTER=y
 CONFIG_BPF_JIT_ALWAYS_ON=y
 CONFIG_BPF_JIT=y
 CONFIG_BPF_KPROBE_OVERRIDE=y
-- 
cgit v1.2.3-70-g09d2


From 445aea5afda4759c13dc5c492b309cc1d5c1c486 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 2 Jan 2024 20:30:36 +0100
Subject: selftests/bpf: Double the size of test_loader log

Testing long jumps requires having >32k instructions. That many
instructions require the verifier log buffer of 2 megabytes.

The regular test_progs run doesn't need an increased buffer, since
gotol test with 40k instructions doesn't request a log,
but test_progs -v will set the verifier log level.
Hence to avoid breaking gotol test with -v increase the buffer size.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20240102193531.3169422-3-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_loader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 74ceb7877ae2..f01391021218 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -12,7 +12,7 @@
 #define str_has_pfx(str, pfx) \
 	(strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
 
-#define TEST_LOADER_LOG_BUF_SZ 1048576
+#define TEST_LOADER_LOG_BUF_SZ 2097152
 
 #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
 #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
-- 
cgit v1.2.3-70-g09d2


From 63fac34669e4cc666f943173ed2aa76b8db999f0 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 2 Jan 2024 20:30:37 +0100
Subject: selftests/bpf: Test gotol with large offsets

Test gotol with offsets that don't fit into a short (i.e., larger than
32k or smaller than -32k).

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20240102193531.3169422-4-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/verifier_gotol.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index d1edbcff9a18..05a329ee45ee 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -33,6 +33,25 @@ l3_%=:							\
 	: __clobber_all);
 }
 
+SEC("socket")
+__description("gotol, large_imm")
+__success __failure_unpriv __retval(40000)
+__naked void gotol_large_imm(void)
+{
+	asm volatile ("					\
+	gotol 1f;					\
+0:							\
+	r0 = 0;						\
+	.rept 40000;					\
+	r0 += 1;					\
+	.endr;						\
+	exit;						\
+1:	gotol 0b;					\
+"	:
+	:
+	: __clobber_all);
+}
+
 #else
 
 SEC("socket")
-- 
cgit v1.2.3-70-g09d2


From 5c5371e069e1ffc204dda8b20c609b170b823165 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Jan 2024 20:05:45 +0100
Subject: selftests/bpf: Add test for recursive attachment of tracing progs

Verify the fact that only one fentry prog could be attached to another
fentry, building up an attachment chain of limited size. Use existing
bpf_testmod as a start of the chain.

Acked-by: Jiri Olsa <olsajiri@gmail.com>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Dmitrii Dolgov <9erthalion6@gmail.com>
Link: https://lore.kernel.org/r/20240103190559.14750-3-9erthalion6@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/recursive_attach.c    | 107 +++++++++++++++++++++
 .../testing/selftests/bpf/progs/fentry_recursive.c |  14 +++
 .../selftests/bpf/progs/fentry_recursive_target.c  |  16 +++
 3 files changed, 137 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/recursive_attach.c
 create mode 100644 tools/testing/selftests/bpf/progs/fentry_recursive.c
 create mode 100644 tools/testing/selftests/bpf/progs/fentry_recursive_target.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
new file mode 100644
index 000000000000..4bd0a0e4231e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Red Hat, Inc. */
+#include <test_progs.h>
+#include "fentry_recursive.skel.h"
+#include "fentry_recursive_target.skel.h"
+#include <bpf/btf.h>
+#include "bpf/libbpf_internal.h"
+
+/* Test recursive attachment of tracing progs with more than one nesting level
+ * is not possible. Create a chain of attachment, verify that the last prog
+ * will fail. Depending on the arguments, following cases are tested:
+ *
+ * - Recursive loading of tracing progs, without attaching (attach = false,
+ *   detach = false). The chain looks like this:
+ *       load target
+ *       load fentry1 -> target
+ *       load fentry2 -> fentry1 (fail)
+ *
+ * - Recursive attach of tracing progs (attach = true, detach = false). The
+ *   chain looks like this:
+ *       load target
+ *       load fentry1 -> target
+ *       attach fentry1 -> target
+ *       load fentry2 -> fentry1 (fail)
+ *
+ * - Recursive attach and detach of tracing progs (attach = true, detach =
+ *   true). This validates that attach_tracing_prog flag will be set throughout
+ *   the whole lifecycle of an fentry prog, independently from whether it's
+ *   detached. The chain looks like this:
+ *       load target
+ *       load fentry1 -> target
+ *       attach fentry1 -> target
+ *       detach fentry1
+ *       load fentry2 -> fentry1 (fail)
+ */
+static void test_recursive_fentry_chain(bool attach, bool detach)
+{
+	struct fentry_recursive_target *target_skel = NULL;
+	struct fentry_recursive *tracing_chain[2] = {};
+	struct bpf_program *prog;
+	int prev_fd, err;
+
+	target_skel = fentry_recursive_target__open_and_load();
+	if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load"))
+		return;
+
+	/* Create an attachment chain with two fentry progs */
+	for (int i = 0; i < 2; i++) {
+		tracing_chain[i] = fentry_recursive__open();
+		if (!ASSERT_OK_PTR(tracing_chain[i], "fentry_recursive__open"))
+			goto close_prog;
+
+		/* The first prog in the chain is going to be attached to the target
+		 * fentry program, the second one to the previous in the chain.
+		 */
+		prog = tracing_chain[i]->progs.recursive_attach;
+		if (i == 0) {
+			prev_fd = bpf_program__fd(target_skel->progs.test1);
+			err = bpf_program__set_attach_target(prog, prev_fd, "test1");
+		} else {
+			prev_fd = bpf_program__fd(tracing_chain[i-1]->progs.recursive_attach);
+			err = bpf_program__set_attach_target(prog, prev_fd, "recursive_attach");
+		}
+
+		if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+			goto close_prog;
+
+		err = fentry_recursive__load(tracing_chain[i]);
+		/* The first attach should succeed, the second fail */
+		if (i == 0) {
+			if (!ASSERT_OK(err, "fentry_recursive__load"))
+				goto close_prog;
+
+			if (attach) {
+				err = fentry_recursive__attach(tracing_chain[i]);
+				if (!ASSERT_OK(err, "fentry_recursive__attach"))
+					goto close_prog;
+			}
+
+			if (detach) {
+				/* Flag attach_tracing_prog should still be set, preventing
+				 * attachment of the following prog.
+				 */
+				fentry_recursive__detach(tracing_chain[i]);
+			}
+		} else {
+			if (!ASSERT_ERR(err, "fentry_recursive__load"))
+				goto close_prog;
+		}
+	}
+
+close_prog:
+	fentry_recursive_target__destroy(target_skel);
+	for (int i = 0; i < 2; i++) {
+		fentry_recursive__destroy(tracing_chain[i]);
+	}
+}
+
+void test_recursive_fentry(void)
+{
+	if (test__start_subtest("attach"))
+		test_recursive_fentry_chain(true, false);
+	if (test__start_subtest("load"))
+		test_recursive_fentry_chain(false, false);
+	if (test__start_subtest("detach"))
+		test_recursive_fentry_chain(true, true);
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive.c b/tools/testing/selftests/bpf/progs/fentry_recursive.c
new file mode 100644
index 000000000000..2c9fb5ac42b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_recursive.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Red Hat, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Dummy fentry bpf prog for testing fentry attachment chains */
+SEC("fentry/XXX")
+int BPF_PROG(recursive_attach, int a)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive_target.c b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c
new file mode 100644
index 000000000000..cb18a8bfffac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Red Hat, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Dummy fentry bpf prog for testing fentry attachment chains. It's going to be
+ * a start of the chain.
+ */
+SEC("fentry/bpf_testmod_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From e02feb3f1f47509ec1e07b604bfbeff8c3b4e639 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 3 Jan 2024 20:05:47 +0100
Subject: selftests/bpf: Test re-attachment fix for bpf_tracing_prog_attach

Add a test case to verify the fix for "prog->aux->dst_trampoline and
tgt_prog is NULL" branch in bpf_tracing_prog_attach. The sequence of
events:

1. load rawtp program
2. load fentry program with rawtp as target_fd
3. create tracing link for fentry program with target_fd = 0
4. repeat 3

Acked-by: Jiri Olsa <olsajiri@gmail.com>
Acked-by: Song Liu <song@kernel.org>
Signed-off-by: Dmitrii Dolgov <9erthalion6@gmail.com>
Link: https://lore.kernel.org/r/20240103190559.14750-5-9erthalion6@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/recursive_attach.c    | 44 ++++++++++++++++++++++
 .../selftests/bpf/progs/fentry_recursive_target.c  |  9 +++++
 2 files changed, 53 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
index 4bd0a0e4231e..8100509e561b 100644
--- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
@@ -105,3 +105,47 @@ void test_recursive_fentry(void)
 	if (test__start_subtest("detach"))
 		test_recursive_fentry_chain(true, true);
 }
+
+/* Test that a tracing prog reattachment (when we land in
+ * "prog->aux->dst_trampoline and tgt_prog is NULL" branch in
+ * bpf_tracing_prog_attach) does not lead to a crash due to missing attach_btf
+ */
+void test_fentry_attach_btf_presence(void)
+{
+	struct fentry_recursive_target *target_skel = NULL;
+	struct fentry_recursive *tracing_skel = NULL;
+	struct bpf_program *prog;
+	int err, link_fd, tgt_prog_fd;
+
+	target_skel = fentry_recursive_target__open_and_load();
+	if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load"))
+		goto close_prog;
+
+	tracing_skel = fentry_recursive__open();
+	if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open"))
+		goto close_prog;
+
+	prog = tracing_skel->progs.recursive_attach;
+	tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target);
+	err = bpf_program__set_attach_target(prog, tgt_prog_fd, "fentry_target");
+	if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+		goto close_prog;
+
+	err = fentry_recursive__load(tracing_skel);
+	if (!ASSERT_OK(err, "fentry_recursive__load"))
+		goto close_prog;
+
+	tgt_prog_fd = bpf_program__fd(tracing_skel->progs.recursive_attach);
+	link_fd = bpf_link_create(tgt_prog_fd, 0, BPF_TRACE_FENTRY, NULL);
+	if (!ASSERT_GE(link_fd, 0, "link_fd"))
+		goto close_prog;
+
+	fentry_recursive__detach(tracing_skel);
+
+	err = fentry_recursive__attach(tracing_skel);
+	ASSERT_ERR(err, "fentry_recursive__attach");
+
+close_prog:
+	fentry_recursive_target__destroy(target_skel);
+	fentry_recursive__destroy(tracing_skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive_target.c b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c
index cb18a8bfffac..267c876d0aba 100644
--- a/tools/testing/selftests/bpf/progs/fentry_recursive_target.c
+++ b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c
@@ -14,3 +14,12 @@ int BPF_PROG(test1, int a)
 {
 	return 0;
 }
+
+/* Dummy bpf prog for testing attach_btf presence when attaching an fentry
+ * program.
+ */
+SEC("raw_tp/sys_enter")
+int BPF_PROG(fentry_target, struct pt_regs *regs, long id)
+{
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 4e321d590cec6053cb3c566413794706035ee638 Mon Sep 17 00:00:00 2001
From: Richard Gobert <richardbgobert@gmail.com>
Date: Wed, 3 Jan 2024 15:48:35 +0100
Subject: selftests/net: fix GRO coalesce test and add ext header coalesce
 tests

Currently there is no test which checks that IPv6 extension header packets
successfully coalesce. This commit adds a test, which verifies two IPv6
packets with HBH extension headers do coalesce, and another test which
checks that packets with different extension header data do not coalesce
in GRO.

I changed the receive socket filter to accept a packet with one extension
header. This change exposed a bug in the fragment test -- the old BPF did
not accept the fragment packet. I updated correct_num_packets in the
fragment test accordingly.

Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/69282fed-2415-47e8-b3d3-34939ec3eb56@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/gro.c | 93 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 87 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
index 30024d0ed373..353e1e867fbb 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/net/gro.c
@@ -71,6 +71,12 @@
 #define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
 #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
 #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MIN_EXTHDR_SIZE 8
+#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
+#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
+
+#define ipv6_optlen(p)  (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 
 static const char *addr6_src = "fdaa::2";
 static const char *addr6_dst = "fdaa::1";
@@ -104,7 +110,7 @@ static void setup_sock_filter(int fd)
 	const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
 	const int ethproto_off = offsetof(struct ethhdr, h_proto);
 	int optlen = 0;
-	int ipproto_off;
+	int ipproto_off, opt_ipproto_off;
 	int next_off;
 
 	if (proto == PF_INET)
@@ -116,14 +122,30 @@ static void setup_sock_filter(int fd)
 	if (strcmp(testname, "ip") == 0) {
 		if (proto == PF_INET)
 			optlen = sizeof(struct ip_timestamp);
-		else
-			optlen = sizeof(struct ip6_frag);
+		else {
+			BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
+			BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
+			BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
+
+			/* same size for HBH and Fragment extension header types */
+			optlen = MIN_EXTHDR_SIZE;
+			opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
+				+ offsetof(struct ip6_ext, ip6e_nxt);
+		}
 	}
 
+	/* this filter validates the following:
+	 *	- packet is IPv4/IPv6 according to the running test.
+	 *	- packet is TCP. Also handles the case of one extension header and then TCP.
+	 *	- checks the packet tcp dport equals to DPORT. Also handles the case of one
+	 *	  extension header and then TCP.
+	 */
 	struct sock_filter filter[] = {
 			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, ethproto_off),
-			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
 			BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, ipproto_off),
+			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
+			BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, opt_ipproto_off),
 			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
 			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off),
 			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
@@ -576,6 +598,39 @@ static void add_ipv4_ts_option(void *buf, void *optpkt)
 	iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
 }
 
+static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
+{
+	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
+	struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
+	char *exthdr_payload_start = (char *)(exthdr + 1);
+
+	exthdr->hdrlen = 0;
+	exthdr->nexthdr = IPPROTO_TCP;
+
+	memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
+
+	memcpy(optpkt, buf, tcp_offset);
+	memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
+		sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+	iph->nexthdr = exthdr_type;
+	iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
+}
+
+static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
+	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
+	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+}
+
 /* IPv4 options shouldn't coalesce */
 static void send_ip_options(int fd, struct sockaddr_ll *daddr)
 {
@@ -697,7 +752,7 @@ static void send_fragment6(int fd, struct sockaddr_ll *daddr)
 		create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
 		write_packet(fd, buf, bufpkt_len, daddr);
 	}
-
+	sleep(1);
 	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
 	memset(extpkt, 0, extpkt_len);
 
@@ -760,6 +815,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
 	vlog("}, Total %d packets\nReceived {", correct_num_pkts);
 
 	while (1) {
+		ip_ext_len = 0;
 		pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
 		if (pkt_size < 0)
 			error(1, errno, "could not receive");
@@ -767,7 +823,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
 		if (iph->version == 4)
 			ip_ext_len = (iph->ihl - 5) * 4;
 		else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
-			ip_ext_len = sizeof(struct ip6_frag);
+			ip_ext_len = MIN_EXTHDR_SIZE;
 
 		tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
 
@@ -880,7 +936,21 @@ static void gro_sender(void)
 			sleep(1);
 			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
 		} else if (proto == PF_INET6) {
+			sleep(1);
 			send_fragment6(txfd, &daddr);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			/* send IPv6 packets with ext header with same payload */
+			send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
+			sleep(1);
+			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+			sleep(1);
+			/* send IPv6 packets with ext header with different payload */
+			send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
+			sleep(1);
 			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
 		}
 	} else if (strcmp(testname, "large") == 0) {
@@ -997,6 +1067,17 @@ static void gro_receiver(void)
 			 */
 			printf("fragmented ip6 doesn't coalesce: ");
 			correct_payload[0] = PAYLOAD_LEN * 2;
+			correct_payload[1] = PAYLOAD_LEN;
+			correct_payload[2] = PAYLOAD_LEN;
+			check_recv_pkts(rxfd, correct_payload, 3);
+
+			printf("ipv6 with ext header does coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN * 2;
+			check_recv_pkts(rxfd, correct_payload, 1);
+
+			printf("ipv6 with ext header with different payloads doesn't coalesce: ");
+			correct_payload[0] = PAYLOAD_LEN;
+			correct_payload[1] = PAYLOAD_LEN;
 			check_recv_pkts(rxfd, correct_payload, 2);
 		}
 	} else if (strcmp(testname, "large") == 0) {
-- 
cgit v1.2.3-70-g09d2


From 2114e83381d3289a88378850f43069e79f848083 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@nvidia.com>
Date: Thu, 4 Jan 2024 09:11:09 -0500
Subject: selftests: forwarding: Avoid failures to source net/lib.sh

The expression "source ../lib.sh" added to net/forwarding/lib.sh in commit
25ae948b4478 ("selftests/net: add lib.sh") does not work for tests outside
net/forwarding which source net/forwarding/lib.sh (1). It also does not
work in some cases where only a subset of tests are exported (2).

Avoid the problems mentioned above by replacing the faulty expression with
a copy of the content from net/lib.sh which is used by files under
net/forwarding.

A more thorough solution which avoids duplicating content between
net/lib.sh and net/forwarding/lib.sh has been posted here:
https://lore.kernel.org/netdev/20231222135836.992841-1-bpoirier@nvidia.com/

The approach in the current patch is a stopgap solution to avoid submitting
large changes at the eleventh hour of this development cycle.

Example of problem 1)

tools/testing/selftests/drivers/net/bonding$ ./dev_addr_lists.sh
./net_forwarding_lib.sh: line 41: ../lib.sh: No such file or directory
TEST: bonding cleanup mode active-backup                            [ OK ]
TEST: bonding cleanup mode 802.3ad                                  [ OK ]
TEST: bonding LACPDU multicast address to slave (from bond down)    [ OK ]
TEST: bonding LACPDU multicast address to slave (from bond up)      [ OK ]

An error message is printed but since the test does not use functions from
net/lib.sh, the test results are not affected.

Example of problem 2)

tools/testing/selftests$ make install TARGETS="net/forwarding"
tools/testing/selftests$ cd kselftest_install/net/forwarding/
tools/testing/selftests/kselftest_install/net/forwarding$ ./pedit_ip.sh veth{0..3}
lib.sh: line 41: ../lib.sh: No such file or directory
TEST: ping                                                          [ OK ]
TEST: ping6                                                         [ OK ]
./pedit_ip.sh: line 135: busywait: command not found
TEST: dev veth1 ingress pedit ip src set 198.51.100.1               [FAIL]
        Expected to get 10 packets, but got .
./pedit_ip.sh: line 135: busywait: command not found
TEST: dev veth2 egress pedit ip src set 198.51.100.1                [FAIL]
        Expected to get 10 packets, but got .
./pedit_ip.sh: line 135: busywait: command not found
TEST: dev veth1 ingress pedit ip dst set 198.51.100.1               [FAIL]
        Expected to get 10 packets, but got .
./pedit_ip.sh: line 135: busywait: command not found
TEST: dev veth2 egress pedit ip dst set 198.51.100.1                [FAIL]
        Expected to get 10 packets, but got .
./pedit_ip.sh: line 135: busywait: command not found
TEST: dev veth1 ingress pedit ip6 src set 2001:db8:2::1             [FAIL]
        Expected to get 10 packets, but got .
./pedit_ip.sh: line 135: busywait: command not found
TEST: dev veth2 egress pedit ip6 src set 2001:db8:2::1              [FAIL]
        Expected to get 10 packets, but got .
./pedit_ip.sh: line 135: busywait: command not found
TEST: dev veth1 ingress pedit ip6 dst set 2001:db8:2::1             [FAIL]
        Expected to get 10 packets, but got .
./pedit_ip.sh: line 135: busywait: command not found
TEST: dev veth2 egress pedit ip6 dst set 2001:db8:2::1              [FAIL]
        Expected to get 10 packets, but got .

In this case, the test results are affected.

Fixes: 25ae948b4478 ("selftests/net: add lib.sh")
Suggested-by: Ido Schimmel <idosch@nvidia.com>
Suggested-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20240104141109.100672-1-bpoirier@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/lib.sh | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 69ef2a40df21..8a61464ab6eb 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -38,7 +38,32 @@ if [[ -f $relative_path/forwarding.config ]]; then
 	source "$relative_path/forwarding.config"
 fi
 
-source ../lib.sh
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+busywait()
+{
+	local timeout=$1; shift
+
+	local start_time="$(date -u +%s%3N)"
+	while true
+	do
+		local out
+		out=$("$@")
+		local ret=$?
+		if ((!ret)); then
+			echo -n "$out"
+			return 0
+		fi
+
+		local current_time="$(date -u +%s%3N)"
+		if ((current_time - start_time > timeout)); then
+			echo -n "$out"
+			return 1
+		fi
+	done
+}
+
 ##############################################################################
 # Sanity checks
 
-- 
cgit v1.2.3-70-g09d2


From 2ffca83aa39ce70003a792acf3443175fc82a655 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Sat, 6 Jan 2024 08:11:28 -0500
Subject: net/sched: Remove ipt action tests

Commit ba24ea129126 ("net/sched: Retire ipt action") removed the ipt action
but not the testcases. This patch removes the outstanding tdc tests.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/tc-tests/actions/xt.json  | 243 ---------------------
 1 file changed, 243 deletions(-)
 delete mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/xt.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/xt.json b/tools/testing/selftests/tc-testing/tc-tests/actions/xt.json
deleted file mode 100644
index 1a92e8898fec..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/xt.json
+++ /dev/null
@@ -1,243 +0,0 @@
-[
-    {
-        "id": "2029",
-        "name": "Add xt action with log-prefix",
-        "category": [
-            "actions",
-            "xt"
-        ],
-        "plugins": {
-           "requires": "nsPlugin"
-        },
-        "setup": [
-            [
-                "$TC actions flush action xt",
-                0,
-                1,
-                255
-            ]
-        ],
-        "cmdUnderTest": "$TC action add action xt -j LOG --log-prefix PONG index 100",
-        "expExitCode": "0",
-        "verifyCmd": "$TC action ls action xt",
-        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"PONG\".*index 100 ref",
-        "matchCount": "1",
-        "teardown": [
-            "$TC actions flush action xt"
-        ]
-    },
-    {
-        "id": "3562",
-        "name": "Replace xt action log-prefix",
-        "category": [
-            "actions",
-            "xt"
-        ],
-        "plugins": {
-           "requires": "nsPlugin"
-        },
-        "setup": [
-            [
-                "$TC actions flush action xt",
-                0,
-                1,
-                255
-            ],
-            [
-                "$TC action add action xt -j LOG --log-prefix PONG index 1",
-                0,
-                1,
-                255
-            ]
-        ],
-        "cmdUnderTest": "$TC action replace action xt -j LOG --log-prefix WIN index 1",
-        "expExitCode": "0",
-        "verifyCmd": "$TC action get action xt index 1",
-        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"WIN\".*index 1 ref",
-        "matchCount": "1",
-        "teardown": [
-            "$TC action flush action xt"
-        ]
-    },
-    {
-        "id": "8291",
-        "name": "Delete xt action with valid index",
-        "category": [
-            "actions",
-            "xt"
-        ],
-        "plugins": {
-           "requires": "nsPlugin"
-        },
-        "setup": [
-            [
-                "$TC actions flush action xt",
-                0,
-                1,
-                255
-            ],
-            [
-                "$TC action add action xt -j LOG --log-prefix PONG index 1000",
-                0,
-                1,
-                255
-            ]
-        ],
-        "cmdUnderTest": "$TC action delete action xt index 1000",
-        "expExitCode": "0",
-        "verifyCmd": "$TC action get action xt index 1000",
-        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"PONG\".*index 1000 ref",
-        "matchCount": "0",
-        "teardown": [
-            "$TC action flush action xt"
-        ]
-    },
-    {
-        "id": "5169",
-        "name": "Delete xt action with invalid index",
-        "category": [
-            "actions",
-            "xt"
-        ],
-        "plugins": {
-           "requires": "nsPlugin"
-        },
-        "setup": [
-            [
-                "$TC actions flush action xt",
-                0,
-                1,
-                255
-            ],
-            [
-                "$TC action add action xt -j LOG --log-prefix PONG index 1000",
-                0,
-                1,
-                255
-            ]
-        ],
-        "cmdUnderTest": "$TC action delete action xt index 333",
-        "expExitCode": "255",
-        "verifyCmd": "$TC action get action xt index 1000",
-        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"PONG\".*index 1000 ref",
-        "matchCount": "1",
-        "teardown": [
-            "$TC action flush action xt"
-        ]
-    },
-    {
-        "id": "7284",
-        "name": "List xt actions",
-        "category": [
-            "actions",
-            "xt"
-        ],
-        "plugins": {
-           "requires": "nsPlugin"
-        },
-        "setup": [
-            [
-                "$TC action flush action xt",
-                0,
-                1,
-                255
-            ],
-            "$TC action add action xt -j LOG --log-prefix PONG index 1001",
-            "$TC action add action xt -j LOG --log-prefix WIN index 1002",
-            "$TC action add action xt -j LOG --log-prefix LOSE index 1003"
-        ],
-        "cmdUnderTest": "$TC action list action xt",
-        "expExitCode": "0",
-        "verifyCmd": "$TC action list action xt",
-        "matchPattern": "action order [0-9]*: tablename:",
-        "matchCount": "3",
-        "teardown": [
-            "$TC actions flush action xt"
-        ]
-    },
-    {
-        "id": "5010",
-        "name": "Flush xt actions",
-        "category": [
-            "actions",
-            "xt"
-        ],
-        "plugins": {
-           "requires": "nsPlugin"
-        },
-        "setup": [
-            [
-		"$TC actions flush action xt",
-                0,
-                1,
-                255
-            ],
-            "$TC action add action xt -j LOG --log-prefix PONG index 1001",
-            "$TC action add action xt -j LOG --log-prefix WIN index 1002",
-            "$TC action add action xt -j LOG --log-prefix LOSE index 1003"
-	],
-        "cmdUnderTest": "$TC action flush action xt",
-        "expExitCode": "0",
-        "verifyCmd": "$TC action list action xt",
-        "matchPattern": "action order [0-9]*: tablename:",
-        "matchCount": "0",
-        "teardown": [
-            "$TC actions flush action xt"
-        ]
-    },
-    {
-        "id": "8437",
-        "name": "Add xt action with duplicate index",
-        "category": [
-            "actions",
-            "xt"
-        ],
-        "plugins": {
-           "requires": "nsPlugin"
-        },
-        "setup": [
-            [
-                "$TC actions flush action xt",
-                0,
-                1,
-                255
-            ],
-            "$TC action add action xt -j LOG --log-prefix PONG index 101"
-        ],
-        "cmdUnderTest": "$TC action add action xt -j LOG --log-prefix WIN index 101",
-        "expExitCode": "255",
-        "verifyCmd": "$TC action get action xt index 101",
-        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"PONG\".*index 101",
-        "matchCount": "1",
-        "teardown": [
-            "$TC action flush action xt"
-        ]
-    },
-    {
-        "id": "2837",
-        "name": "Add xt action with invalid index",
-        "category": [
-            "actions",
-            "xt"
-        ],
-        "plugins": {
-           "requires": "nsPlugin"
-        },
-        "setup": [
-            [
-                "$TC actions flush action xt",
-                0,
-                1,
-                255
-            ]
-        ],
-        "cmdUnderTest": "$TC action add action xt -j LOG --log-prefix WIN index 4294967296",
-        "expExitCode": "255",
-        "verifyCmd": "$TC action ls action xt",
-        "matchPattern": "action order [0-9]*:*target  LOG level warning prefix \"WIN\"",
-        "matchCount": "0",
-        "teardown": [
-            "$TC action flush action xt"
-        ]
-    }
-]
-- 
cgit v1.2.3-70-g09d2