194 files changed, 9208 insertions, 1134 deletions
diff --git a/tools/crypto/ccp/dbc.c b/tools/crypto/ccp/dbc.c
index a807df0f0597..80248d3d3a5a 100644
--- a/tools/crypto/ccp/dbc.c
+++ b/tools/crypto/ccp/dbc.c
@@ -57,7 +57,6 @@ int process_param(int fd, int msg_index, __u8 *signature, int *data)
 		.msg_index = msg_index,
 		.param = *data,
 	};
-	int ret;
 
 	assert(signature);
 	assert(data);
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index 2e60e2c212cd..34ffcec264ab 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -52,7 +52,7 @@ $(OUTPUT)hv_fcopy_uio_daemon: $(HV_FCOPY_UIO_DAEMON_IN)
 
 clean:
 	rm -f $(ALL_PROGRAMS)
-	find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+	find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
 
 install: $(ALL_PROGRAMS)
 	install -d -m 755 $(DESTDIR)$(sbindir); \
diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus
index 099f2c44dbed..f83698f14da2 100644..100755
--- a/tools/hv/lsvmbus
+++ b/tools/hv/lsvmbus
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
 import os
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 35bcf52dbc65..e05b39e39c3f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2851,7 +2851,7 @@ union bpf_attr {
  * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
  * 		  **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
  * 		  **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
- * 		  **TCP_BPF_RTO_MIN**.
+ *		  **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  * 		* **IPPROTO_IPV6**, which supports the following *optname*\ s:
  * 		  **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -7080,6 +7080,7 @@ enum {
 	TCP_BPF_SYN		= 1005, /* Copy the TCP header */
 	TCP_BPF_SYN_IP		= 1006, /* Copy the IP[46] and TCP header */
 	TCP_BPF_SYN_MAC         = 1007, /* Copy the MAC, IP[46], and TCP header */
+	TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
 };
 
 enum {
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 43742ac5b00d..7c308f04e7a0 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -93,6 +93,7 @@ enum {
 	NETDEV_A_PAGE_POOL_INFLIGHT,
 	NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
 	NETDEV_A_PAGE_POOL_DETACH_TIME,
+	NETDEV_A_PAGE_POOL_DMABUF,
 
 	__NETDEV_A_PAGE_POOL_MAX,
 	NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -131,6 +132,7 @@ enum {
 	NETDEV_A_QUEUE_IFINDEX,
 	NETDEV_A_QUEUE_TYPE,
 	NETDEV_A_QUEUE_NAPI_ID,
+	NETDEV_A_QUEUE_DMABUF,
 
 	__NETDEV_A_QUEUE_MAX,
 	NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
@@ -174,6 +176,16 @@ enum {
 };
 
 enum {
+	NETDEV_A_DMABUF_IFINDEX = 1,
+	NETDEV_A_DMABUF_QUEUES,
+	NETDEV_A_DMABUF_FD,
+	NETDEV_A_DMABUF_ID,
+
+	__NETDEV_A_DMABUF_MAX,
+	NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
+};
+
+enum {
 	NETDEV_CMD_DEV_GET = 1,
 	NETDEV_CMD_DEV_ADD_NTF,
 	NETDEV_CMD_DEV_DEL_NTF,
@@ -186,6 +198,7 @@ enum {
 	NETDEV_CMD_QUEUE_GET,
 	NETDEV_CMD_NAPI_GET,
 	NETDEV_CMD_QSTATS_GET,
+	NETDEV_CMD_BIND_RX,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore
index c18dd8d83cee..296c4035dbf2 100644
--- a/tools/net/ynl/lib/.gitignore
+++ b/tools/net/ynl/lib/.gitignore
@@ -1 +1,2 @@
 __pycache__/
+*.d
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index fcb18a5a6d70..e16cef160bc2 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -696,14 +696,14 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
 	addr.nl_family = AF_NETLINK;
 	if (bind(ys->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
 		__perr(yse, "unable to bind to a socket address");
-		goto err_close_sock;;
+		goto err_close_sock;
 	}
 
 	memset(&addr, 0, sizeof(addr));
 	addrlen = sizeof(addr);
 	if (getsockname(ys->socket, (struct sockaddr *)&addr, &addrlen) < 0) {
 		__perr(yse, "unable to read socket address");
-		goto err_close_sock;;
+		goto err_close_sock;
 	}
 	ys->portid = addr.nl_pid;
 	ys->seq = random();
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index d42c1d605969..c22c22bf2cb7 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -388,6 +388,8 @@ class NetlinkProtocol:
 
     def decode(self, ynl, nl_msg, op):
         msg = self._decode(nl_msg)
+        if op is None:
+            op = ynl.rsp_by_value[msg.cmd()]
         fixed_header_size = ynl._struct_size(op.fixed_header)
         msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size)
         return msg
@@ -921,8 +923,7 @@ class YnlFamily(SpecFamily):
                     print("Netlink done while checking for ntf!?")
                     continue
 
-                op = self.rsp_by_value[nl_msg.cmd()]
-                decoded = self.nlproto.decode(self, nl_msg, op)
+                decoded = self.nlproto.decode(self, nl_msg, None)
                 if decoded.cmd() not in self.async_msg_ids:
                     print("Unexpected msg id done while checking for ntf", decoded)
                     continue
@@ -980,7 +981,7 @@ class YnlFamily(SpecFamily):
                     if nl_msg.extack:
                         self._decode_extack(req_msg, op, nl_msg.extack)
                 else:
-                    op = self.rsp_by_value[nl_msg.cmd()]
+                    op = None
                     req_flags = []
 
                 if nl_msg.error:
diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c
index 3e7b29bd55d5..22609d44c89a 100644
--- a/tools/net/ynl/samples/netdev.c
+++ b/tools/net/ynl/samples/netdev.c
@@ -79,7 +79,10 @@ int main(int argc, char **argv)
 		goto err_close;
 
 	printf("Select ifc ($ifindex; or 0 = dump; or -2 ntf check): ");
-	scanf("%d", &ifindex);
+	if (scanf("%d", &ifindex) != 1) {
+		fprintf(stderr, "Error: unable to parse input\n");
+		goto err_destroy;
+	}
 
 	if (ifindex > 0) {
 		struct netdev_dev_get_req *req;
@@ -119,6 +122,7 @@ int main(int argc, char **argv)
 
 err_close:
 	fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
 	ynl_sock_destroy(ys);
 	return 2;
 }
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 51529fabd517..717530bc9c52 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -2668,13 +2668,15 @@ def main():
         cw.p('#define ' + hdr_prot)
         cw.nl()
 
+    hdr_file=os.path.basename(args.out_file[:-2]) + ".h"
+
     if args.mode == 'kernel':
         cw.p('#include <net/netlink.h>')
         cw.p('#include <net/genetlink.h>')
         cw.nl()
         if not args.header:
             if args.out_file:
-                cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"')
+                cw.p(f'#include "{hdr_file}"')
             cw.nl()
         headers = ['uapi/' + parsed.uapi_header]
         headers += parsed.kernel_family.get('headers', [])
@@ -2686,7 +2688,7 @@ def main():
             if family_contains_bitfield32(parsed):
                 cw.p('#include <linux/netlink.h>')
         else:
-            cw.p(f'#include "{parsed.name}-user.h"')
+            cw.p(f'#include "{hdr_file}"')
             cw.p('#include "ynl.h"')
         headers = [parsed.uapi_header]
     for definition in parsed['definitions']:
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index 5c9335fff2d3..9a95871afc95 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -691,7 +691,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
 
 		fprintf(out, "%c%" PRIu64,
 			/* session up time */
-			csv_sep, (curr - daemon->start) / 60);
+			csv_sep, (uint64_t)((curr - daemon->start) / 60));
 
 		fprintf(out, "\n");
 	} else {
@@ -702,7 +702,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
 			fprintf(out, "  lock:    %s/lock\n",
 				daemon->base);
 			fprintf(out, "  up:      %" PRIu64 " minutes\n",
-				(curr - daemon->start) / 60);
+				(uint64_t)((curr - daemon->start) / 60));
 		}
 	}
 
@@ -730,7 +730,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
 
 			fprintf(out, "%c%" PRIu64,
 				/* session up time */
-				csv_sep, (curr - session->start) / 60);
+				csv_sep, (uint64_t)((curr - session->start) / 60));
 
 			fprintf(out, "\n");
 		} else {
@@ -747,7 +747,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
 			fprintf(out, "  ack:     %s/%s\n",
 				session->base, SESSION_ACK);
 			fprintf(out, "  up:      %" PRIu64 " minutes\n",
-				(curr - session->start) / 60);
+				(uint64_t)((curr - session->start) / 60));
 		}
 	}
 
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 40132655ccd1..c76f53a90a7b 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -456,11 +456,13 @@ static int test__name_cmp(struct test_suite *test __maybe_unused, int subtest __
 /**
  * Test perf_pmu__match() that's used to search for a PMU given a name passed
  * on the command line. The name that's passed may also be a filename type glob
- * match.
+ * match. If the name does not match, perf_pmu__match() attempts to match the
+ * alias of the PMU, if provided.
  */
 static int test__pmu_match(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct perf_pmu test_pmu;
+	test_pmu.alias_name = NULL;
 
 	test_pmu.name = "pmuname";
 	TEST_ASSERT_EQUAL("Exact match", perf_pmu__match(&test_pmu, "pmuname"),	     true);
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index b4cb3fe5cc25..bc4e92c0c08b 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -286,6 +286,9 @@ static void account_end_timestamp(struct lock_contention *con)
 			goto next;
 
 		for (int i = 0; i < total_cpus; i++) {
+			if (cpu_data[i].lock == 0)
+				continue;
+
 			update_lock_stat(stat_fd, -1, end_ts, aggr_mode,
 					 &cpu_data[i]);
 		}
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 3be882b2e845..31a223eaf8e6 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -20,6 +20,7 @@
 #include "util/env.h"
 #include "util/kvm-stat.h"
 #include "util/kwork.h"
+#include "util/sample.h"
 #include "util/lock-contention.h"
 #include <internal/lib.h>
 #include "../builtin.h"
diff --git a/tools/power/cpupower/bindings/python/.gitignore b/tools/power/cpupower/bindings/python/.gitignore
new file mode 100644
index 000000000000..5c9a1f0212dd
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/.gitignore
@@ -0,0 +1,8 @@
+__pycache__/
+raw_pylibcpupower_wrap.c
+*.o
+*.so
+*.py
+!test_raw_pylibcpupower.py
+# git keeps ignoring this file, use git add -f raw_libcpupower.i
+!raw_pylibcpupower.i
diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile
new file mode 100644
index 000000000000..dc09c5b66ead
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Makefile for libcpupower's Python bindings
+#
+# This Makefile expects you have already run the makefile for cpupower to build
+# the .o files in the lib directory for the bindings to be created.
+
+CC := gcc
+HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi)
+HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi)
+
+LIB_DIR := ../../lib
+PY_INCLUDE = $(firstword $(shell python-config --includes))
+OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o)
+
+all: _raw_pylibcpupower.so
+
+_raw_pylibcpupower.so: raw_pylibcpupower_wrap.o
+	$(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so
+
+raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c
+	$(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE)
+
+raw_pylibcpupower_wrap.c: raw_pylibcpupower.i
+ifeq ($(HAVE_SWIG),0)
+	$(error "swig was not found. Make sure you have it installed and in the PATH to generate the bindings.")
+else ifeq ($(HAVE_PYCONFIG),0)
+	$(error "python-config was not found. Make sure you have it installed and in the PATH to generate the bindings.")
+endif
+	swig -python raw_pylibcpupower.i
+
+# Will only clean the bindings folder; will not clean the actual cpupower folder
+clean:
+	rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so
diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README
new file mode 100644
index 000000000000..0a4bb2581e8a
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/README
@@ -0,0 +1,59 @@
+This folder contains the necessary files to build the Python bindings for
+libcpupower (aside from the libcpupower object files).
+
+
+requirements
+------------
+
+* You need the object files in the libcpupower directory compiled by
+cpupower's makefile.
+* The SWIG program must be installed.
+* The Python's development libraries installed.
+
+Please check that your version of SWIG is compatible with the version of Python
+installed on your machine by checking the SWIG changelog on their website.
+https://swig.org/
+
+Note that while SWIG itself is GPL v3+ licensed; the resulting output,
+the bindings code: is permissively licensed + the license of libcpupower's .o
+files. For these bindings that means GPL v2.
+
+Please see https://swig.org/legal.html and the discussion [1] for more details.
+
+[1]
+https://lore.kernel.org/linux-pm/Zqv9BOjxLAgyNP5B@hatbackup/
+
+
+build
+-----
+
+Install SWIG and the Python development files provided by your distribution.
+
+Build the object files for libcpupower by running make in the cpupower
+directory.
+
+Return to the directory this README is in to run:
+
+$ make
+
+
+testing
+-------
+
+Please verify the _raw_pylibcpupower.so and raw_pylibcpupower.py files have
+been created.
+
+To run the test script:
+
+$ python test_raw_pylibcpupower.py
+
+
+credits
+-------
+
+Original Bindings Author:
+John B. Wyatt IV
+jwyatt@redhat.com
+sageofredondo@gmail.com
+
+Copyright (C) 2024 Red Hat
diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.i b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i
new file mode 100644
index 000000000000..96556d87a745
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+%module raw_pylibcpupower
+%{
+#include "../../lib/cpupower_intern.h"
+#include "../../lib/acpi_cppc.h"
+#include "../../lib/cpufreq.h"
+#include "../../lib/cpuidle.h"
+#include "../../lib/cpupower.h"
+#include "../../lib/powercap.h"
+%}
+
+/*
+ * cpupower_intern.h
+ */
+
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define MAX_LINE_LEN 4096
+#define SYSFS_PATH_MAX 255
+
+int is_valid_path(const char *path);
+
+unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
+
+unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen);
+
+/*
+ * acpi_cppc.h
+ */
+
+enum acpi_cppc_value {
+	HIGHEST_PERF,
+	LOWEST_PERF,
+	NOMINAL_PERF,
+	LOWEST_NONLINEAR_PERF,
+	LOWEST_FREQ,
+	NOMINAL_FREQ,
+	REFERENCE_PERF,
+	WRAPAROUND_TIME,
+	MAX_CPPC_VALUE_FILES
+};
+
+unsigned long acpi_cppc_get_data(unsigned int cpu,
+				 enum acpi_cppc_value which);
+
+/*
+ * cpufreq.h
+ */
+
+struct cpufreq_policy {
+	unsigned long min;
+	unsigned long max;
+	char *governor;
+};
+
+struct cpufreq_available_governors {
+	char *governor;
+	struct cpufreq_available_governors *next;
+	struct cpufreq_available_governors *first;
+};
+
+struct cpufreq_available_frequencies {
+	unsigned long frequency;
+	struct cpufreq_available_frequencies *next;
+	struct cpufreq_available_frequencies *first;
+};
+
+
+struct cpufreq_affected_cpus {
+	unsigned int cpu;
+	struct cpufreq_affected_cpus *next;
+	struct cpufreq_affected_cpus *first;
+};
+
+struct cpufreq_stats {
+	unsigned long frequency;
+	unsigned long long time_in_state;
+	struct cpufreq_stats *next;
+	struct cpufreq_stats *first;
+};
+
+unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
+
+unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
+
+#define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu);
+
+unsigned long cpufreq_get_transition_latency(unsigned int cpu);
+
+int cpufreq_get_hardware_limits(unsigned int cpu,
+				unsigned long *min,
+				unsigned long *max);
+
+char *cpufreq_get_driver(unsigned int cpu);
+
+void cpufreq_put_driver(char *ptr);
+
+struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
+
+void cpufreq_put_policy(struct cpufreq_policy *policy);
+
+struct cpufreq_available_governors
+*cpufreq_get_available_governors(unsigned int cpu);
+
+void cpufreq_put_available_governors(
+	struct cpufreq_available_governors *first);
+
+struct cpufreq_available_frequencies
+*cpufreq_get_available_frequencies(unsigned int cpu);
+
+void cpufreq_put_available_frequencies(
+		struct cpufreq_available_frequencies *first);
+
+struct cpufreq_available_frequencies
+*cpufreq_get_boost_frequencies(unsigned int cpu);
+
+void cpufreq_put_boost_frequencies(
+		struct cpufreq_available_frequencies *first);
+
+struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
+							int cpu);
+
+void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
+
+struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
+							int cpu);
+
+void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
+
+struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
+					unsigned long long *total_time);
+
+void cpufreq_put_stats(struct cpufreq_stats *stats);
+
+unsigned long cpufreq_get_transitions(unsigned int cpu);
+
+int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
+
+int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
+
+int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
+
+int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
+
+int cpufreq_set_frequency(unsigned int cpu,
+				unsigned long target_frequency);
+
+unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
+						 const char **table,
+						 unsigned int index,
+						 unsigned int size);
+
+/*
+ * cpuidle.h
+ */
+
+int cpuidle_is_state_disabled(unsigned int cpu,
+				       unsigned int idlestate);
+int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
+				   unsigned int disable);
+unsigned long cpuidle_state_latency(unsigned int cpu,
+						unsigned int idlestate);
+unsigned long cpuidle_state_usage(unsigned int cpu,
+					unsigned int idlestate);
+unsigned long long cpuidle_state_time(unsigned int cpu,
+						unsigned int idlestate);
+char *cpuidle_state_name(unsigned int cpu,
+				unsigned int idlestate);
+char *cpuidle_state_desc(unsigned int cpu,
+				unsigned int idlestate);
+unsigned int cpuidle_state_count(unsigned int cpu);
+
+char *cpuidle_get_governor(void);
+
+char *cpuidle_get_driver(void);
+
+/*
+ * cpupower.h
+ */
+
+struct cpupower_topology {
+	/* Amount of CPU cores, packages and threads per core in the system */
+	unsigned int cores;
+	unsigned int pkgs;
+	unsigned int threads; /* per core */
+
+	/* Array gets mallocated with cores entries, holding per core info */
+	struct cpuid_core_info *core_info;
+};
+
+struct cpuid_core_info {
+	int pkg;
+	int core;
+	int cpu;
+
+	/* flags */
+	unsigned int is_online:1;
+};
+
+int get_cpu_topology(struct cpupower_topology *cpu_top);
+
+void cpu_topology_release(struct cpupower_topology cpu_top);
+
+int cpupower_is_cpu_online(unsigned int cpu);
+
+/*
+ * powercap.h
+ */
+
+struct powercap_zone {
+	char name[MAX_LINE_LEN];
+	/*
+	 * sys_name relative to PATH_TO_POWERCAP,
+	 * do not forget the / in between
+	 */
+	char sys_name[SYSFS_PATH_MAX];
+	int tree_depth;
+	struct powercap_zone *parent;
+	struct powercap_zone *children[POWERCAP_MAX_CHILD_ZONES];
+	/* More possible caps or attributes to be added? */
+	uint32_t has_power_uw:1,
+		 has_energy_uj:1;
+
+};
+
+int powercap_walk_zones(struct powercap_zone *zone,
+			int (*f)(struct powercap_zone *zone));
+
+struct powercap_zone *powercap_init_zones(void);
+
+int powercap_get_enabled(int *mode);
+
+int powercap_set_enabled(int mode);
+
+int powercap_get_driver(char *driver, int buflen);
+
+int powercap_get_max_energy_range_uj(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_get_energy_uj(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_get_max_power_range_uw(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_get_power_uw(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_zone_get_enabled(struct powercap_zone *zone, int *mode);
+
+int powercap_zone_set_enabled(struct powercap_zone *zone, int mode);
diff --git a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py
new file mode 100755
index 000000000000..3d6f62b9556a
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+
+import raw_pylibcpupower as p
+
+# Simple function call
+
+"""
+Get cstate count
+"""
+cpu_cstates_count = p.cpuidle_state_count(0)
+if cpu_cstates_count > -1:
+    print(f"CPU 0 has {cpu_cstates_count} c-states")
+else:
+    print(f"cstate count error: return code: {cpu_cstates_count}")
+
+"""
+Disable cstate (will fail if the above is 0, ex: a virtual machine)
+"""
+cstate_disabled = p.cpuidle_state_disable(0, 0, 1)
+if cpu_cstates_count == 0:
+    print(f"CPU 0 has {cpu_cstates_count} c-states")
+else:
+    print(f"cstate count error: return code: {cpu_cstates_count}")
+
+match cstate_disabled:
+    case 0:
+        print(f"CPU state disabled")
+    case -1:
+        print(f"Idlestate not available")
+    case _:
+        print(f"Not documented")
+
+
+# Pointer example
+
+topo = p.cpupower_topology()
+total_cpus = p.get_cpu_topology(topo)
+if total_cpus > 0:
+    print(f"Number of total cpus: {total_cpus} and number of cores: {topo.cores}")
+else:
+    print(f"Error: could not get cpu topology")
diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
index 479c5971aa6d..0ecac009273c 100644
--- a/tools/power/cpupower/lib/cpuidle.c
+++ b/tools/power/cpupower/lib/cpuidle.c
@@ -116,6 +116,7 @@ enum idlestate_value {
 	IDLESTATE_USAGE,
 	IDLESTATE_POWER,
 	IDLESTATE_LATENCY,
+	IDLESTATE_RESIDENCY,
 	IDLESTATE_TIME,
 	IDLESTATE_DISABLE,
 	MAX_IDLESTATE_VALUE_FILES
@@ -125,6 +126,7 @@ static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = {
 	[IDLESTATE_USAGE] = "usage",
 	[IDLESTATE_POWER] = "power",
 	[IDLESTATE_LATENCY] = "latency",
+	[IDLESTATE_RESIDENCY] = "residency",
 	[IDLESTATE_TIME]  = "time",
 	[IDLESTATE_DISABLE]  = "disable",
 };
@@ -254,6 +256,12 @@ unsigned long cpuidle_state_latency(unsigned int cpu,
 	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY);
 }
 
+unsigned long cpuidle_state_residency(unsigned int cpu,
+					  unsigned int idlestate)
+{
+	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_RESIDENCY);
+}
+
 unsigned long cpuidle_state_usage(unsigned int cpu,
 					unsigned int idlestate)
 {
diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h
index 2e10fead2e1e..2ab404d40259 100644
--- a/tools/power/cpupower/lib/cpuidle.h
+++ b/tools/power/cpupower/lib/cpuidle.h
@@ -8,6 +8,8 @@ int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
 				   unsigned int disable);
 unsigned long cpuidle_state_latency(unsigned int cpu,
 						unsigned int idlestate);
+unsigned long cpuidle_state_residency(unsigned int cpu,
+						unsigned int idlestate);
 unsigned long cpuidle_state_usage(unsigned int cpu,
 					unsigned int idlestate);
 unsigned long long cpuidle_state_time(unsigned int cpu,
diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c
index a7a59c6bacda..94a0c69e55ef 100644
--- a/tools/power/cpupower/lib/powercap.c
+++ b/tools/power/cpupower/lib/powercap.c
@@ -78,6 +78,14 @@ int powercap_get_enabled(int *mode)
 }
 
 /*
+ * TODO: implement function. Returns dummy 0 for now.
+ */
+int powercap_set_enabled(int mode)
+{
+	return 0;
+}
+
+/*
  * Hardcoded, because rapl is the only powercap implementation
 - * this needs to get more generic if more powercap implementations
  * should show up
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 44126a87fa7a..e0d17f0de3fe 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -64,6 +64,8 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose)
 
 		printf(_("Latency: %lu\n"),
 		       cpuidle_state_latency(cpu, idlestate));
+		printf(_("Residency: %lu\n"),
+		       cpuidle_state_residency(cpu, idlestate));
 		printf(_("Usage: %lu\n"),
 		       cpuidle_state_usage(cpu, idlestate));
 		printf(_("Duration: %llu\n"),
@@ -115,6 +117,8 @@ static void proc_cpuidle_cpu_output(unsigned int cpu)
 		printf(_("promotion[--] demotion[--] "));
 		printf(_("latency[%03lu] "),
 		       cpuidle_state_latency(cpu, cstate));
+		printf(_("residency[%05lu] "),
+		       cpuidle_state_residency(cpu, cstate));
 		printf(_("usage[%08lu] "),
 		       cpuidle_state_usage(cpu, cstate));
 		printf(_("duration[%020Lu] \n"),
diff --git a/tools/power/pm-graph/.gitignore b/tools/power/pm-graph/.gitignore
new file mode 100644
index 000000000000..37762a8a06d6
--- /dev/null
+++ b/tools/power/pm-graph/.gitignore
@@ -0,0 +1,3 @@
+# sleepgraph.py artifacts
+suspend-[0-9]*-[0-9]*
+suspend-[0-9]*-[0-9]*-x[0-9]*
diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
index b5310832c19c..aeddbaf2d4c4 100644
--- a/tools/power/pm-graph/Makefile
+++ b/tools/power/pm-graph/Makefile
@@ -1,51 +1,86 @@
 # SPDX-License-Identifier: GPL-2.0
-PREFIX		?= /usr
-DESTDIR		?=
+#
+# Copyright (c) 2013, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# Authors:
+#	 Todd Brandt <todd.e.brandt@linux.intel.com>
+
+# Prefix to the directories we're installing to
+DESTDIR ?=
+
+# Directory definitions. These are default and most probably
+# do not need to be changed. Please note that DESTDIR is
+# added in front of any of them
+
+BINDIR ?=	/usr/bin
+MANDIR ?=	/usr/share/man
+LIBDIR ?=	/usr/lib
+
+# Toolchain: what tools do we use, and what options do they need:
+INSTALL = /usr/bin/install
+INSTALL_DATA  = ${INSTALL} -m 644
 
 all:
 	@echo "Nothing to build"
 
 install : uninstall
-	install -d  $(DESTDIR)$(PREFIX)/lib/pm-graph
-	install sleepgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph
-	install bootgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph
-	install -d  $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/cgskip.txt $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/freeze-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/freeze.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/freeze-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/standby-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/standby.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/standby-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/suspend-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/suspend.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/suspend-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-	install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-
-	install -d  $(DESTDIR)$(PREFIX)/bin
-	ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
-	ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
-
-	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
-	install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
-	install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
+	$(INSTALL) -d  $(DESTDIR)$(LIBDIR)/pm-graph
+	$(INSTALL) sleepgraph.py $(DESTDIR)$(LIBDIR)/pm-graph
+	$(INSTALL) bootgraph.py $(DESTDIR)$(LIBDIR)/pm-graph
+	$(INSTALL) -d  $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/cgskip.txt $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/freeze-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/freeze.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/freeze-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/standby-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/standby.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/standby-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/suspend-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/suspend.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/suspend-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+	$(INSTALL_DATA) config/suspend-x2-proc.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+
+	$(INSTALL) -d  $(DESTDIR)$(BINDIR)
+	ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(BINDIR)/bootgraph
+	ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(BINDIR)/sleepgraph
+
+	$(INSTALL) -d  $(DESTDIR)$(MANDIR)/man8
+	$(INSTALL) bootgraph.8 $(DESTDIR)$(MANDIR)/man8
+	$(INSTALL) sleepgraph.8 $(DESTDIR)$(MANDIR)/man8
 
 uninstall :
-	rm -f $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8
-	rm -f $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8
+	rm -f $(DESTDIR)$(MANDIR)/man8/bootgraph.8
+	rm -f $(DESTDIR)$(MANDIR)/man8/sleepgraph.8
 
-	rm -f $(DESTDIR)$(PREFIX)/bin/bootgraph
-	rm -f $(DESTDIR)$(PREFIX)/bin/sleepgraph
+	rm -f $(DESTDIR)$(BINDIR)/bootgraph
+	rm -f $(DESTDIR)$(BINDIR)/sleepgraph
 
-	rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/config/*
-	if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config ] ; then \
-		rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/config; \
+	rm -f $(DESTDIR)$(LIBDIR)/pm-graph/config/*
+	if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/config ] ; then \
+		rmdir $(DESTDIR)$(LIBDIR)/pm-graph/config; \
 	fi;
-	rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__/*
-	if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__ ] ; then \
-		rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__; \
+	rm -f $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__/*
+	if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__ ] ; then \
+		rmdir $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__; \
 	fi;
-	rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/*
-	if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \
-		rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \
+	rm -f $(DESTDIR)$(LIBDIR)/pm-graph/*
+	if [ -d $(DESTDIR)$(LIBDIR)/pm-graph ] ; then \
+		rmdir $(DESTDIR)$(LIBDIR)/pm-graph; \
 	fi;
+
+help:
+	@echo  'Building targets:'
+	@echo  '  all		  - Nothing to build'
+	@echo  '  install	  - Install the program and create necessary directories'
+	@echo  '  uninstall	  - Remove installed files and directories'
+
+.PHONY: all install uninstall help
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index bc8fe9e8f7f2..3b7df5477317 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -65,9 +65,11 @@ TARGETS += net/af_unix
 TARGETS += net/forwarding
 TARGETS += net/hsr
 TARGETS += net/mptcp
+TARGETS += net/netfilter
 TARGETS += net/openvswitch
+TARGETS += net/packetdrill
+TARGETS += net/rds
 TARGETS += net/tcp_ao
-TARGETS += net/netfilter
 TARGETS += nsfs
 TARGETS += perf_events
 TARGETS += pidfd
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index d8909b2b535a..f2d6007a2b98 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -156,6 +156,12 @@ static void pmull_sigill(void)
 	asm volatile(".inst 0x0ee0e000" : : : );
 }
 
+static void poe_sigill(void)
+{
+	/* mrs x0, POR_EL0 */
+	asm volatile("mrs x0, S3_3_C10_C2_4" : : : "x0");
+}
+
 static void rng_sigill(void)
 {
 	asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
@@ -602,6 +608,14 @@ static const struct hwcap_data {
 		.sigill_fn = pmull_sigill,
 	},
 	{
+		.name = "POE",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_POE,
+		.cpuinfo = "poe",
+		.sigill_fn = poe_sigill,
+		.sigill_reliable = true,
+	},
+	{
 		.name = "RNG",
 		.at_hwcap = AT_HWCAP2,
 		.hwcap_bit = HWCAP2_RNG,
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
index e4fa507cbdd0..b51d21f78cf9 100644
--- a/tools/testing/selftests/arm64/abi/ptrace.c
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -163,10 +163,10 @@ static void test_hw_debug(pid_t child, int type, const char *type_name)
 static int do_child(void)
 {
 	if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
-		ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+		ksft_exit_fail_perror("PTRACE_TRACEME");
 
 	if (raise(SIGSTOP))
-		ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+		ksft_exit_fail_perror("raise(SIGSTOP)");
 
 	return EXIT_SUCCESS;
 }
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 1ce5b5eac386..b2f2bfd5c6aa 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -2,6 +2,7 @@
 mangle_*
 fake_sigreturn_*
 fpmr_*
+poe_*
 sme_*
 ssve_*
 sve_*
diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile
index 8f5febaf1a9a..edb3613513b8 100644
--- a/tools/testing/selftests/arm64/signal/Makefile
+++ b/tools/testing/selftests/arm64/signal/Makefile
@@ -23,7 +23,7 @@ $(TEST_GEN_PROGS): $(PROGS)
 # Common test-unit targets to build common-layout test-cases executables
 # Needs secondary expansion to properly include the testcase c-file in pre-reqs
 COMMON_SOURCES := test_signals.c test_signals_utils.c testcases/testcases.c \
-	signals.S
+	signals.S sve_helpers.c
 COMMON_HEADERS := test_signals.h test_signals_utils.h testcases/testcases.h
 
 .SECONDEXPANSION:
diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.c b/tools/testing/selftests/arm64/signal/sve_helpers.c
new file mode 100644
index 000000000000..0acc121af306
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/sve_helpers.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 ARM Limited
+ *
+ * Common helper functions for SVE and SME functionality.
+ */
+
+#include <stdbool.h>
+#include <kselftest.h>
+#include <asm/sigcontext.h>
+#include <sys/prctl.h>
+
+unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls;
+
+int sve_fill_vls(bool use_sme, int min_vls)
+{
+	int vq, vl;
+	int pr_set_vl = use_sme ? PR_SME_SET_VL : PR_SVE_SET_VL;
+	int len_mask = use_sme ? PR_SME_VL_LEN_MASK : PR_SVE_VL_LEN_MASK;
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(pr_set_vl, vq * 16);
+		if (vl == -1)
+			return KSFT_FAIL;
+
+		vl &= len_mask;
+
+		/*
+		 * Unlike SVE, SME does not require the minimum vector length
+		 * to be implemented, or the VLs to be consecutive, so any call
+		 * to the prctl might return the single implemented VL, which
+		 * might be larger than 16. So to avoid this loop never
+		 * terminating,  bail out here when we find a higher VL than
+		 * we asked for.
+		 * See the ARM ARM, DDI 0487K.a, B1.4.2: I_QQRNR and I_NWYBP.
+		 */
+		if (vq < sve_vq_from_vl(vl))
+			break;
+
+		/* Skip missing VLs */
+		vq = sve_vq_from_vl(vl);
+
+		vls[nvls++] = vl;
+	}
+
+	if (nvls < min_vls) {
+		fprintf(stderr, "Only %d VL supported\n", nvls);
+		return KSFT_SKIP;
+	}
+
+	return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h
new file mode 100644
index 000000000000..50948ce471cc
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/sve_helpers.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 ARM Limited
+ *
+ * Common helper functions for SVE and SME functionality.
+ */
+
+#ifndef __SVE_HELPERS_H__
+#define __SVE_HELPERS_H__
+
+#include <stdbool.h>
+
+#define VLS_USE_SVE	false
+#define VLS_USE_SME	true
+
+extern unsigned int vls[];
+extern unsigned int nvls;
+
+int sve_fill_vls(bool use_sme, int min_vls);
+
+#endif
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
index ebd5815b54bb..dfd6a2badf9f 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
@@ -6,44 +6,28 @@
  * handler, this is not supported and is expected to segfault.
  */
 
+#include <kselftest.h>
 #include <signal.h>
 #include <ucontext.h>
 #include <sys/prctl.h>
 
 #include "test_signals_utils.h"
+#include "sve_helpers.h"
 #include "testcases.h"
 
 struct fake_sigframe sf;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
 
 static bool sme_get_vls(struct tdescr *td)
 {
-	int vq, vl;
+	int res = sve_fill_vls(VLS_USE_SME, 2);
 
-	/*
-	 * Enumerate up to SVE_VQ_MAX vector lengths
-	 */
-	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
-		vl = prctl(PR_SVE_SET_VL, vq * 16);
-		if (vl == -1)
-			return false;
+	if (!res)
+		return true;
 
-		vl &= PR_SME_VL_LEN_MASK;
+	if (res == KSFT_SKIP)
+		td->result = KSFT_SKIP;
 
-		/* Skip missing VLs */
-		vq = sve_vq_from_vl(vl);
-
-		vls[nvls++] = vl;
-	}
-
-	/* We need at least two VLs */
-	if (nvls < 2) {
-		fprintf(stderr, "Only %d VL supported\n", nvls);
-		return false;
-	}
-
-	return true;
+	return false;
 }
 
 static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
@@ -51,30 +35,30 @@ static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
 {
 	size_t resv_sz, offset;
 	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
-	struct sve_context *sve;
+	struct za_context *za;
 
 	/* Get a signal context with a SME ZA frame in it */
 	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	resv_sz = GET_SF_RESV_SIZE(sf);
-	head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+	head = get_header(head, ZA_MAGIC, resv_sz, &offset);
 	if (!head) {
-		fprintf(stderr, "No SVE context\n");
+		fprintf(stderr, "No ZA context\n");
 		return 1;
 	}
 
-	if (head->size != sizeof(struct sve_context)) {
+	if (head->size != sizeof(struct za_context)) {
 		fprintf(stderr, "Register data present, aborting\n");
 		return 1;
 	}
 
-	sve = (struct sve_context *)head;
+	za = (struct za_context *)head;
 
 	/* No changes are supported; init left us at minimum VL so go to max */
 	fprintf(stderr, "Attempting to change VL from %d to %d\n",
-		sve->vl, vls[0]);
-	sve->vl = vls[0];
+		za->vl, vls[0]);
+	za->vl = vls[0];
 
 	fake_sigreturn(&sf, sizeof(sf), 0);
 
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
index e2a452190511..e1ccf8f85a70 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -12,40 +12,22 @@
 #include <sys/prctl.h>
 
 #include "test_signals_utils.h"
+#include "sve_helpers.h"
 #include "testcases.h"
 
 struct fake_sigframe sf;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
 
 static bool sve_get_vls(struct tdescr *td)
 {
-	int vq, vl;
+	int res = sve_fill_vls(VLS_USE_SVE, 2);
 
-	/*
-	 * Enumerate up to SVE_VQ_MAX vector lengths
-	 */
-	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
-		vl = prctl(PR_SVE_SET_VL, vq * 16);
-		if (vl == -1)
-			return false;
+	if (!res)
+		return true;
 
-		vl &= PR_SVE_VL_LEN_MASK;
-
-		/* Skip missing VLs */
-		vq = sve_vq_from_vl(vl);
-
-		vls[nvls++] = vl;
-	}
-
-	/* We need at least two VLs */
-	if (nvls < 2) {
-		fprintf(stderr, "Only %d VL supported\n", nvls);
+	if (res == KSFT_SKIP)
 		td->result = KSFT_SKIP;
-		return false;
-	}
 
-	return true;
+	return false;
 }
 
 static int fake_sigreturn_sve_change_vl(struct tdescr *td,
diff --git a/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c
new file mode 100644
index 000000000000..36bd9940ee05
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Arm Limited
+ *
+ * Verify that the POR_EL0 register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+	ucontext_t uc;
+	char buf[1024 * 128];
+} context;
+
+#define SYS_POR_EL0 "S3_3_C10_C2_4"
+
+static uint64_t get_por_el0(void)
+{
+	uint64_t val;
+
+	asm volatile(
+		"mrs	%0, " SYS_POR_EL0 "\n"
+		: "=r"(val)
+		:
+		: );
+
+	return val;
+}
+
+int poe_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+	struct poe_context *poe_ctx;
+	size_t offset;
+	bool in_sigframe;
+	bool have_poe;
+	__u64 orig_poe;
+
+	have_poe = getauxval(AT_HWCAP2) & HWCAP2_POE;
+	if (have_poe)
+		orig_poe = get_por_el0();
+
+	if (!get_current_context(td, &context.uc, sizeof(context)))
+		return 1;
+
+	poe_ctx = (struct poe_context *)
+		get_header(head, POE_MAGIC, td->live_sz, &offset);
+
+	in_sigframe = poe_ctx != NULL;
+
+	fprintf(stderr, "POR_EL0 sigframe %s on system %s POE\n",
+		in_sigframe ? "present" : "absent",
+		have_poe ? "with" : "without");
+
+	td->pass = (in_sigframe == have_poe);
+
+	/*
+	 * Check that the value we read back was the one present at
+	 * the time that the signal was triggered.
+	 */
+	if (have_poe && poe_ctx) {
+		if (poe_ctx->por_el0 != orig_poe) {
+			fprintf(stderr, "POR_EL0 in frame is %llx, was %llx\n",
+				poe_ctx->por_el0, orig_poe);
+			td->pass = false;
+		}
+	}
+
+	return 0;
+}
+
+struct tdescr tde = {
+	.name = "POR_EL0",
+	.descr = "Validate that POR_EL0 is present as expected",
+	.timeout = 3,
+	.run = poe_present,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
index 3d37daafcff5..6dbe48cf8b09 100644
--- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
@@ -6,51 +6,31 @@
  * set up as expected.
  */
 
+#include <kselftest.h>
 #include <signal.h>
 #include <ucontext.h>
 #include <sys/prctl.h>
 
 #include "test_signals_utils.h"
+#include "sve_helpers.h"
 #include "testcases.h"
 
 static union {
 	ucontext_t uc;
 	char buf[1024 * 64];
 } context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
 
 static bool sme_get_vls(struct tdescr *td)
 {
-	int vq, vl;
+	int res = sve_fill_vls(VLS_USE_SME, 1);
 
-	/*
-	 * Enumerate up to SVE_VQ_MAX vector lengths
-	 */
-	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
-		vl = prctl(PR_SME_SET_VL, vq * 16);
-		if (vl == -1)
-			return false;
-
-		vl &= PR_SME_VL_LEN_MASK;
-
-		/* Did we find the lowest supported VL? */
-		if (vq < sve_vq_from_vl(vl))
-			break;
+	if (!res)
+		return true;
 
-		/* Skip missing VLs */
-		vq = sve_vq_from_vl(vl);
-
-		vls[nvls++] = vl;
-	}
-
-	/* We need at least one VL */
-	if (nvls < 1) {
-		fprintf(stderr, "Only %d VL supported\n", nvls);
-		return false;
-	}
+	if (res == KSFT_SKIP)
+		td->result = KSFT_SKIP;
 
-	return true;
+	return false;
 }
 
 static void setup_ssve_regs(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
index 9dc5f128bbc0..5557e116e973 100644
--- a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
@@ -6,51 +6,31 @@
  * signal frames is set up as expected when enabled simultaneously.
  */
 
+#include <kselftest.h>
 #include <signal.h>
 #include <ucontext.h>
 #include <sys/prctl.h>
 
 #include "test_signals_utils.h"
+#include "sve_helpers.h"
 #include "testcases.h"
 
 static union {
 	ucontext_t uc;
 	char buf[1024 * 128];
 } context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
 
 static bool sme_get_vls(struct tdescr *td)
 {
-	int vq, vl;
+	int res = sve_fill_vls(VLS_USE_SME, 1);
 
-	/*
-	 * Enumerate up to SVE_VQ_MAX vector lengths
-	 */
-	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
-		vl = prctl(PR_SME_SET_VL, vq * 16);
-		if (vl == -1)
-			return false;
-
-		vl &= PR_SME_VL_LEN_MASK;
-
-		/* Did we find the lowest supported VL? */
-		if (vq < sve_vq_from_vl(vl))
-			break;
+	if (!res)
+		return true;
 
-		/* Skip missing VLs */
-		vq = sve_vq_from_vl(vl);
-
-		vls[nvls++] = vl;
-	}
-
-	/* We need at least one VL */
-	if (nvls < 1) {
-		fprintf(stderr, "Only %d VL supported\n", nvls);
-		return false;
-	}
+	if (res == KSFT_SKIP)
+		td->result = KSFT_SKIP;
 
-	return true;
+	return false;
 }
 
 static void setup_regs(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
index 8b16eabbb769..8143eb1c58c1 100644
--- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
@@ -6,47 +6,31 @@
  * expected.
  */
 
+#include <kselftest.h>
 #include <signal.h>
 #include <ucontext.h>
 #include <sys/prctl.h>
 
 #include "test_signals_utils.h"
+#include "sve_helpers.h"
 #include "testcases.h"
 
 static union {
 	ucontext_t uc;
 	char buf[1024 * 64];
 } context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
 
 static bool sve_get_vls(struct tdescr *td)
 {
-	int vq, vl;
+	int res = sve_fill_vls(VLS_USE_SVE, 1);
 
-	/*
-	 * Enumerate up to SVE_VQ_MAX vector lengths
-	 */
-	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
-		vl = prctl(PR_SVE_SET_VL, vq * 16);
-		if (vl == -1)
-			return false;
-
-		vl &= PR_SVE_VL_LEN_MASK;
-
-		/* Skip missing VLs */
-		vq = sve_vq_from_vl(vl);
+	if (!res)
+		return true;
 
-		vls[nvls++] = vl;
-	}
-
-	/* We need at least one VL */
-	if (nvls < 1) {
-		fprintf(stderr, "Only %d VL supported\n", nvls);
-		return false;
-	}
+	if (res == KSFT_SKIP)
+		td->result = KSFT_SKIP;
 
-	return true;
+	return false;
 }
 
 static void setup_sve_regs(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 674b88cc8c39..e6daa94fcd2e 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -6,29 +6,6 @@
 
 #include "testcases.h"
 
-struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
-				size_t resv_sz, size_t *offset)
-{
-	size_t offs = 0;
-	struct _aarch64_ctx *found = NULL;
-
-	if (!head || resv_sz < HDR_SZ)
-		return found;
-
-	while (offs <= resv_sz - HDR_SZ &&
-	       head->magic != magic && head->magic) {
-		offs += head->size;
-		head = GET_RESV_NEXT_HEAD(head);
-	}
-	if (head->magic == magic) {
-		found = head;
-		if (offset)
-			*offset = offs;
-	}
-
-	return found;
-}
-
 bool validate_extra_context(struct extra_context *extra, char **err,
 			    void **extra_data, size_t *extra_size)
 {
@@ -184,6 +161,10 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 			if (head->size != sizeof(struct esr_context))
 				*err = "Bad size for esr_context";
 			break;
+		case POE_MAGIC:
+			if (head->size != sizeof(struct poe_context))
+				*err = "Bad size for poe_context";
+			break;
 		case TPIDR2_MAGIC:
 			if (head->size != sizeof(struct tpidr2_context))
 				*err = "Bad size for tpidr2_context";
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
index 7727126347e0..9872b8912714 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -26,6 +26,9 @@
 #define HDR_SZ \
 	sizeof(struct _aarch64_ctx)
 
+#define GET_UC_RESV_HEAD(uc) \
+	(struct _aarch64_ctx *)(&(uc->uc_mcontext.__reserved))
+
 #define GET_SF_RESV_HEAD(sf) \
 	(struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved)
 
@@ -88,8 +91,29 @@ struct fake_sigframe {
 
 bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err);
 
-struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
-				size_t resv_sz, size_t *offset);
+static inline struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
+				size_t resv_sz, size_t *offset)
+{
+	size_t offs = 0;
+	struct _aarch64_ctx *found = NULL;
+
+	if (!head || resv_sz < HDR_SZ)
+		return found;
+
+	while (offs <= resv_sz - HDR_SZ &&
+	       head->magic != magic && head->magic) {
+		offs += head->size;
+		head = GET_RESV_NEXT_HEAD(head);
+	}
+	if (head->magic == magic) {
+		found = head;
+		if (offset)
+			*offset = offs;
+	}
+
+	return found;
+}
+
 
 static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head,
 						  size_t resv_sz,
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
index 4d6f94b6178f..ce26e9c2fa5e 100644
--- a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
@@ -6,47 +6,31 @@
  * expected.
  */
 
+#include <kselftest.h>
 #include <signal.h>
 #include <ucontext.h>
 #include <sys/prctl.h>
 
 #include "test_signals_utils.h"
+#include "sve_helpers.h"
 #include "testcases.h"
 
 static union {
 	ucontext_t uc;
 	char buf[1024 * 128];
 } context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
 
 static bool sme_get_vls(struct tdescr *td)
 {
-	int vq, vl;
+	int res = sve_fill_vls(VLS_USE_SME, 1);
 
-	/*
-	 * Enumerate up to SME_VQ_MAX vector lengths
-	 */
-	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
-		vl = prctl(PR_SME_SET_VL, vq * 16);
-		if (vl == -1)
-			return false;
-
-		vl &= PR_SME_VL_LEN_MASK;
-
-		/* Skip missing VLs */
-		vq = sve_vq_from_vl(vl);
+	if (!res)
+		return true;
 
-		vls[nvls++] = vl;
-	}
-
-	/* We need at least one VL */
-	if (nvls < 1) {
-		fprintf(stderr, "Only %d VL supported\n", nvls);
-		return false;
-	}
+	if (res == KSFT_SKIP)
+		td->result = KSFT_SKIP;
 
-	return true;
+	return false;
 }
 
 static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
index 174ad6656696..b9e13f27f1f9 100644
--- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
@@ -6,51 +6,31 @@
  * expected.
  */
 
+#include <kselftest.h>
 #include <signal.h>
 #include <ucontext.h>
 #include <sys/prctl.h>
 
 #include "test_signals_utils.h"
+#include "sve_helpers.h"
 #include "testcases.h"
 
 static union {
 	ucontext_t uc;
 	char buf[1024 * 128];
 } context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
 
 static bool sme_get_vls(struct tdescr *td)
 {
-	int vq, vl;
+	int res = sve_fill_vls(VLS_USE_SME, 1);
 
-	/*
-	 * Enumerate up to SME_VQ_MAX vector lengths
-	 */
-	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
-		vl = prctl(PR_SME_SET_VL, vq * 16);
-		if (vl == -1)
-			return false;
-
-		vl &= PR_SME_VL_LEN_MASK;
-
-		/* Did we find the lowest supported VL? */
-		if (vq < sve_vq_from_vl(vl))
-			break;
+	if (!res)
+		return true;
 
-		/* Skip missing VLs */
-		vq = sve_vq_from_vl(vl);
-
-		vls[nvls++] = vl;
-	}
-
-	/* We need at least one VL */
-	if (nvls < 1) {
-		fprintf(stderr, "Only %d VL supported\n", nvls);
-		return false;
-	}
+	if (res == KSFT_SKIP)
+		td->result = KSFT_SKIP;
 
-	return true;
+	return false;
 }
 
 static void setup_za_regs(void)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
index 11ee801e75e7..6c3b4d4f173a 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
@@ -34,6 +34,12 @@ DECLARE_TRACE(bpf_testmod_test_write_bare,
 	TP_ARGS(task, ctx)
 );
 
+/* Used in bpf_testmod_test_read() to test __nullable suffix */
+DECLARE_TRACE(bpf_testmod_test_nullable_bare,
+	TP_PROTO(struct bpf_testmod_test_read_ctx *ctx__nullable),
+	TP_ARGS(ctx__nullable)
+);
+
 #undef BPF_TESTMOD_DECLARE_TRACE
 #ifdef DECLARE_TRACE_WRITABLE
 #define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index fd28c1157bd3..a32771da4293 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -356,6 +356,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 	if (bpf_testmod_loop_test(101) > 100)
 		trace_bpf_testmod_test_read(current, &ctx);
 
+	trace_bpf_testmod_test_nullable_bare(NULL);
+
 	/* Magic number to enable writable tp */
 	if (len == 64) {
 		struct bpf_testmod_test_writable_ctx writable = {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 00965a6e83bb..61de88cf4ad0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3551,6 +3551,40 @@ static struct btf_raw_test raw_tests[] = {
 	BTF_STR_SEC("\0x\0?.foo bar:buz"),
 },
 {
+	.descr = "datasec: name with non-printable first char not is ok",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* VAR x */                                     /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+		BTF_VAR_STATIC,
+		/* DATASEC ?.data */                            /* [3] */
+		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(2, 0, 4),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0x\0\7foo"),
+	.err_str = "Invalid name",
+	.btf_load_err = true,
+},
+{
+	.descr = "datasec: name '\\0' is not ok",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* VAR x */                                     /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+		BTF_VAR_STATIC,
+		/* DATASEC \0 */                                /* [3] */
+		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+		BTF_VAR_SECINFO_ENC(2, 0, 4),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0x\0"),
+	.err_str = "Invalid name",
+	.btf_load_err = true,
+},
+{
 	.descr = "type name '?foo' is not ok",
 	.raw_types = {
 		/* union ?foo; */
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 7cfac53c0d58..b614a5272dfd 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -9,6 +9,7 @@
 enum test_setup_type {
 	SETUP_SYSCALL_SLEEP,
 	SETUP_SKB_PROG,
+	SETUP_SKB_PROG_TP,
 };
 
 static struct {
@@ -28,6 +29,7 @@ static struct {
 	{"test_dynptr_clone", SETUP_SKB_PROG},
 	{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
 	{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
+	{"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
 };
 
 static void verify_success(const char *prog_name, enum test_setup_type setup_type)
@@ -35,7 +37,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
 	struct dynptr_success *skel;
 	struct bpf_program *prog;
 	struct bpf_link *link;
-       int err;
+	int err;
 
 	skel = dynptr_success__open();
 	if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
@@ -47,7 +49,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
 	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
 		goto cleanup;
 
-       bpf_program__set_autoload(prog, true);
+	bpf_program__set_autoload(prog, true);
 
 	err = dynptr_success__load(skel);
 	if (!ASSERT_OK(err, "dynptr_success__load"))
@@ -87,6 +89,37 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
 
 		break;
 	}
+	case SETUP_SKB_PROG_TP:
+	{
+		struct __sk_buff skb = {};
+		struct bpf_object *obj;
+		int aux_prog_fd;
+
+		/* Just use its test_run to trigger kfree_skb tracepoint */
+		err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+					 &obj, &aux_prog_fd);
+		if (!ASSERT_OK(err, "prog_load sched cls"))
+			goto cleanup;
+
+		LIBBPF_OPTS(bpf_test_run_opts, topts,
+			    .data_in = &pkt_v4,
+			    .data_size_in = sizeof(pkt_v4),
+			    .ctx_in = &skb,
+			    .ctx_size_in = sizeof(skb),
+		);
+
+		link = bpf_program__attach(prog);
+		if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+			goto cleanup;
+
+		err = bpf_prog_test_run_opts(aux_prog_fd, &topts);
+		bpf_link__destroy(link);
+
+		if (!ASSERT_OK(err, "test_run"))
+			goto cleanup;
+
+		break;
+	}
 	}
 
 	ASSERT_EQ(skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 9e5f38739104..6b3078dd5645 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -378,8 +378,8 @@ struct test tests[] = {
 			.iph_inner.ihl = 5,
 			.iph_inner.protocol = IPPROTO_TCP,
 			.iph_inner.tot_len =
-				__bpf_constant_htons(MAGIC_BYTES) -
-				sizeof(struct iphdr),
+				__bpf_constant_htons(MAGIC_BYTES -
+				sizeof(struct iphdr)),
 			.tcp.doff = 5,
 			.tcp.source = 80,
 			.tcp.dest = 8080,
@@ -407,8 +407,8 @@ struct test tests[] = {
 			.iph_inner.ihl = 5,
 			.iph_inner.protocol = IPPROTO_TCP,
 			.iph_inner.tot_len =
-				__bpf_constant_htons(MAGIC_BYTES) -
-				sizeof(struct iphdr),
+				__bpf_constant_htons(MAGIC_BYTES -
+				sizeof(struct iphdr)),
 			.tcp.doff = 5,
 			.tcp.source = 80,
 			.tcp.dest = 8080,
@@ -436,8 +436,8 @@ struct test tests[] = {
 			.iph_inner.ihl = 5,
 			.iph_inner.protocol = IPPROTO_TCP,
 			.iph_inner.tot_len =
-				__bpf_constant_htons(MAGIC_BYTES) -
-				sizeof(struct iphdr),
+				__bpf_constant_htons(MAGIC_BYTES -
+				sizeof(struct iphdr)),
 			.tcp.doff = 5,
 			.tcp.source = 99,
 			.tcp.dest = 9090,
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
index 7d4a9b3d3722..e12255121c15 100644
--- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -154,6 +154,51 @@ err_out:
 	close(sfd);
 }
 
+static void test_nonstandard_opt(int family)
+{
+	struct setget_sockopt__bss *bss = skel->bss;
+	struct bpf_link *getsockopt_link = NULL;
+	int sfd = -1, fd = -1, cfd = -1, flags;
+	socklen_t flagslen = sizeof(flags);
+
+	memset(bss, 0, sizeof(*bss));
+
+	sfd = start_server(family, SOCK_STREAM,
+			   family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+	if (!ASSERT_GE(sfd, 0, "start_server"))
+		return;
+
+	fd = connect_to_fd(sfd, 0);
+	if (!ASSERT_GE(fd, 0, "connect_to_fd_server"))
+		goto err_out;
+
+	/* cgroup/getsockopt prog will intercept getsockopt() below and
+	 * retrieve the tcp socket bpf_sock_ops_cb_flags value for the
+	 * accept()ed socket; this was set earlier in the passive established
+	 * callback for the accept()ed socket via bpf_setsockopt().
+	 */
+	getsockopt_link = bpf_program__attach_cgroup(skel->progs._getsockopt, cg_fd);
+	if (!ASSERT_OK_PTR(getsockopt_link, "getsockopt prog"))
+		goto err_out;
+
+	cfd = accept(sfd, NULL, 0);
+	if (!ASSERT_GE(cfd, 0, "accept"))
+		goto err_out;
+
+	if (!ASSERT_OK(getsockopt(cfd, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, &flagslen),
+		       "getsockopt_flags"))
+		goto err_out;
+	ASSERT_EQ(flags & BPF_SOCK_OPS_STATE_CB_FLAG, BPF_SOCK_OPS_STATE_CB_FLAG,
+		  "cb_flags_set");
+err_out:
+	close(sfd);
+	if (fd != -1)
+		close(fd);
+	if (cfd != -1)
+		close(cfd);
+	bpf_link__destroy(getsockopt_link);
+}
+
 void test_setget_sockopt(void)
 {
 	cg_fd = test__join_cgroup(CG_NAME);
@@ -191,6 +236,8 @@ void test_setget_sockopt(void)
 	test_udp(AF_INET);
 	test_ktls(AF_INET6);
 	test_ktls(AF_INET);
+	test_nonstandard_opt(AF_INET);
+	test_nonstandard_opt(AF_INET6);
 
 done:
 	setget_sockopt__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 1337153eb0ad..82bfb266741c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -451,11 +451,11 @@ out:
 #define MAX_EVENTS 10
 static void test_sockmap_skb_verdict_shutdown(void)
 {
+	int n, err, map, verdict, c1 = -1, p1 = -1;
 	struct epoll_event ev, events[MAX_EVENTS];
-	int n, err, map, verdict, s, c1 = -1, p1 = -1;
 	struct test_sockmap_pass_prog *skel;
-	int epollfd;
 	int zero = 0;
+	int epollfd;
 	char b;
 
 	skel = test_sockmap_pass_prog__open_and_load();
@@ -469,10 +469,7 @@ static void test_sockmap_skb_verdict_shutdown(void)
 	if (!ASSERT_OK(err, "bpf_prog_attach"))
 		goto out;
 
-	s = socket_loopback(AF_INET, SOCK_STREAM);
-	if (s < 0)
-		goto out;
-	err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
+	err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
 	if (err < 0)
 		goto out;
 
@@ -506,8 +503,8 @@ out:
 
 static void test_sockmap_skb_verdict_fionread(bool pass_prog)
 {
+	int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
 	int expected, zero = 0, sent, recvd, avail;
-	int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
 	struct test_sockmap_pass_prog *pass = NULL;
 	struct test_sockmap_drop_prog *drop = NULL;
 	char buf[256] = "0123456789";
@@ -534,11 +531,8 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
 	if (!ASSERT_OK(err, "bpf_prog_attach"))
 		goto out;
 
-	s = socket_loopback(AF_INET, SOCK_STREAM);
-	if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
-		goto out;
-	err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
-	if (!ASSERT_OK(err, "create_socket_pairs(s)"))
+	err = create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
+	if (!ASSERT_OK(err, "create_socket_pairs()"))
 		goto out;
 
 	err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
@@ -570,16 +564,12 @@ out:
 
 static void test_sockmap_skb_verdict_peek_helper(int map)
 {
-	int err, s, c1, p1, zero = 0, sent, recvd, avail;
+	int err, c1, p1, zero = 0, sent, recvd, avail;
 	char snd[256] = "0123456789";
 	char rcv[256] = "0";
 
-	s = socket_loopback(AF_INET, SOCK_STREAM);
-	if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
-		return;
-
-	err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
-	if (!ASSERT_OK(err, "create_pairs(s)"))
+	err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
+	if (!ASSERT_OK(err, "create_pair()"))
 		return;
 
 	err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index e880f97bc44d..38e35c72bdaa 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -3,6 +3,9 @@
 
 #include <linux/vm_sockets.h>
 
+/* include/linux/net.h */
+#define SOCK_TYPE_MASK 0xf
+
 #define IO_TIMEOUT_SEC 30
 #define MAX_STRERR_LEN 256
 #define MAX_TEST_NAME 80
@@ -14,6 +17,17 @@
 
 #define __always_unused	__attribute__((__unused__))
 
+/* include/linux/cleanup.h */
+#define __get_and_null(p, nullvalue)                                           \
+	({                                                                     \
+		__auto_type __ptr = &(p);                                      \
+		__auto_type __val = *__ptr;                                    \
+		*__ptr = nullvalue;                                            \
+		__val;                                                         \
+	})
+
+#define take_fd(fd) __get_and_null(fd, -EBADF)
+
 #define _FAIL(errnum, fmt...)                                                  \
 	({                                                                     \
 		error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
@@ -179,6 +193,14 @@
 		__ret;                                                         \
 	})
 
+static inline void close_fd(int *fd)
+{
+	if (*fd >= 0)
+		xclose(*fd);
+}
+
+#define __close_fd __attribute__((cleanup(close_fd)))
+
 static inline int poll_connect(int fd, unsigned int timeout_sec)
 {
 	struct timeval timeout = { .tv_sec = timeout_sec };
@@ -312,54 +334,6 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
 	return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
 }
 
-static inline int create_pair(int s, int family, int sotype, int *c, int *p)
-{
-	struct sockaddr_storage addr;
-	socklen_t len;
-	int err = 0;
-
-	len = sizeof(addr);
-	err = xgetsockname(s, sockaddr(&addr), &len);
-	if (err)
-		return err;
-
-	*c = xsocket(family, sotype, 0);
-	if (*c < 0)
-		return errno;
-	err = xconnect(*c, sockaddr(&addr), len);
-	if (err) {
-		err = errno;
-		goto close_cli0;
-	}
-
-	*p = xaccept_nonblock(s, NULL, NULL);
-	if (*p < 0) {
-		err = errno;
-		goto close_cli0;
-	}
-	return err;
-close_cli0:
-	close(*c);
-	return err;
-}
-
-static inline int create_socket_pairs(int s, int family, int sotype,
-				      int *c0, int *c1, int *p0, int *p1)
-{
-	int err;
-
-	err = create_pair(s, family, sotype, c0, p0);
-	if (err)
-		return err;
-
-	err = create_pair(s, family, sotype, c1, p1);
-	if (err) {
-		close(*c0);
-		close(*p0);
-	}
-	return err;
-}
-
 static inline int enable_reuseport(int s, int progfd)
 {
 	int err, one = 1;
@@ -412,5 +386,84 @@ static inline int socket_loopback(int family, int sotype)
 	return socket_loopback_reuseport(family, sotype, -1);
 }
 
+static inline int create_pair(int family, int sotype, int *p0, int *p1)
+{
+	__close_fd int s, c = -1, p = -1;
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int err;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return s;
+
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		return err;
+
+	c = xsocket(family, sotype, 0);
+	if (c < 0)
+		return c;
+
+	err = connect(c, sockaddr(&addr), len);
+	if (err) {
+		if (errno != EINPROGRESS) {
+			FAIL_ERRNO("connect");
+			return err;
+		}
+
+		err = poll_connect(c, IO_TIMEOUT_SEC);
+		if (err) {
+			FAIL_ERRNO("poll_connect");
+			return err;
+		}
+	}
+
+	switch (sotype & SOCK_TYPE_MASK) {
+	case SOCK_DGRAM:
+		err = xgetsockname(c, sockaddr(&addr), &len);
+		if (err)
+			return err;
+
+		err = xconnect(s, sockaddr(&addr), len);
+		if (err)
+			return err;
+
+		*p0 = take_fd(s);
+		break;
+	case SOCK_STREAM:
+	case SOCK_SEQPACKET:
+		p = xaccept_nonblock(s, NULL, NULL);
+		if (p < 0)
+			return p;
+
+		*p0 = take_fd(p);
+		break;
+	default:
+		FAIL("Unsupported socket type %#x", sotype);
+		return -EOPNOTSUPP;
+	}
+
+	*p1 = take_fd(c);
+	return 0;
+}
+
+static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
+				      int *p0, int *p1)
+{
+	int err;
+
+	err = create_pair(family, sotype, c0, p0);
+	if (err)
+		return err;
+
+	err = create_pair(family, sotype, c1, p1);
+	if (err) {
+		close(*c0);
+		close(*p0);
+	}
+
+	return err;
+}
 
 #endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 9ce0e0e0b7da..da5a6fb03b69 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -677,7 +677,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
 			       int verd_mapfd, enum redir_mode mode)
 {
 	const char *log_prefix = redir_mode_str(mode);
-	int s, c0, c1, p0, p1;
+	int c0, c1, p0, p1;
 	unsigned int pass;
 	int err, n;
 	u32 key;
@@ -685,13 +685,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
 
 	zero_verdict_count(verd_mapfd);
 
-	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
-	if (s < 0)
-		return;
-
-	err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+	err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
+				  &p0, &p1);
 	if (err)
-		goto close_srv;
+		return;
 
 	err = add_to_sockmap(sock_mapfd, p0, p1);
 	if (err)
@@ -722,8 +719,6 @@ close:
 	xclose(c1);
 	xclose(p0);
 	xclose(c0);
-close_srv:
-	xclose(s);
 }
 
 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
@@ -909,7 +904,7 @@ static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *sk
 
 static void redir_partial(int family, int sotype, int sock_map, int parser_map)
 {
-	int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
+	int c0 = -1, c1 = -1, p0 = -1, p1 = -1;
 	int err, n, key, value;
 	char buf[] = "abc";
 
@@ -919,13 +914,10 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map)
 	if (err)
 		return;
 
-	s = socket_loopback(family, sotype | SOCK_NONBLOCK);
-	if (s < 0)
-		goto clean_parser_map;
-
-	err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+	err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
+				  &p0, &p1);
 	if (err)
-		goto close_srv;
+		goto clean_parser_map;
 
 	err = add_to_sockmap(sock_map, p0, p1);
 	if (err)
@@ -944,8 +936,6 @@ close:
 	xclose(p0);
 	xclose(c1);
 	xclose(p1);
-close_srv:
-	xclose(s);
 
 clean_parser_map:
 	key = 0;
@@ -1500,49 +1490,7 @@ static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *ma
 /* Returns two connected loopback vsock sockets */
 static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
 {
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int s, p, c;
-
-	s = socket_loopback(AF_VSOCK, sotype);
-	if (s < 0)
-		return -1;
-
-	c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
-	if (c == -1)
-		goto close_srv;
-
-	if (getsockname(s, sockaddr(&addr), &len) < 0)
-		goto close_cli;
-
-	if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
-		FAIL_ERRNO("connect");
-		goto close_cli;
-	}
-
-	len = sizeof(addr);
-	p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
-	if (p < 0)
-		goto close_cli;
-
-	if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
-		FAIL_ERRNO("poll_connect");
-		goto close_acc;
-	}
-
-	*v0 = p;
-	*v1 = c;
-
-	return 0;
-
-close_acc:
-	close(p);
-close_cli:
-	close(c);
-close_srv:
-	close(s);
-
-	return -1;
+	return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
 }
 
 static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
@@ -1691,44 +1639,7 @@ static void test_reuseport(struct test_sockmap_listen *skel,
 
 static int inet_socketpair(int family, int type, int *s, int *c)
 {
-	struct sockaddr_storage addr;
-	socklen_t len;
-	int p0, c0;
-	int err;
-
-	p0 = socket_loopback(family, type | SOCK_NONBLOCK);
-	if (p0 < 0)
-		return p0;
-
-	len = sizeof(addr);
-	err = xgetsockname(p0, sockaddr(&addr), &len);
-	if (err)
-		goto close_peer0;
-
-	c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
-	if (c0 < 0) {
-		err = c0;
-		goto close_peer0;
-	}
-	err = xconnect(c0, sockaddr(&addr), len);
-	if (err)
-		goto close_cli0;
-	err = xgetsockname(c0, sockaddr(&addr), &len);
-	if (err)
-		goto close_cli0;
-	err = xconnect(p0, sockaddr(&addr), len);
-	if (err)
-		goto close_cli0;
-
-	*s = p0;
-	*c = c0;
-	return 0;
-
-close_cli0:
-	xclose(c0);
-close_peer0:
-	xclose(p0);
-	return err;
+	return create_pair(family, type | SOCK_NONBLOCK, s, c);
 }
 
 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
@@ -1795,11 +1706,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
 	int sfd[2];
 	int err;
 
-	if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+	if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
 		return;
 	c0 = sfd[0], p0 = sfd[1];
 
-	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
+	err = inet_socketpair(family, type, &p1, &c1);
 	if (err)
 		goto close;
 
@@ -1847,7 +1758,7 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
 	int sfd[2];
 	int err;
 
-	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+	err = inet_socketpair(family, type, &p0, &c0);
 	if (err)
 		return;
 
@@ -1882,7 +1793,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
 	unix_inet_redir_to_connected(family, SOCK_DGRAM,
 				     sock_map, -1, verdict_map,
 				     REDIR_EGRESS, NO_FLAGS);
-	unix_inet_redir_to_connected(family, SOCK_DGRAM,
+	unix_inet_redir_to_connected(family, SOCK_STREAM,
 				     sock_map, -1, verdict_map,
 				     REDIR_EGRESS, NO_FLAGS);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c
new file mode 100644
index 000000000000..accc42e01f8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_tp_btf_nullable.skel.h"
+
+void test_tp_btf_nullable(void)
+{
+	if (!env.has_testmod) {
+		test__skip();
+		return;
+	}
+
+	RUN_TESTS(test_tp_btf_nullable);
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index e35bc1eac52a..c3bc186af21e 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -6,6 +6,7 @@
 #include <stdbool.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 #include <linux/if_ether.h>
 #include "bpf_misc.h"
 #include "bpf_kfuncs.h"
@@ -1254,6 +1255,30 @@ int skb_invalid_ctx(void *ctx)
 	return 0;
 }
 
+SEC("fentry/skb_tx_error")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_dynptr_from_skb(skb, 0, &ptr);
+
+	return 0;
+}
+
+SEC("fexit/skb_tx_error")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(skb_invalid_ctx_fexit, void *skb)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_dynptr_from_skb(skb, 0, &ptr);
+
+	return 0;
+}
+
 /* Reject writes to dynptr slot for uninit arg */
 SEC("?raw_tp")
 __failure __msg("potential write to dynptr at off=-16")
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 5985920d162e..bfcc85686cf0 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -5,6 +5,7 @@
 #include <stdbool.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
 #include "bpf_kfuncs.h"
 #include "errno.h"
@@ -544,3 +545,25 @@ int test_dynptr_skb_strcmp(struct __sk_buff *skb)
 
 	return 1;
 }
+
+SEC("tp_btf/kfree_skb")
+int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location)
+{
+	__u8 write_data[2] = {1, 2};
+	struct bpf_dynptr ptr;
+	int ret;
+
+	if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+		err = 1;
+		return 1;
+	}
+
+	/* since tp_btf skbs are read only, writes should fail */
+	ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+	if (ret != -EINVAL) {
+		err = 2;
+		return 1;
+	}
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 60518aed1ffc..6dd4318debbf 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -59,6 +59,8 @@ static const struct sockopt_test sol_tcp_tests[] = {
 	{ .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
 	{ .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
 	{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
+	{ .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
+	  .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
 	{ .opt = 0, },
 };
 
@@ -353,11 +355,30 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family,
 	return 1;
 }
 
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+	struct bpf_sock *sk = ctx->sk;
+	int *optval = ctx->optval;
+	struct tcp_sock *tp;
+
+	if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS)
+		return 1;
+
+	tp = bpf_core_cast(sk, struct tcp_sock);
+	if (ctx->optval + sizeof(int) <= ctx->optval_end) {
+		*optval = tp->bpf_sock_ops_cb_flags;
+		ctx->retval = 0;
+	}
+	return 1;
+}
+
 SEC("sockops")
 int skops_sockopt(struct bpf_sock_ops *skops)
 {
 	struct bpf_sock *bpf_sk = skops->sk;
 	struct sock *sk;
+	int flags;
 
 	if (!bpf_sk)
 		return 1;
@@ -384,9 +405,8 @@ int skops_sockopt(struct bpf_sock_ops *skops)
 		nr_passive += !(bpf_test_sockopt(skops, sk) ||
 				test_tcp_maxseg(skops, sk) ||
 				test_tcp_saved_syn(skops, sk));
-		bpf_sock_ops_cb_flags_set(skops,
-					  skops->bpf_sock_ops_cb_flags |
-					  BPF_SOCK_OPS_STATE_CB_FLAG);
+		flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG;
+		bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags));
 		break;
 	case BPF_SOCK_OPS_STATE_CB:
 		if (skops->args[1] == BPF_TCP_CLOSE_WAIT)
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index 44ee0d037f95..eb5cca1fce16 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -486,17 +486,10 @@ static int tcp_validate_cookie(struct tcp_syncookie *ctx)
 		goto err;
 
 	mssind = (cookie & (3 << 6)) >> 6;
-	if (ctx->ipv4) {
-		if (mssind > ARRAY_SIZE(msstab4))
-			goto err;
-
+	if (ctx->ipv4)
 		ctx->attrs.mss = msstab4[mssind];
-	} else {
-		if (mssind > ARRAY_SIZE(msstab6))
-			goto err;
-
+	else
 		ctx->attrs.mss = msstab6[mssind];
-	}
 
 	ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK;
 	ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale;
diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
new file mode 100644
index 000000000000..bba3e37f749b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+#include "bpf_misc.h"
+
+SEC("tp_btf/bpf_testmod_test_nullable_bare")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx)
+{
+	return nullable_ctx->len;
+}
+
+SEC("tp_btf/bpf_testmod_test_nullable_bare")
+int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx)
+{
+	if (nullable_ctx)
+		return nullable_ctx->len;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 8144fd145237..1ee0ef114f9d 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -324,6 +324,25 @@ out:
 	return zc_avail;
 }
 
+#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
+static unsigned int get_max_skb_frags(void)
+{
+	unsigned int max_skb_frags = 0;
+	FILE *file;
+
+	file = fopen(MAX_SKB_FRAGS_PATH, "r");
+	if (!file) {
+		ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
+		return 0;
+	}
+
+	if (fscanf(file, "%u", &max_skb_frags) != 1)
+		ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
+
+	fclose(file);
+	return max_skb_frags;
+}
+
 static struct option long_options[] = {
 	{"interface", required_argument, 0, 'i'},
 	{"busy-poll", no_argument, 0, 'b'},
@@ -2244,13 +2263,24 @@ static int testapp_poll_rxq_tmout(struct test_spec *test)
 
 static int testapp_too_many_frags(struct test_spec *test)
 {
-	struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {};
+	struct pkt *pkts;
 	u32 max_frags, i;
+	int ret;
 
-	if (test->mode == TEST_MODE_ZC)
+	if (test->mode == TEST_MODE_ZC) {
 		max_frags = test->ifobj_tx->xdp_zc_max_segs;
-	else
-		max_frags = XSK_DESC__MAX_SKB_FRAGS;
+	} else {
+		max_frags = get_max_skb_frags();
+		if (!max_frags) {
+			ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n");
+			max_frags = 17;
+		}
+		max_frags += 1;
+	}
+
+	pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
+	if (!pkts)
+		return TEST_FAILURE;
 
 	test->mtu = MAX_ETH_JUMBO_SIZE;
 
@@ -2280,7 +2310,10 @@ static int testapp_too_many_frags(struct test_spec *test)
 	pkts[2 * max_frags + 1].valid = true;
 
 	pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
-	return testapp_validate_traffic(test);
+	ret = testapp_validate_traffic(test);
+
+	free(pkts);
+	return ret;
 }
 
 static int xsk_load_xdp_programs(struct ifobject *ifobj)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 885c948c5d83..e46e823f6a1a 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -55,7 +55,6 @@
 #define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
 #define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
 #define XSK_DESC__INVALID_OPTION (0xffff)
-#define XSK_DESC__MAX_SKB_FRAGS 18
 #define HUGEPAGE_SIZE (2 * 1024 * 1024)
 #define PKT_DUMP_NB_TO_PRINT 16
 #define RUN_ALL_TESTS UINT_MAX
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 12b4eb9d0434..e0d9851fe1c9 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -26,6 +26,10 @@
 #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
 #endif
 
+#ifndef F_CREATED_QUERY
+#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4)
+#endif
+
 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
 				  unsigned int flags)
 {
@@ -624,4 +628,39 @@ TEST(close_range_bitmap_corruption)
 	EXPECT_EQ(0, WEXITSTATUS(status));
 }
 
+TEST(fcntl_created)
+{
+	for (int i = 0; i < 101; i++) {
+		int fd;
+		char path[PATH_MAX];
+
+		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+		ASSERT_GE(fd, 0) {
+			if (errno == ENOENT)
+				SKIP(return,
+					   "Skipping test since /dev/null does not exist");
+		}
+
+		/* We didn't create "/dev/null". */
+		EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
+		close(fd);
+
+		sprintf(path, "aaaa_%d", i);
+		fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600);
+		ASSERT_GE(fd, 0);
+
+		/* We created "aaaa_%d". */
+		EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1);
+		close(fd);
+
+		fd = open(path, O_RDONLY | O_CLOEXEC);
+		ASSERT_GE(fd, 0);
+
+		/* We're opening it again, so no positive creation check. */
+		EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
+		close(fd);
+		unlink(path);
+	}
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index e54f382bcb02..39fb97a8c1df 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -1,8 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
 
-TEST_INCLUDES := $(wildcard lib/py/*.py)
+TEST_INCLUDES := $(wildcard lib/py/*.py) \
+		 ../../net/net_helper.sh \
+		 ../../net/lib.sh \
 
 TEST_PROGS := \
+	netcons_basic.sh \
 	ping.py \
 	queues.py \
 	stats.py \
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
index f6a58ce8a230..a2d8af60876d 100644
--- a/tools/testing/selftests/drivers/net/config
+++ b/tools/testing/selftests/drivers/net/config
@@ -1,2 +1,6 @@
 CONFIG_IPV6=y
 CONFIG_NETDEVSIM=m
+CONFIG_CONFIGFS_FS=y
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
index 026d98976c35..05b6fbb3fcdd 100755
--- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
+import errno
 import time
 import os
 from lib.py import ksft_run, ksft_exit, ksft_pr
@@ -61,7 +62,7 @@ def test_pp_alloc(cfg, netdevnl):
     try:
         stats = get_stats()
     except NlError as e:
-        if e.nl_msg.error == -95:
+        if e.nl_msg.error == -errno.EOPNOTSUPP:
             stats = {}
         else:
             raise
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index 011508ca604b..9d7adb3cf33b 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -3,7 +3,7 @@
 
 import datetime
 import random
-from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt
+from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt
 from lib.py import NetDrvEpEnv
 from lib.py import EthtoolFamily, NetdevFamily
 from lib.py import KsftSkipEx
@@ -90,10 +90,10 @@ def _send_traffic_check(cfg, port, name, params):
     ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
     if params.get('noise'):
         ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
-                "traffic on other queues:" + str(cnts))
+                f"traffic on other queues ({name})':" + str(cnts))
     if params.get('empty'):
         ksft_eq(sum(cnts[i] for i in params['empty']), 0,
-                "traffic on inactive queues: " + str(cnts))
+                f"traffic on inactive queues ({name}): " + str(cnts))
 
 
 def test_rss_key_indir(cfg):
@@ -302,6 +302,78 @@ def test_hitless_key_update(cfg):
     ksft_eq(carrier1 - carrier0, 0)
 
 
+def test_rss_context_dump(cfg):
+    """
+    Test dumping RSS contexts. This tests mostly exercises the kernel APIs.
+    """
+
+    # Get a random key of the right size
+    data = get_rss(cfg)
+    if 'rss-hash-key' in data:
+        key_data = _rss_key_rand(len(data['rss-hash-key']))
+        key = _rss_key_str(key_data)
+    else:
+        key_data = []
+        key = "ba:ad"
+
+    ids = []
+    try:
+        ids.append(ethtool_create(cfg, "-X", f"context new"))
+        defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+        ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1"))
+        defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+        ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}"))
+        defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+    except CmdExitFailure:
+        if not ids:
+            raise KsftSkipEx("Unable to add any contexts")
+        ksft_pr(f"Added only {len(ids)} out of 3 contexts")
+
+    expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids])
+
+    # Dump all
+    ctxs = cfg.ethnl.rss_get({}, dump=True)
+    tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+    ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+    ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+    ksft_eq(expect_tuples, ctx_tuples)
+
+    # Sanity-check the results
+    for data in ctxs:
+        ksft_ne(set(data['indir']), {0}, "indir table is all zero")
+        ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
+
+        # More specific checks
+        if len(ids) > 1 and data.get('context') == ids[1]:
+            ksft_eq(set(data['indir']), {0, 1},
+                    "ctx1 - indir table mismatch")
+        if len(ids) > 2 and data.get('context') == ids[2]:
+            ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch")
+
+    # Ifindex filter
+    ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True)
+    tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+    ctx_tuples = set(tuples)
+    ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump")
+    ksft_eq(expect_tuples, ctx_tuples)
+
+    # Skip ctx 0
+    expect_tuples.remove((cfg.ifname, -1))
+
+    ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True)
+    tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+    ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+    ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+    ksft_eq(expect_tuples, ctx_tuples)
+
+    # And finally both with ifindex and skip main
+    ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True)
+    ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs])
+    ksft_eq(expect_tuples, ctx_tuples)
+
+
 def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
     """
     Test separating traffic into RSS contexts.
@@ -542,7 +614,7 @@ def main() -> None:
         ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
                   test_rss_resize, test_hitless_key_update,
                   test_rss_context, test_rss_context4, test_rss_context32,
-                  test_rss_context_queue_reconfigure,
+                  test_rss_context_dump, test_rss_context_queue_reconfigure,
                   test_rss_context_overlap, test_rss_context_overlap2,
                   test_rss_context_out_of_order, test_rss_context4_create_with_cfg],
                  args=(cfg, ))
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index a5e800b8f103..1ea9bb695e94 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -4,6 +4,7 @@ import os
 import time
 from pathlib import Path
 from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import ksft_setup
 from lib.py import cmd, ethtool, ip
 from lib.py import NetNS, NetdevSimDev
 from .remote import Remote
@@ -14,7 +15,7 @@ def _load_env_file(src_path):
 
     src_dir = Path(src_path).parent.resolve()
     if not (src_dir / "net.config").exists():
-        return env
+        return ksft_setup(env)
 
     with open((src_dir / "net.config").as_posix(), 'r') as fp:
         for line in fp.readlines():
@@ -30,7 +31,7 @@ def _load_env_file(src_path):
             if len(pair) != 2:
                 raise Exception("Can't parse configuration line:", full_file)
             env[pair[0]] = pair[1]
-    return env
+    return ksft_setup(env)
 
 
 class NetDrvEnv:
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
new file mode 100755
index 000000000000..06021b2059b7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -0,0 +1,234 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test creates two netdevsim virtual interfaces, assigns one of them (the
+# "destination interface") to a new namespace, and assigns IP addresses to both
+# interfaces.
+#
+# It listens on the destination interface using socat and configures a dynamic
+# target on netconsole, pointing to the destination IP address.
+#
+# Finally, it checks whether the message was received properly on the
+# destination interface.  Note that this test may pollute the kernel log buffer
+# (dmesg) and relies on dynamic configuration and namespaces being configured.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+# Simple script to test dynamic targets in netconsole
+SRCIF="" # to be populated later
+SRCIP=192.168.1.1
+DSTIF="" # to be populated later
+DSTIP=192.168.1.2
+
+PORT="6666"
+MSG="netconsole selftest"
+TARGET=$(mktemp -u netcons_XXXXX)
+DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
+NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
+NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+# NAMESPACE will be populated by setup_ns with a random value
+NAMESPACE=""
+
+# IDs for netdevsim
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+
+# Used to create and delete namespaces
+source "${SCRIPTDIR}"/../../net/lib.sh
+source "${SCRIPTDIR}"/../../net/net_helper.sh
+
+# Create netdevsim interfaces
+create_ifaces() {
+	local NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+
+	echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
+	echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
+	udevadm settle 2> /dev/null || true
+
+	local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
+	local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
+
+	# These are global variables
+	SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
+		-path "$NSIM1"/net -exec basename {} \;)
+	DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
+		-path "$NSIM2"/net -exec basename {} \;)
+}
+
+link_ifaces() {
+	local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+	local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
+	local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
+
+	exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
+	exec {INITNS_FD}</proc/self/ns/net
+
+	# Bind the dst interface to namespace
+	ip link set "${DSTIF}" netns "${NAMESPACE}"
+
+	# Linking one device to the other one (on the other namespace}
+	if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX"  > $NSIM_DEV_SYS_LINK
+	then
+		echo "linking netdevsim1 with netdevsim2 should succeed"
+		cleanup
+		exit "${ksft_skip}"
+	fi
+}
+
+function configure_ip() {
+	# Configure the IPs for both interfaces
+	ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
+	ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
+
+	ip addr add "${SRCIP}"/24 dev "${SRCIF}"
+	ip link set "${SRCIF}" up
+}
+
+function set_network() {
+	# setup_ns function is coming from lib.sh
+	setup_ns NAMESPACE
+
+	# Create both interfaces, and assign the destination to a different
+	# namespace
+	create_ifaces
+
+	# Link both interfaces back to back
+	link_ifaces
+
+	configure_ip
+}
+
+function create_dynamic_target() {
+	DSTMAC=$(ip netns exec "${NAMESPACE}" \
+		 ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+
+	# Create a dynamic target
+	mkdir "${NETCONS_PATH}"
+
+	echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
+	echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
+	echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
+	echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+
+	echo 1 > "${NETCONS_PATH}"/enabled
+}
+
+function cleanup() {
+	local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
+
+	# delete netconsole dynamic reconfiguration
+	echo 0 > "${NETCONS_PATH}"/enabled
+	# Remove the configfs entry
+	rmdir "${NETCONS_PATH}"
+
+	# Delete netdevsim devices
+	echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
+	echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
+
+	# this is coming from lib.sh
+	cleanup_all_ns
+
+	# Restoring printk configurations
+	echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
+}
+
+function listen_port_and_save_to() {
+	local OUTPUT=${1}
+	# Just wait for 2 seconds
+	timeout 2 ip netns exec "${NAMESPACE}" \
+		socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+}
+
+function validate_result() {
+	local TMPFILENAME="$1"
+
+	# Check if the file exists
+	if [ ! -f "$TMPFILENAME" ]; then
+		echo "FAIL: File was not generated." >&2
+		exit "${ksft_fail}"
+	fi
+
+	if ! grep -q "${MSG}" "${TMPFILENAME}"; then
+		echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
+		cat "${TMPFILENAME}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	# Delete the file once it is validated, otherwise keep it
+	# for debugging purposes
+	rm "${TMPFILENAME}"
+	exit "${ksft_pass}"
+}
+
+function check_for_dependencies() {
+	if [ "$(id -u)" -ne 0 ]; then
+		echo "This test must be run as root" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ! which socat > /dev/null ; then
+		echo "SKIP: socat(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ! which ip > /dev/null ; then
+		echo "SKIP: ip(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ! which udevadm > /dev/null ; then
+		echo "SKIP: udevadm(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if [ ! -d "${NETCONS_CONFIGFS}" ]; then
+		echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ip link show "${DSTIF}" 2> /dev/null; then
+		echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
+		echo "SKIP: IPs already in use. Skipping it" >&2
+		exit "${ksft_skip}"
+	fi
+}
+
+# ========== #
+# Start here #
+# ========== #
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# Listed for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+
+# Make sure the message was received in the dst part
+# and exit
+validate_result "${OUTPUT_FILE}"
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 820b8e0a22c6..63e3c045a3b2 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -1,10 +1,13 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
+import errno
 from lib.py import ksft_run, ksft_exit, ksft_pr
 from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_disruptive
 from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
 from lib.py import NetDrvEnv
+from lib.py import ip, defer
 
 ethnl = EthtoolFamily()
 netfam = NetdevFamily()
@@ -17,7 +20,7 @@ def check_pause(cfg) -> None:
     try:
         ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
     except NlError as e:
-        if e.error == 95:
+        if e.error == errno.EOPNOTSUPP:
             raise KsftXfailEx("pause not supported by the device")
         raise
 
@@ -32,7 +35,7 @@ def check_fec(cfg) -> None:
     try:
         ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
     except NlError as e:
-        if e.error == 95:
+        if e.error == errno.EOPNOTSUPP:
             raise KsftXfailEx("FEC not supported by the device")
         raise
 
@@ -117,7 +120,7 @@ def qstat_by_ifindex(cfg) -> None:
     # loopback has no stats
     with ksft_raises(NlError) as cm:
         netfam.qstats_get({"ifindex": 1}, dump=True)
-    ksft_eq(cm.exception.nl_msg.error, -95)
+    ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
     ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
 
     # Try to get stats for lowest unused ifindex but not 0
@@ -133,9 +136,31 @@ def qstat_by_ifindex(cfg) -> None:
     ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
 
 
+@ksft_disruptive
+def check_down(cfg) -> None:
+    try:
+        qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+    except NlError as e:
+        if e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("qstats not supported by the device")
+        raise
+
+    ip(f"link set dev {cfg.dev['ifname']} down")
+    defer(ip, f"link set dev {cfg.dev['ifname']} up")
+
+    qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+    for k, v in qstat.items():
+        ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
+
+    # exercise per-queue API to make sure that "device down" state
+    # is handled correctly and doesn't crash
+    netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
+
+
 def main() -> None:
     with NetDrvEnv(__file__) as cfg:
-        ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex],
+        ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
+                  check_down],
                  args=(cfg, ))
     ksft_exit()
 
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 6343f4053bd4..4927b9add5ad 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -825,7 +825,7 @@ TEST_F(iommufd_ioas, copy_area)
 {
 	struct iommu_ioas_copy copy_cmd = {
 		.size = sizeof(copy_cmd),
-		.flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+		.flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
 		.dst_ioas_id = self->ioas_id,
 		.src_ioas_id = self->ioas_id,
 		.length = PAGE_SIZE,
@@ -1318,7 +1318,7 @@ TEST_F(iommufd_ioas, copy_sweep)
 {
 	struct iommu_ioas_copy copy_cmd = {
 		.size = sizeof(copy_cmd),
-		.flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+		.flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
 		.src_ioas_id = self->ioas_id,
 		.dst_iova = MOCK_APERTURE_START,
 		.length = MOCK_PAGE_SIZE,
@@ -1608,7 +1608,7 @@ TEST_F(iommufd_mock_domain, user_copy)
 	};
 	struct iommu_ioas_copy copy_cmd = {
 		.size = sizeof(copy_cmd),
-		.flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+		.flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
 		.dst_ioas_id = self->ioas_id,
 		.dst_iova = MOCK_APERTURE_START,
 		.length = BUFFER_SIZE,
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 74954f6a8f94..2c3c58e65a41 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -111,8 +111,11 @@ run_one()
 			stdbuf="/usr/bin/stdbuf --output=L "
 		fi
 		eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}"
-		cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args"
-		if [ ! -x "$TEST" ]; then
+		if [ -x "$TEST" ]; then
+			cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args"
+		elif [ -x "./ksft_runner.sh" ]; then
+			cmd="$stdbuf ./ksft_runner.sh ./$BASENAME_TEST"
+		else
 			echo "# Warning: file $TEST is not executable"
 
 			if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ]
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 48d32c5aa3eb..0c4b254ab56b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -152,6 +152,7 @@ TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
 TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
 
 TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
+TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
 TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
@@ -163,6 +164,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
 TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
+TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
 TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
 TEST_GEN_PROGS_aarch64 += arch_timer
 TEST_GEN_PROGS_aarch64 += demand_paging_test
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
new file mode 100644
index 000000000000..a36a7e2db434
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
@@ -0,0 +1,1062 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
+ *
+ * The test validates some edge cases related to the arch-timer:
+ * - timers above the max TVAL value.
+ * - timers in the past
+ * - moving counters ahead and behind pending timers.
+ * - reprograming timers.
+ * - timers fired multiple times.
+ * - masking/unmasking using the timer control mask.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <pthread.h>
+#include <sys/sysinfo.h>
+
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+static const uint64_t CVAL_MAX = ~0ULL;
+/* tval is a signed 32-bit int. */
+static const int32_t TVAL_MAX = INT32_MAX;
+static const int32_t TVAL_MIN = INT32_MIN;
+
+/* After how much time we say there is no IRQ. */
+static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
+
+/* A nice counter value to use as the starting one for most tests. */
+static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+
+/* Number of runs. */
+static const uint32_t NR_TEST_ITERS_DEF = 5;
+
+/* Default wait test time in ms. */
+static const uint32_t WAIT_TEST_MS = 10;
+
+/* Default "long" wait test time in ms. */
+static const uint32_t LONG_WAIT_TEST_MS = 100;
+
+/* Shared with IRQ handler. */
+struct test_vcpu_shared_data {
+	atomic_t handled;
+	atomic_t spurious;
+} shared_data;
+
+struct test_args {
+	/* Virtual or physical timer and counter tests. */
+	enum arch_timer timer;
+	/* Delay used for most timer tests. */
+	uint64_t wait_ms;
+	/* Delay used in the test_long_timer_delays test. */
+	uint64_t long_wait_ms;
+	/* Number of iterations. */
+	int iterations;
+	/* Whether to test the physical timer. */
+	bool test_physical;
+	/* Whether to test the virtual timer. */
+	bool test_virtual;
+};
+
+struct test_args test_args = {
+	.wait_ms = WAIT_TEST_MS,
+	.long_wait_ms = LONG_WAIT_TEST_MS,
+	.iterations = NR_TEST_ITERS_DEF,
+	.test_physical = true,
+	.test_virtual = true,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+enum sync_cmd {
+	SET_COUNTER_VALUE,
+	USERSPACE_USLEEP,
+	USERSPACE_SCHED_YIELD,
+	USERSPACE_MIGRATE_SELF,
+	NO_USERSPACE_CMD,
+};
+
+typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
+static void sleep_migrate(enum arch_timer timer, uint64_t usec);
+
+sleep_method_t sleep_method[] = {
+	sleep_poll,
+	sleep_sched_poll,
+	sleep_migrate,
+	sleep_in_userspace,
+};
+
+typedef void (*irq_wait_method_t)(void);
+
+static void wait_for_non_spurious_irq(void);
+static void wait_poll_for_irq(void);
+static void wait_sched_poll_for_irq(void);
+static void wait_migrate_poll_for_irq(void);
+
+irq_wait_method_t irq_wait_method[] = {
+	wait_for_non_spurious_irq,
+	wait_poll_for_irq,
+	wait_sched_poll_for_irq,
+	wait_migrate_poll_for_irq,
+};
+
+enum timer_view {
+	TIMER_CVAL,
+	TIMER_TVAL,
+};
+
+static void assert_irqs_handled(uint32_t n)
+{
+	int h = atomic_read(&shared_data.handled);
+
+	__GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
+}
+
+static void userspace_cmd(uint64_t cmd)
+{
+	GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
+}
+
+static void userspace_migrate_vcpu(void)
+{
+	userspace_cmd(USERSPACE_MIGRATE_SELF);
+}
+
+static void userspace_sleep(uint64_t usecs)
+{
+	GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
+}
+
+static void set_counter(enum arch_timer timer, uint64_t counter)
+{
+	GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+	unsigned int intid = gic_get_and_ack_irq();
+	enum arch_timer timer;
+	uint64_t cnt, cval;
+	uint32_t ctl;
+	bool timer_condition, istatus;
+
+	if (intid == IAR_SPURIOUS) {
+		atomic_inc(&shared_data.spurious);
+		goto out;
+	}
+
+	if (intid == ptimer_irq)
+		timer = PHYSICAL;
+	else if (intid == vtimer_irq)
+		timer = VIRTUAL;
+	else
+		goto out;
+
+	ctl = timer_get_ctl(timer);
+	cval = timer_get_cval(timer);
+	cnt = timer_get_cntct(timer);
+	timer_condition = cnt >= cval;
+	istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
+	GUEST_ASSERT_EQ(timer_condition, istatus);
+
+	/* Disable and mask the timer. */
+	timer_set_ctl(timer, CTL_IMASK);
+
+	atomic_inc(&shared_data.handled);
+
+out:
+	gic_set_eoi(intid);
+}
+
+static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
+			 uint32_t ctl)
+{
+	atomic_set(&shared_data.handled, 0);
+	atomic_set(&shared_data.spurious, 0);
+	timer_set_cval(timer, cval_cycles);
+	timer_set_ctl(timer, ctl);
+}
+
+static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
+			 uint32_t ctl)
+{
+	atomic_set(&shared_data.handled, 0);
+	atomic_set(&shared_data.spurious, 0);
+	timer_set_ctl(timer, ctl);
+	timer_set_tval(timer, tval_cycles);
+}
+
+static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
+			 enum timer_view tv)
+{
+	switch (tv) {
+	case TIMER_CVAL:
+		set_cval_irq(timer, xval, ctl);
+		break;
+	case TIMER_TVAL:
+		set_tval_irq(timer, xval, ctl);
+		break;
+	default:
+		GUEST_FAIL("Could not get timer %d", timer);
+	}
+}
+
+/*
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void wait_for_non_spurious_irq(void)
+{
+	int h;
+
+	local_irq_disable();
+
+	for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
+		wfi();
+		local_irq_enable();
+		isb(); /* handle IRQ */
+		local_irq_disable();
+	}
+}
+
+/*
+ * Wait for an non-spurious IRQ by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
+ *
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
+{
+	int h;
+
+	local_irq_disable();
+
+	h = atomic_read(&shared_data.handled);
+
+	local_irq_enable();
+	while (h == atomic_read(&shared_data.handled)) {
+		if (usp_cmd == NO_USERSPACE_CMD)
+			cpu_relax();
+		else
+			userspace_cmd(usp_cmd);
+	}
+	local_irq_disable();
+}
+
+static void wait_poll_for_irq(void)
+{
+	poll_for_non_spurious_irq(NO_USERSPACE_CMD);
+}
+
+static void wait_sched_poll_for_irq(void)
+{
+	poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
+}
+
+static void wait_migrate_poll_for_irq(void)
+{
+	poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
+}
+
+/*
+ * Sleep for usec microseconds by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
+ */
+static void guest_poll(enum arch_timer test_timer, uint64_t usec,
+		       enum sync_cmd usp_cmd)
+{
+	uint64_t cycles = usec_to_cycles(usec);
+	/* Whichever timer we are testing with, sleep with the other. */
+	enum arch_timer sleep_timer = 1 - test_timer;
+	uint64_t start = timer_get_cntct(sleep_timer);
+
+	while ((timer_get_cntct(sleep_timer) - start) < cycles) {
+		if (usp_cmd == NO_USERSPACE_CMD)
+			cpu_relax();
+		else
+			userspace_cmd(usp_cmd);
+	}
+}
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec)
+{
+	guest_poll(timer, usec, NO_USERSPACE_CMD);
+}
+
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
+{
+	guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
+}
+
+static void sleep_migrate(enum arch_timer timer, uint64_t usec)
+{
+	guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
+}
+
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
+{
+	userspace_sleep(usec);
+}
+
+/*
+ * Reset the timer state to some nice values like the counter not being close
+ * to the edge, and the control register masked and disabled.
+ */
+static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
+{
+	set_counter(timer, cnt);
+	timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timer_xval(enum arch_timer timer, uint64_t xval,
+			    enum timer_view tv, irq_wait_method_t wm, bool reset_state,
+			    uint64_t reset_cnt)
+{
+	local_irq_disable();
+
+	if (reset_state)
+		reset_timer_state(timer, reset_cnt);
+
+	set_xval_irq(timer, xval, CTL_ENABLE, tv);
+
+	/* This method re-enables IRQs to handle the one we're looking for. */
+	wm();
+
+	assert_irqs_handled(1);
+	local_irq_enable();
+}
+
+/*
+ * The test_timer_* functions will program the timer, wait for it, and assert
+ * the firing of the correct IRQ.
+ *
+ * These functions don't have a timeout and return as soon as they receive an
+ * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
+ * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
+ */
+
+static void test_timer_cval(enum arch_timer timer, uint64_t cval,
+			    irq_wait_method_t wm, bool reset_state,
+			    uint64_t reset_cnt)
+{
+	test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
+}
+
+static void test_timer_tval(enum arch_timer timer, int32_t tval,
+			    irq_wait_method_t wm, bool reset_state,
+			    uint64_t reset_cnt)
+{
+	test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
+			reset_cnt);
+}
+
+static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
+				   uint64_t usec, enum timer_view timer_view,
+				   sleep_method_t guest_sleep)
+{
+	local_irq_disable();
+
+	set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
+	guest_sleep(timer, usec);
+
+	local_irq_enable();
+	isb();
+
+	/* Assume success (no IRQ) after waiting usec microseconds */
+	assert_irqs_handled(0);
+}
+
+static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
+			     uint64_t usec, sleep_method_t wm)
+{
+	test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
+}
+
+static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
+			     sleep_method_t wm)
+{
+	/* tval will be cast to an int32_t in test_xval_check_no_irq */
+	test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
+}
+
+/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
+static void test_timer_control_mask_then_unmask(enum arch_timer timer)
+{
+	reset_timer_state(timer, DEF_CNT);
+	set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+	/* Unmask the timer, and then get an IRQ. */
+	local_irq_disable();
+	timer_set_ctl(timer, CTL_ENABLE);
+	/* This method re-enables IRQs to handle the one we're looking for. */
+	wait_for_non_spurious_irq();
+
+	assert_irqs_handled(1);
+	local_irq_enable();
+}
+
+/* Check that timer control masks actually mask a timer being fired. */
+static void test_timer_control_masks(enum arch_timer timer)
+{
+	reset_timer_state(timer, DEF_CNT);
+
+	/* Local IRQs are not masked at this point. */
+
+	set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+	/* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+	sleep_poll(timer, TIMEOUT_NO_IRQ_US);
+
+	assert_irqs_handled(0);
+	timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_fire_a_timer_multiple_times(enum arch_timer timer,
+					     irq_wait_method_t wm, int num)
+{
+	int i;
+
+	local_irq_disable();
+	reset_timer_state(timer, DEF_CNT);
+
+	set_tval_irq(timer, 0, CTL_ENABLE);
+
+	for (i = 1; i <= num; i++) {
+		/* This method re-enables IRQs to handle the one we're looking for. */
+		wm();
+
+		/* The IRQ handler masked and disabled the timer.
+		 * Enable and unmmask it again.
+		 */
+		timer_set_ctl(timer, CTL_ENABLE);
+
+		assert_irqs_handled(i);
+	}
+
+	local_irq_enable();
+}
+
+static void test_timers_fired_multiple_times(enum arch_timer timer)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
+		test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
+}
+
+/*
+ * Set a timer for tval=delta_1_ms then reprogram it to
+ * tval=delta_2_ms. Check that we get the timer fired. There is no
+ * timeout for the wait: we use the wfi instruction.
+ */
+static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
+				     int32_t delta_1_ms, int32_t delta_2_ms)
+{
+	local_irq_disable();
+	reset_timer_state(timer, DEF_CNT);
+
+	/* Program the timer to DEF_CNT + delta_1_ms. */
+	set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
+
+	/* Reprogram the timer to DEF_CNT + delta_2_ms. */
+	timer_set_tval(timer, msec_to_cycles(delta_2_ms));
+
+	/* This method re-enables IRQs to handle the one we're looking for. */
+	wm();
+
+	/* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
+	GUEST_ASSERT(timer_get_cntct(timer) >=
+		     DEF_CNT + msec_to_cycles(delta_2_ms));
+
+	local_irq_enable();
+	assert_irqs_handled(1);
+};
+
+static void test_reprogram_timers(enum arch_timer timer)
+{
+	int i;
+	uint64_t base_wait = test_args.wait_ms;
+
+	for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+		/*
+		 * Ensure reprogramming works whether going from a
+		 * longer time to a shorter or vice versa.
+		 */
+		test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
+					 base_wait);
+		test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
+					 2 * base_wait);
+	}
+}
+
+static void test_basic_functionality(enum arch_timer timer)
+{
+	int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
+	uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+		irq_wait_method_t wm = irq_wait_method[i];
+
+		test_timer_cval(timer, cval, wm, true, DEF_CNT);
+		test_timer_tval(timer, tval, wm, true, DEF_CNT);
+	}
+}
+
+/*
+ * This test checks basic timer behavior without actually firing timers, things
+ * like: the relationship between cval and tval, tval down-counting.
+ */
+static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
+{
+	reset_timer_state(timer, DEF_CNT);
+
+	local_irq_disable();
+
+	/* cval in the past */
+	timer_set_cval(timer,
+		       timer_get_cntct(timer) -
+		       msec_to_cycles(test_args.wait_ms));
+	if (use_sched)
+		userspace_migrate_vcpu();
+	GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+	/* tval in the past */
+	timer_set_tval(timer, -1);
+	if (use_sched)
+		userspace_migrate_vcpu();
+	GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
+
+	/* tval larger than TVAL_MAX. This requires programming with
+	 * timer_set_cval instead so the value is expressible
+	 */
+	timer_set_cval(timer,
+		       timer_get_cntct(timer) + TVAL_MAX +
+		       msec_to_cycles(test_args.wait_ms));
+	if (use_sched)
+		userspace_migrate_vcpu();
+	GUEST_ASSERT(timer_get_tval(timer) <= 0);
+
+	/*
+	 * tval larger than 2 * TVAL_MAX.
+	 * Twice the TVAL_MAX completely loops around the TVAL.
+	 */
+	timer_set_cval(timer,
+		       timer_get_cntct(timer) + 2ULL * TVAL_MAX +
+		       msec_to_cycles(test_args.wait_ms));
+	if (use_sched)
+		userspace_migrate_vcpu();
+	GUEST_ASSERT(timer_get_tval(timer) <=
+		       msec_to_cycles(test_args.wait_ms));
+
+	/* negative tval that rollovers from 0. */
+	set_counter(timer, msec_to_cycles(1));
+	timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
+	if (use_sched)
+		userspace_migrate_vcpu();
+	GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
+
+	/* tval should keep down-counting from 0 to -1. */
+	timer_set_tval(timer, 0);
+	sleep_poll(timer, 1);
+	GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+	local_irq_enable();
+
+	/* Mask and disable any pending timer. */
+	timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timers_sanity_checks(enum arch_timer timer)
+{
+	timers_sanity_checks(timer, false);
+	/* Check how KVM saves/restores these edge-case values. */
+	timers_sanity_checks(timer, true);
+}
+
+static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
+{
+	local_irq_disable();
+	reset_timer_state(timer, DEF_CNT);
+
+	set_cval_irq(timer,
+		     (uint64_t) TVAL_MAX +
+		     msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
+
+	set_counter(timer, TVAL_MAX);
+
+	/* This method re-enables IRQs to handle the one we're looking for. */
+	wm();
+
+	assert_irqs_handled(1);
+	local_irq_enable();
+}
+
+/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
+static void test_timers_above_tval_max(enum arch_timer timer)
+{
+	uint64_t cval;
+	int i;
+
+	/*
+	 * Test that the system is not implementing cval in terms of
+	 * tval.  If that was the case, setting a cval to "cval = now
+	 * + TVAL_MAX + wait_ms" would wrap to "cval = now +
+	 * wait_ms", and the timer would fire immediately. Test that it
+	 * doesn't.
+	 */
+	for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+		reset_timer_state(timer, DEF_CNT);
+		cval = timer_get_cntct(timer) + TVAL_MAX +
+			msec_to_cycles(test_args.wait_ms);
+		test_cval_no_irq(timer, cval,
+				 msecs_to_usecs(test_args.wait_ms) +
+				 TIMEOUT_NO_IRQ_US, sleep_method[i]);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+		/* Get the IRQ by moving the counter forward. */
+		test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
+	}
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests.  It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
+				    uint64_t xval, uint64_t cnt_2,
+				    irq_wait_method_t wm, enum timer_view tv)
+{
+	local_irq_disable();
+
+	set_counter(timer, cnt_1);
+	timer_set_ctl(timer, CTL_IMASK);
+
+	set_xval_irq(timer, xval, CTL_ENABLE, tv);
+	set_counter(timer, cnt_2);
+	/* This method re-enables IRQs to handle the one we're looking for. */
+	wm();
+
+	assert_irqs_handled(1);
+	local_irq_enable();
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests.  It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
+					   uint64_t cnt_1, uint64_t xval,
+					   uint64_t cnt_2,
+					   sleep_method_t guest_sleep,
+					   enum timer_view tv)
+{
+	local_irq_disable();
+
+	set_counter(timer, cnt_1);
+	timer_set_ctl(timer, CTL_IMASK);
+
+	set_xval_irq(timer, xval, CTL_ENABLE, tv);
+	set_counter(timer, cnt_2);
+	guest_sleep(timer, TIMEOUT_NO_IRQ_US);
+
+	local_irq_enable();
+	isb();
+
+	/* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+	assert_irqs_handled(0);
+	timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
+				    int32_t tval, uint64_t cnt_2,
+				    irq_wait_method_t wm)
+{
+	test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
+				    uint64_t cval, uint64_t cnt_2,
+				    irq_wait_method_t wm)
+{
+	test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
+}
+
+static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
+					   uint64_t cnt_1, int32_t tval,
+					   uint64_t cnt_2, sleep_method_t wm)
+{
+	test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
+				       TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
+					   uint64_t cnt_1, uint64_t cval,
+					   uint64_t cnt_2, sleep_method_t wm)
+{
+	test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
+				       TIMER_CVAL);
+}
+
+/* Set a timer and then move the counter ahead of it. */
+static void test_move_counters_ahead_of_timers(enum arch_timer timer)
+{
+	int i;
+	int32_t tval;
+
+	for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+		irq_wait_method_t wm = irq_wait_method[i];
+
+		test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
+		test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
+
+		/* Move counter ahead of negative tval. */
+		test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
+		test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
+		tval = TVAL_MAX;
+		test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
+					wm);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+		sleep_method_t sm = sleep_method[i];
+
+		test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
+	}
+}
+
+/*
+ * Program a timer, mask it, and then change the tval or counter to cancel it.
+ * Unmask it and check that nothing fires.
+ */
+static void test_move_counters_behind_timers(enum arch_timer timer)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+		sleep_method_t sm = sleep_method[i];
+
+		test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
+					       sm);
+		test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
+	}
+}
+
+static void test_timers_in_the_past(enum arch_timer timer)
+{
+	int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
+	uint64_t cval;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+		irq_wait_method_t wm = irq_wait_method[i];
+
+		/* set a timer wait_ms the past. */
+		cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
+		test_timer_cval(timer, cval, wm, true, DEF_CNT);
+		test_timer_tval(timer, tval, wm, true, DEF_CNT);
+
+		/* Set a timer to counter=0 (in the past) */
+		test_timer_cval(timer, 0, wm, true, DEF_CNT);
+
+		/* Set a time for tval=0 (now) */
+		test_timer_tval(timer, 0, wm, true, DEF_CNT);
+
+		/* Set a timer to as far in the past as possible */
+		test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
+	}
+
+	/*
+	 * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
+	 * IRQ as that tval means cval=CVAL_MAX-wait_ms.
+	 */
+	for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+		sleep_method_t sm = sleep_method[i];
+
+		set_counter(timer, msec_to_cycles(test_args.wait_ms));
+		test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
+	}
+}
+
+static void test_long_timer_delays(enum arch_timer timer)
+{
+	int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
+	uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+		irq_wait_method_t wm = irq_wait_method[i];
+
+		test_timer_cval(timer, cval, wm, true, DEF_CNT);
+		test_timer_tval(timer, tval, wm, true, DEF_CNT);
+	}
+}
+
+static void guest_run_iteration(enum arch_timer timer)
+{
+	test_basic_functionality(timer);
+	test_timers_sanity_checks(timer);
+
+	test_timers_above_tval_max(timer);
+	test_timers_in_the_past(timer);
+
+	test_move_counters_ahead_of_timers(timer);
+	test_move_counters_behind_timers(timer);
+	test_reprogram_timers(timer);
+
+	test_timers_fired_multiple_times(timer);
+
+	test_timer_control_mask_then_unmask(timer);
+	test_timer_control_masks(timer);
+}
+
+static void guest_code(enum arch_timer timer)
+{
+	int i;
+
+	local_irq_disable();
+
+	gic_init(GIC_V3, 1);
+
+	timer_set_ctl(VIRTUAL, CTL_IMASK);
+	timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+	gic_irq_enable(vtimer_irq);
+	gic_irq_enable(ptimer_irq);
+	local_irq_enable();
+
+	for (i = 0; i < test_args.iterations; i++) {
+		GUEST_SYNC(i);
+		guest_run_iteration(timer);
+	}
+
+	test_long_timer_delays(timer);
+	GUEST_DONE();
+}
+
+static uint32_t next_pcpu(void)
+{
+	uint32_t max = get_nprocs();
+	uint32_t cur = sched_getcpu();
+	uint32_t next = cur;
+	cpu_set_t cpuset;
+
+	TEST_ASSERT(max > 1, "Need at least two physical cpus");
+
+	sched_getaffinity(0, sizeof(cpuset), &cpuset);
+
+	do {
+		next = (next + 1) % CPU_SETSIZE;
+	} while (!CPU_ISSET(next, &cpuset));
+
+	return next;
+}
+
+static void migrate_self(uint32_t new_pcpu)
+{
+	int ret;
+	cpu_set_t cpuset;
+	pthread_t thread;
+
+	thread = pthread_self();
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(new_pcpu, &cpuset);
+
+	pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
+
+	ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+
+	TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
+		    new_pcpu, ret);
+}
+
+static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
+			   enum arch_timer timer)
+{
+	if (timer == PHYSICAL)
+		vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
+	else
+		vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+	enum sync_cmd cmd = uc->args[1];
+	uint64_t val = uc->args[2];
+	enum arch_timer timer = uc->args[3];
+
+	switch (cmd) {
+	case SET_COUNTER_VALUE:
+		kvm_set_cntxct(vcpu, val, timer);
+		break;
+	case USERSPACE_USLEEP:
+		usleep(val);
+		break;
+	case USERSPACE_SCHED_YIELD:
+		sched_yield();
+		break;
+	case USERSPACE_MIGRATE_SELF:
+		migrate_self(next_pcpu());
+		break;
+	default:
+		break;
+	}
+}
+
+static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+
+	/* Start on CPU 0 */
+	migrate_self(0);
+
+	while (true) {
+		vcpu_run(vcpu);
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_SYNC:
+			handle_sync(vcpu, &uc);
+			break;
+		case UCALL_DONE:
+			goto out;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			goto out;
+		default:
+			TEST_FAIL("Unexpected guest exit\n");
+		}
+	}
+
+ out:
+	return;
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+	vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+			     KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+	vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+			     KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+	sync_global_to_guest(vm, ptimer_irq);
+	sync_global_to_guest(vm, vtimer_irq);
+
+	pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
+			   enum arch_timer timer)
+{
+	*vm = vm_create_with_one_vcpu(vcpu, guest_code);
+	TEST_ASSERT(*vm, "Failed to create the test VM\n");
+
+	vm_init_descriptor_tables(*vm);
+	vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
+				     guest_irq_handler);
+
+	vcpu_init_descriptor_tables(*vcpu);
+	vcpu_args_set(*vcpu, 1, timer);
+
+	test_init_timer_irq(*vm, *vcpu);
+	vgic_v3_setup(*vm, 1, 64);
+	sync_global_to_guest(*vm, test_args);
+}
+
+static void test_print_help(char *name)
+{
+	pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
+		, name);
+	pr_info("\t-i: Number of iterations (default: %u)\n",
+		NR_TEST_ITERS_DEF);
+	pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
+	pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
+	     LONG_WAIT_TEST_MS);
+	pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+		WAIT_TEST_MS);
+	pr_info("\t-p: Test physical timer (default: true)\n");
+	pr_info("\t-v: Test virtual timer (default: true)\n");
+	pr_info("\t-h: Print this help message\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+	int opt;
+
+	while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
+		switch (opt) {
+		case 'b':
+			test_args.test_physical = true;
+			test_args.test_virtual = true;
+			break;
+		case 'i':
+			test_args.iterations =
+			    atoi_positive("Number of iterations", optarg);
+			break;
+		case 'l':
+			test_args.long_wait_ms =
+			    atoi_positive("Long wait time", optarg);
+			break;
+		case 'p':
+			test_args.test_physical = true;
+			test_args.test_virtual = false;
+			break;
+		case 'v':
+			test_args.test_virtual = true;
+			test_args.test_physical = false;
+			break;
+		case 'w':
+			test_args.wait_ms = atoi_positive("Wait time", optarg);
+			break;
+		case 'h':
+		default:
+			goto err;
+		}
+	}
+
+	return true;
+
+ err:
+	test_print_help(argv[0]);
+	return false;
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	if (!parse_args(argc, argv))
+		exit(KSFT_SKIP);
+
+	if (test_args.test_virtual) {
+		test_vm_create(&vm, &vcpu, VIRTUAL);
+		test_run(vm, vcpu);
+		kvm_vm_free(vm);
+	}
+
+	if (test_args.test_physical) {
+		test_vm_create(&vm, &vcpu, PHYSICAL);
+		test_run(vm, vcpu);
+		kvm_vm_free(vm);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 4abebde78187..d43fb3f49050 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -40,6 +40,18 @@ static struct feature_id_reg feat_id_regs[] = {
 		ARM64_SYS_REG(3, 0, 0, 7, 3),	/* ID_AA64MMFR3_EL1 */
 		8,
 		1
+	},
+	{
+		ARM64_SYS_REG(3, 0, 10, 2, 4),	/* POR_EL1 */
+		ARM64_SYS_REG(3, 0, 0, 7, 3),	/* ID_AA64MMFR3_EL1 */
+		16,
+		1
+	},
+	{
+		ARM64_SYS_REG(3, 3, 10, 2, 4),	/* POR_EL0 */
+		ARM64_SYS_REG(3, 0, 0, 7, 3),	/* ID_AA64MMFR3_EL1 */
+		16,
+		1
 	}
 };
 
@@ -468,6 +480,7 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(3, 0, 10, 2, 0),	/* MAIR_EL1 */
 	ARM64_SYS_REG(3, 0, 10, 2, 2),	/* PIRE0_EL1 */
 	ARM64_SYS_REG(3, 0, 10, 2, 3),	/* PIR_EL1 */
+	ARM64_SYS_REG(3, 0, 10, 2, 4),	/* POR_EL1 */
 	ARM64_SYS_REG(3, 0, 10, 3, 0),	/* AMAIR_EL1 */
 	ARM64_SYS_REG(3, 0, 12, 0, 0),	/* VBAR_EL1 */
 	ARM64_SYS_REG(3, 0, 12, 1, 1),	/* DISR_EL1 */
@@ -475,6 +488,7 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(3, 0, 13, 0, 4),	/* TPIDR_EL1 */
 	ARM64_SYS_REG(3, 0, 14, 1, 0),	/* CNTKCTL_EL1 */
 	ARM64_SYS_REG(3, 2, 0, 0, 0),	/* CSSELR_EL1 */
+	ARM64_SYS_REG(3, 3, 10, 2, 4),	/* POR_EL0 */
 	ARM64_SYS_REG(3, 3, 13, 0, 2),	/* TPIDR_EL0 */
 	ARM64_SYS_REG(3, 3, 13, 0, 3),	/* TPIDRRO_EL0 */
 	ARM64_SYS_REG(3, 3, 14, 0, 1),	/* CNTPCT_EL0 */
diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
new file mode 100644
index 000000000000..943d65fc6b0b
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3 system, not configuring GICv3 correctly
+// results in all of the sysregs generating an UNDEF exception.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r)					\
+	({							\
+		uint64_t val;					\
+								\
+		handled = false;				\
+		dsb(sy);					\
+		val = read_sysreg_s(SYS_ ## r);			\
+		val;						\
+	})
+
+#define __check_sr_write(r)					\
+	do {							\
+		handled = false;				\
+		dsb(sy);					\
+		write_sysreg_s(0, SYS_ ## r);			\
+		isb();						\
+	} while(0)
+
+/* Fatal checks */
+#define check_sr_read(r)					\
+	do {							\
+		__check_sr_read(r);				\
+		__GUEST_ASSERT(handled, #r " no read trap");	\
+	} while(0)
+
+#define check_sr_write(r)					\
+	do {							\
+		__check_sr_write(r);				\
+		__GUEST_ASSERT(handled, #r " no write trap");	\
+	} while(0)
+
+#define check_sr_rw(r)				\
+	do {					\
+		check_sr_read(r);		\
+		check_sr_write(r);		\
+	} while(0)
+
+static void guest_code(void)
+{
+	uint64_t val;
+
+	/*
+	 * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+	 * hidden the feature at runtime without any other userspace action.
+	 */
+	__GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
+				 read_sysreg(id_aa64pfr0_el1)) == 0,
+		       "GICv3 wrongly advertised");
+
+	/*
+	 * Access all GICv3 registers, and fail if we don't get an UNDEF.
+	 * Note that we happily access all the APxRn registers without
+	 * checking their existance, as all we want to see is a failure.
+	 */
+	check_sr_rw(ICC_PMR_EL1);
+	check_sr_read(ICC_IAR0_EL1);
+	check_sr_write(ICC_EOIR0_EL1);
+	check_sr_rw(ICC_HPPIR0_EL1);
+	check_sr_rw(ICC_BPR0_EL1);
+	check_sr_rw(ICC_AP0R0_EL1);
+	check_sr_rw(ICC_AP0R1_EL1);
+	check_sr_rw(ICC_AP0R2_EL1);
+	check_sr_rw(ICC_AP0R3_EL1);
+	check_sr_rw(ICC_AP1R0_EL1);
+	check_sr_rw(ICC_AP1R1_EL1);
+	check_sr_rw(ICC_AP1R2_EL1);
+	check_sr_rw(ICC_AP1R3_EL1);
+	check_sr_write(ICC_DIR_EL1);
+	check_sr_read(ICC_RPR_EL1);
+	check_sr_write(ICC_SGI1R_EL1);
+	check_sr_write(ICC_ASGI1R_EL1);
+	check_sr_write(ICC_SGI0R_EL1);
+	check_sr_read(ICC_IAR1_EL1);
+	check_sr_write(ICC_EOIR1_EL1);
+	check_sr_rw(ICC_HPPIR1_EL1);
+	check_sr_rw(ICC_BPR1_EL1);
+	check_sr_rw(ICC_CTLR_EL1);
+	check_sr_rw(ICC_IGRPEN0_EL1);
+	check_sr_rw(ICC_IGRPEN1_EL1);
+
+	/*
+	 * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+	 * be RAO/WI. Engage in non-fatal accesses, starting with a
+	 * write of 0 to try and disable SRE, and let's see if it
+	 * sticks.
+	 */
+	__check_sr_write(ICC_SRE_EL1);
+	if (!handled)
+		GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+	val = __check_sr_read(ICC_SRE_EL1);
+	if (!handled) {
+		__GUEST_ASSERT((val & BIT(0)),
+			       "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+		GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+	}
+
+	GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+	/* Success, we've gracefully exploded! */
+	handled = true;
+	regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+
+	do {
+		vcpu_run(vcpu);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			break;
+		case UCALL_PRINTF:
+			printf("%s", uc.buffer);
+			break;
+		case UCALL_DONE:
+			break;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	} while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_gicv3(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	/* Create a VM without a GICv3 */
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+				ESR_EC_UNKNOWN, guest_undef_handler);
+
+	test_run_vcpu(vcpu);
+
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	uint64_t pfr0;
+
+	vm = vm_create_with_one_vcpu(&vcpu, NULL);
+	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0);
+	__TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
+		       "GICv3 not supported.");
+	kvm_vm_free(vm);
+
+	test_guest_no_gicv3();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
index d20981663831..2a3fe7914b72 100644
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -126,6 +126,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+	REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index a51dbd2a5f84..f4ac28d53747 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -269,13 +269,12 @@ static void guest_inject(struct test_args *args,
 	KVM_INJECT_MULTI(cmd, first_intid, num);
 
 	while (irq_handled < num) {
-		asm volatile("wfi\n"
-			     "msr daifclr, #2\n"
-			     /* handle IRQ */
-			     "msr daifset, #2\n"
-			     : : : "memory");
+		wfi();
+		local_irq_enable();
+		isb(); /* handle IRQ */
+		local_irq_disable();
 	}
-	asm volatile("msr daifclr, #2" : : : "memory");
+	local_irq_enable();
 
 	GUEST_ASSERT_EQ(irq_handled, num);
 	for (i = first_intid; i < num + first_intid; i++)
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
index b3e97525cb55..bf461de34785 100644
--- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
+++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
@@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer)
 	return 0;
 }
 
-static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
+static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
 {
 	switch (timer) {
 	case VIRTUAL:
@@ -95,6 +95,22 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
 	isb();
 }
 
+static inline int32_t timer_get_tval(enum arch_timer timer)
+{
+	isb();
+	switch (timer) {
+	case VIRTUAL:
+		return read_sysreg(cntv_tval_el0);
+	case PHYSICAL:
+		return read_sysreg(cntp_tval_el0);
+	default:
+		GUEST_FAIL("Could not get timer %d\n", timer);
+	}
+
+	/* We should not reach here */
+	return 0;
+}
+
 static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
 {
 	switch (timer) {
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 9b20a355d81a..de977d131082 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -243,4 +243,7 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
 	       uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
 	       uint64_t arg6, struct arm_smccc_res *res);
 
+/* Execute a Wait For Interrupt instruction. */
+void wfi(void);
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 0ac7cc89f38c..fe4dc3693112 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -639,3 +639,9 @@ void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
 	sparsebit_set_num(vm->vpages_valid, 0,
 			  (1ULL << vm->va_bits) >> vm->page_shift);
 }
+
+/* Helper to call wfi instruction. */
+void wfi(void)
+{
+	asm volatile("wfi");
+}
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index 65c9c058458d..bd13257bfdfe 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -139,11 +139,8 @@ load_lp $MOD_REPLACE replace=1
 grep 'live patched' /proc/cmdline > /dev/kmsg
 grep 'live patched' /proc/meminfo > /dev/kmsg
 
-mods=(/sys/kernel/livepatch/*)
-nmods=${#mods[@]}
-if [ "$nmods" -ne 1 ]; then
-	die "Expecting only one moduled listed, found $nmods"
-fi
+loop_until 'mods=(/sys/kernel/livepatch/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' ||
+        die "Expecting only one moduled listed, found $nmods"
 
 # These modules were disabled by the atomic replace
 for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do
diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c
index 06d24d4679a6..1cc8a977c711 100644
--- a/tools/testing/selftests/lsm/lsm_list_modules_test.c
+++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c
@@ -128,6 +128,9 @@ TEST(correct_lsm_list_modules)
 		case LSM_ID_EVM:
 			name = "evm";
 			break;
+		case LSM_ID_IPE:
+			name = "ipe";
+			break;
 		default:
 			name = "INVALID";
 			break;
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index cfad627e8d94..5f2ca591c956 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -106,7 +106,7 @@ TEST_GEN_FILES += $(BINARIES_64)
 endif
 else
 
-ifneq (,$(findstring $(ARCH),powerpc))
+ifneq (,$(filter $(ARCH),arm64 powerpc))
 TEST_GEN_FILES += protection_keys
 endif
 
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
index a818f010de47..bfcea5cf9a48 100644
--- a/tools/testing/selftests/mm/mseal_test.c
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -81,17 +81,6 @@ static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
 	return sret;
 }
 
-static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
-	unsigned long flags, unsigned long fd, unsigned long offset)
-{
-	void *sret;
-
-	errno = 0;
-	sret = (void *) syscall(__NR_mmap, addr, len, prot,
-		flags, fd, offset);
-	return sret;
-}
-
 static int sys_munmap(void *ptr, size_t size)
 {
 	int sret;
@@ -172,7 +161,7 @@ static void setup_single_address(int size, void **ptrOut)
 {
 	void *ptr;
 
-	ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 	*ptrOut = ptr;
 }
 
@@ -181,7 +170,7 @@ static void setup_single_address_rw(int size, void **ptrOut)
 	void *ptr;
 	unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE;
 
-	ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
+	ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
 	*ptrOut = ptr;
 }
 
@@ -205,7 +194,7 @@ bool seal_support(void)
 	void *ptr;
 	unsigned long page_size = getpagesize();
 
-	ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 	if (ptr == (void *) -1)
 		return false;
 
@@ -481,8 +470,8 @@ static void test_seal_zero_address(void)
 	int prot;
 
 	/* use mmap to change protection. */
-	ptr = sys_mmap(0, size, PROT_NONE,
-			MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ptr = mmap(0, size, PROT_NONE,
+		   MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
 	FAIL_TEST_IF_FALSE(ptr == 0);
 
 	size = get_vma_size(ptr, &prot);
@@ -1209,8 +1198,8 @@ static void test_seal_mmap_overwrite_prot(bool seal)
 	}
 
 	/* use mmap to change protection. */
-	ret2 = sys_mmap(ptr, size, PROT_NONE,
-			MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ret2 = mmap(ptr, size, PROT_NONE,
+		    MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
 	if (seal) {
 		FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
 		FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1240,8 +1229,8 @@ static void test_seal_mmap_expand(bool seal)
 	}
 
 	/* use mmap to expand. */
-	ret2 = sys_mmap(ptr, size, PROT_READ,
-			MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ret2 = mmap(ptr, size, PROT_READ,
+		    MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
 	if (seal) {
 		FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
 		FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1268,8 +1257,8 @@ static void test_seal_mmap_shrink(bool seal)
 	}
 
 	/* use mmap to shrink. */
-	ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ,
-			MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+	ret2 = mmap(ptr, 8 * page_size, PROT_READ,
+		    MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
 	if (seal) {
 		FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
 		FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1650,7 +1639,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal)
 	ret = fallocate(fd, 0, 0, size);
 	FAIL_TEST_IF_FALSE(!ret);
 
-	ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0);
+	ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0);
 	FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
 
 	if (seal) {
@@ -1680,7 +1669,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal)
 	int ret;
 	unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED;
 
-	ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0);
+	ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0);
 	FAIL_TEST_IF_FALSE(ptr != (void *)-1);
 
 	if (seal) {
diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h
new file mode 100644
index 000000000000..580e1b0bb38e
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey-arm64.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#ifndef _PKEYS_ARM64_H
+#define _PKEYS_ARM64_H
+
+#include "vm_util.h"
+/* for signal frame parsing */
+#include "../arm64/signal/testcases/testcases.h"
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key	288
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc		289
+# define SYS_pkey_free		290
+#endif
+#define MCONTEXT_IP(mc)		mc.pc
+#define MCONTEXT_TRAPNO(mc)	-1
+
+#define PKEY_MASK		0xf
+
+#define POE_NONE		0x0
+#define POE_X			0x2
+#define POE_RX			0x3
+#define POE_RWX			0x7
+
+#define NR_PKEYS		8
+#define NR_RESERVED_PKEYS	1 /* pkey-0 */
+
+#define PKEY_ALLOW_ALL		0x77777777
+
+#define PKEY_BITS_PER_PKEY	4
+#define PAGE_SIZE		sysconf(_SC_PAGESIZE)
+#undef HPAGE_SIZE
+#define HPAGE_SIZE		default_huge_page_size()
+
+/* 4-byte instructions * 16384 = 64K page */
+#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+
+static inline u64 __read_pkey_reg(void)
+{
+	u64 pkey_reg = 0;
+
+	// POR_EL0
+	asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg));
+
+	return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+	u64 por = pkey_reg;
+
+	dprintf4("%s() changing %016llx to %016llx\n",
+			 __func__, __read_pkey_reg(), pkey_reg);
+
+	// POR_EL0
+	asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :);
+
+	dprintf4("%s() pkey register after changing %016llx to %016llx\n",
+			__func__, __read_pkey_reg(), pkey_reg);
+}
+
+static inline int cpu_has_pkeys(void)
+{
+	/* No simple way to determine this */
+	return 1;
+}
+
+static inline u32 pkey_bit_position(int pkey)
+{
+	return pkey * PKEY_BITS_PER_PKEY;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+	return NR_RESERVED_PKEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+}
+
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+	return PTR_ERR_ENOTSUP;
+}
+
+#define set_pkey_bits	set_pkey_bits
+static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+	u32 shift = pkey_bit_position(pkey);
+	u64 new_val = POE_RWX;
+
+	/* mask out bits from pkey in old value */
+	reg &= ~((u64)PKEY_MASK << shift);
+
+	if (flags & PKEY_DISABLE_ACCESS)
+		new_val = POE_X;
+	else if (flags & PKEY_DISABLE_WRITE)
+		new_val = POE_RX;
+
+	/* OR in new bits for pkey */
+	reg |= new_val << shift;
+
+	return reg;
+}
+
+#define get_pkey_bits	get_pkey_bits
+static inline u64 get_pkey_bits(u64 reg, int pkey)
+{
+	u32 shift = pkey_bit_position(pkey);
+	/*
+	 * shift down the relevant bits to the lowest four, then
+	 * mask off all the other higher bits
+	 */
+	u32 perm = (reg >> shift) & PKEY_MASK;
+
+	if (perm == POE_X)
+		return PKEY_DISABLE_ACCESS;
+	if (perm == POE_RX)
+		return PKEY_DISABLE_WRITE;
+	return 0;
+}
+
+static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey)
+{
+	struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt);
+	struct poe_context *poe_ctx =
+		(struct poe_context *) get_header(ctx, POE_MAGIC,
+						sizeof(uctxt->uc_mcontext), NULL);
+	if (poe_ctx)
+		poe_ctx->por_el0 = pkey;
+}
+
+#endif /* _PKEYS_ARM64_H */
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 1af3156a9db8..15608350fc01 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -91,12 +91,17 @@ void record_pkey_malloc(void *ptr, long size, int prot);
 #include "pkey-x86.h"
 #elif defined(__powerpc64__) /* arch */
 #include "pkey-powerpc.h"
+#elif defined(__aarch64__) /* arch */
+#include "pkey-arm64.h"
 #else /* arch */
 #error Architecture not supported
 #endif /* arch */
 
+#ifndef PKEY_MASK
 #define PKEY_MASK	(PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+#endif
 
+#ifndef set_pkey_bits
 static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
 {
 	u32 shift = pkey_bit_position(pkey);
@@ -106,7 +111,9 @@ static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
 	reg |= (flags & PKEY_MASK) << shift;
 	return reg;
 }
+#endif
 
+#ifndef get_pkey_bits
 static inline u64 get_pkey_bits(u64 reg, int pkey)
 {
 	u32 shift = pkey_bit_position(pkey);
@@ -116,6 +123,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey)
 	 */
 	return ((reg >> shift) & PKEY_MASK);
 }
+#endif
 
 extern u64 shadow_pkey_reg;
 
diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index ae5df26104e5..3d0c0bdae5bc 100644
--- a/tools/testing/selftests/mm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -8,7 +8,10 @@
 # define SYS_pkey_free		385
 #endif
 #define REG_IP_IDX		PT_NIP
+#define MCONTEXT_IP(mc)		mc.gp_regs[REG_IP_IDX]
+#define MCONTEXT_TRAPNO(mc)	mc.gp_regs[REG_TRAPNO]
 #define REG_TRAPNO		PT_TRAP
+#define MCONTEXT_FPREGS
 #define gregs			gp_regs
 #define fpregs			fp_regs
 #define si_pkey_offset		0x20
diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h
index 814758e109c0..5f28e26a2511 100644
--- a/tools/testing/selftests/mm/pkey-x86.h
+++ b/tools/testing/selftests/mm/pkey-x86.h
@@ -15,6 +15,10 @@
 
 #endif
 
+#define MCONTEXT_IP(mc)		mc.gregs[REG_IP_IDX]
+#define MCONTEXT_TRAPNO(mc)	mc.gregs[REG_TRAPNO]
+#define MCONTEXT_FPREGS
+
 #ifndef PKEY_DISABLE_ACCESS
 # define PKEY_DISABLE_ACCESS	0x1
 #endif
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index eaa6d1fc5328..0789981b72b9 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -147,7 +147,7 @@ void abort_hooks(void)
  * will then fault, which makes sure that the fault code handles
  * execute-only memory properly.
  */
-#ifdef __powerpc64__
+#if defined(__powerpc64__) || defined(__aarch64__)
 /* This way, both 4K and 64K alignment are maintained */
 __attribute__((__aligned__(65536)))
 #else
@@ -212,7 +212,6 @@ void pkey_disable_set(int pkey, int flags)
 	unsigned long syscall_flags = 0;
 	int ret;
 	int pkey_rights;
-	u64 orig_pkey_reg = read_pkey_reg();
 
 	dprintf1("START->%s(%d, 0x%x)\n", __func__,
 		pkey, flags);
@@ -242,8 +241,6 @@ void pkey_disable_set(int pkey, int flags)
 
 	dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
 		__func__, pkey, read_pkey_reg());
-	if (flags)
-		pkey_assert(read_pkey_reg() >= orig_pkey_reg);
 	dprintf1("END<---%s(%d, 0x%x)\n", __func__,
 		pkey, flags);
 }
@@ -253,7 +250,6 @@ void pkey_disable_clear(int pkey, int flags)
 	unsigned long syscall_flags = 0;
 	int ret;
 	int pkey_rights = hw_pkey_get(pkey, syscall_flags);
-	u64 orig_pkey_reg = read_pkey_reg();
 
 	pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
 
@@ -273,8 +269,6 @@ void pkey_disable_clear(int pkey, int flags)
 
 	dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
 			pkey, read_pkey_reg());
-	if (flags)
-		assert(read_pkey_reg() <= orig_pkey_reg);
 }
 
 void pkey_write_allow(int pkey)
@@ -314,7 +308,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	ucontext_t *uctxt = vucontext;
 	int trapno;
 	unsigned long ip;
+#ifdef MCONTEXT_FPREGS
 	char *fpregs;
+#endif
 #if defined(__i386__) || defined(__x86_64__) /* arch */
 	u32 *pkey_reg_ptr;
 	int pkey_reg_offset;
@@ -328,9 +324,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 			__func__, __LINE__,
 			__read_pkey_reg(), shadow_pkey_reg);
 
-	trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
-	ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+	trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext);
+	ip = MCONTEXT_IP(uctxt->uc_mcontext);
+#ifdef MCONTEXT_FPREGS
 	fpregs = (char *) uctxt->uc_mcontext.fpregs;
+#endif
 
 	dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
 			__func__, trapno, ip, si_code_str(si->si_code),
@@ -359,7 +357,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 #endif /* arch */
 
 	dprintf1("siginfo: %p\n", si);
+#ifdef MCONTEXT_FPREGS
 	dprintf1(" fpregs: %p\n", fpregs);
+#endif
 
 	if ((si->si_code == SEGV_MAPERR) ||
 	    (si->si_code == SEGV_ACCERR) ||
@@ -389,6 +389,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 #elif defined(__powerpc64__) /* arch */
 	/* restore access and let the faulting instruction continue */
 	pkey_access_allow(siginfo_pkey);
+#elif defined(__aarch64__)
+	aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL);
 #endif /* arch */
 	pkey_faults++;
 	dprintf1("<<<<==================================================\n");
@@ -902,7 +904,9 @@ void expected_pkey_fault(int pkey)
 	 * test program continue.  We now have to restore it.
 	 */
 	if (__read_pkey_reg() != 0)
-#else /* arch */
+#elif defined(__aarch64__)
+	if (__read_pkey_reg() != PKEY_ALLOW_ALL)
+#else
 	if (__read_pkey_reg() != shadow_pkey_reg)
 #endif /* arch */
 		pkey_assert(0);
@@ -1492,6 +1496,11 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
 	lots_o_noops_around_write(&scratch);
 	do_not_expect_pkey_fault("executing on PROT_EXEC memory");
 	expect_fault_on_read_execonly_key(p1, pkey);
+
+	// Reset back to PROT_EXEC | PROT_READ for architectures that support
+	// non-PKEY execute-only permissions.
+	ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey);
+	pkey_assert(!ret);
 }
 
 void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
@@ -1665,6 +1674,84 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
 }
 #endif
 
+#if defined(__aarch64__)
+void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+{
+	pid_t child;
+	int status, ret;
+	struct iovec iov;
+	u64 trace_pkey;
+	/* Just a random pkey value.. */
+	u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) |
+			(POE_NONE << PKEY_BITS_PER_PKEY) |
+			POE_RWX;
+
+	child = fork();
+	pkey_assert(child >= 0);
+	dprintf3("[%d] fork() ret: %d\n", getpid(), child);
+	if (!child) {
+		ptrace(PTRACE_TRACEME, 0, 0, 0);
+
+		/* Stop and allow the tracer to modify PKRU directly */
+		raise(SIGSTOP);
+
+		/*
+		 * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+		 * checking
+		 */
+		if (__read_pkey_reg() != new_pkey)
+			exit(1);
+
+		raise(SIGSTOP);
+
+		exit(0);
+	}
+
+	pkey_assert(child == waitpid(child, &status, 0));
+	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+	iov.iov_base = &trace_pkey;
+	iov.iov_len = 8;
+	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+	pkey_assert(ret == 0);
+	pkey_assert(trace_pkey == read_pkey_reg());
+
+	trace_pkey = new_pkey;
+
+	ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov);
+	pkey_assert(ret == 0);
+
+	/* Test that the modification is visible in ptrace before any execution */
+	memset(&trace_pkey, 0, sizeof(trace_pkey));
+	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+	pkey_assert(ret == 0);
+	pkey_assert(trace_pkey == new_pkey);
+
+	/* Execute the tracee */
+	ret = ptrace(PTRACE_CONT, child, 0, 0);
+	pkey_assert(ret == 0);
+
+	/* Test that the tracee saw the PKRU value change */
+	pkey_assert(child == waitpid(child, &status, 0));
+	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+	/* Test that the modification is visible in ptrace after execution */
+	memset(&trace_pkey, 0, sizeof(trace_pkey));
+	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+	pkey_assert(ret == 0);
+	pkey_assert(trace_pkey == new_pkey);
+
+	ret = ptrace(PTRACE_CONT, child, 0, 0);
+	pkey_assert(ret == 0);
+	pkey_assert(child == waitpid(child, &status, 0));
+	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+	pkey_assert(WIFEXITED(status));
+	pkey_assert(WEXITSTATUS(status) == 0);
+}
+#endif
+
 void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
 {
 	int size = PAGE_SIZE;
@@ -1700,7 +1787,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
 	test_pkey_syscalls_bad_args,
 	test_pkey_alloc_exhaust,
 	test_pkey_alloc_free_attach_pkey0,
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
 	test_ptrace_modifies_pkru,
 #endif
 };
diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c
index 7aa1366063e4..d9f8ba8d5050 100644
--- a/tools/testing/selftests/mm/seal_elf.c
+++ b/tools/testing/selftests/mm/seal_elf.c
@@ -30,17 +30,6 @@ static int sys_mseal(void *start, size_t len)
 	return sret;
 }
 
-static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
-	unsigned long flags, unsigned long fd, unsigned long offset)
-{
-	void *sret;
-
-	errno = 0;
-	sret = (void *) syscall(__NR_mmap, addr, len, prot,
-		flags, fd, offset);
-	return sret;
-}
-
 static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
 {
 	int sret;
@@ -56,7 +45,7 @@ static bool seal_support(void)
 	void *ptr;
 	unsigned long page_size = getpagesize();
 
-	ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 	if (ptr == (void *) -1)
 		return false;
 
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 666ab7d9390b..1c04c780db66 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -17,6 +17,7 @@ ipv6_flowlabel
 ipv6_flowlabel_mgr
 log.txt
 msg_zerocopy
+ncdevmem
 nettest
 psock_fanout
 psock_snd
@@ -34,6 +35,7 @@ scm_pidfd
 scm_rights
 sk_bind_sendto_listen
 sk_connect_zero_addr
+sk_so_peek_off
 socket
 so_incoming_cpu
 so_netns_cookie
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8eaffd7a641c..649f1fe0dc46 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh
 TEST_PROGS += rps_default_mask.sh
 TEST_PROGS += big_tcp.sh
 TEST_PROGS += netns-sysctl.sh
-TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
+TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
@@ -80,12 +80,14 @@ TEST_PROGS += io_uring_zerocopy_tx.sh
 TEST_GEN_FILES += bind_bhash
 TEST_GEN_PROGS += sk_bind_sendto_listen
 TEST_GEN_PROGS += sk_connect_zero_addr
+TEST_GEN_PROGS += sk_so_peek_off
 TEST_PROGS += test_ingress_egress_chaining.sh
 TEST_GEN_PROGS += so_incoming_cpu
 TEST_PROGS += sctp_vrf.sh
 TEST_GEN_FILES += sctp_hello
 TEST_GEN_FILES += ip_local_port_range
-TEST_GEN_FILES += bind_wildcard
+TEST_GEN_PROGS += bind_wildcard
+TEST_GEN_PROGS += bind_timewait
 TEST_PROGS += test_vxlan_mdb.sh
 TEST_PROGS += test_bridge_neigh_suppress.sh
 TEST_PROGS += test_vxlan_nolocalbypass.sh
@@ -95,6 +97,11 @@ TEST_PROGS += fq_band_pktlimit.sh
 TEST_PROGS += vlan_hw_filter.sh
 TEST_PROGS += bpf_offload.py
 
+# YNL files, must be before "include ..lib.mk"
+EXTRA_CLEAN += $(OUTPUT)/libynl.a
+YNL_GEN_FILES := ncdevmem
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+
 TEST_FILES := settings
 TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
 
@@ -104,6 +111,10 @@ TEST_INCLUDES := forwarding/lib.sh
 
 include ../lib.mk
 
+# YNL build
+YNL_GENS := netdev
+include ynl.mk
+
 $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
index 535eb2c3d7d1..3ed3882a93b8 100644
--- a/tools/testing/selftests/net/af_unix/msg_oob.c
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -525,6 +525,29 @@ TEST_F(msg_oob, ex_oob_drop_2)
 	}
 }
 
+TEST_F(msg_oob, ex_oob_oob)
+{
+	sendpair("x", 1, MSG_OOB);
+	epollpair(true);
+	siocatmarkpair(true);
+
+	recvpair("x", 1, 1, MSG_OOB);
+	epollpair(false);
+	siocatmarkpair(true);
+
+	sendpair("y", 1, MSG_OOB);
+	epollpair(true);
+	siocatmarkpair(true);
+
+	recvpair("", -EAGAIN, 1, 0);
+	epollpair(false);
+	siocatmarkpair(false);
+
+	recvpair("", -EINVAL, 1, MSG_OOB);
+	epollpair(false);
+	siocatmarkpair(false);
+}
+
 TEST_F(msg_oob, ex_oob_ahead_break)
 {
 	sendpair("hello", 5, MSG_OOB);
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 386ebd829df5..899dbad0104b 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -4304,14 +4304,7 @@ elif [ "$TESTS" = "ipv6" ]; then
 	TESTS="$TESTS_IPV6"
 fi
 
-# nettest can be run from PATH or from same directory as this selftest
-if ! which nettest >/dev/null; then
-	PATH=$PWD:$PATH
-	if ! which nettest >/dev/null; then
-		echo "'nettest' command not found; skipping tests"
-		exit $ksft_skip
-	fi
-fi
+check_gen_prog "nettest"
 
 declare -i nfail=0
 declare -i nsuccess=0
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index ac0b2c6a5761..77c83d9508d3 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -78,7 +78,12 @@ log_test()
 	else
 		ret=1
 		nfail=$((nfail+1))
-		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [[ $rc -eq $ksft_skip ]]; then
+			printf "TEST: %-60s  [SKIP]\n" "${msg}"
+		else
+			printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		fi
+
 		if [ "$VERBOSE" = "1" ]; then
 			echo "    rc=$rc, expected $expected"
 		fi
@@ -923,6 +928,29 @@ ipv6_grp_fcnal()
 
 	ipv6_grp_refs
 	log_test $? 0 "Nexthop group replace refcounts"
+
+	#
+	# 16-bit weights.
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+	run_cmd "$IP nexthop add id 66 dev veth1"
+
+	run_cmd "$IP nexthop add id 103 group 62,1000"
+	if [[ $? == 0 ]]; then
+		local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535"
+		run_cmd "$IP nexthop replace $GRP"
+		check_nexthop "id 103" "$GRP"
+		rc=$?
+	else
+		rc=$ksft_skip
+	fi
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	log_test $rc 0 "16-bit weights"
 }
 
 ipv6_res_grp_fcnal()
@@ -987,6 +1015,31 @@ ipv6_res_grp_fcnal()
 	check_nexthop_bucket "list id 102" \
 		"id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
 	log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+
+	#
+	# 16-bit weights.
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+	run_cmd "$IP nexthop add id 66 dev veth1"
+
+	run_cmd "$IP nexthop add id 103 group 62,1000 type resilient buckets 32"
+	if [[ $? == 0 ]]; then
+		local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535 $(:
+			  )type resilient buckets 32 idle_timer 0 $(:
+			  )unbalanced_timer 0"
+		run_cmd "$IP nexthop replace $GRP"
+		check_nexthop "id 103" "$GRP unbalanced_time 0"
+		rc=$?
+	else
+		rc=$ksft_skip
+	fi
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	log_test $rc 0 "16-bit weights"
 }
 
 ipv6_fcnal_runtime()
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 7c01f58a20de..1d58b3b87465 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -35,18 +35,13 @@ log_test()
 	local expected=$2
 	local msg="$3"
 
-	$IP rule show | grep -q l3mdev
-	if [ $? -eq 0 ]; then
-		msg="$msg (VRF)"
-	fi
-
 	if [ ${rc} -eq ${expected} ]; then
 		nsuccess=$((nsuccess+1))
-		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
 	else
 		ret=1
 		nfail=$((nfail+1))
-		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
 		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
 			echo
 			echo "hit enter to continue, 'q' to quit"
@@ -56,39 +51,6 @@ log_test()
 	fi
 }
 
-log_section()
-{
-	echo
-	echo "######################################################################"
-	echo "TEST SECTION: $*"
-	echo "######################################################################"
-}
-
-check_nettest()
-{
-	if which nettest > /dev/null 2>&1; then
-		return 0
-	fi
-
-	# Add the selftest directory to PATH if not already done
-	if [ "${SELFTEST_PATH}" = "" ]; then
-		SELFTEST_PATH="$(dirname $0)"
-		PATH="${PATH}:${SELFTEST_PATH}"
-
-		# Now retry with the new path
-		if which nettest > /dev/null 2>&1; then
-			return 0
-		fi
-
-		if [ "${ret}" -eq 0 ]; then
-			ret="${ksft_skip}"
-		fi
-		echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')"
-	fi
-
-	return 1
-}
-
 setup()
 {
 	set -e
@@ -187,12 +149,17 @@ fib_rule6_test_match_n_redirect()
 {
 	local match="$1"
 	local getmatch="$2"
-	local description="$3"
+	local getnomatch="$3"
+	local description="$4"
+	local nomatch_description="$5"
 
 	$IP -6 rule add $match table $RTABLE
 	$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
 	log_test $? 0 "rule6 check: $description"
 
+	$IP -6 route get $GW_IP6 $getnomatch 2>&1 | grep -q "table $RTABLE"
+	log_test $? 1 "rule6 check: $nomatch_description"
+
 	fib_rule6_del_by_pref "$match"
 	log_test $? 0 "rule6 del by pref: $description"
 }
@@ -213,18 +180,27 @@ fib_rule6_test_reject()
 
 fib_rule6_test()
 {
+	local ext_name=$1; shift
+	local getnomatch
 	local getmatch
 	local match
 	local cnt
 
+	echo
+	echo "IPv6 FIB rule tests $ext_name"
+
 	# setup the fib rule redirect route
 	$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
 
 	match="oif $DEV"
-	fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+	getnomatch="oif lo"
+	fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \
+		"oif redirect to table" "oif no redirect to table"
 
 	match="from $SRC_IP6 iif $DEV"
-	fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+	getnomatch="from $SRC_IP6 iif lo"
+	fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \
+		"iif redirect to table" "iif no redirect to table"
 
 	# Reject dsfield (tos) options which have ECN bits set
 	for cnt in $(seq 1 3); do
@@ -238,44 +214,89 @@ fib_rule6_test()
 		# Using option 'tos' instead of 'dsfield' as old iproute2
 		# versions don't support 'dsfield' in ip rule show.
 		getmatch="tos $cnt"
+		getnomatch="tos 0x20"
 		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
-						"$getmatch redirect to table"
+			"$getnomatch" "$getmatch redirect to table" \
+			"$getnomatch no redirect to table"
+	done
+
+	# Re-test TOS matching, but with input routes since they are handled
+	# differently from output routes.
+	match="tos 0x10"
+	for cnt in "0x10" "0x11" "0x12" "0x13"; do
+		getmatch="tos $cnt"
+		getnomatch="tos 0x20"
+		fib_rule6_test_match_n_redirect "$match" \
+			"from $SRC_IP6 iif $DEV $getmatch" \
+			"from $SRC_IP6 iif $DEV $getnomatch" \
+			"iif $getmatch redirect to table" \
+			"iif $getnomatch no redirect to table"
 	done
 
 	match="fwmark 0x64"
 	getmatch="mark 0x64"
-	fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+	getnomatch="mark 0x63"
+	fib_rule6_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \
+		"fwmark redirect to table" "fwmark no redirect to table"
 
 	fib_check_iproute_support "uidrange" "uid"
 	if [ $? -eq 0 ]; then
 		match="uidrange 100-100"
 		getmatch="uid 100"
-		fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+		getnomatch="uid 101"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "uid redirect to table" \
+			"uid no redirect to table"
 	fi
 
 	fib_check_iproute_support "sport" "sport"
 	if [ $? -eq 0 ]; then
 		match="sport 666 dport 777"
-		fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+		getnomatch="sport 667 dport 778"
+		fib_rule6_test_match_n_redirect "$match" "$match" \
+			"$getnomatch" "sport and dport redirect to table" \
+			"sport and dport no redirect to table"
 	fi
 
 	fib_check_iproute_support "ipproto" "ipproto"
 	if [ $? -eq 0 ]; then
 		match="ipproto tcp"
-		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+		getnomatch="ipproto udp"
+		fib_rule6_test_match_n_redirect "$match" "$match" \
+			"$getnomatch" "ipproto tcp match" "ipproto udp no match"
 	fi
 
 	fib_check_iproute_support "ipproto" "ipproto"
 	if [ $? -eq 0 ]; then
 		match="ipproto ipv6-icmp"
-		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match"
+		getnomatch="ipproto tcp"
+		fib_rule6_test_match_n_redirect "$match" "$match" \
+			"$getnomatch" "ipproto ipv6-icmp match" \
+			"ipproto ipv6-tcp no match"
+	fi
+
+	fib_check_iproute_support "dscp" "tos"
+	if [ $? -eq 0 ]; then
+		match="dscp 0x3f"
+		getmatch="tos 0xfc"
+		getnomatch="tos 0xf4"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "dscp redirect to table" \
+			"dscp no redirect to table"
+
+		match="dscp 0x3f"
+		getmatch="from $SRC_IP6 iif $DEV tos 0xfc"
+		getnomatch="from $SRC_IP6 iif $DEV tos 0xf4"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "iif dscp redirect to table" \
+			"iif dscp no redirect to table"
 	fi
 }
 
 fib_rule6_vrf_test()
 {
 	setup_vrf
-	fib_rule6_test
+	fib_rule6_test "- with VRF"
 	cleanup_vrf
 }
 
@@ -285,10 +306,8 @@ fib_rule6_connect_test()
 {
 	local dsfield
 
-	if ! check_nettest; then
-		echo "SKIP: Could not run test without nettest tool"
-		return
-	fi
+	echo
+	echo "IPv6 FIB rule connect tests"
 
 	setup_peer
 	$IP -6 rule add dsfield 0x04 table $RTABLE_PEER
@@ -306,7 +325,45 @@ fib_rule6_connect_test()
 		log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})"
 	done
 
+	# Check that UDP and TCP connections fail when using a DS Field that
+	# does not match the previously configured FIB rule.
+	nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \
+		-Q 0x20 -l 2001:db8::1:11 -r 2001:db8::1:11
+	log_test $? 1 "rule6 dsfield udp no connect (dsfield 0x20)"
+
+	nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0x20 \
+		-l 2001:db8::1:11 -r 2001:db8::1:11
+	log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)"
+
 	$IP -6 rule del dsfield 0x04 table $RTABLE_PEER
+
+	ip rule help 2>&1 | grep -q dscp
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 iprule too old, missing dscp match"
+		cleanup_peer
+		return
+	fi
+
+	$IP -6 rule add dscp 0x3f table $RTABLE_PEER
+
+	nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \
+		-l 2001:db8::1:11 -r 2001:db8::1:11
+	log_test $? 0 "rule6 dscp udp connect"
+
+	nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \
+		-l 2001:db8::1:11 -r 2001:db8::1:11
+	log_test $? 0 "rule6 dscp tcp connect"
+
+	nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \
+		-l 2001:db8::1:11 -r 2001:db8::1:11
+	log_test $? 1 "rule6 dscp udp no connect"
+
+	nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \
+		-l 2001:db8::1:11 -r 2001:db8::1:11
+	log_test $? 1 "rule6 dscp tcp no connect"
+
+	$IP -6 rule del dscp 0x3f table $RTABLE_PEER
+
 	cleanup_peer
 }
 
@@ -326,12 +383,17 @@ fib_rule4_test_match_n_redirect()
 {
 	local match="$1"
 	local getmatch="$2"
-	local description="$3"
+	local getnomatch="$3"
+	local description="$4"
+	local nomatch_description="$5"
 
 	$IP rule add $match table $RTABLE
 	$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
 	log_test $? 0 "rule4 check: $description"
 
+	$IP route get $GW_IP4 $getnomatch 2>&1 | grep -q "table $RTABLE"
+	log_test $? 1 "rule4 check: $nomatch_description"
+
 	fib_rule4_del_by_pref "$match"
 	log_test $? 0 "rule4 del by pref: $description"
 }
@@ -352,23 +414,31 @@ fib_rule4_test_reject()
 
 fib_rule4_test()
 {
+	local ext_name=$1; shift
+	local getnomatch
 	local getmatch
 	local match
 	local cnt
 
+	echo
+	echo "IPv4 FIB rule tests $ext_name"
+
 	# setup the fib rule redirect route
 	$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
 
 	match="oif $DEV"
-	fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+	getnomatch="oif lo"
+	fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
+		"oif redirect to table" "oif no redirect to table"
 
-	# need enable forwarding and disable rp_filter temporarily as all the
-	# addresses are in the same subnet and egress device == ingress device.
+	# Enable forwarding and disable rp_filter as all the addresses are in
+	# the same subnet and egress device == ingress device.
 	ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
 	ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
 	match="from $SRC_IP iif $DEV"
-	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
-	ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0
+	getnomatch="from $SRC_IP iif lo"
+	fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
+		"iif redirect to table" "iif no redirect to table"
 
 	# Reject dsfield (tos) options which have ECN bits set
 	for cnt in $(seq 1 3); do
@@ -382,44 +452,90 @@ fib_rule4_test()
 		# Using option 'tos' instead of 'dsfield' as old iproute2
 		# versions don't support 'dsfield' in ip rule show.
 		getmatch="tos $cnt"
+		getnomatch="tos 0x20"
 		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
-						"$getmatch redirect to table"
+			"$getnomatch" "$getmatch redirect to table" \
+			"$getnomatch no redirect to table"
+	done
+
+	# Re-test TOS matching, but with input routes since they are handled
+	# differently from output routes.
+	match="tos 0x10"
+	for cnt in "0x10" "0x11" "0x12" "0x13"; do
+		getmatch="tos $cnt"
+		getnomatch="tos 0x20"
+		fib_rule4_test_match_n_redirect "$match" \
+			"from $SRC_IP iif $DEV $getmatch" \
+			"from $SRC_IP iif $DEV $getnomatch" \
+			"iif $getmatch redirect to table" \
+			"iif $getnomatch no redirect to table"
 	done
 
 	match="fwmark 0x64"
 	getmatch="mark 0x64"
-	fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+	getnomatch="mark 0x63"
+	fib_rule4_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \
+		"fwmark redirect to table" "fwmark no redirect to table"
 
 	fib_check_iproute_support "uidrange" "uid"
 	if [ $? -eq 0 ]; then
 		match="uidrange 100-100"
 		getmatch="uid 100"
-		fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+		getnomatch="uid 101"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "uid redirect to table" \
+			"uid no redirect to table"
 	fi
 
 	fib_check_iproute_support "sport" "sport"
 	if [ $? -eq 0 ]; then
 		match="sport 666 dport 777"
-		fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+		getnomatch="sport 667 dport 778"
+		fib_rule4_test_match_n_redirect "$match" "$match" \
+			"$getnomatch" "sport and dport redirect to table" \
+			"sport and dport no redirect to table"
 	fi
 
 	fib_check_iproute_support "ipproto" "ipproto"
 	if [ $? -eq 0 ]; then
 		match="ipproto tcp"
-		fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+		getnomatch="ipproto udp"
+		fib_rule4_test_match_n_redirect "$match" "$match" \
+			"$getnomatch" "ipproto tcp match" \
+			"ipproto udp no match"
 	fi
 
 	fib_check_iproute_support "ipproto" "ipproto"
 	if [ $? -eq 0 ]; then
 		match="ipproto icmp"
-		fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+		getnomatch="ipproto tcp"
+		fib_rule4_test_match_n_redirect "$match" "$match" \
+			"$getnomatch" "ipproto icmp match" \
+			"ipproto tcp no match"
+	fi
+
+	fib_check_iproute_support "dscp" "tos"
+	if [ $? -eq 0 ]; then
+		match="dscp 0x3f"
+		getmatch="tos 0xfc"
+		getnomatch="tos 0xf4"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "dscp redirect to table" \
+			"dscp no redirect to table"
+
+		match="dscp 0x3f"
+		getmatch="from $SRC_IP iif $DEV tos 0xfc"
+		getnomatch="from $SRC_IP iif $DEV tos 0xf4"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "iif dscp redirect to table" \
+			"iif dscp no redirect to table"
 	fi
 }
 
 fib_rule4_vrf_test()
 {
 	setup_vrf
-	fib_rule4_test
+	fib_rule4_test "- with VRF"
 	cleanup_vrf
 }
 
@@ -429,10 +545,8 @@ fib_rule4_connect_test()
 {
 	local dsfield
 
-	if ! check_nettest; then
-		echo "SKIP: Could not run test without nettest tool"
-		return
-	fi
+	echo
+	echo "IPv4 FIB rule connect tests"
 
 	setup_peer
 	$IP -4 rule add dsfield 0x04 table $RTABLE_PEER
@@ -450,16 +564,46 @@ fib_rule4_connect_test()
 		log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})"
 	done
 
+	# Check that UDP and TCP connections fail when using a DS Field that
+	# does not match the previously configured FIB rule.
+	nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0x20 \
+		-l 198.51.100.11 -r 198.51.100.11
+	log_test $? 1 "rule4 dsfield udp no connect (dsfield 0x20)"
+
+	nettest -q -B -t 5 -N $testns -O $peerns -Q 0x20 \
+		-l 198.51.100.11 -r 198.51.100.11
+	log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)"
+
 	$IP -4 rule del dsfield 0x04 table $RTABLE_PEER
-	cleanup_peer
-}
 
-run_fibrule_tests()
-{
-	log_section "IPv4 fib rule"
-	fib_rule4_test
-	log_section "IPv6 fib rule"
-	fib_rule6_test
+	ip rule help 2>&1 | grep -q dscp
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 iprule too old, missing dscp match"
+		cleanup_peer
+		return
+	fi
+
+	$IP -4 rule add dscp 0x3f table $RTABLE_PEER
+
+	nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \
+		-l 198.51.100.11 -r 198.51.100.11
+	log_test $? 0 "rule4 dscp udp connect"
+
+	nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \
+		-l 198.51.100.11 -r 198.51.100.11
+	log_test $? 0 "rule4 dscp tcp connect"
+
+	nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \
+		-l 198.51.100.11 -r 198.51.100.11
+	log_test $? 1 "rule4 dscp udp no connect"
+
+	nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \
+		-l 198.51.100.11 -r 198.51.100.11
+	log_test $? 1 "rule4 dscp tcp no connect"
+
+	$IP -4 rule del dscp 0x3f table $RTABLE_PEER
+
+	cleanup_peer
 }
 ################################################################################
 # usage
@@ -495,6 +639,8 @@ if [ ! -x "$(command -v ip)" ]; then
 	exit $ksft_skip
 fi
 
+check_gen_prog "nettest"
+
 # start clean
 cleanup &> /dev/null
 setup
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index 7fdb6a9ca543..a652429bfd53 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -6,7 +6,7 @@ to easily create and test complex environments.
 
 Unfortunately, these namespaces can not be used with actual switching
 ASICs, as their ports can not be migrated to other network namespaces
-(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+(dev->netns_local) and most of them probably do not support the
 L1-separation provided by namespaces.
 
 However, a similar kind of flexibility can be achieved by using VRFs and
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
index 1783c10215e5..7d531f7091e6 100755
--- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -224,10 +224,10 @@ send_dst_ipv6()
 send_flowlabel()
 {
 	# Generate 16384 echo requests, each with a random flow label.
-	for _ in $(seq 1 16384); do
-		ip vrf exec v$h1 \
-			$PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
-	done
+	ip vrf exec v$h1 sh -c \
+		"for _ in {1..16384}; do \
+			$PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+		done"
 }
 
 send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
index 9788bd0f6e8b..dda11a4a9450 100755
--- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -319,10 +319,10 @@ send_dst_ipv6()
 send_flowlabel()
 {
 	# Generate 16384 echo requests, each with a random flow label.
-	for _ in $(seq 1 16384); do
-		ip vrf exec v$h1 \
-			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
-	done
+	ip vrf exec v$h1 sh -c \
+		"for _ in {1..16384}; do \
+			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+		done"
 }
 
 send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
index 2ab9eaaa5532..e28b4a079e52 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -321,10 +321,10 @@ send_dst_ipv6()
 send_flowlabel()
 {
 	# Generate 16384 echo requests, each with a random flow label.
-	for _ in $(seq 1 16384); do
-		ip vrf exec v$h1 \
-			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
-	done
+	ip vrf exec v$h1 sh -c \
+		"for _ in {1..16384}; do \
+			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+		done"
 }
 
 send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 718d04a4f72d..c992e385159c 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -514,6 +514,13 @@ xfail_on_slow()
 	fi
 }
 
+omit_on_slow()
+{
+	if [[ $KSFT_MACHINE_SLOW != yes ]]; then
+		"$@"
+	fi
+}
+
 xfail_on_veth()
 {
 	local dev=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index 648868f74604..c35548767756 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -571,6 +571,10 @@ vlan_over_vlan_aware_bridge()
 cleanup()
 {
 	pre_cleanup
+
+	ip link set $h2 down
+	ip link set $h1 down
+
 	vrf_cleanup
 }
 
diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh
index af3b398d13f0..9e677aa64a06 100755
--- a/tools/testing/selftests/net/forwarding/no_forwarding.sh
+++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh
@@ -233,6 +233,9 @@ cleanup()
 {
 	pre_cleanup
 
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
 	h2_destroy
 	h1_destroy
 
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 2ba44247c60a..a7d8399c8d4f 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -40,6 +40,7 @@ ALL_TESTS="
 	ping_ipv4
 	ping_ipv6
 	multipath_test
+	multipath16_test
 	ping_ipv4_blackhole
 	ping_ipv6_blackhole
 	nh_stats_test_v4
@@ -226,9 +227,11 @@ routing_nh_obj()
 
 multipath4_test()
 {
-	local desc="$1"
-	local weight_rp12=$2
-	local weight_rp13=$3
+	local desc=$1; shift
+	local weight_rp12=$1; shift
+	local weight_rp13=$1; shift
+	local ports=${1-sp=1024,dp=0-32768}; shift
+
 	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
 	local packets_rp12 packets_rp13
 
@@ -242,7 +245,8 @@ multipath4_test()
 	t0_rp13=$(link_stats_tx_packets_get $rp13)
 
 	ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
-		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+		-d $MZ_DELAY -t udp "$ports"
+	sleep 1
 
 	t1_rp12=$(link_stats_tx_packets_get $rp12)
 	t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -258,9 +262,11 @@ multipath4_test()
 
 multipath6_test()
 {
-	local desc="$1"
-	local weight_rp12=$2
-	local weight_rp13=$3
+	local desc=$1; shift
+	local weight_rp12=$1; shift
+	local weight_rp13=$1; shift
+	local ports=${1-sp=1024,dp=0-32768}; shift
+
 	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
 	local packets_rp12 packets_rp13
 
@@ -275,7 +281,8 @@ multipath6_test()
 	t0_rp13=$(link_stats_tx_packets_get $rp13)
 
 	$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
-		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+		-d $MZ_DELAY -t udp "$ports"
+	sleep 1
 
 	t1_rp12=$(link_stats_tx_packets_get $rp12)
 	t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -313,6 +320,23 @@ multipath_test()
 	multipath6_test "Weighted MP 11:45" 11 45
 }
 
+multipath16_test()
+{
+	check_nhgw16 104 || return
+
+	log_info "Running 16-bit IPv4 multipath tests"
+	multipath4_test "65535:65535" 65535 65535
+	multipath4_test "128:512" 128 512
+	omit_on_slow \
+		multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+	log_info "Running 16-bit IPv6 multipath tests"
+	multipath6_test "65535:65535" 65535 65535
+	multipath6_test "128:512" 128 512
+	omit_on_slow \
+		multipath6_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+}
+
 ping_ipv4_blackhole()
 {
 	RET=0
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
index 2903294d8bca..507b2852dabe 100644
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -117,3 +117,16 @@ __nh_stats_test_v6()
 			       $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
 	sysctl_restore net.ipv6.fib_multipath_hash_policy
 }
+
+check_nhgw16()
+{
+	local nhid=$1; shift
+
+	ip nexthop replace id 9999 group "$nhid,65535" &>/dev/null
+	if (( $? )); then
+		log_test_skip "16-bit multipath tests" \
+			      "iproute2 or the kernel do not support 16-bit next hop weights"
+		return 1
+	fi
+	ip nexthop del id 9999 ||:
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index cd9e346436fc..88ddae05b39d 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -40,6 +40,7 @@ ALL_TESTS="
 	ping_ipv4
 	ping_ipv6
 	multipath_test
+	multipath16_test
 	nh_stats_test_v4
 	nh_stats_test_v6
 "
@@ -228,9 +229,11 @@ routing_nh_obj()
 
 multipath4_test()
 {
-	local desc="$1"
-	local weight_rp12=$2
-	local weight_rp13=$3
+	local desc=$1; shift
+	local weight_rp12=$1; shift
+	local weight_rp13=$1; shift
+	local ports=${1-sp=1024,dp=0-32768}; shift
+
 	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
 	local packets_rp12 packets_rp13
 
@@ -243,7 +246,8 @@ multipath4_test()
 	t0_rp13=$(link_stats_tx_packets_get $rp13)
 
 	ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
-		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+		-d $MZ_DELAY -t udp "$ports"
+	sleep 1
 
 	t1_rp12=$(link_stats_tx_packets_get $rp12)
 	t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -258,9 +262,11 @@ multipath4_test()
 
 multipath6_l4_test()
 {
-	local desc="$1"
-	local weight_rp12=$2
-	local weight_rp13=$3
+	local desc=$1; shift
+	local weight_rp12=$1; shift
+	local weight_rp13=$1; shift
+	local ports=${1-sp=1024,dp=0-32768}; shift
+
 	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
 	local packets_rp12 packets_rp13
 
@@ -273,7 +279,8 @@ multipath6_l4_test()
 	t0_rp13=$(link_stats_tx_packets_get $rp13)
 
 	$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
-		-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+		-d $MZ_DELAY -t udp "$ports"
+	sleep 1
 
 	t1_rp12=$(link_stats_tx_packets_get $rp12)
 	t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -371,6 +378,41 @@ multipath_test()
 	ip nexthop replace id 106 group 104,1/105,1 type resilient
 }
 
+multipath16_test()
+{
+	check_nhgw16 104 || return
+
+	log_info "Running 16-bit IPv4 multipath tests"
+	ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+
+	ip nexthop replace id 103 group 101,65535/102,65535 type resilient
+	multipath4_test "65535:65535" 65535 65535
+
+	ip nexthop replace id 103 group 101,128/102,512 type resilient
+	multipath4_test "128:512" 128 512
+
+	ip nexthop replace id 103 group 101,255/102,65535 type resilient
+	omit_on_slow \
+		multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+	ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+	log_info "Running 16-bit IPv6 L4 hash multipath tests"
+	ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+	ip nexthop replace id 106 group 104,65535/105,65535 type resilient
+	multipath6_l4_test "65535:65535" 65535 65535
+
+	ip nexthop replace id 106 group 104,128/105,512 type resilient
+	multipath6_l4_test "128:512" 128 512
+
+	ip nexthop replace id 106 group 104,255/105,65535 type resilient
+	omit_on_slow \
+		multipath6_l4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+	ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
 nh_stats_test_v4()
 {
 	__nh_stats_test_v4 resilient
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index e2be354167a1..46f365b557b7 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -180,6 +180,7 @@ multipath4_test()
 
        ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
 	       -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+       sleep 1
 
        t1_rp12=$(link_stats_tx_packets_get $rp12)
        t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -217,6 +218,7 @@ multipath6_test()
 
        $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
 	       -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+       sleep 1
 
        t1_rp12=$(link_stats_tx_packets_get $rp12)
        t1_rp13=$(link_stats_tx_packets_get $rp13)
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 589629636502..ea89e558672d 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -4,7 +4,8 @@
 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
 	mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
 	gact_trap_test mirred_egress_to_ingress_test \
-	mirred_egress_to_ingress_tcp_test"
+	mirred_egress_to_ingress_tcp_test \
+	ingress_2nd_vlan_push egress_2nd_vlan_push"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -244,6 +245,49 @@ mirred_egress_to_ingress_tcp_test()
 	log_test "mirred_egress_to_ingress_tcp ($tcflags)"
 }
 
+ingress_2nd_vlan_push()
+{
+	tc filter add dev $swp1 ingress pref 20 chain 0 handle 20 flower \
+		$tcflags num_of_vlans 1 \
+		action vlan push id 100 protocol 0x8100 action goto chain 5
+	tc filter add dev $swp1 ingress pref 30 chain 5 handle 30 flower \
+		$tcflags num_of_vlans 2 \
+		cvlan_ethtype 0x800 action pass
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -Q 10 -q
+
+	tc_check_packets "dev $swp1 ingress" 30 1
+	check_err $? "No double-vlan packets received"
+
+	tc filter del dev $swp1 ingress pref 20 chain 0 handle 20 flower
+	tc filter del dev $swp1 ingress pref 30 chain 5 handle 30 flower
+
+	log_test "ingress_2nd_vlan_push ($tcflags)"
+}
+
+egress_2nd_vlan_push()
+{
+	tc filter add dev $h1 egress pref 20 chain 0 handle 20 flower \
+		$tcflags num_of_vlans 0 \
+		action vlan push id 10 protocol 0x8100 \
+		pipe action vlan push id 100 protocol 0x8100 action goto chain 5
+	tc filter add dev $h1 egress pref 30 chain 5 handle 30 flower \
+		$tcflags num_of_vlans 2 \
+		cvlan_ethtype 0x800 action pass
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h1 egress" 30 1
+	check_err $? "No double-vlan packets received"
+
+	tc filter del dev $h1 egress pref 20 chain 0 handle 20 flower
+	tc filter del dev $h1 egress pref 30 chain 5 handle 30 flower
+
+	log_test "egress_2nd_vlan_push ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 8ee4489238ca..be8707bfb46e 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -125,6 +125,21 @@ slowwait_for_counter()
 	slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
 }
 
+# Check for existence of tools which are built as part of selftests
+# but may also already exist in $PATH
+check_gen_prog()
+{
+	local prog_name=$1; shift
+
+	if ! which $prog_name >/dev/null 2>/dev/null; then
+		PATH=$PWD:$PATH
+		if ! which $prog_name >/dev/null; then
+			echo "'$prog_name' command not found; skipping tests"
+			exit $ksft_skip
+		fi
+	fi
+}
+
 remove_ns_list()
 {
 	local item=$1
diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
index b9f3fc3c3426..e0a34e5e8dd5 100644
--- a/tools/testing/selftests/net/lib/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len)
 {
 	struct iphdr *iph = nh;
 	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+	uint16_t ip_len;
 
 	if (len < sizeof(*iph) || iph->protocol != proto)
 		return -1;
 
+	ip_len = ntohs(iph->tot_len);
+	if (ip_len > len || ip_len < sizeof(*iph))
+		return -1;
+
+	len = ip_len;
 	iph_addr_p = &iph->saddr;
 	if (proto == IPPROTO_TCP)
 		return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
@@ -669,16 +675,22 @@ static int recv_verify_packet_ipv6(void *nh, int len)
 {
 	struct ipv6hdr *ip6h = nh;
 	uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+	uint16_t ip_len;
 
 	if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
 		return -1;
 
+	ip_len = ntohs(ip6h->payload_len);
+	if (ip_len > len - sizeof(*ip6h))
+		return -1;
+
+	len = ip_len;
 	iph_addr_p = &ip6h->saddr;
 
 	if (proto == IPPROTO_TCP)
-		return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
+		return recv_verify_packet_tcp(ip6h + 1, len);
 	else
-		return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
+		return recv_verify_packet_udp(ip6h + 1, len);
 }
 
 /* return whether auxdata includes TP_STATUS_CSUM_VALID */
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index f26c20df9db4..477ae76de93d 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 import builtins
+import functools
 import inspect
 import sys
 import time
@@ -10,6 +11,7 @@ from .utils import global_defer_queue
 
 KSFT_RESULT = None
 KSFT_RESULT_ALL = True
+KSFT_DISRUPTIVE = True
 
 
 class KsftFailEx(Exception):
@@ -32,8 +34,18 @@ def _fail(*args):
     global KSFT_RESULT
     KSFT_RESULT = False
 
-    frame = inspect.stack()[2]
-    ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":")
+    stack = inspect.stack()
+    started = False
+    for frame in reversed(stack[2:]):
+        # Start printing from the test case function
+        if not started:
+            if frame.function == 'ksft_run':
+                started = True
+            continue
+
+        ksft_pr("Check| At " + frame.filename + ", line " + str(frame.lineno) +
+                ", in " + frame.function + ":")
+        ksft_pr("Check|     " + frame.code_context[0].strip())
     ksft_pr(*args)
 
 
@@ -43,6 +55,12 @@ def ksft_eq(a, b, comment=""):
         _fail("Check failed", a, "!=", b, comment)
 
 
+def ksft_ne(a, b, comment=""):
+    global KSFT_RESULT
+    if a == b:
+        _fail("Check failed", a, "==", b, comment)
+
+
 def ksft_true(a, comment=""):
     if not a:
         _fail("Check failed", a, "does not eval to True", comment)
@@ -127,6 +145,44 @@ def ksft_flush_defer():
             KSFT_RESULT = False
 
 
+def ksft_disruptive(func):
+    """
+    Decorator that marks the test as disruptive (e.g. the test
+    that can down the interface). Disruptive tests can be skipped
+    by passing DISRUPTIVE=False environment variable.
+    """
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if not KSFT_DISRUPTIVE:
+            raise KsftSkipEx(f"marked as disruptive")
+        return func(*args, **kwargs)
+    return wrapper
+
+
+def ksft_setup(env):
+    """
+    Setup test framework global state from the environment.
+    """
+
+    def get_bool(env, name):
+        value = env.get(name, "").lower()
+        if value in ["yes", "true"]:
+            return True
+        if value in ["no", "false"]:
+            return False
+        try:
+            return bool(int(value))
+        except:
+            raise Exception(f"failed to parse {name}")
+
+    if "DISRUPTIVE" in env:
+        global KSFT_DISRUPTIVE
+        KSFT_DISRUPTIVE = get_bool(env, "DISRUPTIVE")
+
+    return env
+
+
 def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
     cases = cases or []
 
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 7b936a926859..5d796622e730 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -11,6 +11,8 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
 
 TEST_FILES := mptcp_lib.sh settings
 
+TEST_INCLUDES := ../lib.sh ../net_helper.sh
+
 EXTRA_CLEAN := *.pcap
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 776d43a6922d..2bd0c1eb70c5 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -284,7 +284,7 @@ echo "b" | \
 			./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \
 				127.0.0.1 >/dev/null &
 wait_connected $ns 10000
-chk_msk_nr 2 "after MPC handshake "
+chk_msk_nr 2 "after MPC handshake"
 chk_last_time_info 10000
 chk_msk_remote_key_nr 2 "....chk remote_key"
 chk_msk_fallback_nr 0 "....chk no fallback"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index b77fb7065bfb..57325d57e4c6 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -345,9 +345,11 @@ do_transfer()
 
 	local addr_port
 	addr_port=$(printf "%s:%d" ${connect_addr} ${port})
-	local result_msg
-	result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
-	mptcp_lib_print_title "${result_msg}"
+	local pretty_title
+	pretty_title="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
+	mptcp_lib_print_title "${pretty_title}"
+
+	local tap_title="${connector_ns:0:3} ${cl_proto} -> ${listener_ns:0:3} (${addr_port}) ${srv_proto}"
 
 	if $capture; then
 		local capuser
@@ -431,7 +433,6 @@ do_transfer()
 
 	local duration
 	duration=$((stop-start))
-	result_msg+=" # time=${duration}ms"
 	printf "(duration %05sms) " "${duration}"
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
 		mptcp_lib_pr_fail "client exit code $retc, server $rets"
@@ -444,7 +445,7 @@ do_transfer()
 
 		echo
 		cat "$capout"
-		mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+		mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}"
 		return 1
 	fi
 
@@ -544,12 +545,12 @@ do_transfer()
 
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
 		mptcp_lib_pr_ok "${extra:1}"
-		mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+		mptcp_lib_result_pass "${TEST_GROUP}: ${tap_title}"
 	else
 		if [ -n "${extra}" ]; then
 			mptcp_lib_print_warn "${extra:1}"
 		fi
-		mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+		mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}"
 	fi
 
 	cat "$capout"
@@ -848,6 +849,8 @@ stop_if_error()
 make_file "$cin" "client"
 make_file "$sin" "server"
 
+mptcp_lib_subtests_last_ts_reset
+
 check_mptcp_disabled
 
 stop_if_error "The kernel configuration is not valid for MPTCP"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 89e553e0e0c2..e8d0a01b4144 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -61,6 +61,16 @@ unset sflags
 unset fastclose
 unset fullmesh
 unset speed
+unset join_csum_ns1
+unset join_csum_ns2
+unset join_fail_nr
+unset join_rst_nr
+unset join_infi_nr
+unset join_corrupted_pkts
+unset join_syn_tx
+unset join_create_err
+unset join_bind_err
+unset join_connect_err
 
 # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
 #				  (ip6 && (ip6[74] & 0xf0) == 0x30)'"
@@ -196,6 +206,22 @@ print_skip()
 	mptcp_lib_pr_skip "${@}"
 }
 
+# $1: check name; $2: rc
+print_results()
+{
+	local check="${1}"
+	local rc=${2}
+
+	print_check "${check}"
+	if [ ${rc} = ${KSFT_PASS} ]; then
+		print_ok
+	elif [ ${rc} = ${KSFT_SKIP} ]; then
+		print_skip
+	else
+		fail_test "see above"
+	fi
+}
+
 # [ $1: fail msg ]
 mark_as_skipped()
 {
@@ -337,7 +363,7 @@ reset_with_checksum()
 	local ns1_enable=$1
 	local ns2_enable=$2
 
-	reset "checksum test ${1} ${2}" || return 1
+	reset "checksum test ${ns1_enable} ${ns2_enable}" || return 1
 
 	ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
 	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
@@ -420,12 +446,17 @@ reset_with_fail()
 	fi
 }
 
+start_events()
+{
+	mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
+	mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+}
+
 reset_with_events()
 {
 	reset "${1}" || return 1
 
-	mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
-	mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+	start_events
 }
 
 reset_with_tcp_filter()
@@ -834,7 +865,7 @@ chk_cestab_nr()
 	local cestab=$2
 	local count
 
-	print_check "cestab $cestab"
+	print_check "currently established: $cestab"
 	count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1110,28 +1141,29 @@ chk_csum_nr()
 		csum_ns2=${csum_ns2:1}
 	fi
 
-	print_check "sum"
+	print_check "checksum server"
 	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
-	if [ "$count" != "$csum_ns1" ]; then
+	if [ -n "$count" ] && [ "$count" != "$csum_ns1" ]; then
 		extra_msg+=" ns1=$count"
 	fi
 	if [ -z "$count" ]; then
 		print_skip
 	elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
-	   { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
+	     { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
 		fail_test "got $count data checksum error[s] expected $csum_ns1"
 	else
 		print_ok
 	fi
-	print_check "csum"
+
+	print_check "checksum client"
 	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
-	if [ "$count" != "$csum_ns2" ]; then
+	if [ -n "$count" ] && [ "$count" != "$csum_ns2" ]; then
 		extra_msg+=" ns2=$count"
 	fi
 	if [ -z "$count" ]; then
 		print_skip
 	elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
-	   { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
+	     { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
 		fail_test "got $count data checksum error[s] expected $csum_ns2"
 	else
 		print_ok
@@ -1148,6 +1180,8 @@ chk_fail_nr()
 	local count
 	local ns_tx=$ns1
 	local ns_rx=$ns2
+	local tx="server"
+	local rx="client"
 	local extra_msg=""
 	local allow_tx_lost=0
 	local allow_rx_lost=0
@@ -1155,7 +1189,8 @@ chk_fail_nr()
 	if [[ $ns_invert = "invert" ]]; then
 		ns_tx=$ns2
 		ns_rx=$ns1
-		extra_msg="invert"
+		tx="client"
+		rx="server"
 	fi
 
 	if [[ "${fail_tx}" = "-"* ]]; then
@@ -1167,29 +1202,29 @@ chk_fail_nr()
 		fail_rx=${fail_rx:1}
 	fi
 
-	print_check "ftx"
+	print_check "fail tx ${tx}"
 	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
-	if [ "$count" != "$fail_tx" ]; then
-		extra_msg+=",tx=$count"
+	if [ -n "$count" ] && [ "$count" != "$fail_tx" ]; then
+		extra_msg+=" tx=$count"
 	fi
 	if [ -z "$count" ]; then
 		print_skip
 	elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
-	   { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
+	     { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
 		fail_test "got $count MP_FAIL[s] TX expected $fail_tx"
 	else
 		print_ok
 	fi
 
-	print_check "failrx"
+	print_check "fail rx ${rx}"
 	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
-	if [ "$count" != "$fail_rx" ]; then
-		extra_msg+=",rx=$count"
+	if [ -n "$count" ] && [ "$count" != "$fail_rx" ]; then
+		extra_msg+=" rx=$count"
 	fi
 	if [ -z "$count" ]; then
 		print_skip
 	elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
-	   { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
+	     { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
 		fail_test "got $count MP_FAIL[s] RX expected $fail_rx"
 	else
 		print_ok
@@ -1206,37 +1241,35 @@ chk_fclose_nr()
 	local count
 	local ns_tx=$ns2
 	local ns_rx=$ns1
-	local extra_msg=""
+	local tx="client"
+	local rx="server"
 
 	if [[ $ns_invert = "invert" ]]; then
 		ns_tx=$ns1
 		ns_rx=$ns2
-		extra_msg="invert"
+		tx="server"
+		rx="client"
 	fi
 
-	print_check "ctx"
+	print_check "fast close tx ${tx}"
 	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$fclose_tx" ]; then
-		extra_msg+=",tx=$count"
 		fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
 	else
 		print_ok
 	fi
 
-	print_check "fclzrx"
+	print_check "fast close rx ${rx}"
 	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$fclose_rx" ]; then
-		extra_msg+=",rx=$count"
 		fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
 	else
 		print_ok
 	fi
-
-	print_info "$extra_msg"
 }
 
 chk_rst_nr()
@@ -1247,15 +1280,17 @@ chk_rst_nr()
 	local count
 	local ns_tx=$ns1
 	local ns_rx=$ns2
-	local extra_msg=""
+	local tx="server"
+	local rx="client"
 
 	if [[ $ns_invert = "invert" ]]; then
 		ns_tx=$ns2
 		ns_rx=$ns1
-		extra_msg="invert"
+		tx="client"
+		rx="server"
 	fi
 
-	print_check "rtx"
+	print_check "reset tx ${tx}"
 	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1267,7 +1302,7 @@ chk_rst_nr()
 		print_ok
 	fi
 
-	print_check "rstrx"
+	print_check "reset rx ${rx}"
 	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1278,8 +1313,6 @@ chk_rst_nr()
 	else
 		print_ok
 	fi
-
-	print_info "$extra_msg"
 }
 
 chk_infi_nr()
@@ -1288,7 +1321,7 @@ chk_infi_nr()
 	local infi_rx=$2
 	local count
 
-	print_check "itx"
+	print_check "infi tx client"
 	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1298,7 +1331,7 @@ chk_infi_nr()
 		print_ok
 	fi
 
-	print_check "infirx"
+	print_check "infi rx server"
 	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1309,17 +1342,66 @@ chk_infi_nr()
 	fi
 }
 
+chk_join_tx_nr()
+{
+	local syn_tx=${join_syn_tx:-0}
+	local create=${join_create_err:-0}
+	local bind=${join_bind_err:-0}
+	local connect=${join_connect_err:-0}
+	local rc=${KSFT_PASS}
+	local count
+
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTx")
+	if [ -z "$count" ]; then
+		rc=${KSFT_SKIP}
+	elif [ "$count" != "$syn_tx" ]; then
+		rc=${KSFT_FAIL}
+		print_check "syn tx"
+		fail_test "got $count JOIN[s] syn tx expected $syn_tx"
+	fi
+
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr")
+	if [ -z "$count" ]; then
+		rc=${KSFT_SKIP}
+	elif [ "$count" != "$create" ]; then
+		rc=${KSFT_FAIL}
+		print_check "syn tx create socket error"
+		fail_test "got $count JOIN[s] syn tx create socket error expected $create"
+	fi
+
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr")
+	if [ -z "$count" ]; then
+		rc=${KSFT_SKIP}
+	elif [ "$count" != "$bind" ]; then
+		rc=${KSFT_FAIL}
+		print_check "syn tx bind error"
+		fail_test "got $count JOIN[s] syn tx bind error expected $bind"
+	fi
+
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr")
+	if [ -z "$count" ]; then
+		rc=${KSFT_SKIP}
+	elif [ "$count" != "$connect" ]; then
+		rc=${KSFT_FAIL}
+		print_check "syn tx connect error"
+		fail_test "got $count JOIN[s] syn tx connect error expected $connect"
+	fi
+
+	print_results "join Tx" ${rc}
+}
+
 chk_join_nr()
 {
 	local syn_nr=$1
 	local syn_ack_nr=$2
 	local ack_nr=$3
-	local csum_ns1=${4:-0}
-	local csum_ns2=${5:-0}
-	local fail_nr=${6:-0}
-	local rst_nr=${7:-0}
-	local infi_nr=${8:-0}
-	local corrupted_pkts=${9:-0}
+	local csum_ns1=${join_csum_ns1:-0}
+	local csum_ns2=${join_csum_ns2:-0}
+	local fail_nr=${join_fail_nr:-0}
+	local rst_nr=${join_rst_nr:-0}
+	local infi_nr=${join_infi_nr:-0}
+	local corrupted_pkts=${join_corrupted_pkts:-0}
+	local rc=${KSFT_PASS}
 	local count
 	local with_cookie
 
@@ -1327,43 +1409,44 @@ chk_join_nr()
 		print_info "${corrupted_pkts} corrupted pkts"
 	fi
 
-	print_check "syn"
 	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx")
 	if [ -z "$count" ]; then
-		print_skip
+		rc=${KSFT_SKIP}
 	elif [ "$count" != "$syn_nr" ]; then
-		fail_test "got $count JOIN[s] syn expected $syn_nr"
-	else
-		print_ok
+		rc=${KSFT_FAIL}
+		print_check "syn rx"
+		fail_test "got $count JOIN[s] syn rx expected $syn_nr"
 	fi
 
-	print_check "synack"
 	with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
 	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
 	if [ -z "$count" ]; then
-		print_skip
+		rc=${KSFT_SKIP}
 	elif [ "$count" != "$syn_ack_nr" ]; then
 		# simult connections exceeding the limit with cookie enabled could go up to
 		# synack validation as the conn limit can be enforced reliably only after
 		# the subflow creation
-		if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then
-			print_ok
-		else
-			fail_test "got $count JOIN[s] synack expected $syn_ack_nr"
+		if [ "$with_cookie" != 2 ] || [ "$count" -le "$syn_ack_nr" ] || [ "$count" -gt "$syn_nr" ]; then
+			rc=${KSFT_FAIL}
+			print_check "synack rx"
+			fail_test "got $count JOIN[s] synack rx expected $syn_ack_nr"
 		fi
-	else
-		print_ok
 	fi
 
-	print_check "ack"
 	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
 	if [ -z "$count" ]; then
-		print_skip
+		rc=${KSFT_SKIP}
 	elif [ "$count" != "$ack_nr" ]; then
-		fail_test "got $count JOIN[s] ack expected $ack_nr"
-	else
-		print_ok
+		rc=${KSFT_FAIL}
+		print_check "ack rx"
+		fail_test "got $count JOIN[s] ack rx expected $ack_nr"
 	fi
+
+	print_results "join Rx" ${rc}
+
+	join_syn_tx="${join_syn_tx:-${syn_nr}}" \
+		chk_join_tx_nr
+
 	if $validate_checksum; then
 		chk_csum_nr $csum_ns1 $csum_ns2
 		chk_fail_nr $fail_nr $fail_nr
@@ -1424,19 +1507,21 @@ chk_add_nr()
 	local mis_ack_nr=0
 	local ns_tx=$ns1
 	local ns_rx=$ns2
-	local extra_msg=""
+	local tx=""
+	local rx=""
 	local count
 	local timeout
 
 	if [[ $ns_invert = "invert" ]]; then
 		ns_tx=$ns2
 		ns_rx=$ns1
-		extra_msg="invert"
+		tx=" client"
+		rx=" server"
 	fi
 
 	timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout)
 
-	print_check "add"
+	print_check "add addr rx${rx}"
 	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1448,7 +1533,7 @@ chk_add_nr()
 		print_ok
 	fi
 
-	print_check "echo"
+	print_check "add addr echo rx${tx}"
 	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1459,7 +1544,7 @@ chk_add_nr()
 	fi
 
 	if [ $port_nr -gt 0 ]; then
-		print_check "pt"
+		print_check "add addr rx with port${rx}"
 		count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd")
 		if [ -z "$count" ]; then
 			print_skip
@@ -1469,7 +1554,7 @@ chk_add_nr()
 			print_ok
 		fi
 
-		print_check "syn"
+		print_check "syn rx port${tx}"
 		count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx")
 		if [ -z "$count" ]; then
 			print_skip
@@ -1480,7 +1565,7 @@ chk_add_nr()
 			print_ok
 		fi
 
-		print_check "synack"
+		print_check "synack rx port${rx}"
 		count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx")
 		if [ -z "$count" ]; then
 			print_skip
@@ -1491,7 +1576,7 @@ chk_add_nr()
 			print_ok
 		fi
 
-		print_check "ack"
+		print_check "ack rx port${tx}"
 		count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx")
 		if [ -z "$count" ]; then
 			print_skip
@@ -1502,7 +1587,7 @@ chk_add_nr()
 			print_ok
 		fi
 
-		print_check "syn"
+		print_check "syn rx port mismatch${tx}"
 		count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx")
 		if [ -z "$count" ]; then
 			print_skip
@@ -1513,7 +1598,7 @@ chk_add_nr()
 			print_ok
 		fi
 
-		print_check "ack"
+		print_check "ack rx port mismatch${tx}"
 		count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx")
 		if [ -z "$count" ]; then
 			print_skip
@@ -1524,8 +1609,6 @@ chk_add_nr()
 			print_ok
 		fi
 	fi
-
-	print_info "$extra_msg"
 }
 
 chk_add_tx_nr()
@@ -1537,7 +1620,7 @@ chk_add_tx_nr()
 
 	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
 
-	print_check "add TX"
+	print_check "add addr tx"
 	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1549,7 +1632,7 @@ chk_add_tx_nr()
 		print_ok
 	fi
 
-	print_check "echo TX"
+	print_check "add addr echo tx"
 	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1569,6 +1652,8 @@ chk_rm_nr()
 	local count
 	local addr_ns=$ns1
 	local subflow_ns=$ns2
+	local addr="server"
+	local subflow="client"
 	local extra_msg=""
 
 	shift 2
@@ -1578,16 +1663,14 @@ chk_rm_nr()
 		shift
 	done
 
-	if [ -z $invert ]; then
-		addr_ns=$ns1
-		subflow_ns=$ns2
-	elif [ $invert = "true" ]; then
+	if [ "$invert" = "true" ]; then
 		addr_ns=$ns2
 		subflow_ns=$ns1
-		extra_msg="invert"
+		addr="client"
+		subflow="server"
 	fi
 
-	print_check "rm"
+	print_check "rm addr rx ${addr}"
 	count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1597,7 +1680,7 @@ chk_rm_nr()
 		print_ok
 	fi
 
-	print_check "rmsf"
+	print_check "rm subflow ${subflow}"
 	count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1611,7 +1694,7 @@ chk_rm_nr()
 		count=$((count + cnt))
 		if [ "$count" != "$rm_subflow_nr" ]; then
 			suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
-			extra_msg+=" simult"
+			extra_msg="simult"
 		fi
 		if [ $count -ge "$rm_subflow_nr" ] && \
 		   [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
@@ -1632,7 +1715,7 @@ chk_rm_tx_nr()
 {
 	local rm_addr_tx_nr=$1
 
-	print_check "rm TX"
+	print_check "rm addr tx client"
 	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1651,7 +1734,7 @@ chk_prio_nr()
 	local mpj_syn_ack=$4
 	local count
 
-	print_check "ptx"
+	print_check "mp_prio tx server"
 	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1661,7 +1744,7 @@ chk_prio_nr()
 		print_ok
 	fi
 
-	print_check "prx"
+	print_check "mp_prio rx client"
 	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx")
 	if [ -z "$count" ]; then
 		print_skip
@@ -1904,9 +1987,11 @@ subflows_error_tests()
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.12.2 flags subflow
 		speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 0 0 0
+		join_bind_err=1 \
+			chk_join_nr 0 0 0
 	fi
 
 	# multiple subflows, with subflow creation error
@@ -1918,7 +2003,8 @@ subflows_error_tests()
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 1
+		join_syn_tx=2 \
+			chk_join_nr 1 1 1
 	fi
 
 	# multiple subflows, with subflow timeout on MPJ
@@ -1930,7 +2016,8 @@ subflows_error_tests()
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 1
+		join_syn_tx=2 \
+			chk_join_nr 1 1 1
 	fi
 
 	# multiple subflows, check that the endpoint corresponding to
@@ -1951,7 +2038,8 @@ subflows_error_tests()
 
 		# additional subflow could be created only if the PM select
 		# the later endpoint, skipping the already used one
-		chk_join_nr 1 1 1
+		join_syn_tx=2 \
+			chk_join_nr 1 1 1
 	fi
 }
 
@@ -2037,7 +2125,8 @@ signal_address_tests()
 		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
 		pm_nl_set_limits $ns2 3 3
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 1
+		join_syn_tx=3 \
+			chk_join_nr 1 1 1
 		chk_add_nr 3 3
 	fi
 
@@ -2205,7 +2294,8 @@ add_addr_timeout_tests()
 		pm_nl_set_limits $ns2 2 2
 		speed=10 \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 1
+		join_syn_tx=2 \
+			chk_join_nr 1 1 1
 		chk_add_nr 8 0
 	fi
 }
@@ -2305,7 +2395,8 @@ remove_tests()
 		pm_nl_set_limits $ns2 2 2
 		addr_nr_ns1=-3 speed=10 \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 1
+		join_syn_tx=2 join_connect_err=1 \
+			chk_join_nr 1 1 1
 		chk_add_nr 3 3
 		chk_rm_nr 3 1 invert
 		chk_rst_nr 0 0
@@ -2370,7 +2461,8 @@ remove_tests()
 		pm_nl_set_limits $ns2 3 3
 		addr_nr_ns1=-8 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 1
+		join_syn_tx=3 \
+			chk_join_nr 1 1 1
 		chk_add_nr 3 3
 		chk_rm_nr 3 1 invert
 		chk_rst_nr 0 0
@@ -2940,37 +3032,16 @@ syncookies_tests()
 
 checksum_tests()
 {
-	# checksum test 0 0
-	if reset_with_checksum 0 0; then
-		pm_nl_set_limits $ns1 0 1
-		pm_nl_set_limits $ns2 0 1
-		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 0 0 0
-	fi
-
-	# checksum test 1 1
-	if reset_with_checksum 1 1; then
-		pm_nl_set_limits $ns1 0 1
-		pm_nl_set_limits $ns2 0 1
-		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 0 0 0
-	fi
-
-	# checksum test 0 1
-	if reset_with_checksum 0 1; then
-		pm_nl_set_limits $ns1 0 1
-		pm_nl_set_limits $ns2 0 1
-		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 0 0 0
-	fi
-
-	# checksum test 1 0
-	if reset_with_checksum 1 0; then
-		pm_nl_set_limits $ns1 0 1
-		pm_nl_set_limits $ns2 0 1
-		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 0 0 0
-	fi
+	local checksum_enable
+	for checksum_enable in "0 0" "1 1" "0 1" "1 0"; do
+		# checksum test 0 0, 1 1, 0 1, 1 0
+		if reset_with_checksum ${checksum_enable}; then
+			pm_nl_set_limits $ns1 0 1
+			pm_nl_set_limits $ns2 0 1
+			run_tests $ns1 $ns2 10.0.1.1
+			chk_join_nr 0 0 0
+		fi
+	done
 }
 
 deny_join_id0_tests()
@@ -3059,7 +3130,9 @@ fullmesh_tests()
 		pm_nl_set_limits $ns1 1 3
 		pm_nl_set_limits $ns2 1 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
+		if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+			pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
+		fi
 		fullmesh=1 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 3 3 3
@@ -3159,7 +3232,8 @@ fastclose_tests()
 		MPTCP_LIB_SUBTEST_FLAKY=1
 		test_linkfail=1024 fastclose=server \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 0 0 0 0 0 0 1
+		join_rst_nr=1 \
+			chk_join_nr 0 0 0
 		chk_fclose_nr 1 1 invert
 		chk_rst_nr 1 1
 	fi
@@ -3178,7 +3252,10 @@ fail_tests()
 		MPTCP_LIB_SUBTEST_FLAKY=1
 		test_linkfail=128 \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+		join_csum_ns1=+1 join_csum_ns2=+0 \
+			join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \
+			join_corrupted_pkts="$(pedit_action_pkts)" \
+			chk_join_nr 0 0 0
 		chk_fail_nr 1 -1 invert
 	fi
 
@@ -3191,7 +3268,10 @@ fail_tests()
 		pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
 		test_linkfail=1024 \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)"
+		join_csum_ns1=1 join_csum_ns2=0 \
+			join_fail_nr=1 join_rst_nr=1 join_infi_nr=0 \
+			join_corrupted_pkts="$(pedit_action_pkts)" \
+			chk_join_nr 1 1 1
 	fi
 }
 
@@ -3333,6 +3413,36 @@ userspace_pm_chk_get_addr()
 	fi
 }
 
+# $1: ns ; $2: event type ; $3: count
+chk_evt_nr()
+{
+	local ns=${1}
+	local evt_name="${2}"
+	local exp="${3}"
+
+	local evts="${evts_ns1}"
+	local evt="${!evt_name}"
+	local count
+
+	evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_
+	[ "${ns}" == "ns2" ] && evts="${evts_ns2}"
+
+	print_check "event ${ns} ${evt_name} (${exp})"
+
+	if [[ "${evt_name}" = "LISTENER_"* ]] &&
+	   ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+		print_skip "event not supported"
+		return
+	fi
+
+	count=$(grep -cw "type:${evt}" "${evts}")
+	if [ "${count}" != "${exp}" ]; then
+		fail_test "got ${count} events, expected ${exp}"
+	else
+		print_ok
+	fi
+}
+
 userspace_tests()
 {
 	# userspace pm type prevents add_addr
@@ -3413,8 +3523,8 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns1
 		pm_nl_set_limits $ns2 2 2
-		speed=5 \
-			run_tests $ns1 $ns2 10.0.1.1 &
+		{ speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns1
 		userspace_pm_add_addr $ns1 10.0.2.1 10
@@ -3429,14 +3539,12 @@ userspace_tests()
 			"signal"
 		userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1"
 		userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1"
-		userspace_pm_rm_addr $ns1 10
 		userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED
 		userspace_pm_chk_dump_addr "${ns1}" \
-			"id 20 flags signal 10.0.3.1" "after rm_addr 10"
+			"id 20 flags signal 10.0.3.1" "after rm_sf 10"
 		userspace_pm_rm_addr $ns1 20
-		userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
 		userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20"
-		chk_rm_nr 2 2 invert
+		chk_rm_nr 1 1 invert
 		chk_mptcp_info subflows 0 subflows 0
 		chk_subflows_total 1 1
 		kill_events_pids
@@ -3448,8 +3556,8 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		speed=5 \
-			run_tests $ns1 $ns2 10.0.1.1 &
+		{ speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns2
 		userspace_pm_add_sf $ns2 10.0.3.2 20
@@ -3460,12 +3568,11 @@ userspace_tests()
 			"id 20 flags subflow 10.0.3.2" \
 			"subflow"
 		userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2"
-		userspace_pm_rm_addr $ns2 20
 		userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
 		userspace_pm_chk_dump_addr "${ns2}" \
 			"" \
-			"after rm_addr 20"
-		chk_rm_nr 1 1
+			"after rm_sf 20"
+		chk_rm_nr 0 1
 		chk_mptcp_info subflows 0 subflows 0
 		chk_subflows_total 1 1
 		kill_events_pids
@@ -3477,8 +3584,8 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		speed=5 \
-			run_tests $ns1 $ns2 10.0.1.1 &
+		{ speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns2
 		chk_mptcp_info subflows 0 subflows 0
@@ -3498,8 +3605,8 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		speed=5 \
-			run_tests $ns1 $ns2 10.0.1.1 &
+		{ speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns2
 		userspace_pm_add_sf $ns2 10.0.3.2 20
@@ -3522,8 +3629,8 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns1
 		pm_nl_set_limits $ns2 1 1
-		speed=5 \
-			run_tests $ns1 $ns2 10.0.1.1 &
+		{ speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns1
 		userspace_pm_add_addr $ns1 10.0.2.1 10
@@ -3553,8 +3660,8 @@ endpoint_tests()
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-		speed=slow \
-			run_tests $ns1 $ns2 10.0.1.1 &
+		{ speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
 		wait_mpj $ns1
@@ -3575,27 +3682,29 @@ endpoint_tests()
 
 	if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
 	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
-		pm_nl_set_limits $ns1 0 2
-		pm_nl_set_limits $ns2 0 2
+		start_events
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_set_limits $ns2 0 3
+		pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
-		test_linkfail=4 speed=20 \
-			run_tests $ns1 $ns2 10.0.1.1 &
+		{ test_linkfail=4 speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
 		wait_mpj $ns2
 		pm_nl_check_endpoint "creation" \
 			$ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
-		chk_subflow_nr "before delete" 2
+		chk_subflow_nr "before delete id 2" 2
 		chk_mptcp_info subflows 1 subflows 1
 
 		pm_nl_del_endpoint $ns2 2 10.0.2.2
 		sleep 0.5
-		chk_subflow_nr "after delete" 1
+		chk_subflow_nr "after delete id 2" 1
 		chk_mptcp_info subflows 0 subflows 0
 
 		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
 		wait_mpj $ns2
-		chk_subflow_nr "after re-add" 2
+		chk_subflow_nr "after re-add id 2" 2
 		chk_mptcp_info subflows 1 subflows 1
 
 		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
@@ -3610,22 +3719,53 @@ endpoint_tests()
 		chk_subflow_nr "after no reject" 3
 		chk_mptcp_info subflows 2 subflows 2
 
+		local i
+		for i in $(seq 3); do
+			pm_nl_del_endpoint $ns2 1 10.0.1.2
+			sleep 0.5
+			chk_subflow_nr "after delete id 0 ($i)" 2
+			chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf
+
+			pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
+			wait_mpj $ns2
+			chk_subflow_nr "after re-add id 0 ($i)" 3
+			chk_mptcp_info subflows 3 subflows 3
+		done
+
 		mptcp_lib_kill_wait $tests_pid
 
-		chk_join_nr 3 3 3
-		chk_rm_nr 1 1
+		kill_events_pids
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 4
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 4
+
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 0
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 0
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 5 # one has been closed before estab
+
+		join_syn_tx=7 \
+			chk_join_nr 6 6 6
+		chk_rm_nr 4 4
 	fi
 
 	# remove and re-add
-	if reset "delete re-add signal" &&
+	if reset_with_events "delete re-add signal" &&
 	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
-		pm_nl_set_limits $ns1 0 2
-		pm_nl_set_limits $ns2 2 2
+		pm_nl_set_limits $ns1 0 3
+		pm_nl_set_limits $ns2 3 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
 		# broadcast IP: no packet for this address will be received on ns1
 		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
-		test_linkfail=4 speed=20 \
-			run_tests $ns1 $ns2 10.0.1.1 &
+		pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
+		{ test_linkfail=4 speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
 		wait_mpj $ns2
@@ -3645,11 +3785,48 @@ endpoint_tests()
 		wait_mpj $ns2
 		chk_subflow_nr "after re-add" 3
 		chk_mptcp_info subflows 2 subflows 2
+
+		pm_nl_del_endpoint $ns1 42 10.0.1.1
+		sleep 0.5
+		chk_subflow_nr "after delete ID 0" 2
+		chk_mptcp_info subflows 2 subflows 2
+
+		pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
+		wait_mpj $ns2
+		chk_subflow_nr "after re-add ID 0" 3
+		chk_mptcp_info subflows 3 subflows 3
+
+		pm_nl_del_endpoint $ns1 99 10.0.1.1
+		sleep 0.5
+		chk_subflow_nr "after re-delete ID 0" 2
+		chk_mptcp_info subflows 2 subflows 2
+
+		pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
+		wait_mpj $ns2
+		chk_subflow_nr "after re-re-add ID 0" 3
+		chk_mptcp_info subflows 3 subflows 3
 		mptcp_lib_kill_wait $tests_pid
 
-		chk_join_nr 3 3 3
-		chk_add_nr 4 4
-		chk_rm_nr 2 1 invert
+		kill_events_pids
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5
+		chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 3
+
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 6
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 4
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5
+		chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 3
+
+		join_connect_err=1 \
+			chk_join_nr 5 5 5
+		chk_add_nr 6 6
+		chk_rm_nr 4 3 invert
 	fi
 
 	# flush and re-add
@@ -3660,8 +3837,8 @@ endpoint_tests()
 		# broadcast IP: no packet for this address will be received on ns1
 		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
 		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
-		test_linkfail=4 speed=20 \
-			run_tests $ns1 $ns2 10.0.1.1 &
+		{ test_linkfail=4 speed=20 \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
 		wait_attempt_fail $ns2
@@ -3678,7 +3855,8 @@ endpoint_tests()
 		wait_mpj $ns2
 		mptcp_lib_kill_wait $tests_pid
 
-		chk_join_nr 2 2 2
+		join_syn_tx=3 join_connect_err=1 \
+			chk_join_nr 2 2 2
 		chk_add_nr 2 2
 		chk_rm_nr 1 0 invert
 	fi
@@ -3783,9 +3961,11 @@ if [ ${#tests[@]} -eq 0 ]; then
 	tests=("${all_tests_names[@]}")
 fi
 
+mptcp_lib_subtests_last_ts_reset
 for subtests in "${tests[@]}"; do
 	"${subtests}"
 done
+append_prev_results
 
 if [ ${ret} -ne 0 ]; then
 	echo
@@ -3796,7 +3976,6 @@ if [ ${ret} -ne 0 ]; then
 	echo
 fi
 
-append_prev_results
 mptcp_lib_result_print_all_tap
 
 exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 438280e68434..975d4d4c862a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -12,10 +12,14 @@ readonly KSFT_SKIP=4
 readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"
 
 # These variables are used in some selftests, read-only
+declare -rx MPTCP_LIB_EVENT_CREATED=1           # MPTCP_EVENT_CREATED
+declare -rx MPTCP_LIB_EVENT_ESTABLISHED=2       # MPTCP_EVENT_ESTABLISHED
+declare -rx MPTCP_LIB_EVENT_CLOSED=3            # MPTCP_EVENT_CLOSED
 declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6         # MPTCP_EVENT_ANNOUNCED
 declare -rx MPTCP_LIB_EVENT_REMOVED=7           # MPTCP_EVENT_REMOVED
 declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10  # MPTCP_EVENT_SUB_ESTABLISHED
 declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11       # MPTCP_EVENT_SUB_CLOSED
+declare -rx MPTCP_LIB_EVENT_SUB_PRIORITY=13     # MPTCP_EVENT_SUB_PRIORITY
 declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED
 declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16  # MPTCP_EVENT_LISTENER_CLOSED
 
@@ -25,6 +29,7 @@ declare -rx MPTCP_LIB_AF_INET6=10
 MPTCP_LIB_SUBTESTS=()
 MPTCP_LIB_SUBTESTS_DUPLICATED=0
 MPTCP_LIB_SUBTEST_FLAKY=0
+MPTCP_LIB_SUBTESTS_LAST_TS_MS=
 MPTCP_LIB_TEST_COUNTER=0
 MPTCP_LIB_TEST_FORMAT="%02u %-50s"
 MPTCP_LIB_IP_MPTCP=0
@@ -201,6 +206,11 @@ mptcp_lib_kversion_ge() {
 	mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
 }
 
+mptcp_lib_subtests_last_ts_reset() {
+	MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)"
+}
+mptcp_lib_subtests_last_ts_reset
+
 __mptcp_lib_result_check_duplicated() {
 	local subtest
 
@@ -215,13 +225,22 @@ __mptcp_lib_result_check_duplicated() {
 
 __mptcp_lib_result_add() {
 	local result="${1}"
+	local time="time="
+	local ts_prev_ms
 	shift
 
 	local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
 
 	__mptcp_lib_result_check_duplicated "${*}"
 
-	MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}")
+	# not to add two '#'
+	[[ "${*}" != *"#"* ]] && time="# ${time}"
+
+	ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}"
+	mptcp_lib_subtests_last_ts_reset
+	time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms"
+
+	MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}")
 }
 
 # $1: test name
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 68899a303a1a..5e8d5b83e2d0 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -349,6 +349,7 @@ init
 make_file "$cin" "client" 1
 make_file "$sin" "server" 1
 trap cleanup EXIT
+mptcp_lib_subtests_last_ts_reset
 
 run_tests $ns1 $ns2 10.0.1.1
 run_tests $ns1 $ns2 dead:beef:1::1
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 2757378b1b13..2e6648a2b2c0 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -137,6 +137,8 @@ check()
 	fi
 }
 
+mptcp_lib_subtests_last_ts_reset
+
 check "show_endpoints" "" "defaults addr list"
 
 default_limits="$(get_limits)"
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 7ad5a59adff2..994a556f46c1 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -19,12 +19,6 @@
 
 #include "linux/mptcp.h"
 
-#ifndef MPTCP_PM_NAME
-#define MPTCP_PM_NAME		"mptcp_pm"
-#endif
-#ifndef MPTCP_PM_EVENTS
-#define MPTCP_PM_EVENTS		"mptcp_pm_events"
-#endif
 #ifndef IPPROTO_MPTCP
 #define IPPROTO_MPTCP 262
 #endif
@@ -116,7 +110,7 @@ static int capture_events(int fd, int event_group)
 
 	if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
 		       &event_group, sizeof(event_group)) < 0)
-		error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group");
+		error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group");
 
 	do {
 		FD_ZERO(&rfds);
@@ -288,7 +282,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family,
 					if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID)
 						*events_mcast_grp = *(__u32 *)RTA_DATA(grp);
 					else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME &&
-						 !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS))
+						 !strcmp(RTA_DATA(grp), MPTCP_PM_EV_GRP_NAME))
 						got_events_grp = 1;
 
 					grp = RTA_NEXT(grp, grp_len);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index f74e1c3c126d..8fa77c8e9b65 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -286,6 +286,7 @@ while getopts "bcdhi" option;do
 done
 
 setup
+mptcp_lib_subtests_last_ts_reset
 run_test 10 10 0 0 "balanced bwidth"
 run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
 
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 9cb05978269d..3651f73451cf 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -150,6 +150,7 @@ mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid
 server_evts=$(mktemp)
 mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
 sleep 0.5
+mptcp_lib_subtests_last_ts_reset
 
 print_title "Init"
 print_test "Created network namespaces ns1, ns2"
diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c
new file mode 100644
index 000000000000..64d6805381c5
--- /dev/null
+++ b/tools/testing/selftests/net/ncdevmem.c
@@ -0,0 +1,570 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <linux/uio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#define __iovec_defined
+#include <fcntl.h>
+#include <malloc.h>
+#include <error.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include <linux/memfd.h>
+#include <linux/dma-buf.h>
+#include <linux/udmabuf.h>
+#include <libmnl/libmnl.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/netdev.h>
+#include <time.h>
+#include <net/if.h>
+
+#include "netdev-user.h"
+#include <ynl.h>
+
+#define PAGE_SHIFT 12
+#define TEST_PREFIX "ncdevmem"
+#define NUM_PAGES 16000
+
+#ifndef MSG_SOCK_DEVMEM
+#define MSG_SOCK_DEVMEM 0x2000000
+#endif
+
+/*
+ * tcpdevmem netcat. Works similarly to netcat but does device memory TCP
+ * instead of regular TCP. Uses udmabuf to mock a dmabuf provider.
+ *
+ * Usage:
+ *
+ *	On server:
+ *	ncdevmem -s <server IP> -c <client IP> -f eth1 -l -p 5201 -v 7
+ *
+ *	On client:
+ *	yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
+ *		tr \\n \\0 | \
+ *		head -c 5G | \
+ *		nc <server IP> 5201 -p 5201
+ *
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ */
+
+static char *server_ip = "192.168.1.4";
+static char *client_ip = "192.168.1.2";
+static char *port = "5201";
+static size_t do_validation;
+static int start_queue = 8;
+static int num_queues = 8;
+static char *ifname = "eth1";
+static unsigned int ifindex;
+static unsigned int dmabuf_id;
+
+void print_bytes(void *ptr, size_t size)
+{
+	unsigned char *p = ptr;
+	int i;
+
+	for (i = 0; i < size; i++)
+		printf("%02hhX ", p[i]);
+	printf("\n");
+}
+
+void print_nonzero_bytes(void *ptr, size_t size)
+{
+	unsigned char *p = ptr;
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		putchar(p[i]);
+	printf("\n");
+}
+
+void validate_buffer(void *line, size_t size)
+{
+	static unsigned char seed = 1;
+	unsigned char *ptr = line;
+	int errors = 0;
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		if (ptr[i] != seed) {
+			fprintf(stderr,
+				"Failed validation: expected=%u, actual=%u, index=%lu\n",
+				seed, ptr[i], i);
+			errors++;
+			if (errors > 20)
+				error(1, 0, "validation failed.");
+		}
+		seed++;
+		if (seed == do_validation)
+			seed = 0;
+	}
+
+	fprintf(stdout, "Validated buffer\n");
+}
+
+#define run_command(cmd, ...)                                           \
+	({                                                              \
+		char command[256];                                      \
+		memset(command, 0, sizeof(command));                    \
+		snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
+		printf("Running: %s\n", command);                       \
+		system(command);                                        \
+	})
+
+static int reset_flow_steering(void)
+{
+	int ret = 0;
+
+	ret = run_command("sudo ethtool -K %s ntuple off", ifname);
+	if (ret)
+		return ret;
+
+	return run_command("sudo ethtool -K %s ntuple on", ifname);
+}
+
+static int configure_headersplit(bool on)
+{
+	return run_command("sudo ethtool -G %s tcp-data-split %s", ifname,
+			   on ? "on" : "off");
+}
+
+static int configure_rss(void)
+{
+	return run_command("sudo ethtool -X %s equal %d", ifname, start_queue);
+}
+
+static int configure_channels(unsigned int rx, unsigned int tx)
+{
+	return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx);
+}
+
+static int configure_flow_steering(void)
+{
+	return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d",
+			   ifname, client_ip, server_ip, port, port, start_queue);
+}
+
+static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+			 struct netdev_queue_id *queues,
+			 unsigned int n_queue_index, struct ynl_sock **ys)
+{
+	struct netdev_bind_rx_req *req = NULL;
+	struct netdev_bind_rx_rsp *rsp = NULL;
+	struct ynl_error yerr;
+
+	*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+	if (!*ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return -1;
+	}
+
+	req = netdev_bind_rx_req_alloc();
+	netdev_bind_rx_req_set_ifindex(req, ifindex);
+	netdev_bind_rx_req_set_fd(req, dmabuf_fd);
+	__netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
+
+	rsp = netdev_bind_rx(*ys, req);
+	if (!rsp) {
+		perror("netdev_bind_rx");
+		goto err_close;
+	}
+
+	if (!rsp->_present.id) {
+		perror("id not present");
+		goto err_close;
+	}
+
+	printf("got dmabuf id=%d\n", rsp->id);
+	dmabuf_id = rsp->id;
+
+	netdev_bind_rx_req_free(req);
+	netdev_bind_rx_rsp_free(rsp);
+
+	return 0;
+
+err_close:
+	fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+	netdev_bind_rx_req_free(req);
+	ynl_sock_destroy(*ys);
+	return -1;
+}
+
+static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size)
+{
+	struct udmabuf_create create;
+	int ret;
+
+	*devfd = open("/dev/udmabuf", O_RDWR);
+	if (*devfd < 0) {
+		error(70, 0,
+		      "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
+		      TEST_PREFIX);
+	}
+
+	*memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
+	if (*memfd < 0)
+		error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX);
+
+	/* Required for udmabuf */
+	ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+	if (ret < 0)
+		error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
+
+	ret = ftruncate(*memfd, dmabuf_size);
+	if (ret == -1)
+		error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+
+	memset(&create, 0, sizeof(create));
+
+	create.memfd = *memfd;
+	create.offset = 0;
+	create.size = dmabuf_size;
+	*buf = ioctl(*devfd, UDMABUF_CREATE, &create);
+	if (*buf < 0)
+		error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX);
+}
+
+int do_server(void)
+{
+	char ctrl_data[sizeof(int) * 20000];
+	struct netdev_queue_id *queues;
+	size_t non_page_aligned_frags = 0;
+	struct sockaddr_in client_addr;
+	struct sockaddr_in server_sin;
+	size_t page_aligned_frags = 0;
+	int devfd, memfd, buf, ret;
+	size_t total_received = 0;
+	socklen_t client_addr_len;
+	bool is_devmem = false;
+	char *buf_mem = NULL;
+	struct ynl_sock *ys;
+	size_t dmabuf_size;
+	char iobuf[819200];
+	char buffer[256];
+	int socket_fd;
+	int client_fd;
+	size_t i = 0;
+	int opt = 1;
+
+	dmabuf_size = getpagesize() * NUM_PAGES;
+
+	create_udmabuf(&devfd, &memfd, &buf, dmabuf_size);
+
+	if (reset_flow_steering())
+		error(1, 0, "Failed to reset flow steering\n");
+
+	/* Configure RSS to divert all traffic from our devmem queues */
+	if (configure_rss())
+		error(1, 0, "Failed to configure rss\n");
+
+	/* Flow steer our devmem flows to start_queue */
+	if (configure_flow_steering())
+		error(1, 0, "Failed to configure flow steering\n");
+
+	sleep(1);
+
+	queues = malloc(sizeof(*queues) * num_queues);
+
+	for (i = 0; i < num_queues; i++) {
+		queues[i]._present.type = 1;
+		queues[i]._present.id = 1;
+		queues[i].type = NETDEV_QUEUE_TYPE_RX;
+		queues[i].id = start_queue + i;
+	}
+
+	if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+		error(1, 0, "Failed to bind\n");
+
+	buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+		       buf, 0);
+	if (buf_mem == MAP_FAILED)
+		error(1, 0, "mmap()");
+
+	server_sin.sin_family = AF_INET;
+	server_sin.sin_port = htons(atoi(port));
+
+	ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr);
+	if (socket < 0)
+		error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX);
+
+	socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0);
+	if (socket < 0)
+		error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX);
+
+	ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt,
+			 sizeof(opt));
+	if (ret)
+		error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX);
+
+	ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt,
+			 sizeof(opt));
+	if (ret)
+		error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX);
+
+	printf("binding to address %s:%d\n", server_ip,
+	       ntohs(server_sin.sin_port));
+
+	ret = bind(socket_fd, &server_sin, sizeof(server_sin));
+	if (ret)
+		error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX);
+
+	ret = listen(socket_fd, 1);
+	if (ret)
+		error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX);
+
+	client_addr_len = sizeof(client_addr);
+
+	inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer,
+		  sizeof(buffer));
+	printf("Waiting or connection on %s:%d\n", buffer,
+	       ntohs(server_sin.sin_port));
+	client_fd = accept(socket_fd, &client_addr, &client_addr_len);
+
+	inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer,
+		  sizeof(buffer));
+	printf("Got connection from %s:%d\n", buffer,
+	       ntohs(client_addr.sin_port));
+
+	while (1) {
+		struct iovec iov = { .iov_base = iobuf,
+				     .iov_len = sizeof(iobuf) };
+		struct dmabuf_cmsg *dmabuf_cmsg = NULL;
+		struct dma_buf_sync sync = { 0 };
+		struct cmsghdr *cm = NULL;
+		struct msghdr msg = { 0 };
+		struct dmabuf_token token;
+		ssize_t ret;
+
+		is_devmem = false;
+		printf("\n\n");
+
+		msg.msg_iov = &iov;
+		msg.msg_iovlen = 1;
+		msg.msg_control = ctrl_data;
+		msg.msg_controllen = sizeof(ctrl_data);
+		ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM);
+		printf("recvmsg ret=%ld\n", ret);
+		if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
+			continue;
+		if (ret < 0) {
+			perror("recvmsg");
+			continue;
+		}
+		if (ret == 0) {
+			printf("client exited\n");
+			goto cleanup;
+		}
+
+		i++;
+		for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+			if (cm->cmsg_level != SOL_SOCKET ||
+			    (cm->cmsg_type != SCM_DEVMEM_DMABUF &&
+			     cm->cmsg_type != SCM_DEVMEM_LINEAR)) {
+				fprintf(stdout, "skipping non-devmem cmsg\n");
+				continue;
+			}
+
+			dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm);
+			is_devmem = true;
+
+			if (cm->cmsg_type == SCM_DEVMEM_LINEAR) {
+				/* TODO: process data copied from skb's linear
+				 * buffer.
+				 */
+				fprintf(stdout,
+					"SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
+					dmabuf_cmsg->frag_size);
+
+				continue;
+			}
+
+			token.token_start = dmabuf_cmsg->frag_token;
+			token.token_count = 1;
+
+			total_received += dmabuf_cmsg->frag_size;
+			printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n",
+			       dmabuf_cmsg->frag_offset >> PAGE_SHIFT,
+			       dmabuf_cmsg->frag_offset % getpagesize(),
+			       dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size,
+			       dmabuf_cmsg->frag_token, total_received,
+			       dmabuf_cmsg->dmabuf_id);
+
+			if (dmabuf_cmsg->dmabuf_id != dmabuf_id)
+				error(1, 0,
+				      "received on wrong dmabuf_id: flow steering error\n");
+
+			if (dmabuf_cmsg->frag_size % getpagesize())
+				non_page_aligned_frags++;
+			else
+				page_aligned_frags++;
+
+			sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START;
+			ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
+
+			if (do_validation)
+				validate_buffer(
+					((unsigned char *)buf_mem) +
+						dmabuf_cmsg->frag_offset,
+					dmabuf_cmsg->frag_size);
+			else
+				print_nonzero_bytes(
+					((unsigned char *)buf_mem) +
+						dmabuf_cmsg->frag_offset,
+					dmabuf_cmsg->frag_size);
+
+			sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END;
+			ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
+
+			ret = setsockopt(client_fd, SOL_SOCKET,
+					 SO_DEVMEM_DONTNEED, &token,
+					 sizeof(token));
+			if (ret != 1)
+				error(1, 0,
+				      "SO_DEVMEM_DONTNEED not enough tokens");
+		}
+		if (!is_devmem)
+			error(1, 0, "flow steering error\n");
+
+		printf("total_received=%lu\n", total_received);
+	}
+
+	fprintf(stdout, "%s: ok\n", TEST_PREFIX);
+
+	fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
+		page_aligned_frags, non_page_aligned_frags);
+
+	fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
+		page_aligned_frags, non_page_aligned_frags);
+
+cleanup:
+
+	munmap(buf_mem, dmabuf_size);
+	close(client_fd);
+	close(socket_fd);
+	close(buf);
+	close(memfd);
+	close(devfd);
+	ynl_sock_destroy(ys);
+
+	return 0;
+}
+
+void run_devmem_tests(void)
+{
+	struct netdev_queue_id *queues;
+	int devfd, memfd, buf;
+	struct ynl_sock *ys;
+	size_t dmabuf_size;
+	size_t i = 0;
+
+	dmabuf_size = getpagesize() * NUM_PAGES;
+
+	create_udmabuf(&devfd, &memfd, &buf, dmabuf_size);
+
+	/* Configure RSS to divert all traffic from our devmem queues */
+	if (configure_rss())
+		error(1, 0, "rss error\n");
+
+	queues = calloc(num_queues, sizeof(*queues));
+
+	if (configure_headersplit(1))
+		error(1, 0, "Failed to configure header split\n");
+
+	if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+		error(1, 0, "Binding empty queues array should have failed\n");
+
+	for (i = 0; i < num_queues; i++) {
+		queues[i]._present.type = 1;
+		queues[i]._present.id = 1;
+		queues[i].type = NETDEV_QUEUE_TYPE_RX;
+		queues[i].id = start_queue + i;
+	}
+
+	if (configure_headersplit(0))
+		error(1, 0, "Failed to configure header split\n");
+
+	if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+		error(1, 0, "Configure dmabuf with header split off should have failed\n");
+
+	if (configure_headersplit(1))
+		error(1, 0, "Failed to configure header split\n");
+
+	for (i = 0; i < num_queues; i++) {
+		queues[i]._present.type = 1;
+		queues[i]._present.id = 1;
+		queues[i].type = NETDEV_QUEUE_TYPE_RX;
+		queues[i].id = start_queue + i;
+	}
+
+	if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+		error(1, 0, "Failed to bind\n");
+
+	/* Deactivating a bound queue should not be legal */
+	if (!configure_channels(num_queues, num_queues - 1))
+		error(1, 0, "Deactivating a bound queue should be illegal.\n");
+
+	/* Closing the netlink socket does an implicit unbind */
+	ynl_sock_destroy(ys);
+}
+
+int main(int argc, char *argv[])
+{
+	int is_server = 0, opt;
+
+	while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
+		switch (opt) {
+		case 'l':
+			is_server = 1;
+			break;
+		case 's':
+			server_ip = optarg;
+			break;
+		case 'c':
+			client_ip = optarg;
+			break;
+		case 'p':
+			port = optarg;
+			break;
+		case 'v':
+			do_validation = atoll(optarg);
+			break;
+		case 'q':
+			num_queues = atoi(optarg);
+			break;
+		case 't':
+			start_queue = atoi(optarg);
+			break;
+		case 'f':
+			ifname = optarg;
+			break;
+		case '?':
+			printf("unknown option: %c\n", optopt);
+			break;
+		}
+	}
+
+	ifindex = if_nametoindex(ifname);
+
+	for (; optind < argc; optind++)
+		printf("extra arguments: %s\n", argv[optind]);
+
+	run_devmem_tests();
+
+	if (is_server)
+		return do_server();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index e3afcb424710..438f7b2acc5f 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -67,8 +67,12 @@ kci_net_setup()
 		return $ksft_skip
 	fi
 
-	# TODO what ipaddr to set ? DHCP ?
-	echo "SKIP: $netdev: set IP address"
+	if [ "$veth_created" ]; then
+		echo "XFAIL: $netdev: set IP address unsupported for veth*"
+	else
+		# TODO what ipaddr to set ? DHCP ?
+		echo "SKIP: $netdev: set IP address"
+	fi
 	return $ksft_skip
 }
 
@@ -86,7 +90,7 @@ kci_netdev_ethtool_test()
 	ret=$?
 	if [ $ret -ne 0 ];then
 		if [ $ret -eq "$1" ];then
-			echo "SKIP: $netdev: ethtool $2 not supported"
+			echo "XFAIL: $netdev: ethtool $2 not supported"
 			return $ksft_skip
 		else
 			echo "FAIL: $netdev: ethtool $2"
@@ -124,11 +128,45 @@ kci_netdev_ethtool()
 		return 1
 	fi
 	echo "PASS: $netdev: ethtool list features"
-	#TODO for each non fixed features, try to turn them on/off
+
+	while read -r FEATURE VALUE FIXED; do
+		[ "$FEATURE" != "Features" ] || continue # Skip "Features"
+		[ "$FIXED" != "[fixed]" ] || continue # Skip fixed features
+		feature="${FEATURE%:*}"
+
+		ethtool --offload "$netdev" "$feature" off
+		if [ $? -eq 0 ]; then
+			echo "PASS: $netdev: Turned off feature: $feature"
+		else
+			echo "FAIL: $netdev: Failed to turn off feature:" \
+				"$feature"
+		fi
+
+		ethtool --offload "$netdev" "$feature" on
+		if [ $? -eq 0 ]; then
+			echo "PASS: $netdev: Turned on feature: $feature"
+		else
+			echo "FAIL: $netdev: Failed to turn on feature:" \
+				"$feature"
+		fi
+
+		#restore the feature to its initial state
+		ethtool --offload "$netdev" "$feature" "$VALUE"
+		if [ $? -eq 0 ]; then
+			echo "PASS: $netdev: Restore feature $feature" \
+				"to initial state $VALUE"
+		else
+			echo "FAIL: $netdev: Failed to restore feature" \
+				"$feature to initial state $VALUE"
+		fi
+
+	done < "$TMP_ETHTOOL_FEATURES"
+
 	rm "$TMP_ETHTOOL_FEATURES"
 
 	kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev"
 	kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev"
+
 	return 0
 }
 
@@ -196,10 +234,24 @@ if [ ! -e "$TMP_LIST_NETDEV" ];then
 fi
 
 ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\  -f2 | cut -d: -f1> "$TMP_LIST_NETDEV"
+
+if [ ! -s "$TMP_LIST_NETDEV" ]; then
+	echo "No valid network device found, creating veth pair"
+	ip link add veth0 type veth peer name veth1
+	echo "veth0" > "$TMP_LIST_NETDEV"
+	veth_created=1
+fi
+
 while read netdev
 do
 	kci_test_netdev "$netdev"
 done < "$TMP_LIST_NETDEV"
 
+#clean up veth interface pair if it was created
+if [ "$veth_created" ]; then
+	ip link delete veth0
+	echo "Removed veth pair"
+fi
+
 rm "$TMP_LIST_NETDEV"
 exit 0
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 63ef80ef47a4..b2dd4db45215 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -87,3 +87,5 @@ CONFIG_XFRM_USER=m
 CONFIG_XFRM_STATISTICS=y
 CONFIG_NET_PKTGEN=m
 CONFIG_TUN=m
+CONFIG_INET_DIAG=m
+CONFIG_SCTP_DIAG=m
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index c61d23a8c88d..d66e3c4dfec6 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -8,7 +8,7 @@
 
 source lib.sh
 ret=0
-timeout=2
+timeout=5
 
 cleanup()
 {
@@ -25,6 +25,9 @@ cleanup()
 }
 
 checktool "nft --version" "test without nft tool"
+checktool "socat -h" "run test without socat"
+
+modprobe -q sctp
 
 trap cleanup EXIT
 
@@ -36,7 +39,9 @@ TMPFILE2=$(mktemp)
 TMPFILE3=$(mktemp)
 
 TMPINPUT=$(mktemp)
-dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+COUNT=200
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
+dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
 
 if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
     echo "SKIP: No virtual ethernet pair device support in kernel"
@@ -250,45 +255,49 @@ listener_ready()
 
 test_tcp_forward()
 {
-	ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" &
+	ip netns exec "$nsrouter" ./nf_queue -q 2 &
 	local nfqpid=$!
 
 	timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
 	local rpid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
+	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
 
 	ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
 
 	wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+	kill "$nfqpid"
 }
 
 test_tcp_localhost()
 {
-	dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
 	timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
 	local rpid=$!
 
-	ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+	ip netns exec "$nsrouter" ./nf_queue -q 3 &
 	local nfqpid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
 
 	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
 
 	wait "$rpid" && echo "PASS: tcp via loopback"
-	wait 2>/dev/null
+	kill "$nfqpid"
 }
 
 test_tcp_localhost_connectclose()
 {
-	ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" &
-	ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+	ip netns exec "$nsrouter" ./nf_queue -q 3 &
+	local nfqpid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
 
+	timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3
+
+	kill "$nfqpid"
 	wait && echo "PASS: tcp via loopback with connect/close"
-	wait 2>/dev/null
 }
 
 test_tcp_localhost_requeue()
@@ -353,7 +362,7 @@ table inet filter {
 	}
 }
 EOF
-	ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" &
+	ip netns exec "$ns1" ./nf_queue -q 1 &
 	local nfqpid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
@@ -363,6 +372,7 @@ EOF
 	for n in output post; do
 		for d in tvrf eth0; do
 			if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
+				kill "$nfqpid"
 				echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
 				ip netns exec "$ns1" nft list ruleset
 				ret=1
@@ -371,8 +381,96 @@ EOF
 		done
 	done
 
-	wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf"
-	wait 2>/dev/null
+	kill "$nfqpid"
+	echo "PASS: icmp+nfqueue via vrf"
+}
+
+sctp_listener_ready()
+{
+	ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345
+}
+
+check_output_files()
+{
+	local f1="$1"
+	local f2="$2"
+	local err="$3"
+
+	if ! cmp "$f1" "$f2" ; then
+		echo "FAIL: $err: input and output file differ" 1>&2
+		echo -n " Input file" 1>&2
+		ls -l "$f1" 1>&2
+		echo -n "Output file" 1>&2
+		ls -l "$f2" 1>&2
+		ret=1
+	fi
+}
+
+test_sctp_forward()
+{
+	ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet sctpq {
+        chain forward {
+        type filter hook forward priority 0; policy accept;
+                sctp dport 12345 queue num 10
+        }
+}
+EOF
+	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+	local rpid=$!
+
+	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
+
+	ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
+	local nfqpid=$!
+
+	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+	if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then
+		echo "FAIL:  Could not delete sctpq table"
+		exit 1
+	fi
+
+	wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
+	kill "$nfqpid"
+
+	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
+}
+
+test_sctp_output()
+{
+        ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+table inet sctpq {
+        chain output {
+        type filter hook output priority 0; policy accept;
+                sctp dport 12345 queue num 11
+        }
+}
+EOF
+	# reduce test file size, software segmentation causes sk wmem increase.
+	dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
+
+	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+	local rpid=$!
+
+	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
+
+	ip netns exec "$ns1" ./nf_queue -q 11 &
+	local nfqpid=$!
+
+	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+	if ! ip netns exec "$ns1" nft delete table inet sctpq; then
+		echo "FAIL:  Could not delete sctpq table"
+		exit 1
+	fi
+
+	# must wait before checking completeness of output file.
+	wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
+	kill "$nfqpid"
+
+	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
 }
 
 test_queue_removal()
@@ -388,7 +486,7 @@ table ip filter {
 	}
 }
 EOF
-	ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" &
+	ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 &
 	local nfqpid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0
@@ -443,11 +541,16 @@ test_queue 10
 # same.  We queue to a second program as well.
 load_ruleset "filter2" 20
 test_queue 20
+ip netns exec "$ns1" nft flush ruleset
 
 test_tcp_forward
 test_tcp_localhost
 test_tcp_localhost_connectclose
 test_tcp_localhost_requeue
+test_sctp_forward
+test_sctp_output
+
+# should be last, adds vrf device in ns1 and changes routes
 test_icmp_vrf
 test_queue_removal
 
diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile
new file mode 100644
index 000000000000..31cfb666ba8b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_INCLUDES := ksft_runner.sh \
+		 defaults.sh \
+		 set_sysctls.py \
+		 ../../kselftest/ktap_helpers.sh
+
+TEST_PROGS := $(wildcard *.pkt)
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config
new file mode 100644
index 000000000000..0237ed98f3c0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/config
@@ -0,0 +1,11 @@
+CONFIG_IPV6=y
+CONFIG_HZ_1000=y
+CONFIG_HZ=1000
+CONFIG_NET_NS=y
+CONFIG_NET_SCH_FIFO=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYN_COOKIES=y
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_TUN=y
diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh
new file mode 100755
index 000000000000..1095a7b22f44
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/defaults.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Set standard production config values that relate to TCP behavior.
+
+# Flush old cached data (fastopen cookies).
+ip tcp_metrics flush all > /dev/null 2>&1
+
+# TCP min, default, and max receive and send buffer sizes.
+sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))"
+sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304"
+
+# TCP timestamps.
+sysctl -q net.ipv4.tcp_timestamps=1
+
+# TCP SYN(ACK) retry thresholds
+sysctl -q net.ipv4.tcp_syn_retries=5
+sysctl -q net.ipv4.tcp_synack_retries=5
+
+# TCP Forward RTO-Recovery, RFC 5682.
+sysctl -q net.ipv4.tcp_frto=2
+
+# TCP Selective Acknowledgements (SACK)
+sysctl -q net.ipv4.tcp_sack=1
+
+# TCP Duplicate Selective Acknowledgements (DSACK)
+sysctl -q net.ipv4.tcp_dsack=1
+
+# TCP FACK (Forward Acknowldgement)
+sysctl -q net.ipv4.tcp_fack=0
+
+# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery).
+sysctl -q net.ipv4.tcp_reordering=3
+
+# TCP congestion control.
+sysctl -q net.ipv4.tcp_congestion_control=cubic
+
+# TCP slow start after idle.
+sysctl -q net.ipv4.tcp_slow_start_after_idle=0
+
+# TCP RACK and TLP.
+sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1
+
+# TCP method for deciding when to defer sending to accumulate big TSO packets.
+sysctl -q net.ipv4.tcp_tso_win_divisor=3
+
+# TCP Explicit Congestion Notification (ECN)
+sysctl -q net.ipv4.tcp_ecn=0
+
+sysctl -q net.ipv4.tcp_pacing_ss_ratio=200
+sysctl -q net.ipv4.tcp_pacing_ca_ratio=120
+sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
+
+sysctl -q net.ipv4.tcp_fastopen=0x70403
+sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4
+
+sysctl -q net.ipv4.tcp_syncookies=1
+
+# Override the default qdisc on the tun device.
+# Many tests fail with timing errors if the default
+# is FQ and that paces their flows.
+tc qdisc add dev tun0 root pfifo
+
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
new file mode 100755
index 000000000000..7478c0c0c9aa
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh"
+
+readonly ipv4_args=('--ip_version=ipv4 '
+		    '--local_ip=192.168.0.1 '
+		    '--gateway_ip=192.168.0.1 '
+		    '--netmask_ip=255.255.0.0 '
+		    '--remote_ip=192.0.2.1 '
+		    '-D CMSG_LEVEL_IP=SOL_IP '
+		    '-D CMSG_TYPE_RECVERR=IP_RECVERR ')
+
+readonly ipv6_args=('--ip_version=ipv6 '
+		    '--mtu=1520 '
+		    '--local_ip=fd3d:0a0b:17d6::1 '
+		    '--gateway_ip=fd3d:0a0b:17d6:8888::1 '
+		    '--remote_ip=fd3d:fa7b:d17d::1 '
+		    '-D CMSG_LEVEL_IP=SOL_IPV6 '
+		    '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ')
+
+if [ $# -ne 1 ]; then
+	ktap_exit_fail_msg "usage: $0 <script>"
+	exit "$KSFT_FAIL"
+fi
+script="$1"
+
+if [ -z "$(which packetdrill)" ]; then
+	ktap_skip_all "packetdrill not found in PATH"
+	exit "$KSFT_SKIP"
+fi
+
+ktap_print_header
+ktap_set_plan 2
+
+unshare -n packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \
+	&& ktap_test_pass "ipv4" || ktap_test_fail "ipv4"
+unshare -n packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \
+	&& ktap_test_pass "ipv6" || ktap_test_fail "ipv6"
+
+ktap_finished
diff --git a/tools/testing/selftests/net/packetdrill/set_sysctls.py b/tools/testing/selftests/net/packetdrill/set_sysctls.py
new file mode 100755
index 000000000000..5ddf456ae973
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/set_sysctls.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Sets sysctl values and writes a file that restores them.
+
+The arguments are of the form "<proc-file>=<val>" separated by spaces.
+The program first reads the current value of the proc-file and creates
+a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which
+restores the values when executed. It then sets the new values.
+
+PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt
+file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh`
+at the end.
+"""
+
+import os
+import subprocess
+import sys
+
+filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID']
+
+# Open file for restoring sysctl values
+restore_file = open(filename, 'w')
+print('#!/bin/bash', file=restore_file)
+
+for a in sys.argv[1:]:
+  sysctl = a.split('=')
+  # sysctl[0] contains the proc-file name, sysctl[1] the new value
+
+  # read current value and add restore command to file
+  cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True)
+  print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file)
+
+  # set new value
+  cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0])
+  os.system(cmd)
+
+os.system('chmod u+x %s' % filename)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
new file mode 100644
index 000000000000..df49c67645ac
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_INQ and TCP_CM_INQ on the client side.
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+    0	socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0	fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+   +0	fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Connect to the server and enable TCP_INQ.
+   +0	connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+   +0	setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0
+
+   +0	> S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01	< S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7>
+   +0	> . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+
+// Now we have 10K of data ready on the socket.
+   +0	< . 1:10001(10000) ack 1 win 514
+   +0	> . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700>
+
+// We read 1K and we should have 9K ready to read.
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{..., 1000}],
+		    msg_flags=0,
+		    msg_control=[{cmsg_level=SOL_TCP,
+				  cmsg_type=TCP_CM_INQ,
+				  cmsg_data=9000}]}, 0) = 1000
+// We read 9K and we should have no further data ready to read.
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{..., 9000}],
+		    msg_flags=0,
+		    msg_control=[{cmsg_level=SOL_TCP,
+				  cmsg_type=TCP_CM_INQ,
+				  cmsg_data=0}]}, 0) = 9000
+
+// Server sends more data and closes the connections.
+   +0	< F. 10001:20001(10000) ack 1 win 514
+   +0	> . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700>
+
+// We read 10K and we should have one "fake" byte because the connection is
+// closed.
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{..., 10000}],
+		    msg_flags=0,
+		    msg_control=[{cmsg_level=SOL_TCP,
+				  cmsg_type=TCP_CM_INQ,
+				  cmsg_data=1}]}, 0) = 10000
+// Now, receive EOF.
+   +0	read(3, ..., 2000) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
new file mode 100644
index 000000000000..04a5e2590c62
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_INQ and TCP_CM_INQ on the server side.
+`./defaults.sh
+`
+
+// Initialize connection
+    0	socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0	setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0	bind(3, ..., ...) = 0
+   +0	listen(3, 1) = 0
+
+   +0	< S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+   +0	> S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01	< . 1:1(0) ack 1 win 514
+
+// Accept the connection and enable TCP_INQ.
+   +0	accept(3, ..., ...) = 4
+   +0	setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0
+
+// Now we have 10K of data ready on the socket.
+   +0	< . 1:10001(10000) ack 1 win 514
+   +0	> . 1:1(0) ack 10001
+
+// We read 2K and we should have 8K ready to read.
+   +0	recvmsg(4, {msg_name(...)=...,
+		    msg_iov(1)=[{..., 2000}],
+		    msg_flags=0,
+		    msg_control=[{cmsg_level=SOL_TCP,
+				  cmsg_type=TCP_CM_INQ,
+				  cmsg_data=8000}]}, 0) = 2000
+// We read 8K and we should have no further data ready to read.
+   +0	recvmsg(4, {msg_name(...)=...,
+		    msg_iov(1)=[{..., 8000}],
+		    msg_flags=0,
+		    msg_control=[{cmsg_level=SOL_TCP,
+				  cmsg_type=TCP_CM_INQ,
+				  cmsg_data=0}]}, 0) = 8000
+// Client sends more data and closes the connections.
+   +0	< F. 10001:20001(10000) ack 1 win 514
+   +0	> . 1:1(0) ack 20002
+
+// We read 10K and we should have one "fake" byte because the connection is
+// closed.
+   +0	recvmsg(4, {msg_name(...)=...,
+		    msg_iov(1)=[{..., 10000}],
+		    msg_flags=0,
+		    msg_control=[{cmsg_level=SOL_TCP,
+				  cmsg_type=TCP_CM_INQ,
+				  cmsg_data=1}]}, 0) = 10000
+// Now, receive error.
+   +0	read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt
new file mode 100644
index 000000000000..25dfef95d3f8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test what happens when client does not provide MD5 on SYN,
+// but then does on the ACK that completes the three-way handshake.
+
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// Ooh, weird: client provides MD5 option on the ACK:
+ +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop>
+
+// The TCP listener refcount should be 2, but on buggy kernels it can be 0:
+   +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"`
+
+// Now here comes the legit ACK:
+ +.01 < . 1:1(0) ack 1 win 514
+
+// Make sure the connection is OK:
+   +0 accept(3, ..., ...) = 4
+
+ +.01 write(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt
new file mode 100644
index 000000000000..795c476d222d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver ACKs every packet.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+   +0 write(4, ..., 30000) = 30000
+   +0 > P. 1:10001(10000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 1001 win 257
+   +0 > P. 10001:12001(2000) ack 1
+
+   +0 < . 1:1(0) ack 2001 win 257
+   +0 > P. 12001:14001(2000) ack 1
+
++.005 < . 1:1(0) ack 3001 win 257
+   +0 > P. 14001:16001(2000) ack 1
+
+   +0 < . 1:1(0) ack 4001 win 257
+   +0 > P. 16001:18001(2000) ack 1
+
++.005 < . 1:1(0) ack 5001 win 257
+   +0 > P. 18001:20001(2000) ack 1
+
+   +0 < . 1:1(0) ack 6001 win 257
+   +0 > P. 20001:22001(2000) ack 1
+
++.005 < . 1:1(0) ack 7001 win 257
+   +0 > P. 22001:24001(2000) ack 1
+
+   +0 < . 1:1(0) ack 8001 win 257
+   +0 > P. 24001:26001(2000) ack 1
+
++.005 < . 1:1(0) ack 9001 win 257
+   +0 > P. 26001:28001(2000) ack 1
+
+   +0 < . 1:1(0) ack 10001 win 257
+   +0 > P. 28001:30001(2000) ack 1
+
+   +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt
new file mode 100644
index 000000000000..9212ae1fd0f2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when an outstanding flight of packets is
+// less than the current cwnd, and not big enough to bump up cwnd.
+//
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config.
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+
+// Only send 5 packets.
+   +0 write(4, ..., 5000) = 5000
+   +0 > P. 1:5001(5000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+   +0 < . 1:1(0) ack 2001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+   +0 < . 1:1(0) ack 4001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+   +0 < . 1:1(0) ack 5001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt
new file mode 100644
index 000000000000..416c901ddf51
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when an outstanding flight of packets is
+// less than the current cwnd, but still big enough that in slow
+// start we want to increase our cwnd a little.
+//
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config.
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+
+// Only send 6 packets.
+   +0 write(4, ..., 6000) = 6000
+   +0 > P. 1:6001(6000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+   +0 < . 1:1(0) ack 2001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+   +0 < . 1:1(0) ack 4001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+   +0 < . 1:1(0) ack 6001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt
new file mode 100644
index 000000000000..a894b7d4559c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+   +0 write(4, ..., 30000) = 30000
+   +0 > P. 1:10001(10000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+   +0 > P. 10001:14001(4000) ack 1
+
++.005 < . 1:1(0) ack 4001 win 257
+   +0 > P. 14001:18001(4000) ack 1
+
++.005 < . 1:1(0) ack 6001 win 257
+   +0 > P. 18001:22001(4000) ack 1
+
++.005 < . 1:1(0) ack 8001 win 257
+   +0 > P. 22001:26001(4000) ack 1
+
++.005 < . 1:1(0) ack 10001 win 257
+   +0 > P. 26001:30001(4000) ack 1
+
+   +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt
new file mode 100644
index 000000000000..065fae9e9abd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver sends one ACK per 4 packets.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+   +0 write(4, ..., 30000) = 30000
+   +0 > P. 1:10001(10000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.11 < . 1:1(0) ack 4001 win 257
+   +0 > P. 10001:18001(8000) ack 1
+
+ +.01 < . 1:1(0) ack 8001 win 257
+   +0 > P. 18001:26001(8000) ack 1
+
++.005 < . 1:1(0) ack 10001 win 257
+   +0 > P. 26001:30001(4000) ack 1
+
+   +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt
new file mode 100644
index 000000000000..11b213be1138
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start after idle
+// This test expects tso size to be at least initial cwnd * mss
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
+		 /proc/sys/net/ipv4/tcp_min_tso_segs=10`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 511
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+   +0 write(4, ..., 26000) = 26000
+   +0 > P. 1:5001(5000) ack 1
+   +0 > P. 5001:10001(5000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+  +.1 < . 1:1(0) ack 10001 win 511
+   +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+   +0 > P. 10001:20001(10000) ack 1
+   +0 > P. 20001:26001(6000) ack 1
+
+  +.1 < . 1:1(0) ack 26001 win 511
+   +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+   +2 write(4, ..., 20000) = 20000
+// If slow start after idle works properly, we should send 5 MSS here (cwnd/2)
+   +0 > P. 26001:31001(5000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt
new file mode 100644
index 000000000000..577ed8c8852c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start after window update
+// This test expects tso size to be at least initial cwnd * mss
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
+		 /proc/sys/net/ipv4/tcp_min_tso_segs=10`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 511
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+   +0 write(4, ..., 26000) = 26000
+   +0 > P. 1:5001(5000) ack 1
+   +0 > P. 5001:10001(5000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+  +.1 < . 1:1(0) ack 10001 win 511
+   +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+   +0 > P. 10001:20001(10000) ack 1
+   +0 > P. 20001:26001(6000) ack 1
+
+  +.1 < . 1:1(0) ack 26001 win 0
+   +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+   +0 write(4, ..., 20000) = 20000
+// 1st win0 probe
++.3~+.310 > . 26000:26000(0) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+// 2nd win0 probe
++.6~+.620 > . 26000:26000(0) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+// 3rd win0 probe
++1.2~+1.240 > . 26000:26000(0) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+  +.9 < . 1:1(0) ack 26001 win 511
+   +0 > P. 26001:31001(5000) ack 1
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt
new file mode 100644
index 000000000000..869f32c35a2a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when application-limited: in this case,
+// with IW10, if we don't fully use our cwnd but instead
+// send just 9 packets, then cwnd should grow to twice that
+// value, or 18 packets.
+
+// Set up config.
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+   +0 write(4, ..., 9000) = 9000
+   +0 > P. 1:9001(9000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 4001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 6001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 8001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 9001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt
new file mode 100644
index 000000000000..0f77b7955db6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when application-limited: in this case,
+// with IW10, if we send exactly 10 packets then cwnd should grow to 20.
+
+// Set up config.
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+   +0 write(4, ..., 10000) = 10000
+   +0 > P. 1:10001(10000) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 4001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 6001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 8001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 10001 win 257
+   +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt
new file mode 100644
index 000000000000..7e9c83d617c2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow, even if TSQ triggers.
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Note we use FQ/pacing to check if TCP Small Queues is not hurting
+
+`./defaults.sh
+tc qdisc replace dev tun0 root fq
+sysctl -q net/ipv4/tcp_pacing_ss_ratio=200
+sysctl -e -q net.ipv4.tcp_min_tso_segs=2`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 500
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+   +0 write(4, ..., 40000) = 40000
+// This might change if we cook the initial packet with 10 MSS.
+   +0 > P. 1:2921(2920) ack 1
+   +0 > P. 2921:5841(2920) ack 1
+   +0 > P. 5841:8761(2920) ack 1
+   +0 > P. 8761:11681(2920) ack 1
+   +0 > P. 11681:14601(2920) ack 1
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2921 win 500
+   +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
+// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts"
+// FQ notices that this packet missed the 'time to send next packet' computed
+// when prior packet (11681:14601(2920)) was sent.
+// So FQ will allow following packet to be sent a bit earlier (quantum/2)
+// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit)
+   +0 > P. 14601:17521(2920) ack 1
+
++.003 < . 1:1(0) ack 5841 win 500
+   +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.001 > P. 17521:20441(2920) ack 1
+
++.001 < . 1:1(0) ack 8761 win 500
+   +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
+// remaining packets are delivered at a constant rate.
++.007 > P. 20441:23361(2920) ack 1
+
++.002 < . 1:1(0) ack 11681 win 500
+   +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
++.001 < . 1:1(0) ack 14601 win 500
+
++.004 > P. 23361:26281(2920) ack 1
+
++.007 > P. 26281:29201(2920) ack 1
+
+   +0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
new file mode 100644
index 000000000000..a82c8899d36b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+// basic zerocopy test:
+//
+// send a packet with MSG_ZEROCOPY and receive the notification ID
+// repeat and verify IDs are consecutive
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 1:4001(4000) ack 1
+   +0 < . 1:1(0) ack 4001 win 257
+
+   +0 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=0}}
+                   ]}, MSG_ERRQUEUE) = 0
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 4001:8001(4000) ack 1
+   +0 < . 1:1(0) ack 8001 win 257
+
+   +0 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=1,
+                                    ee_data=1}}
+                   ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
new file mode 100644
index 000000000000..c01915e7f4a1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+// batch zerocopy test:
+//
+// send multiple packets, then read one range of all notifications.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 1:4001(4000) ack 1
+   +0 < . 1:1(0) ack 4001 win 257
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 4001:8001(4000) ack 1
+   +0 < . 1:1(0) ack 8001 win 257
+
+   +0 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=1}}
+                  ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
new file mode 100644
index 000000000000..6509882932e9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Minimal client-side zerocopy test
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0...0 connect(4, ..., ...) = 0
+
+   +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+   +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > . 1:1(0) ack 1
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 1:4001(4000) ack 1
+   +0 < . 1:1(0) ack 4001 win 257
+
+   +0 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=0}}
+                   ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
new file mode 100644
index 000000000000..2cd78755cb2a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+// send with MSG_ZEROCOPY on a non-established socket
+//
+// verify that a send in state TCP_CLOSE correctly aborts the zerocopy
+// operation, specifically it does not increment the zerocopy counter.
+//
+// First send on a closed socket and wait for (absent) notification.
+// Then connect and send and verify that notification nr. is zero.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+   +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe)
+
+   +0.1 recvmsg(4, {msg_name(...)=...,
+                    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE,
+                    msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+
+   +0...0 connect(4, ..., ...) = 0
+
+   +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+   +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > . 1:1(0) ack 1
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 1:4001(4000) ack 1
+   +0 < . 1:1(0) ack 4001 win 257
+
+   +0 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=0}}
+                   ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
new file mode 100644
index 000000000000..7671c20e01cf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
+// it is not level-triggered either.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLET is set. send another packet with
+// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 epoll_create(1) = 5
+   +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0
+   +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 1:4001(4000) ack 1
+   +0 < . 1:1(0) ack 4001 win 257
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 4001:8001(4000) ack 1
+   +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+   +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+   +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 8001:12001(4000) ack 1
+   +0 < . 1:1(0) ack 12001 win 257
+
+// receive only one EPOLLERR for the third send above.
+   +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+   +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+   +0 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=2}}
+                   ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
new file mode 100644
index 000000000000..fadc480fdb7f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
+// it is not level-triggered either. this tests verify that the same behavior is
+// maintained when we have EPOLLEXCLUSIVE.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLET is set. send another packet with
+// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 epoll_create(1) = 5
+   +0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
+		{events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0
+   +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 1:4001(4000) ack 1
+   +0 < . 1:1(0) ack 4001 win 257
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 4001:8001(4000) ack 1
+   +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+   +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+   +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 8001:12001(4000) ack 1
+   +0 < . 1:1(0) ack 12001 win 257
+
+// receive only one EPOLLERR for the third send above.
+   +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+   +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+   +0 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=2}}
+                   ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
new file mode 100644
index 000000000000..5bfa0d1d2f4a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// This is a test to confirm that EPOLLERR is only fired once for an FD when
+// EPOLLONESHOT is set.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLONESHOT is set. send another packet
+// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and
+// confirm that EPOLLERR is correctly set.
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 epoll_create(1) = 5
+   +0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
+		{events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 1:4001(4000) ack 1
+   +0 < . 1:1(0) ack 4001 win 257
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 4001:8001(4000) ack 1
+   +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+   +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+   +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+   +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+   +0 > P. 8001:12001(4000) ack 1
+   +0 < . 1:1(0) ack 12001 win 257
+
+// receive no EPOLLERR for the third send above.
+   +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+// rearm the FD and verify the EPOLLERR is fired again.
+   +0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0
+   +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+   +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+   +0 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=2}}
+                   ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
new file mode 100644
index 000000000000..4a73bbf46961
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+// Fastopen client zerocopy test:
+//
+// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
+// kernel returns the notification ID.
+//
+// Fastopen requires a stored cookie. Create two sockets. The first
+// one will have no data in the initial send. On return 0 the
+// zerocopy notification counter is not incremented. Verify this too.
+
+`./defaults.sh`
+
+// Send a FastOpen request, no cookie yet so no data in SYN
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+   +0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop>
+ +.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop>
+   +0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000>
+
+// Read from error queue: no zerocopy notification
+   +1 recvmsg(3, {msg_name(...)=...,
+                    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE,
+                    msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+
+ +.01 close(3) = 0
+   +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000>
+ +.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002>
+   +0 > .  2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001>
+
+// Send another Fastopen request, now SYN will have data
+ +.07 `sysctl -q net.ipv4.tcp_timestamps=0`
+  +.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
+   +0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500
+   +0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop>
+ +.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+   +0 > . 501:501(0) ack 1
+
+// Read from error queue: now has first zerocopy notification
+   +0.5 recvmsg(5, {msg_name(...)=...,
+                    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE,
+                    msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=0}}
+                   ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
new file mode 100644
index 000000000000..36086c5877ce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+// Fastopen server zerocopy test:
+//
+// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
+// kernel returns the notification ID.
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207`
+
+// Set up a TFO server listening socket.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+  +.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+// Client sends a SYN with data.
+  +.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop>
+   +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK>
+
+// Server accepts and replies with data.
++.005 accept(3, ..., ...) = 4
+   +0 read(4, ..., 1024) = 1000
+   +0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000
+   +0 > P. 1:1001(1000) ack 1001
+ +.05 < . 1001:1001(0) ack 1001 win 32792
+
+// Read from error queue: now has first zerocopy notification
+  +0.1 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                      {cmsg_level=CMSG_LEVEL_IP,
+                       cmsg_type=CMSG_TYPE_RECVERR,
+                       cmsg_data={ee_errno=0,
+                                  ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                  ee_type=0,
+                                  ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                  ee_info=0,
+                                  ee_data=0}}
+                  ]}, MSG_ERRQUEUE) = 0
+
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
new file mode 100644
index 000000000000..672f817faca0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+// tcp_MAX_SKB_FRAGS test
+//
+// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will
+// 1) fit in a single packet without zerocopy
+// 2) spill over into a second packet with zerocopy,
+//    because each iovec element becomes a frag
+// 3) the PSH bit is set on an skb when it runs out of fragments
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+   // Each pinned zerocopy page is fully accounted to skb->truesize.
+   // This test generates a worst case packet with each frag storing
+   // one byte, but increasing truesize with a page (64KB on PPC).
+   +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0
+
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+   // send an iov of 18 elements: just becomes a linear skb
+   +0 sendmsg(4, {msg_name(...)=...,
+		  msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}],
+		  msg_flags=0}, 0) = 18
+
+   +0 > P. 1:19(18) ack 1
+   +0 < . 1:1(0) ack 19 win 257
+
+   // send a zerocopy iov of 18 elements:
+   +1 sendmsg(4, {msg_name(...)=...,
+		  msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}],
+		  msg_flags=0}, MSG_ZEROCOPY) = 18
+
+   // verify that it is split in one skb of 17 frags + 1 of 1 frag
+   // verify that both have the PSH bit set
+   +0 > P. 19:36(17) ack 1
+   +0 < . 1:1(0) ack 36 win 257
+
+   +0 > P. 36:37(1) ack 1
+   +0 < . 1:1(0) ack 37 win 257
+
+   +1 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=0}}
+                   ]}, MSG_ERRQUEUE) = 0
+
+   // send a zerocopy iov of 64 elements:
+   +0 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1},
+			       {..., 1}, {..., 1}, {..., 1}, {..., 1}],
+                  msg_flags=0}, MSG_ZEROCOPY) = 64
+
+   // verify that it is split in skbs with 17 frags
+   +0 > P. 37:54(17) ack 1
+   +0 < . 1:1(0) ack 54 win 257
+
+   +0 > P. 54:71(17) ack 1
+   +0 < . 1:1(0) ack 71 win 257
+
+   +0 > P. 71:88(17) ack 1
+   +0 < . 1:1(0) ack 88 win 257
+
+   +0 > P. 88:101(13) ack 1
+   +0 < . 1:1(0) ack 101 win 257
+
+   +1 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=1,
+                                    ee_data=1}}
+                   ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
new file mode 100644
index 000000000000..a9a1ac0aea4f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// small packet zerocopy test:
+//
+// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy
+// packets of all sizes, including the smallest payload, 1B.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+   // send 1B
+   +0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+   +0 > P. 1:2(1) ack 1
+   +0 < . 1:1(0) ack 2 win 257
+
+   +1 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=0,
+                                    ee_data=0}}
+                   ]}, MSG_ERRQUEUE) = 0
+
+   // send 1B again
+   +0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+   +0 > P. 2:3(1) ack 1
+   +0 < . 1:1(0) ack 3 win 257
+
+   +1 recvmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{...,0}],
+                  msg_flags=MSG_ERRQUEUE,
+                  msg_control=[
+                        {cmsg_level=CMSG_LEVEL_IP,
+                         cmsg_type=CMSG_TYPE_RECVERR,
+                         cmsg_data={ee_errno=0,
+                                    ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+                                    ee_type=0,
+                                    ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+                                    ee_info=1,
+                                    ee_data=1}}
+                   ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 5175c0c83a23..569bce8b6383 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -681,13 +681,7 @@ setup_xfrm() {
 }
 
 setup_nettest_xfrm() {
-	if ! which nettest >/dev/null; then
-		PATH=$PWD:$PATH
-		if ! which nettest >/dev/null; then
-			echo "'nettest' command not found; skipping tests"
-			return 1
-		fi
-	fi
+	check_gen_prog "nettest"
 
 	[ ${1} -eq 6 ] && proto="-6" || proto=""
 	port=${2}
@@ -1447,7 +1441,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
 		size=$(du -sb $tmpoutfile)
 		size=${size%%/tmp/*}
 
-		[ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+		[ $size -ne 1048576 ] && err "File size $size mismatches expected value in locally bridged vxlan test" && return 1
 	done
 
 	rm -f "$tmpoutfile"
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 1a736f700be4..4f31e92ebd96 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -165,9 +165,9 @@ static void sock_fanout_set_ebpf(int fd)
 	attr.insns = (unsigned long) prog;
 	attr.insn_cnt = ARRAY_SIZE(prog);
 	attr.license = (unsigned long) "GPL";
-	attr.log_buf = (unsigned long) log_buf,
-	attr.log_size = sizeof(log_buf),
-	attr.log_level = 1,
+	attr.log_buf = (unsigned long) log_buf;
+	attr.log_size = sizeof(log_buf);
+	attr.log_level = 1;
 
 	pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
 	if (pfd < 0) {
diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile
new file mode 100644
index 000000000000..da9714bc7aad
--- /dev/null
+++ b/tools/testing/selftests/net/rds/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+all:
+	@echo mk_build_dir="$(shell pwd)" > include.sh
+
+TEST_PROGS := run.sh \
+	include.sh \
+	test.py
+
+EXTRA_CLEAN := /tmp/rds_logs
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt
new file mode 100644
index 000000000000..cbde2951ab13
--- /dev/null
+++ b/tools/testing/selftests/net/rds/README.txt
@@ -0,0 +1,41 @@
+RDS self-tests
+==============
+
+These scripts provide a coverage test for RDS-TCP by creating two
+network namespaces and running rds packets between them. A loopback
+network is provisioned with optional probability of packet loss or
+corruption. A workload of 50000 hashes, each 64 characters in size,
+are passed over an RDS socket on this test network. A passing test means
+the RDS-TCP stack was able to recover properly.  The provided config.sh
+can be used to compile the kernel with the necessary gcov options.  The
+kernel may optionally be configured to omit the coverage report as well.
+
+USAGE:
+	run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]
+	       [-u packet_duplcate]
+
+OPTIONS:
+	-d	Log directory.  Defaults to tools/testing/selftests/net/rds/rds_logs
+
+	-l	Simulates a percentage of packet loss
+
+	-c	Simulates a percentage of packet corruption
+
+	-u	Simulates a percentage of packet duplication.
+
+EXAMPLE:
+
+    # Create a suitable gcov enabled .config
+    tools/testing/selftests/net/rds/config.sh -g
+
+    # Alternatly create a gcov disabled .config
+    tools/testing/selftests/net/rds/config.sh
+
+    # build the kernel
+    vng --build  --config tools/testing/selftests/net/config
+
+    # launch the tests in a VM
+    vng -v --rwdir ./ --run . --user root --cpus 4 -- \
+        "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh"
+
+An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/.
diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh
new file mode 100755
index 000000000000..791c8dbe1095
--- /dev/null
+++ b/tools/testing/selftests/net/rds/config.sh
@@ -0,0 +1,53 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+set -x
+
+unset KBUILD_OUTPUT
+
+GENERATE_GCOV_REPORT=0
+while getopts "g" opt; do
+  case ${opt} in
+    g)
+      GENERATE_GCOV_REPORT=1
+      ;;
+    :)
+      echo "USAGE: config.sh [-g]"
+      exit 1
+      ;;
+    ?)
+      echo "Invalid option: -${OPTARG}."
+      exit 1
+      ;;
+  esac
+done
+
+CONF_FILE="tools/testing/selftests/net/config"
+
+# no modules
+scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES
+
+# enable RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP
+
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+	# instrument RDS and only RDS
+	scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL
+	scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+	scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS
+else
+	scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL
+	scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+	scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS
+fi
+
+# need network namespaces to run tests with veth network interfaces
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS
+scripts/config --file "$CONF_FILE" --enable CONFIG_VETH
+
+# simulate packet loss
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM
+
diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh
new file mode 100755
index 000000000000..8aee244f582a
--- /dev/null
+++ b/tools/testing/selftests/net/rds/run.sh
@@ -0,0 +1,224 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+
+unset KBUILD_OUTPUT
+
+current_dir="$(realpath "$(dirname "$0")")"
+build_dir="$current_dir"
+
+build_include="$current_dir/include.sh"
+if test -f "$build_include"; then
+	# this include will define "$mk_build_dir" as the location the test was
+	# built.  We will need this if the tests are installed in a location
+	# other than the kernel source
+
+	source "$build_include"
+	build_dir="$mk_build_dir"
+fi
+
+# This test requires kernel source and the *.gcda data therein
+# Locate the top level of the kernel source, and the net/rds
+# subfolder with the appropriate *.gcno object files
+ksrc_dir="$(realpath "$build_dir"/../../../../../)"
+kconfig="$ksrc_dir/.config"
+obj_dir="$ksrc_dir/net/rds"
+
+GCOV_CMD=gcov
+
+#check to see if the host has the required packages to generate a gcov report
+check_gcov_env()
+{
+	if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+		echo "Warning: Could not find gcov. "
+		GENERATE_GCOV_REPORT=0
+		return
+	fi
+
+	# the gcov version must match the gcc version
+	GCC_VER=$(gcc -dumpfullversion)
+	GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}')
+	if [ "$GCOV_VER" != "$GCC_VER" ]; then
+		#attempt to find a matching gcov version
+		GCOV_CMD=gcov-$(gcc -dumpversion)
+
+		if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+			echo "Warning: Could not find an appropriate gcov installation. \
+				gcov version must match gcc version"
+			GENERATE_GCOV_REPORT=0
+			return
+		fi
+
+		#recheck version number of found gcov executable
+		GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \
+			awk 'BEGIN {FS="-"}{print $1}')
+		if [ "$GCOV_VER" != "$GCC_VER" ]; then
+			echo "Warning: Could not find an appropriate gcov installation. \
+				gcov version must match gcc version"
+			GENERATE_GCOV_REPORT=0
+		else
+			echo "Warning: Mismatched gcc and gcov detected.  Using $GCOV_CMD"
+		fi
+	fi
+}
+
+# Check to see if the kconfig has the required configs to generate a coverage report
+check_gcov_conf()
+{
+	if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then
+		echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports"
+		GENERATE_GCOV_REPORT=0
+	fi
+	if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then
+		echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports"
+		GENERATE_GCOV_REPORT=0
+	fi
+	if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then
+		echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports"
+		GENERATE_GCOV_REPORT=0
+	fi
+
+	if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then
+		echo "To enable gcov reports, please run "\
+			"\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel"
+	else
+		# if we have the required kernel configs, proceed to check the environment to
+		# ensure we have the required gcov packages
+		check_gcov_env
+	fi
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_conf_enabled() {
+	if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+		echo "selftests: [SKIP] This test requires $1 enabled"
+		echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+		exit 4
+	fi
+}
+check_conf_disabled() {
+	if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+		echo "selftests: [SKIP] This test requires $1 disabled"
+		echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+		exit 4
+	fi
+}
+check_conf() {
+	check_conf_enabled CONFIG_NET_SCH_NETEM
+	check_conf_enabled CONFIG_VETH
+	check_conf_enabled CONFIG_NET_NS
+	check_conf_enabled CONFIG_RDS_TCP
+	check_conf_enabled CONFIG_RDS
+	check_conf_disabled CONFIG_MODULES
+}
+
+check_env()
+{
+	if ! test -d "$obj_dir"; then
+		echo "selftests: [SKIP] This test requires a kernel source tree"
+		exit 4
+	fi
+	if ! test -e "$kconfig"; then
+		echo "selftests: [SKIP] This test requires a configured kernel source tree"
+		exit 4
+	fi
+	if ! which strace > /dev/null 2>&1; then
+		echo "selftests: [SKIP] Could not run test without strace"
+		exit 4
+	fi
+	if ! which tcpdump > /dev/null 2>&1; then
+		echo "selftests: [SKIP] Could not run test without tcpdump"
+		exit 4
+	fi
+
+	if ! which python3 > /dev/null 2>&1; then
+		echo "selftests: [SKIP] Could not run test without python3"
+		exit 4
+	fi
+
+	python_major=$(python3 -c "import sys; print(sys.version_info[0])")
+	python_minor=$(python3 -c "import sys; print(sys.version_info[1])")
+	if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then
+		echo "selftests: [SKIP] Could not run test without at least python3.9"
+		python3 -V
+		exit 4
+	fi
+}
+
+LOG_DIR="$current_dir"/rds_logs
+PLOSS=0
+PCORRUPT=0
+PDUP=0
+GENERATE_GCOV_REPORT=1
+while getopts "d:l:c:u:" opt; do
+  case ${opt} in
+    d)
+      LOG_DIR=${OPTARG}
+      ;;
+    l)
+      PLOSS=${OPTARG}
+      ;;
+    c)
+      PCORRUPT=${OPTARG}
+      ;;
+    u)
+      PDUP=${OPTARG}
+      ;;
+    :)
+      echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \
+           "[-u packet_duplcate] [-g]"
+      exit 1
+      ;;
+    ?)
+      echo "Invalid option: -${OPTARG}."
+      exit 1
+      ;;
+  esac
+done
+
+
+check_env
+check_conf
+check_gcov_conf
+
+
+rm -fr "$LOG_DIR"
+TRACE_FILE="${LOG_DIR}/rds-strace.txt"
+COVR_DIR="${LOG_DIR}/coverage/"
+mkdir -p  "$LOG_DIR"
+mkdir -p "$COVR_DIR"
+
+set +e
+echo running RDS tests...
+echo Traces will be logged to "$TRACE_FILE"
+rm -f "$TRACE_FILE"
+strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \
+       -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP"
+
+test_rc=$?
+dmesg > "${LOG_DIR}/dmesg.out"
+
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+       echo saving coverage data...
+       (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \
+       while read -r f
+       do
+               cat < "/sys/kernel/debug/gcov/$f" > "/$f"
+       done)
+
+       echo running gcovr...
+       gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \
+             -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/"
+else
+       echo "Coverage report will be skipped"
+fi
+
+if [ "$test_rc" -eq 0 ]; then
+	echo "PASS: Test completed successfully"
+else
+	echo "FAIL: Test failed"
+fi
+
+exit "$test_rc"
diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py
new file mode 100644
index 000000000000..e6bb109bcead
--- /dev/null
+++ b/tools/testing/selftests/net/rds/test.py
@@ -0,0 +1,262 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import argparse
+import ctypes
+import errno
+import hashlib
+import os
+import select
+import signal
+import socket
+import subprocess
+import sys
+import atexit
+from pwd import getpwuid
+from os import stat
+from lib.py import ip
+
+
+libc = ctypes.cdll.LoadLibrary('libc.so.6')
+setns = libc.setns
+
+net0 = 'net0'
+net1 = 'net1'
+
+veth0 = 'veth0'
+veth1 = 'veth1'
+
+# Helper function for creating a socket inside a network namespace.
+# We need this because otherwise RDS will detect that the two TCP
+# sockets are on the same interface and use the loop transport instead
+# of the TCP transport.
+def netns_socket(netns, *args):
+    u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET)
+
+    child = os.fork()
+    if child == 0:
+        # change network namespace
+        with open(f'/var/run/netns/{netns}') as f:
+            try:
+                ret = setns(f.fileno(), 0)
+            except IOError as e:
+                print(e.errno)
+                print(e)
+
+        # create socket in target namespace
+        s = socket.socket(*args)
+
+        # send resulting socket to parent
+        socket.send_fds(u0, [], [s.fileno()])
+
+        sys.exit(0)
+
+    # receive socket from child
+    _, s, _, _ = socket.recv_fds(u1, 0, 1)
+    os.waitpid(child, 0)
+    u0.close()
+    u1.close()
+    return socket.fromfd(s[0], *args)
+
+def signal_handler(sig, frame):
+    print('Test timed out')
+    sys.exit(1)
+
+#Parse out command line arguments.  We take an optional
+# timeout parameter and an optional log output folder
+parser = argparse.ArgumentParser(description="init script args",
+                  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("-d", "--logdir", action="store",
+                    help="directory to store logs", default="/tmp")
+parser.add_argument('--timeout', help="timeout to terminate hung test",
+                    type=int, default=0)
+parser.add_argument('-l', '--loss', help="Simulate tcp packet loss",
+                    type=int, default=0)
+parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption",
+                    type=int, default=0)
+parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication",
+                    type=int, default=0)
+args = parser.parse_args()
+logdir=args.logdir
+packet_loss=str(args.loss)+'%'
+packet_corruption=str(args.corruption)+'%'
+packet_duplicate=str(args.duplicate)+'%'
+
+ip(f"netns add {net0}")
+ip(f"netns add {net1}")
+ip(f"link add type veth")
+
+addrs = [
+    # we technically don't need different port numbers, but this will
+    # help identify traffic in the network analyzer
+    ('10.0.0.1', 10000),
+    ('10.0.0.2', 20000),
+]
+
+# move interfaces to separate namespaces so they can no longer be
+# bound directly; this prevents rds from switching over from the tcp
+# transport to the loop transport.
+ip(f"link set {veth0} netns {net0} up")
+ip(f"link set {veth1} netns {net1} up")
+
+
+
+# add addresses
+ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}")
+ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}")
+
+# add routes
+ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}")
+ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}")
+
+# sanity check that our two interfaces/addresses are correctly set up
+# and communicating by doing a single ping
+ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}")
+
+# Start a packet capture on each network
+for net in [net0, net1]:
+    tcpdump_pid = os.fork()
+    if tcpdump_pid == 0:
+        pcap = logdir+'/'+net+'.pcap'
+        subprocess.check_call(['touch', pcap])
+        user = getpwuid(stat(pcap).st_uid).pw_name
+        ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}")
+        sys.exit(0)
+
+# simulate packet loss, duplication and corruption
+for net, iface in [(net0, veth0), (net1, veth1)]:
+    ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem  \
+         corrupt {packet_corruption} loss {packet_loss} duplicate  \
+         {packet_duplicate}")
+
+# add a timeout
+if args.timeout > 0:
+    signal.alarm(args.timeout)
+    signal.signal(signal.SIGALRM, signal_handler)
+
+sockets = [
+    netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET),
+    netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET),
+]
+
+for s, addr in zip(sockets, addrs):
+    s.bind(addr)
+    s.setblocking(0)
+
+fileno_to_socket = {
+    s.fileno(): s for s in sockets
+}
+
+addr_to_socket = {
+    addr: s for addr, s in zip(addrs, sockets)
+}
+
+socket_to_addr = {
+    s: addr for addr, s in zip(addrs, sockets)
+}
+
+send_hashes = {}
+recv_hashes = {}
+
+ep = select.epoll()
+
+for s in sockets:
+    ep.register(s, select.EPOLLRDNORM)
+
+n = 50000
+nr_send = 0
+nr_recv = 0
+
+while nr_send < n:
+    # Send as much as we can without blocking
+    print("sending...", nr_send, nr_recv)
+    while nr_send < n:
+        send_data = hashlib.sha256(
+            f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8')
+
+        # pseudo-random send/receive pattern
+        sender = sockets[nr_send % 2]
+        receiver = sockets[1 - (nr_send % 3) % 2]
+
+        try:
+            sender.sendto(send_data, socket_to_addr[receiver])
+            send_hashes.setdefault((sender.fileno(), receiver.fileno()),
+                    hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8'))
+            nr_send = nr_send + 1
+        except BlockingIOError as e:
+            break
+        except OSError as e:
+            if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]:
+                break
+            raise
+
+    # Receive as much as we can without blocking
+    print("receiving...", nr_send, nr_recv)
+    while nr_recv < nr_send:
+        for fileno, eventmask in ep.poll():
+            receiver = fileno_to_socket[fileno]
+
+            if eventmask & select.EPOLLRDNORM:
+                while True:
+                    try:
+                        recv_data, address = receiver.recvfrom(1024)
+                        sender = addr_to_socket[address]
+                        recv_hashes.setdefault((sender.fileno(),
+                            receiver.fileno()), hashlib.sha256()).update(
+                                    f'<{recv_data}>'.encode('utf-8'))
+                        nr_recv = nr_recv + 1
+                    except BlockingIOError as e:
+                        break
+
+    # exercise net/rds/tcp.c:rds_tcp_sysctl_reset()
+    for net in [net0, net1]:
+        ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000")
+        ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000")
+
+print("done", nr_send, nr_recv)
+
+# the Python socket module doesn't know these
+RDS_INFO_FIRST = 10000
+RDS_INFO_LAST = 10017
+
+nr_success = 0
+nr_error = 0
+
+for s in sockets:
+    for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1):
+        # Sigh, the Python socket module doesn't allow us to pass
+        # buffer lengths greater than 1024 for some reason. RDS
+        # wants multiple pages.
+        try:
+            s.getsockopt(socket.SOL_RDS, optname, 1024)
+            nr_success = nr_success + 1
+        except OSError as e:
+            nr_error = nr_error + 1
+            if e.errno == errno.ENOSPC:
+                # ignore
+                pass
+
+print(f"getsockopt(): {nr_success}/{nr_error}")
+
+print("Stopping network packet captures")
+subprocess.check_call(['killall', '-q', 'tcpdump'])
+
+# We're done sending and receiving stuff, now let's check if what
+# we received is what we sent.
+for (sender, receiver), send_hash in send_hashes.items():
+    recv_hash = recv_hashes.get((sender, receiver))
+
+    if recv_hash is None:
+        print("FAIL: No data received")
+        sys.exit(1)
+
+    if send_hash.hexdigest() != recv_hash.hexdigest():
+        print("FAIL: Send/recv mismatch")
+        print("hash expected:", send_hash.hexdigest())
+        print("hash received:", recv_hash.hexdigest())
+        sys.exit(1)
+
+    print(f"{sender}/{receiver}: ok")
+
+print("Success")
+sys.exit(0)
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 9eb42570294d..16ac4df55fdb 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -57,6 +57,8 @@ static struct sof_flag sof_flags[] = {
 	SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
 	SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
 	SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
+	SOF_FLAG(SOF_TIMESTAMPING_OPT_RX_FILTER),
+	SOF_FLAG(SOF_TIMESTAMPING_RAW_HARDWARE),
 };
 
 static struct socket_type socket_types[] = {
@@ -98,6 +100,22 @@ static struct test_case test_cases[] = {
 		{}
 	},
 	{
+		{ .so_timestamping = SOF_TIMESTAMPING_RAW_HARDWARE
+			| SOF_TIMESTAMPING_OPT_RX_FILTER },
+		{}
+	},
+	{
+		{ .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
+			| SOF_TIMESTAMPING_OPT_RX_FILTER },
+		{}
+	},
+	{
+		{ .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
+			| SOF_TIMESTAMPING_RX_SOFTWARE
+			| SOF_TIMESTAMPING_OPT_RX_FILTER },
+		{ .swtstamp = true }
+	},
+	{
 		{ .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
 			| SOF_TIMESTAMPING_RX_SOFTWARE },
 		{ .swtstamp = true }
diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c
new file mode 100644
index 000000000000..d87dd8d8d491
--- /dev/null
+++ b/tools/testing/selftests/net/sk_so_peek_off.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include "../kselftest.h"
+
+static char *afstr(int af, int proto)
+{
+	if (proto == IPPROTO_TCP)
+		return af == AF_INET ? "TCP/IPv4" : "TCP/IPv6";
+	else
+		return af == AF_INET ? "UDP/IPv4" : "UDP/IPv6";
+}
+
+int sk_peek_offset_probe(sa_family_t af, int proto)
+{
+	int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM);
+	int optv = 0;
+	int ret = 0;
+	int s;
+
+	s = socket(af, type, proto);
+	if (s < 0) {
+		ksft_perror("Temporary TCP socket creation failed");
+	} else {
+		if (!setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &optv, sizeof(int)))
+			ret = 1;
+		else
+			printf("%s does not support SO_PEEK_OFF\n", afstr(af, proto));
+		close(s);
+	}
+	return ret;
+}
+
+static void sk_peek_offset_set(int s, int offset)
+{
+	if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset)))
+		ksft_perror("Failed to set SO_PEEK_OFF value\n");
+}
+
+static int sk_peek_offset_get(int s)
+{
+	int offset;
+	socklen_t len = sizeof(offset);
+
+	if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len))
+		ksft_perror("Failed to get SO_PEEK_OFF value\n");
+	return offset;
+}
+
+static int sk_peek_offset_test(sa_family_t af, int proto)
+{
+	int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM);
+	union {
+		struct sockaddr sa;
+		struct sockaddr_in a4;
+		struct sockaddr_in6 a6;
+	} a;
+	int res = 0;
+	int s[2] = {0, 0};
+	int recv_sock = 0;
+	int offset = 0;
+	ssize_t len;
+	char buf[2];
+
+	memset(&a, 0, sizeof(a));
+	a.sa.sa_family = af;
+
+	s[0] = recv_sock = socket(af, type, proto);
+	s[1] = socket(af, type, proto);
+
+	if (s[0] < 0 || s[1] < 0) {
+		ksft_perror("Temporary socket creation failed\n");
+		goto out;
+	}
+	if (bind(s[0], &a.sa, sizeof(a)) < 0) {
+		ksft_perror("Temporary socket bind() failed\n");
+		goto out;
+	}
+	if (getsockname(s[0], &a.sa, &((socklen_t) { sizeof(a) })) < 0) {
+		ksft_perror("Temporary socket getsockname() failed\n");
+		goto out;
+	}
+	if (proto == IPPROTO_TCP && listen(s[0], 0) < 0) {
+		ksft_perror("Temporary socket listen() failed\n");
+		goto out;
+	}
+	if (connect(s[1], &a.sa, sizeof(a)) < 0) {
+		ksft_perror("Temporary socket connect() failed\n");
+		goto out;
+	}
+	if (proto == IPPROTO_TCP) {
+		recv_sock = accept(s[0], NULL, NULL);
+		if (recv_sock <= 0) {
+			ksft_perror("Temporary socket accept() failed\n");
+			goto out;
+		}
+	}
+
+	/* Some basic tests of getting/setting offset */
+	offset = sk_peek_offset_get(recv_sock);
+	if (offset != -1) {
+		ksft_perror("Initial value of socket offset not -1\n");
+		goto out;
+	}
+	sk_peek_offset_set(recv_sock, 0);
+	offset = sk_peek_offset_get(recv_sock);
+	if (offset != 0) {
+		ksft_perror("Failed to set socket offset to 0\n");
+		goto out;
+	}
+
+	/* Transfer a message */
+	if (send(s[1], (char *)("ab"), 2, 0) != 2) {
+		ksft_perror("Temporary probe socket send() failed\n");
+		goto out;
+	}
+	/* Read first byte */
+	len = recv(recv_sock, buf, 1, MSG_PEEK);
+	if (len != 1 || buf[0] != 'a') {
+		ksft_perror("Failed to read first byte of message\n");
+		goto out;
+	}
+	offset = sk_peek_offset_get(recv_sock);
+	if (offset != 1) {
+		ksft_perror("Offset not forwarded correctly at first byte\n");
+		goto out;
+	}
+	/* Try to read beyond last byte */
+	len = recv(recv_sock, buf, 2, MSG_PEEK);
+	if (len != 1 || buf[0] != 'b') {
+		ksft_perror("Failed to read last byte of message\n");
+		goto out;
+	}
+	offset = sk_peek_offset_get(recv_sock);
+	if (offset != 2) {
+		ksft_perror("Offset not forwarded correctly at last byte\n");
+		goto out;
+	}
+	/* Flush message */
+	len = recv(recv_sock, buf, 2, MSG_TRUNC);
+	if (len != 2) {
+		ksft_perror("Failed to flush message\n");
+		goto out;
+	}
+	offset = sk_peek_offset_get(recv_sock);
+	if (offset != 0) {
+		ksft_perror("Offset not reverted correctly after flush\n");
+		goto out;
+	}
+
+	printf("%s with MSG_PEEK_OFF works correctly\n", afstr(af, proto));
+	res = 1;
+out:
+	if (proto == IPPROTO_TCP && recv_sock >= 0)
+		close(recv_sock);
+	if (s[1] >= 0)
+		close(s[1]);
+	if (s[0] >= 0)
+		close(s[0]);
+	return res;
+}
+
+static int do_test(int proto)
+{
+	int res4, res6;
+
+	res4 = sk_peek_offset_probe(AF_INET, proto);
+	res6 = sk_peek_offset_probe(AF_INET6, proto);
+
+	if (!res4 && !res6)
+		return KSFT_SKIP;
+
+	if (res4)
+		res4 = sk_peek_offset_test(AF_INET, proto);
+
+	if (res6)
+		res6 = sk_peek_offset_test(AF_INET6, proto);
+
+	if (!res4 || !res6)
+		return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
+
+int main(void)
+{
+	int restcp, resudp;
+
+	restcp = do_test(IPPROTO_TCP);
+	resudp = do_test(IPPROTO_UDP);
+	if (restcp == KSFT_FAIL || resudp == KSFT_FAIL)
+		return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index bd88b90b902b..5b0205c70c39 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -31,7 +31,8 @@ CFLAGS	+= $(KHDR_INCLUDES)
 CFLAGS	+= -iquote ./lib/ -I ../../../../include/
 
 # Library
-LIBSRC	:= kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c
+LIBSRC	:= ftrace.c ftrace-tcp.c kconfig.c netlink.c
+LIBSRC	+= proc.c repair.c setup.c sock.c utils.c
 LIBOBJ	:= $(LIBSRC:%.c=$(LIBDIR)/%.o)
 EXTRA_CLEAN += $(LIBOBJ) $(LIB)
 
diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
index a1e6e007c291..6736484996a3 100644
--- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c
+++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
@@ -355,6 +355,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(30, server_fn, client_fn);
+	test_init(31, server_fn, client_fn);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
index d3277a9de987..3605e38711cb 100644
--- a/tools/testing/selftests/net/tcp_ao/config
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -7,4 +7,5 @@ CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_NET_VRF=y
 CONFIG_TCP_AO=y
 CONFIG_TCP_MD5SIG=y
+CONFIG_TRACEPOINTS=y
 CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
index 185a2f6e5ff3..d418162d335f 100644
--- a/tools/testing/selftests/net/tcp_ao/connect-deny.c
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -71,10 +71,12 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
 		}
 	}
 
+	synchronize_threads(); /* before counter checks */
 	if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
 		test_error("test_get_tcp_ao_counters()");
 
 	close(lsk);
+
 	if (pwd)
 		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
 
@@ -84,10 +86,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
 	after_cnt = netstat_get_one(cnt_name, NULL);
 
 	if (after_cnt <= before_cnt) {
-		test_fail("%s: %s counter did not increase: %zu <= %zu",
+		test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
 				tst_name, cnt_name, after_cnt, before_cnt);
 	} else {
-		test_ok("%s: counter %s increased %zu => %zu",
+		test_ok("%s: counter %s increased %" PRIu64  " => %" PRIu64,
 			tst_name, cnt_name, before_cnt, after_cnt);
 	}
 
@@ -180,6 +182,7 @@ static void try_connect(const char *tst_name, unsigned int port,
 	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
 	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
 
+	synchronize_threads(); /* before counter checks */
 	if (ret < 0) {
 		if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
 			test_ok("%s: connect() was prevented", tst_name);
@@ -212,30 +215,44 @@ out:
 
 static void *client_fn(void *arg)
 {
-	union tcp_addr wrong_addr, network_addr;
+	union tcp_addr wrong_addr, network_addr, addr_any = {};
 	unsigned int port = test_server_port;
 
 	if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
 		test_error("Can't convert ip address %s", TEST_WRONG_IP);
 
+	trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+			      -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
 	try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD,
 			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
 
+	trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, this_ip_dest,
+				-1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("AO server + Non-AO client", port++, NULL,
 			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
 
+	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+			      -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
 	try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD,
 			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
 
+	trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+			      -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
 	try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
 			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
 
+	trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any,
+				 port, 0, 100, 100);
 	try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
 			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
 
+	trace_ao_event_expect(TCP_AO_WRONG_MACLEN, this_ip_addr, this_ip_dest,
+			      -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
 	try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD,
 			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
 
+	trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+			      -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
 	try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
 			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
 
@@ -259,6 +276,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(21, server_fn, client_fn);
+	test_init(22, server_fn, client_fn);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
index 81653b47f303..f1d8d29e393f 100644
--- a/tools/testing/selftests/net/tcp_ao/connect.c
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -67,14 +67,14 @@ static void *client_fn(void *arg)
 	netstat_free(ns_after);
 
 	if (nr_packets > (after_aogood - before_aogood)) {
-		test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)",
+		test_fail("TCPAOGood counter mismatch: %zu > (%" PRIu64 " - %" PRIu64 ")",
 				nr_packets, after_aogood, before_aogood);
 		return NULL;
 	}
 	if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
 		return NULL;
 
-	test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64,
+	test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu",
 			before_aogood, ao1.ao_info_pkt_good,
 			ao1.key_cnts[0].pkt_good,
 			after_aogood, ao2.ao_info_pkt_good,
@@ -85,6 +85,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(1, server_fn, client_fn);
+	test_init(2, server_fn, client_fn);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
index d69bcba3c929..a1614f0d8c44 100644
--- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -444,6 +444,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(3, server_fn, client_fn);
+	test_init(4, server_fn, client_fn);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
index 24e62120b792..d4385b52c10b 100644
--- a/tools/testing/selftests/net/tcp_ao/key-management.c
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -965,7 +965,7 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
 	synchronize_threads(); /* 5: counters */
 }
 
-static void try_unmatched_keys(int sk, int *rnext_index)
+static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port)
 {
 	struct test_key *key;
 	unsigned int i = 0;
@@ -1013,6 +1013,9 @@ static void try_unmatched_keys(int sk, int *rnext_index)
 		test_error("all keys on server match the client");
 	if (test_set_key(sk, -1, key->server_keyid))
 		test_error("Can't change the current key");
+	trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest,
+			      -1, port, 0, -1, -1, -1, -1, -1,
+			      -1, key->server_keyid, -1);
 	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
 		test_fail("verify failed");
 	*rnext_index = i;
@@ -1054,6 +1057,10 @@ static void check_current_back(const char *tst_name, unsigned int port,
 		return;
 	if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1))
 		test_error("Can't change the current key");
+	trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_dest, this_ip_addr,
+			      port, -1, 0, -1, -1, -1, -1, -1,
+			      collection.keys[rotate_to_index].client_keyid,
+			      collection.keys[current_index].client_keyid, -1);
 	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
 		test_fail("verify failed");
 	/* There is a race here: between setting the current_key with
@@ -1085,6 +1092,11 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
 	for (i = rnext_index + 1; rotations > 0; i++, rotations--) {
 		if (i >= collection.nr_keys)
 			i = 0;
+		trace_ao_event_expect(TCP_AO_RNEXT_REQUEST,
+				this_ip_addr, this_ip_dest,
+				-1, port, 0, -1, -1, -1, -1, -1,
+				i == 0 ? -1 : collection.keys[i - 1].server_keyid,
+				collection.keys[i].server_keyid, -1);
 		if (test_set_key(sk, -1, collection.keys[i].server_keyid))
 			test_error("Can't change the Rnext key");
 		if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
@@ -1124,7 +1136,7 @@ static void try_client_match(const char *tst_name, unsigned int port,
 				 rnext_index, msg_len, nr_packets);
 	if (sk < 0)
 		return;
-	try_unmatched_keys(sk, &rnext_index);
+	try_unmatched_keys(sk, &rnext_index, port);
 	end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL);
 }
 
@@ -1181,6 +1193,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(120, server_fn, client_fn);
+	test_init(121, server_fn, client_fn);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
index fbc7f6111815..db44e77428dd 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -37,17 +37,58 @@ extern void __test_xfail(const char *buf);
 extern void __test_error(const char *buf);
 extern void __test_skip(const char *buf);
 
-__attribute__((__format__(__printf__, 2, 3)))
-static inline void __test_print(void (*fn)(const char *), const char *fmt, ...)
+static inline char *test_snprintf(const char *fmt, va_list vargs)
 {
-#define TEST_MSG_BUFFER_SIZE 4096
-	char buf[TEST_MSG_BUFFER_SIZE];
-	va_list arg;
-
-	va_start(arg, fmt);
-	vsnprintf(buf, sizeof(buf), fmt, arg);
-	va_end(arg);
-	fn(buf);
+	char *ret = NULL;
+	size_t size = 0;
+	va_list tmp;
+	int n = 0;
+
+	va_copy(tmp, vargs);
+	n = vsnprintf(ret, size, fmt, tmp);
+	if (n < 0)
+		return NULL;
+
+	size = n + 1;
+	ret = malloc(size);
+	if (!ret)
+		return NULL;
+
+	n = vsnprintf(ret, size, fmt, vargs);
+	if (n < 0 || n > size - 1) {
+		free(ret);
+		return NULL;
+	}
+	return ret;
+}
+
+static __printf(1, 2) inline char *test_sprintf(const char *fmt, ...)
+{
+	va_list vargs;
+	char *ret;
+
+	va_start(vargs, fmt);
+	ret = test_snprintf(fmt, vargs);
+	va_end(vargs);
+
+	return ret;
+}
+
+static __printf(2, 3) inline void __test_print(void (*fn)(const char *),
+					       const char *fmt, ...)
+{
+	va_list vargs;
+	char *msg;
+
+	va_start(vargs, fmt);
+	msg = test_snprintf(fmt, vargs);
+	va_end(vargs);
+
+	if (!msg)
+		return;
+
+	fn(msg);
+	free(msg);
 }
 
 #define test_print(fmt, ...)						\
@@ -103,6 +144,7 @@ enum test_needs_kconfig {
 	KCONFIG_TCP_AO,			/* required */
 	KCONFIG_TCP_MD5,		/* optional, for TCP-MD5 features */
 	KCONFIG_NET_VRF,		/* optional, for L3/VRF testing */
+	KCONFIG_FTRACE,			/* optional, for tracepoints checks */
 	__KCONFIG_LAST__
 };
 extern bool kernel_config_has(enum test_needs_kconfig k);
@@ -142,6 +184,8 @@ static inline void test_init2(unsigned int ntests,
 	__test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2);
 }
 extern void test_add_destructor(void (*d)(void));
+extern void test_init_ftrace(int nsfd1, int nsfd2);
+extern int test_setup_tracing(void);
 
 /* To adjust optmem socket limit, approximately estimate a number,
  * that is bigger than sizeof(struct tcp_ao_key).
@@ -216,12 +260,17 @@ static inline void test_init(unsigned int ntests,
 }
 extern void synchronize_threads(void);
 extern void switch_ns(int fd);
+extern int switch_save_ns(int fd);
+extern void switch_close_ns(int fd);
 
 extern __thread union tcp_addr this_ip_addr;
 extern __thread union tcp_addr this_ip_dest;
 extern int test_family;
 
 extern void randomize_buffer(void *buf, size_t buflen);
+extern __printf(3, 4) int test_echo(const char *fname, bool append,
+				    const char *fmt, ...);
+
 extern int open_netns(void);
 extern int unshare_open_netns(void);
 extern const char veth_name[];
@@ -602,4 +651,115 @@ static inline int test_add_repaired_key(int sk,
 	return test_verify_socket_key(sk, &tmp);
 }
 
+#define DEFAULT_FTRACE_BUFFER_KB	10000
+#define DEFAULT_TRACER_LINES_ARR	200
+struct test_ftracer;
+extern uint64_t ns_cookie1, ns_cookie2;
+
+enum ftracer_op {
+	FTRACER_LINE_DISCARD = 0,
+	FTRACER_LINE_PRESERVE,
+	FTRACER_EXIT,
+};
+
+extern struct test_ftracer *create_ftracer(const char *name,
+		enum ftracer_op (*process_line)(const char *line),
+		void (*destructor)(struct test_ftracer *tracer),
+		bool (*expecting_more)(void),
+		size_t lines_buf_sz, size_t buffer_size_kb);
+extern int setup_trace_event(struct test_ftracer *tracer,
+			     const char *event, const char *filter);
+extern void destroy_ftracer(struct test_ftracer *tracer);
+extern const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer);
+extern const char **tracer_get_savedlines(struct test_ftracer *tracer);
+
+enum trace_events {
+	/* TCP_HASH_EVENT */
+	TCP_HASH_BAD_HEADER = 0,
+	TCP_HASH_MD5_REQUIRED,
+	TCP_HASH_MD5_UNEXPECTED,
+	TCP_HASH_MD5_MISMATCH,
+	TCP_HASH_AO_REQUIRED,
+	/* TCP_AO_EVENT */
+	TCP_AO_HANDSHAKE_FAILURE,
+	TCP_AO_WRONG_MACLEN,
+	TCP_AO_MISMATCH,
+	TCP_AO_KEY_NOT_FOUND,
+	TCP_AO_RNEXT_REQUEST,
+	/* TCP_AO_EVENT_SK */
+	TCP_AO_SYNACK_NO_KEY,
+	/* TCP_AO_EVENT_SNE */
+	TCP_AO_SND_SNE_UPDATE,
+	TCP_AO_RCV_SNE_UPDATE,
+	__MAX_TRACE_EVENTS
+};
+
+extern int __trace_event_expect(enum trace_events type, int family,
+				union tcp_addr src, union tcp_addr dst,
+				int src_port, int dst_port, int L3index,
+				int fin, int syn, int rst, int psh, int ack,
+				int keyid, int rnext, int maclen, int sne);
+
+static inline void trace_hash_event_expect(enum trace_events type,
+				union tcp_addr src, union tcp_addr dst,
+				int src_port, int dst_port, int L3index,
+				int fin, int syn, int rst, int psh, int ack)
+{
+	int err;
+
+	err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+				   src_port, dst_port, L3index,
+				   fin, syn, rst, psh, ack,
+				   -1, -1, -1, -1);
+	if (err)
+		test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_expect(enum trace_events type,
+				union tcp_addr src, union tcp_addr dst,
+				int src_port, int dst_port, int L3index,
+				int fin, int syn, int rst, int psh, int ack,
+				int keyid, int rnext, int maclen)
+{
+	int err;
+
+	err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+				   src_port, dst_port, L3index,
+				   fin, syn, rst, psh, ack,
+				   keyid, rnext, maclen, -1);
+	if (err)
+		test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_sk_expect(enum trace_events type,
+				union tcp_addr src, union tcp_addr dst,
+				int src_port, int dst_port,
+				int keyid, int rnext)
+{
+	int err;
+
+	err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+				   src_port, dst_port, -1,
+				   -1, -1, -1, -1, -1,
+				   keyid, rnext, -1, -1);
+	if (err)
+		test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_sne_expect(enum trace_events type,
+				union tcp_addr src, union tcp_addr dst,
+				int src_port, int dst_port, int sne)
+{
+	int err;
+
+	err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+				   src_port, dst_port, -1,
+				   -1, -1, -1, -1, -1,
+				   -1, -1, -1, sne);
+	if (err)
+		test_error("Couldn't add a trace event: %d", err);
+}
+
+extern int setup_aolib_ftracer(void);
+
 #endif /* _AOLIB_H_ */
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
new file mode 100644
index 000000000000..24380c68fec6
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include "aolib.h"
+
+static const char *trace_event_names[__MAX_TRACE_EVENTS] = {
+	/* TCP_HASH_EVENT */
+	"tcp_hash_bad_header",
+	"tcp_hash_md5_required",
+	"tcp_hash_md5_unexpected",
+	"tcp_hash_md5_mismatch",
+	"tcp_hash_ao_required",
+	/* TCP_AO_EVENT */
+	"tcp_ao_handshake_failure",
+	"tcp_ao_wrong_maclen",
+	"tcp_ao_mismatch",
+	"tcp_ao_key_not_found",
+	"tcp_ao_rnext_request",
+	/* TCP_AO_EVENT_SK */
+	"tcp_ao_synack_no_key",
+	/* TCP_AO_EVENT_SNE */
+	"tcp_ao_snd_sne_update",
+	"tcp_ao_rcv_sne_update"
+};
+
+struct expected_trace_point {
+	/* required */
+	enum trace_events type;
+	int family;
+	union tcp_addr src;
+	union tcp_addr dst;
+
+	/* optional */
+	int src_port;
+	int dst_port;
+	int L3index;
+
+	int fin;
+	int syn;
+	int rst;
+	int psh;
+	int ack;
+
+	int keyid;
+	int rnext;
+	int maclen;
+	int sne;
+
+	size_t matched;
+};
+
+static struct expected_trace_point *exp_tps;
+static size_t exp_tps_nr;
+static size_t exp_tps_size;
+static pthread_mutex_t exp_tps_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int __trace_event_expect(enum trace_events type, int family,
+			 union tcp_addr src, union tcp_addr dst,
+			 int src_port, int dst_port, int L3index,
+			 int fin, int syn, int rst, int psh, int ack,
+			 int keyid, int rnext, int maclen, int sne)
+{
+	struct expected_trace_point new_tp = {
+		.type           = type,
+		.family         = family,
+		.src            = src,
+		.dst            = dst,
+		.src_port       = src_port,
+		.dst_port       = dst_port,
+		.L3index        = L3index,
+		.fin            = fin,
+		.syn            = syn,
+		.rst            = rst,
+		.psh            = psh,
+		.ack            = ack,
+		.keyid          = keyid,
+		.rnext          = rnext,
+		.maclen         = maclen,
+		.sne            = sne,
+		.matched        = 0,
+	};
+	int ret = 0;
+
+	if (!kernel_config_has(KCONFIG_FTRACE))
+		return 0;
+
+	pthread_mutex_lock(&exp_tps_mutex);
+	if (exp_tps_nr == exp_tps_size) {
+		struct expected_trace_point *tmp;
+
+		if (exp_tps_size == 0)
+			exp_tps_size = 10;
+		else
+			exp_tps_size = exp_tps_size * 1.6;
+
+		tmp = reallocarray(exp_tps, exp_tps_size, sizeof(exp_tps[0]));
+		if (!tmp) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		exp_tps = tmp;
+	}
+	exp_tps[exp_tps_nr] = new_tp;
+	exp_tps_nr++;
+out:
+	pthread_mutex_unlock(&exp_tps_mutex);
+	return ret;
+}
+
+static void free_expected_events(void)
+{
+	/* We're from the process destructor - not taking the mutex */
+	exp_tps_size = 0;
+	exp_tps = NULL;
+	free(exp_tps);
+}
+
+struct trace_point {
+	int family;
+	union tcp_addr src;
+	union tcp_addr dst;
+	unsigned int src_port;
+	unsigned int dst_port;
+	int L3index;
+	unsigned int fin:1,
+		     syn:1,
+		     rst:1,
+		     psh:1,
+		     ack:1;
+
+	unsigned int keyid;
+	unsigned int rnext;
+	unsigned int maclen;
+
+	unsigned int sne;
+};
+
+static bool lookup_expected_event(int event_type, struct trace_point *e)
+{
+	size_t i;
+
+	pthread_mutex_lock(&exp_tps_mutex);
+	for (i = 0; i < exp_tps_nr; i++) {
+		struct expected_trace_point *p = &exp_tps[i];
+		size_t sk_size;
+
+		if (p->type != event_type)
+			continue;
+		if (p->family != e->family)
+			continue;
+		if (p->family == AF_INET)
+			sk_size = sizeof(p->src.a4);
+		else
+			sk_size = sizeof(p->src.a6);
+		if (memcmp(&p->src, &e->src, sk_size))
+			continue;
+		if (memcmp(&p->dst, &e->dst, sk_size))
+			continue;
+		if (p->src_port >= 0 && p->src_port != e->src_port)
+			continue;
+		if (p->dst_port >= 0 && p->dst_port != e->dst_port)
+			continue;
+		if (p->L3index >= 0 && p->L3index != e->L3index)
+			continue;
+
+		if (p->fin >= 0 && p->fin != e->fin)
+			continue;
+		if (p->syn >= 0 && p->syn != e->syn)
+			continue;
+		if (p->rst >= 0 && p->rst != e->rst)
+			continue;
+		if (p->psh >= 0 && p->psh != e->psh)
+			continue;
+		if (p->ack >= 0 && p->ack != e->ack)
+			continue;
+
+		if (p->keyid >= 0 && p->keyid != e->keyid)
+			continue;
+		if (p->rnext >= 0 && p->rnext != e->rnext)
+			continue;
+		if (p->maclen >= 0 && p->maclen != e->maclen)
+			continue;
+		if (p->sne >= 0 && p->sne != e->sne)
+			continue;
+		p->matched++;
+		pthread_mutex_unlock(&exp_tps_mutex);
+		return true;
+	}
+	pthread_mutex_unlock(&exp_tps_mutex);
+	return false;
+}
+
+static int check_event_type(const char *line)
+{
+	size_t i;
+
+	/*
+	 * This should have been a set or hashmap, but it's a selftest,
+	 * so... KISS.
+	 */
+	for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+		if (!strncmp(trace_event_names[i], line, strlen(trace_event_names[i])))
+			return i;
+	}
+	return -1;
+}
+
+static bool event_has_flags(enum trace_events event)
+{
+	switch (event) {
+	case TCP_HASH_BAD_HEADER:
+	case TCP_HASH_MD5_REQUIRED:
+	case TCP_HASH_MD5_UNEXPECTED:
+	case TCP_HASH_MD5_MISMATCH:
+	case TCP_HASH_AO_REQUIRED:
+	case TCP_AO_HANDSHAKE_FAILURE:
+	case TCP_AO_WRONG_MACLEN:
+	case TCP_AO_MISMATCH:
+	case TCP_AO_KEY_NOT_FOUND:
+	case TCP_AO_RNEXT_REQUEST:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int tracer_ip_split(int family, char *src, char **addr, char **port)
+{
+	char *p;
+
+	if (family == AF_INET) {
+		/* fomat is <addr>:port, i.e.: 10.0.254.1:7015 */
+		*addr = src;
+		p = strchr(src, ':');
+		if (!p) {
+			test_print("Couldn't parse trace event addr:port %s", src);
+			return -EINVAL;
+		}
+		*p++ = '\0';
+		*port = p;
+		return 0;
+	}
+	if (family != AF_INET6)
+		return -EAFNOSUPPORT;
+
+	/* format is [<addr>]:port, i.e.: [2001:db8:254::1]:7013 */
+	*addr = strchr(src, '[');
+	p = strchr(src, ']');
+
+	if (!p || !*addr) {
+		test_print("Couldn't parse trace event [addr]:port %s", src);
+		return -EINVAL;
+	}
+
+	*addr = *addr + 1;      /* '[' */
+	*p++ = '\0';            /* ']' */
+	if (*p != ':') {
+		test_print("Couldn't parse trace event :port %s", p);
+		return -EINVAL;
+	}
+	*p++ = '\0';            /* ':' */
+	*port = p;
+	return 0;
+}
+
+static int tracer_scan_address(int family, char *src,
+			       union tcp_addr *dst, unsigned int *port)
+{
+	char *addr, *port_str;
+	int ret;
+
+	ret = tracer_ip_split(family, src, &addr, &port_str);
+	if (ret)
+		return ret;
+
+	if (inet_pton(family, addr, dst) != 1) {
+		test_print("Couldn't parse trace event addr %s", addr);
+		return -EINVAL;
+	}
+	errno = 0;
+	*port = (unsigned int)strtoul(port_str, NULL, 10);
+	if (errno != 0) {
+		test_print("Couldn't parse trace event port %s", port_str);
+		return -errno;
+	}
+	return 0;
+}
+
+static int tracer_scan_event(const char *line, enum trace_events event,
+			     struct trace_point *out)
+{
+	char *src = NULL, *dst = NULL, *family = NULL;
+	char fin, syn, rst, psh, ack;
+	int nr_matched, ret = 0;
+	uint64_t netns_cookie;
+
+	switch (event) {
+	case TCP_HASH_BAD_HEADER:
+	case TCP_HASH_MD5_REQUIRED:
+	case TCP_HASH_MD5_UNEXPECTED:
+	case TCP_HASH_MD5_MISMATCH:
+	case TCP_HASH_AO_REQUIRED: {
+		nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c]",
+				    &netns_cookie, &family,
+				    &src, &dst, &out->L3index,
+				    &fin, &syn, &rst, &psh, &ack);
+		if (nr_matched != 10)
+			test_print("Couldn't parse trace event, matched = %d/10",
+				   nr_matched);
+		break;
+	}
+	case TCP_AO_HANDSHAKE_FAILURE:
+	case TCP_AO_WRONG_MACLEN:
+	case TCP_AO_MISMATCH:
+	case TCP_AO_KEY_NOT_FOUND:
+	case TCP_AO_RNEXT_REQUEST: {
+		nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u",
+				    &netns_cookie, &family,
+				    &src, &dst, &out->L3index,
+				    &fin, &syn, &rst, &psh, &ack,
+				    &out->keyid, &out->rnext, &out->maclen);
+		if (nr_matched != 13)
+			test_print("Couldn't parse trace event, matched = %d/13",
+				   nr_matched);
+		break;
+	}
+	case TCP_AO_SYNACK_NO_KEY: {
+		nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms keyid=%u rnext=%u",
+				    &netns_cookie, &family,
+				    &src, &dst, &out->keyid, &out->rnext);
+		if (nr_matched != 6)
+			test_print("Couldn't parse trace event, matched = %d/6",
+				   nr_matched);
+		break;
+	}
+	case TCP_AO_SND_SNE_UPDATE:
+	case TCP_AO_RCV_SNE_UPDATE: {
+		nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms sne=%u",
+				    &netns_cookie, &family,
+				    &src, &dst, &out->sne);
+		if (nr_matched != 5)
+			test_print("Couldn't parse trace event, matched = %d/5",
+				   nr_matched);
+		break;
+	}
+	default:
+		return -1;
+	}
+
+	if (family) {
+		if (!strcmp(family, "AF_INET")) {
+			out->family = AF_INET;
+		} else if (!strcmp(family, "AF_INET6")) {
+			out->family = AF_INET6;
+		} else {
+			test_print("Couldn't parse trace event family %s", family);
+			ret = -EINVAL;
+			goto out_free;
+		}
+	}
+
+	if (event_has_flags(event)) {
+		out->fin = (fin == 'F');
+		out->syn = (syn == 'S');
+		out->rst = (rst == 'R');
+		out->psh = (psh == 'P');
+		out->ack = (ack == '.');
+
+		if ((fin != 'F' && fin != ' ') ||
+		    (syn != 'S' && syn != ' ') ||
+		    (rst != 'R' && rst != ' ') ||
+		    (psh != 'P' && psh != ' ') ||
+		    (ack != '.' && ack != ' ')) {
+			test_print("Couldn't parse trace event flags %c%c%c%c%c",
+				   fin, syn, rst, psh, ack);
+			ret = -EINVAL;
+			goto out_free;
+		}
+	}
+
+	if (src && tracer_scan_address(out->family, src, &out->src, &out->src_port)) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+
+	if (dst && tracer_scan_address(out->family, dst, &out->dst, &out->dst_port)) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+
+	if (netns_cookie != ns_cookie1 && netns_cookie != ns_cookie2) {
+		test_print("Net namespace filter for trace event didn't work: %" PRIu64 " != %" PRIu64 " OR %" PRIu64,
+			   netns_cookie, ns_cookie1, ns_cookie2);
+		ret = -EINVAL;
+	}
+
+out_free:
+	free(src);
+	free(dst);
+	free(family);
+	return ret;
+}
+
+static enum ftracer_op aolib_tracer_process_event(const char *line)
+{
+	int event_type = check_event_type(line);
+	struct trace_point tmp = {};
+
+	if (event_type < 0)
+		return FTRACER_LINE_PRESERVE;
+
+	if (tracer_scan_event(line, event_type, &tmp))
+		return FTRACER_LINE_PRESERVE;
+
+	return lookup_expected_event(event_type, &tmp) ?
+		FTRACER_LINE_DISCARD : FTRACER_LINE_PRESERVE;
+}
+
+static void dump_trace_event(struct expected_trace_point *e)
+{
+	char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
+
+	if (!inet_ntop(e->family, &e->src, src, INET6_ADDRSTRLEN))
+		test_error("inet_ntop()");
+	if (!inet_ntop(e->family, &e->dst, dst, INET6_ADDRSTRLEN))
+		test_error("inet_ntop()");
+	test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu",
+		   trace_event_names[e->type],
+		   src, e->src_port, dst, e->dst_port, e->L3index,
+		   (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "",
+		   (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "",
+		   (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "",
+		   (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "",
+		   (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "",
+		   e->keyid, e->rnext, e->maclen, e->sne, e->matched);
+}
+
+static void print_match_stats(bool unexpected_events)
+{
+	size_t matches_per_type[__MAX_TRACE_EVENTS] = {};
+	bool expected_but_none = false;
+	size_t i, total_matched = 0;
+	char *stat_line = NULL;
+
+	for (i = 0; i < exp_tps_nr; i++) {
+		struct expected_trace_point *e = &exp_tps[i];
+
+		total_matched += e->matched;
+		matches_per_type[e->type] += e->matched;
+		if (!e->matched)
+			expected_but_none = true;
+	}
+	for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+		if (!matches_per_type[i])
+			continue;
+		stat_line = test_sprintf("%s%s[%zu] ", stat_line ?: "",
+					 trace_event_names[i],
+					 matches_per_type[i]);
+		if (!stat_line)
+			test_error("test_sprintf()");
+	}
+
+	if (unexpected_events || expected_but_none) {
+		for (i = 0; i < exp_tps_nr; i++)
+			dump_trace_event(&exp_tps[i]);
+	}
+
+	if (unexpected_events)
+		return;
+
+	if (expected_but_none)
+		test_fail("Some trace events were expected, but didn't occur");
+	else if (total_matched)
+		test_ok("Trace events matched expectations: %zu %s",
+			total_matched, stat_line);
+	else
+		test_ok("No unexpected trace events during the test run");
+}
+
+#define dump_events(fmt, ...)                           \
+	__test_print(__test_msg, fmt, ##__VA_ARGS__)
+static void check_free_events(struct test_ftracer *tracer)
+{
+	const char **lines;
+	size_t nr;
+
+	if (!kernel_config_has(KCONFIG_FTRACE)) {
+		test_skip("kernel config doesn't have ftrace - no checks");
+		return;
+	}
+
+	nr = tracer_get_savedlines_nr(tracer);
+	lines = tracer_get_savedlines(tracer);
+	print_match_stats(!!nr);
+	if (!nr)
+		return;
+
+	errno = 0;
+	test_xfail("Trace events [%zu] were not expected:", nr);
+	while (nr)
+		dump_events("\t%s", lines[--nr]);
+}
+
+static int setup_tcp_trace_events(struct test_ftracer *tracer)
+{
+	char *filter;
+	size_t i;
+	int ret;
+
+	filter = test_sprintf("net_cookie == %zu || net_cookie == %zu",
+			      ns_cookie1, ns_cookie2);
+	if (!filter)
+		return -ENOMEM;
+
+	for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+		char *event_name = test_sprintf("tcp/%s", trace_event_names[i]);
+
+		if (!event_name) {
+			ret = -ENOMEM;
+			break;
+		}
+		ret = setup_trace_event(tracer, event_name, filter);
+		free(event_name);
+		if (ret)
+			break;
+	}
+
+	free(filter);
+	return ret;
+}
+
+static void aolib_tracer_destroy(struct test_ftracer *tracer)
+{
+	check_free_events(tracer);
+	free_expected_events();
+}
+
+static bool aolib_tracer_expecting_more(void)
+{
+	size_t i;
+
+	for (i = 0; i < exp_tps_nr; i++)
+		if (!exp_tps[i].matched)
+			return true;
+	return false;
+}
+
+int setup_aolib_ftracer(void)
+{
+	struct test_ftracer *f;
+
+	f = create_ftracer("aolib", aolib_tracer_process_event,
+			   aolib_tracer_destroy, aolib_tracer_expecting_more,
+			   DEFAULT_FTRACE_BUFFER_KB, DEFAULT_TRACER_LINES_ARR);
+	if (!f)
+		return -1;
+
+	return setup_tcp_trace_events(f);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c
new file mode 100644
index 000000000000..e4d0b173bc94
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include "../../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+static char ftrace_path[] = "ksft-ftrace-XXXXXX";
+static bool ftrace_mounted;
+uint64_t ns_cookie1, ns_cookie2;
+
+struct test_ftracer {
+	pthread_t tracer_thread;
+	int	error;
+	char	*instance_path;
+	FILE	*trace_pipe;
+
+	enum ftracer_op (*process_line)(const char *line);
+	void (*destructor)(struct test_ftracer *tracer);
+	bool (*expecting_more)(void);
+
+	char	**saved_lines;
+	size_t	saved_lines_size;
+	size_t	next_line_ind;
+
+	pthread_cond_t met_all_expected;
+	pthread_mutex_t met_all_expected_lock;
+
+	struct test_ftracer *next;
+};
+
+static struct test_ftracer *ftracers;
+static pthread_mutex_t ftracers_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static int mount_ftrace(void)
+{
+	if (!mkdtemp(ftrace_path))
+		test_error("Can't create temp dir");
+
+	if (mount("tracefs", ftrace_path, "tracefs", 0, "rw"))
+		return -errno;
+
+	ftrace_mounted = true;
+
+	return 0;
+}
+
+static void unmount_ftrace(void)
+{
+	if (ftrace_mounted && umount(ftrace_path))
+		test_print("Failed on cleanup: can't unmount tracefs: %m");
+
+	if (rmdir(ftrace_path))
+		test_error("Failed on cleanup: can't remove ftrace dir %s",
+			   ftrace_path);
+}
+
+struct opts_list_t {
+	char *opt_name;
+	struct opts_list_t *next;
+};
+
+static int disable_trace_options(const char *ftrace_path)
+{
+	struct opts_list_t *opts_list = NULL;
+	char *fopts, *line = NULL;
+	size_t buf_len = 0;
+	ssize_t line_len;
+	int ret = 0;
+	FILE *opts;
+
+	fopts = test_sprintf("%s/%s", ftrace_path, "trace_options");
+	if (!fopts)
+		return -ENOMEM;
+
+	opts = fopen(fopts, "r+");
+	if (!opts) {
+		ret = -errno;
+		goto out_free;
+	}
+
+	while ((line_len = getline(&line, &buf_len, opts)) != -1) {
+		struct opts_list_t *tmp;
+
+		if (!strncmp(line, "no", 2))
+			continue;
+
+		tmp = malloc(sizeof(*tmp));
+		if (!tmp) {
+			ret = -ENOMEM;
+			goto out_free_opts_list;
+		}
+		tmp->next = opts_list;
+		tmp->opt_name = test_sprintf("no%s", line);
+		if (!tmp->opt_name) {
+			ret = -ENOMEM;
+			free(tmp);
+			goto out_free_opts_list;
+		}
+		opts_list = tmp;
+	}
+
+	while (opts_list) {
+		struct opts_list_t *tmp = opts_list;
+
+		fseek(opts, 0, SEEK_SET);
+		fwrite(tmp->opt_name, 1, strlen(tmp->opt_name), opts);
+
+		opts_list = opts_list->next;
+		free(tmp->opt_name);
+		free(tmp);
+	}
+
+out_free_opts_list:
+	while (opts_list) {
+		struct opts_list_t *tmp = opts_list;
+
+		opts_list = opts_list->next;
+		free(tmp->opt_name);
+		free(tmp);
+	}
+	free(line);
+	fclose(opts);
+out_free:
+	free(fopts);
+	return ret;
+}
+
+static int setup_buffer_size(const char *ftrace_path, size_t sz)
+{
+	char *fbuf_size = test_sprintf("%s/buffer_size_kb", ftrace_path);
+	int ret;
+
+	if (!fbuf_size)
+		return -1;
+
+	ret = test_echo(fbuf_size, 0, "%zu", sz);
+	free(fbuf_size);
+	return ret;
+}
+
+static int setup_ftrace_instance(struct test_ftracer *tracer, const char *name)
+{
+	char *tmp;
+
+	tmp = test_sprintf("%s/instances/ksft-%s-XXXXXX", ftrace_path, name);
+	if (!tmp)
+		return -ENOMEM;
+
+	tracer->instance_path = mkdtemp(tmp);
+	if (!tracer->instance_path) {
+		free(tmp);
+		return -errno;
+	}
+
+	return 0;
+}
+
+static void remove_ftrace_instance(struct test_ftracer *tracer)
+{
+	if (rmdir(tracer->instance_path))
+		test_print("Failed on cleanup: can't remove ftrace instance %s",
+			   tracer->instance_path);
+	free(tracer->instance_path);
+}
+
+static void tracer_cleanup(void *arg)
+{
+	struct test_ftracer *tracer = arg;
+
+	fclose(tracer->trace_pipe);
+}
+
+static void tracer_set_error(struct test_ftracer *tracer, int error)
+{
+	if (!tracer->error)
+		tracer->error = error;
+}
+
+const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer)
+{
+	return tracer->next_line_ind;
+}
+
+const char **tracer_get_savedlines(struct test_ftracer *tracer)
+{
+	return (const char **)tracer->saved_lines;
+}
+
+static void *tracer_thread_func(void *arg)
+{
+	struct test_ftracer *tracer = arg;
+
+	pthread_cleanup_push(tracer_cleanup, arg);
+
+	while (tracer->next_line_ind < tracer->saved_lines_size) {
+		char **lp = &tracer->saved_lines[tracer->next_line_ind];
+		enum ftracer_op op;
+		size_t buf_len = 0;
+		ssize_t line_len;
+
+		line_len = getline(lp, &buf_len, tracer->trace_pipe);
+		if (line_len == -1)
+			break;
+
+		pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+		op = tracer->process_line(*lp);
+		pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+
+		if (tracer->expecting_more) {
+			pthread_mutex_lock(&tracer->met_all_expected_lock);
+			if (!tracer->expecting_more())
+				pthread_cond_signal(&tracer->met_all_expected);
+			pthread_mutex_unlock(&tracer->met_all_expected_lock);
+		}
+
+		if (op == FTRACER_LINE_DISCARD)
+			continue;
+		if (op == FTRACER_EXIT)
+			break;
+		if (op != FTRACER_LINE_PRESERVE)
+			test_error("unexpected tracer command %d", op);
+
+		tracer->next_line_ind++;
+		buf_len = 0;
+	}
+	test_print("too many lines in ftracer buffer %zu, exiting tracer",
+		   tracer->next_line_ind);
+
+	pthread_cleanup_pop(1);
+	return NULL;
+}
+
+static int setup_trace_thread(struct test_ftracer *tracer)
+{
+	int ret = 0;
+	char *path;
+
+	path = test_sprintf("%s/trace_pipe", tracer->instance_path);
+	if (!path)
+		return -ENOMEM;
+
+	tracer->trace_pipe = fopen(path, "r");
+	if (!tracer->trace_pipe) {
+		ret = -errno;
+		goto out_free;
+	}
+
+	if (pthread_create(&tracer->tracer_thread, NULL,
+			   tracer_thread_func, (void *)tracer)) {
+		ret = -errno;
+		fclose(tracer->trace_pipe);
+	}
+
+out_free:
+	free(path);
+	return ret;
+}
+
+static void stop_trace_thread(struct test_ftracer *tracer)
+{
+	void *res;
+
+	if (pthread_cancel(tracer->tracer_thread)) {
+		test_print("Can't stop tracer pthread: %m");
+		tracer_set_error(tracer, -errno);
+	}
+	if (pthread_join(tracer->tracer_thread, &res)) {
+		test_print("Can't join tracer pthread: %m");
+		tracer_set_error(tracer, -errno);
+	}
+	if (res != PTHREAD_CANCELED) {
+		test_print("Tracer thread wasn't canceled");
+		tracer_set_error(tracer, -errno);
+	}
+	if (tracer->error)
+		test_fail("tracer errored by %s", strerror(tracer->error));
+}
+
+static void final_wait_for_events(struct test_ftracer *tracer,
+				  unsigned timeout_sec)
+{
+	struct timespec timeout;
+	struct timeval now;
+	int ret = 0;
+
+	if (!tracer->expecting_more)
+		return;
+
+	pthread_mutex_lock(&tracer->met_all_expected_lock);
+	gettimeofday(&now, NULL);
+	timeout.tv_sec = now.tv_sec + timeout_sec;
+	timeout.tv_nsec = now.tv_usec * 1000;
+
+	while (tracer->expecting_more() && ret != ETIMEDOUT)
+		ret = pthread_cond_timedwait(&tracer->met_all_expected,
+				&tracer->met_all_expected_lock, &timeout);
+	pthread_mutex_unlock(&tracer->met_all_expected_lock);
+}
+
+int setup_trace_event(struct test_ftracer *tracer,
+		      const char *event, const char *filter)
+{
+	char *enable_path, *filter_path, *instance = tracer->instance_path;
+	int ret;
+
+	enable_path = test_sprintf("%s/events/%s/enable", instance, event);
+	if (!enable_path)
+		return -ENOMEM;
+
+	filter_path = test_sprintf("%s/events/%s/filter", instance, event);
+	if (!filter_path) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	ret = test_echo(filter_path, 0, "%s", filter);
+	if (!ret)
+		ret = test_echo(enable_path, 0, "1");
+
+out_free:
+	free(filter_path);
+	free(enable_path);
+	return ret;
+}
+
+struct test_ftracer *create_ftracer(const char *name,
+				    enum ftracer_op (*process_line)(const char *line),
+				    void (*destructor)(struct test_ftracer *tracer),
+				    bool (*expecting_more)(void),
+				    size_t lines_buf_sz, size_t buffer_size_kb)
+{
+	struct test_ftracer *tracer;
+	int err;
+
+	/* XXX: separate __create_ftracer() helper and do here
+	 * if (!kernel_config_has(KCONFIG_FTRACE))
+	 *	return NULL;
+	 */
+
+	tracer = malloc(sizeof(*tracer));
+	if (!tracer) {
+		test_print("malloc()");
+		return NULL;
+	}
+
+	memset(tracer, 0, sizeof(*tracer));
+
+	err = setup_ftrace_instance(tracer, name);
+	if (err) {
+		test_print("setup_ftrace_instance(): %d", err);
+		goto err_free;
+	}
+
+	err = disable_trace_options(tracer->instance_path);
+	if (err) {
+		test_print("disable_trace_options(): %d", err);
+		goto err_remove;
+	}
+
+	err = setup_buffer_size(tracer->instance_path, buffer_size_kb);
+	if (err) {
+		test_print("disable_trace_options(): %d", err);
+		goto err_remove;
+	}
+
+	tracer->saved_lines = calloc(lines_buf_sz, sizeof(tracer->saved_lines[0]));
+	if (!tracer->saved_lines) {
+		test_print("calloc()");
+		goto err_remove;
+	}
+	tracer->saved_lines_size = lines_buf_sz;
+
+	tracer->process_line	= process_line;
+	tracer->destructor	= destructor;
+	tracer->expecting_more	= expecting_more;
+
+	err = pthread_cond_init(&tracer->met_all_expected, NULL);
+	if (err) {
+		test_print("pthread_cond_init(): %d", err);
+		goto err_free_lines;
+	}
+
+	err = pthread_mutex_init(&tracer->met_all_expected_lock, NULL);
+	if (err) {
+		test_print("pthread_mutex_init(): %d", err);
+		goto err_cond_destroy;
+	}
+
+	err = setup_trace_thread(tracer);
+	if (err) {
+		test_print("setup_trace_thread(): %d", err);
+		goto err_mutex_destroy;
+	}
+
+	pthread_mutex_lock(&ftracers_lock);
+	tracer->next = ftracers;
+	ftracers = tracer;
+	pthread_mutex_unlock(&ftracers_lock);
+
+	return tracer;
+
+err_mutex_destroy:
+	pthread_mutex_destroy(&tracer->met_all_expected_lock);
+err_cond_destroy:
+	pthread_cond_destroy(&tracer->met_all_expected);
+err_free_lines:
+	free(tracer->saved_lines);
+err_remove:
+	remove_ftrace_instance(tracer);
+err_free:
+	free(tracer);
+	return NULL;
+}
+
+static void __destroy_ftracer(struct test_ftracer *tracer)
+{
+	size_t i;
+
+	final_wait_for_events(tracer, TEST_TIMEOUT_SEC);
+	stop_trace_thread(tracer);
+	remove_ftrace_instance(tracer);
+	if (tracer->destructor)
+		tracer->destructor(tracer);
+	for (i = 0; i < tracer->saved_lines_size; i++)
+		free(tracer->saved_lines[i]);
+	pthread_cond_destroy(&tracer->met_all_expected);
+	pthread_mutex_destroy(&tracer->met_all_expected_lock);
+	free(tracer);
+}
+
+void destroy_ftracer(struct test_ftracer *tracer)
+{
+	pthread_mutex_lock(&ftracers_lock);
+	if (tracer == ftracers) {
+		ftracers = tracer->next;
+	} else {
+		struct test_ftracer *f = ftracers;
+
+		while (f->next != tracer) {
+			if (!f->next)
+				test_error("tracers list corruption or double free %p", tracer);
+			f = f->next;
+		}
+		f->next = tracer->next;
+	}
+	tracer->next = NULL;
+	pthread_mutex_unlock(&ftracers_lock);
+	__destroy_ftracer(tracer);
+}
+
+static void destroy_all_ftracers(void)
+{
+	struct test_ftracer *f;
+
+	pthread_mutex_lock(&ftracers_lock);
+	f = ftracers;
+	ftracers = NULL;
+	pthread_mutex_unlock(&ftracers_lock);
+
+	while (f) {
+		struct test_ftracer *n = f->next;
+
+		f->next = NULL;
+		__destroy_ftracer(f);
+		f = n;
+	}
+}
+
+static void test_unset_tracing(void)
+{
+	destroy_all_ftracers();
+	unmount_ftrace();
+}
+
+int test_setup_tracing(void)
+{
+	/*
+	 * Just a basic protection - this should be called only once from
+	 * lib/kconfig. Not thread safe, which is fine as it's early, before
+	 * threads are created.
+	 */
+	static int already_set;
+	int err;
+
+	if (already_set)
+		return -1;
+
+	/* Needs net-namespace cookies for filters */
+	if (ns_cookie1 == ns_cookie2) {
+		test_print("net-namespace cookies: %" PRIu64 " == %" PRIu64 ", can't set up tracing",
+			   ns_cookie1, ns_cookie2);
+		return -1;
+	}
+
+	already_set = 1;
+
+	test_add_destructor(test_unset_tracing);
+
+	err = mount_ftrace();
+	if (err) {
+		test_print("failed to mount_ftrace(): %d", err);
+		return err;
+	}
+
+	return setup_aolib_ftracer();
+}
+
+static int get_ns_cookie(int nsfd, uint64_t *out)
+{
+	int old_ns = switch_save_ns(nsfd);
+	socklen_t size = sizeof(*out);
+	int sk;
+
+	sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	if (sk < 0) {
+		test_print("socket(): %m");
+		return -errno;
+	}
+
+	if (getsockopt(sk, SOL_SOCKET, SO_NETNS_COOKIE, out, &size)) {
+		test_print("getsockopt(SO_NETNS_COOKIE): %m");
+		close(sk);
+		return -errno;
+	}
+
+	close(sk);
+	switch_close_ns(old_ns);
+	return 0;
+}
+
+void test_init_ftrace(int nsfd1, int nsfd2)
+{
+	get_ns_cookie(nsfd1, &ns_cookie1);
+	get_ns_cookie(nsfd2, &ns_cookie2);
+	/* Populate kernel config state */
+	kernel_config_has(KCONFIG_FTRACE);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
index f279ffc3843b..9f1c175846f8 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
@@ -6,7 +6,7 @@
 #include "aolib.h"
 
 struct kconfig_t {
-	int _errno;		/* the returned error if not supported */
+	int _error;		/* negative errno if not supported */
 	int (*check_kconfig)(int *error);
 };
 
@@ -62,7 +62,7 @@ static int has_tcp_ao(int *err)
 	memcpy(&tmp.addr, &addr, sizeof(addr));
 	*err = 0;
 	if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) {
-		*err = errno;
+		*err = -errno;
 		if (errno != ENOPROTOOPT)
 			ret = -errno;
 	}
@@ -87,7 +87,7 @@ static int has_tcp_md5(int *err)
 	 */
 	*err = 0;
 	if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) {
-		*err = errno;
+		*err = -errno;
 		if (errno != ENOPROTOOPT && errno == ENOMEM) {
 			test_print("setsockopt(TCP_MD5SIG_EXT): %m");
 			ret = -errno;
@@ -116,13 +116,21 @@ static int has_vrfs(int *err)
 	return ret;
 }
 
+static int has_ftrace(int *err)
+{
+	*err = test_setup_tracing();
+	return 0;
+}
+
+#define KCONFIG_UNKNOWN			1
 static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER;
 static struct kconfig_t kconfig[__KCONFIG_LAST__] = {
-	{ -1, has_net_ns },
-	{ -1, has_veth },
-	{ -1, has_tcp_ao },
-	{ -1, has_tcp_md5 },
-	{ -1, has_vrfs },
+	{ KCONFIG_UNKNOWN, has_net_ns },
+	{ KCONFIG_UNKNOWN, has_veth },
+	{ KCONFIG_UNKNOWN, has_tcp_ao },
+	{ KCONFIG_UNKNOWN, has_tcp_md5 },
+	{ KCONFIG_UNKNOWN, has_vrfs },
+	{ KCONFIG_UNKNOWN, has_ftrace },
 };
 
 const char *tests_skip_reason[__KCONFIG_LAST__] = {
@@ -131,6 +139,7 @@ const char *tests_skip_reason[__KCONFIG_LAST__] = {
 	"Tests require TCP-AO support (CONFIG_TCP_AO)",
 	"setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)",
 	"VRFs are not supported (CONFIG_NET_VRF)",
+	"Ftrace points are not supported (CONFIG_TRACEPOINTS)",
 };
 
 bool kernel_config_has(enum test_needs_kconfig k)
@@ -138,11 +147,11 @@ bool kernel_config_has(enum test_needs_kconfig k)
 	bool ret;
 
 	pthread_mutex_lock(&kconfig_lock);
-	if (kconfig[k]._errno == -1) {
-		if (kconfig[k].check_kconfig(&kconfig[k]._errno))
+	if (kconfig[k]._error == KCONFIG_UNKNOWN) {
+		if (kconfig[k].check_kconfig(&kconfig[k]._error))
 			test_error("Failed to initialize kconfig %u", k);
 	}
-	ret = kconfig[k]._errno == 0;
+	ret = kconfig[k]._error == 0;
 	pthread_mutex_unlock(&kconfig_lock);
 	return ret;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index e408b9243b2c..a27cc03c9fbd 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -111,7 +111,7 @@ static void sig_int(int signo)
 
 int open_netns(void)
 {
-	const char *netns_path = "/proc/self/ns/net";
+	const char *netns_path = "/proc/thread-self/ns/net";
 	int fd;
 
 	fd = open(netns_path, O_RDONLY);
@@ -142,6 +142,13 @@ int switch_save_ns(int new_ns)
 	return ret;
 }
 
+void switch_close_ns(int fd)
+{
+	if (setns(fd, CLONE_NEWNET))
+		test_error("setns()");
+	close(fd);
+}
+
 static int nsfd_outside	= -1;
 static int nsfd_parent	= -1;
 static int nsfd_child	= -1;
@@ -243,9 +250,9 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix,
 	test_print("rand seed %u", (unsigned int)seed);
 	srand(seed);
 
-
 	ksft_print_header();
 	init_namespaces();
+	test_init_ftrace(nsfd_parent, nsfd_child);
 
 	if (add_veth(veth_name, nsfd_parent, nsfd_child))
 		test_error("Failed to add veth");
@@ -296,7 +303,7 @@ static bool is_optmem_namespaced(void)
 		int old_ns = switch_save_ns(nsfd_child);
 
 		optmem_ns = !access(optmem_file, F_OK);
-		switch_ns(old_ns);
+		switch_close_ns(old_ns);
 	}
 	return !!optmem_ns;
 }
@@ -317,7 +324,7 @@ size_t test_get_optmem(void)
 		test_error("can't read from %s", optmem_file);
 	fclose(foptmem);
 	if (!is_optmem_namespaced())
-		switch_ns(old_ns);
+		switch_close_ns(old_ns);
 	return ret;
 }
 
@@ -339,7 +346,7 @@ static void __test_set_optmem(size_t new, size_t *old)
 		test_error("can't write %zu to %s", new, optmem_file);
 	fclose(foptmem);
 	if (!is_optmem_namespaced())
-		switch_ns(old_ns);
+		switch_close_ns(old_ns);
 }
 
 static void test_revert_optmem(void)
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
index 15aeb0963058..0ffda966c677 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -379,7 +379,6 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
 
 	key_dump[0].nkeys = nr_keys;
 	key_dump[0].get_all = 1;
-	key_dump[0].get_all = 1;
 	err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS,
 			 key_dump, &key_dump_sz);
 	if (err) {
diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c
index 372daca525f5..bdf5522c9213 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/utils.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c
@@ -21,6 +21,32 @@ void randomize_buffer(void *buf, size_t buflen)
 	}
 }
 
+__printf(3, 4) int test_echo(const char *fname, bool append,
+			     const char *fmt, ...)
+{
+	size_t len, written;
+	va_list vargs;
+	char *msg;
+	FILE *f;
+
+	f = fopen(fname, append ? "a" : "w");
+	if (!f)
+		return -errno;
+
+	va_start(vargs, fmt);
+	msg = test_snprintf(fmt, vargs);
+	va_end(vargs);
+	if (!msg) {
+		fclose(f);
+		return -1;
+	}
+	len = strlen(msg);
+	written = fwrite(msg, 1, len, f);
+	fclose(f);
+	free(msg);
+	return written == len ? 0 : -1;
+}
+
 const struct sockaddr_in6 addr_any6 = {
 	.sin6_family	= AF_INET6,
 };
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
index 8fdc808df325..ecc6f1e3a414 100644
--- a/tools/testing/selftests/net/tcp_ao/restore.c
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -64,6 +64,7 @@ static void try_server_run(const char *tst_name, unsigned int port,
 		else
 			test_ok("%s: server alive", tst_name);
 	}
+	synchronize_threads(); /* 3: counters checks */
 	if (test_get_tcp_ao_counters(sk, &ao2))
 		test_error("test_get_tcp_ao_counters()");
 	after_cnt = netstat_get_one(cnt_name, NULL);
@@ -71,10 +72,10 @@ static void try_server_run(const char *tst_name, unsigned int port,
 	test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
 
 	if (after_cnt <= before_cnt) {
-		test_fail("%s: %s counter did not increase: %zu <= %zu",
+		test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
 				tst_name, cnt_name, after_cnt, before_cnt);
 	} else {
-		test_ok("%s: counter %s increased %zu => %zu",
+		test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
 			tst_name, cnt_name, before_cnt, after_cnt);
 	}
 
@@ -82,7 +83,7 @@ static void try_server_run(const char *tst_name, unsigned int port,
 	 * Before close() as that will send FIN and move the peer in TCP_CLOSE
 	 * and that will prevent reading AO counters from the peer's socket.
 	 */
-	synchronize_threads(); /* 3: verified => closed */
+	synchronize_threads(); /* 4: verified => closed */
 out:
 	close(sk);
 }
@@ -176,6 +177,7 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
 		else
 			test_ok("%s: post-migrate connection is alive", tst_name);
 	}
+	synchronize_threads(); /* 3: counters checks */
 	if (test_get_tcp_ao_counters(sk, &ao2))
 		test_error("test_get_tcp_ao_counters()");
 	after_cnt = netstat_get_one(cnt_name, NULL);
@@ -183,13 +185,13 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
 	test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
 
 	if (after_cnt <= before_cnt) {
-		test_fail("%s: %s counter did not increase: %zu <= %zu",
+		test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
 				tst_name, cnt_name, after_cnt, before_cnt);
 	} else {
-		test_ok("%s: counter %s increased %zu => %zu",
+		test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
 			tst_name, cnt_name, before_cnt, after_cnt);
 	}
-	synchronize_threads(); /* 3: verified => closed */
+	synchronize_threads(); /* 4: verified => closed */
 	close(sk);
 }
 
@@ -206,22 +208,36 @@ static void *client_fn(void *arg)
 
 	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
 	ao_img.snt_isn += 1;
+	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+			      -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+			      port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
 	test_sk_restore("TCP-AO with wrong send ISN", port++,
 			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
 
 	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
 	ao_img.rcv_isn += 1;
+	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+			      -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+			      port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
 	test_sk_restore("TCP-AO with wrong receive ISN", port++,
 			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
 
 	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
 	ao_img.snd_sne += 1;
+	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+			      -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+	/* not expecting server => client mismatches as only snd sne is broken */
 	test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
 			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
 			TEST_CNT_NS_BAD | TEST_CNT_GOOD);
 
 	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
 	ao_img.rcv_sne += 1;
+	/* not expecting client => server mismatches as only rcv sne is broken */
+	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+			      port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
 	test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
 			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
 			TEST_CNT_NS_GOOD | TEST_CNT_BAD);
@@ -231,6 +247,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(20, server_fn, client_fn);
+	test_init(21, server_fn, client_fn);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index a2fe88d35ac0..6364facaa63e 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -455,6 +455,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(14, server_fn, client_fn);
+	test_init(15, server_fn, client_fn);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
index a5698b0a3718..3ecd2b58de6a 100644
--- a/tools/testing/selftests/net/tcp_ao/self-connect.c
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -87,7 +87,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
 	netstat_free(ns_after);
 
 	if (after_aogood <= before_aogood) {
-		test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+		test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64,
 			  tst, after_aogood, before_aogood);
 		close(sk);
 		return;
@@ -148,7 +148,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
 	netstat_free(ns_after);
 	close(sk);
 	if (after_aogood <= before_aogood) {
-		test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+		test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64,
 			  tst, after_aogood, before_aogood);
 		return;
 	}
@@ -163,17 +163,26 @@ static void *client_fn(void *arg)
 	setup_lo_intf("lo");
 
 	tcp_self_connect("self-connect(same keyids)", port++, false, false);
+
+	/* expecting rnext to change based on the first segment RNext != Current */
+	trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+			      port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1);
 	tcp_self_connect("self-connect(different keyids)", port++, true, false);
 	tcp_self_connect("self-connect(restore)", port, false, true);
-	port += 2;
+	port += 2; /* restore test restores over different port */
+	trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+			      port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1);
+	/* intentionally on restore they are added to the socket in different order */
+	trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+			      port + 1, port + 1, 0, -1, -1, -1, -1, -1, 5, 7, -1);
 	tcp_self_connect("self-connect(restore, different keyids)", port, true, true);
-	port += 2;
+	port += 2; /* restore test restores over different port */
 
 	return NULL;
 }
 
 int main(int argc, char *argv[])
 {
-	test_init(4, client_fn, NULL);
+	test_init(5, client_fn, NULL);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index ad4e77d6823e..8901a6785dc8 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -116,7 +116,15 @@ static void *server_fn(void *arg)
 	sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
 			     client_new_port, &ao1);
 
-	synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+	trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr,
+			this_ip_dest, test_server_port + 1, client_new_port, 1);
+	trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_dest,
+			this_ip_addr, client_new_port, test_server_port + 1, 1);
+	trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_addr,
+			this_ip_dest, test_server_port + 1, client_new_port, 1);
+	trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_dest,
+			this_ip_addr, client_new_port, test_server_port + 1, 1);
+	synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
 	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
 	if (bytes != quota) {
 		if (bytes > 0)
@@ -127,6 +135,7 @@ static void *server_fn(void *arg)
 		test_ok("server alive");
 	}
 
+	synchronize_threads(); /* 6: verify counters after SEQ-number rollover */
 	if (test_get_tcp_ao_counters(sk, &ao2))
 		test_error("test_get_tcp_ao_counters()");
 	after_good = netstat_get_one("TCPAOGood", NULL);
@@ -134,15 +143,15 @@ static void *server_fn(void *arg)
 	test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
 
 	if (after_good <= before_good) {
-		test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+		test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
 			  after_good, before_good);
 	} else {
-		test_ok("TCPAOGood counter increased %zu => %zu",
+		test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64,
 			before_good, after_good);
 	}
 	after_bad = netstat_get_one("TCPAOBad", NULL);
 	if (after_bad)
-		test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+		test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad);
 	else
 		test_ok("TCPAOBad counter didn't increase");
 	test_enable_repair(sk);
@@ -206,12 +215,13 @@ static void *client_fn(void *arg)
 	sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
 			     test_server_port + 1, &ao1);
 
-	synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+	synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
 	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
 		test_fail("post-migrate verify failed");
 	else
 		test_ok("post-migrate connection alive");
 
+	synchronize_threads(); /* 5: verify counters after SEQ-number rollover */
 	if (test_get_tcp_ao_counters(sk, &ao2))
 		test_error("test_get_tcp_ao_counters()");
 	after_good = netstat_get_one("TCPAOGood", NULL);
@@ -219,15 +229,15 @@ static void *client_fn(void *arg)
 	test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
 
 	if (after_good <= before_good) {
-		test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+		test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
 			  after_good, before_good);
 	} else {
-		test_ok("TCPAOGood counter increased %zu => %zu",
+		test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64,
 			before_good, after_good);
 	}
 	after_bad = netstat_get_one("TCPAOBad", NULL);
 	if (after_bad)
-		test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+		test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad);
 	else
 		test_ok("TCPAOBad counter didn't increase");
 
@@ -240,6 +250,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(7, server_fn, client_fn);
+	test_init(8, server_fn, client_fn);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
index 517930f9721b..084db4ecdff6 100644
--- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -30,8 +30,8 @@ static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
 #define __cmp_ao(member)							\
 do {										\
 	if (info->member != tmp.member) {					\
-		test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu",	\
-			  tst, (size_t)info->member, (size_t)tmp.member);	\
+		test_fail("%s: getsockopt(): " __stringify(member) " %" PRIu64 " != %" PRIu64,	\
+			  tst, (uint64_t)info->member, (uint64_t)tmp.member);	\
 		return;								\
 	}									\
 } while(0)
@@ -830,6 +830,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(120, client_fn, NULL);
+	test_init(121, client_fn, NULL);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index 6b59a652159f..f779e5892bc1 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -70,6 +70,7 @@ static void try_accept(const char *tst_name, unsigned int port,
 
 	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
 	err = test_wait_fd(lsk, timeout, 0);
+	synchronize_threads(); /* connect()/accept() timeouts */
 	if (err == -ETIMEDOUT) {
 		if (!fault(TIMEOUT))
 			test_fail("timed out for accept()");
@@ -100,10 +101,10 @@ static void try_accept(const char *tst_name, unsigned int port,
 	after_cnt = netstat_get_one(cnt_name, NULL);
 
 	if (after_cnt <= before_cnt) {
-		test_fail("%s: %s counter did not increase: %zu <= %zu",
+		test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
 				tst_name, cnt_name, after_cnt, before_cnt);
 	} else {
-		test_ok("%s: counter %s increased %zu => %zu",
+		test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
 			tst_name, cnt_name, before_cnt, after_cnt);
 	}
 	if (ao_addr)
@@ -283,6 +284,7 @@ static void try_connect(const char *tst_name, unsigned int port,
 	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
 	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
 
+	synchronize_threads(); /* connect()/accept() timeouts */
 	if (ret < 0) {
 		if (fault(KEYREJECT) && ret == -EKEYREJECTED)
 			test_ok("%s: connect() was prevented", tst_name);
@@ -451,6 +453,7 @@ static void try_to_add(const char *tst_name, unsigned int port,
 	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
 	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
 
+	synchronize_threads(); /* connect()/accept() timeouts */
 	if (ret <= 0) {
 		test_error("%s: connect() returned %d", tst_name, ret);
 		goto out;
@@ -671,24 +674,38 @@ static void *client_fn(void *arg)
 
 	try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0,
 		    &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+	trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr,
+				this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
 		    NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+	trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr,
+				this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0,
 		    NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
 	try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
 		    &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+	trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2,
+				this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
 		    NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2);
 
+	trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+			      -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
 	try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0,
 		   &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
 	try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
 		   NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+	trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr,
+				this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0,
 		   NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
 
+	trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+			      -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
 	try_connect("no sign server: AO client", port++, NULL, 0,
 		   &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+	trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr,
+				this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("no sign server: MD5 client", port++, &addr_any, 0,
 		   NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
 	try_connect("no sign server: no sign client", port++, NULL, 0,
@@ -696,25 +713,37 @@ static void *client_fn(void *arg)
 
 	try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0,
 		   &addr_any, 0, 100, 100, 0, 0, 1, &client2);
+	trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+			      -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
 	try_connect("AO+MD5 server: AO client (misconfig, matching MD5)",
 		   port++, NULL, 0, &addr_any, 0, 100, 100, 0,
 		   FAULT_TIMEOUT, 1, &this_ip_addr);
+	trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, client3, this_ip_dest,
+			      -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
 	try_connect("AO+MD5 server: AO client (misconfig, non-matching)",
 		   port++, NULL, 0, &addr_any, 0, 100, 100, 0,
 		   FAULT_TIMEOUT, 1, &client3);
 	try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0,
 		   NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+	trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client2,
+				this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)",
 		   port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
 		   1, &client2);
+	trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client3,
+				this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)",
 		   port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
 		   1, &client3);
 	try_connect("AO+MD5 server: no sign client (unmatched)",
 		   port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3);
+	trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2,
+				this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("AO+MD5 server: no sign client (misconfig, matching AO)",
 		   port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
 		   1, &client2);
+	trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr,
+				this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
 	try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)",
 		   port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
 		   1, &this_ip_addr);
@@ -736,6 +765,6 @@ static void *client_fn(void *arg)
 
 int main(int argc, char *argv[])
 {
-	test_init(72, server_fn, client_fn);
+	test_init(73, server_fn, client_fn);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index ec60a16c9307..d626f22f9550 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -356,8 +356,12 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
 		}
 	}
 
-	if (batch > 1)
+	if (batch > 1) {
 		fprintf(stderr, "batched %d timestamps\n", batch);
+	} else if (!batch) {
+		fprintf(stderr, "Failed to report timestamps\n");
+		test_failed = true;
+	}
 }
 
 static int recv_errmsg(int fd)
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index f52aa5f7da52..3e751234ccfe 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -30,14 +30,7 @@
 
 source lib.sh
 
-# nettest can be run from PATH or from same directory as this selftest
-if ! which nettest >/dev/null; then
-	PATH=$PWD:$PATH
-	if ! which nettest >/dev/null; then
-		echo "'nettest' command not found; skipping tests"
-		exit $ksft_skip
-	fi
-fi
+check_gen_prog "nettest"
 
 result=0
 
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index 152171fb1fc8..e9c2f71da207 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -59,7 +59,6 @@
 # while it is forwarded between different vrfs.
 
 source lib.sh
-PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH
 VERBOSE=0
 PAUSE_ON_FAIL=no
 DEFAULT_TTYPE=sym
@@ -636,6 +635,8 @@ EOF
 # Some systems don't have a ping6 binary anymore
 command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
 
+check_gen_prog "nettest"
+
 TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local
 ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym"
 TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local
diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh
new file mode 100755
index 000000000000..2fab29d3cb91
--- /dev/null
+++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+source lib.sh
+
+timeout=4m
+ret=0
+tmp=$(mktemp)
+cleanup() {
+	cleanup_all_ns
+	rm -f "$tmp"
+}
+
+trap cleanup EXIT
+
+maxpolicies=100000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000
+
+do_dummies4() {
+	local dir="$1"
+	local max="$2"
+
+	local policies
+	local pfx
+	pfx=30
+	policies=0
+
+	ip netns exec "$ns" ip xfrm policy flush
+
+	for i in $(seq 1 100);do
+		local s
+		local d
+		for j in $(seq 1 255);do
+			s=$((i+0))
+			d=$((i+100))
+
+			for a in $(seq 1 8 255); do
+				policies=$((policies+1))
+				[ "$policies" -gt "$max" ] && return
+				echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block
+			done
+			for a in $(seq 1 8 255); do
+				policies=$((policies+1))
+				[ "$policies" -gt "$max" ] && return
+				echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block
+			done
+		done
+	done
+}
+
+setup_ns ns
+
+do_bench()
+{
+	local max="$1"
+
+	start=$(date +%s%3N)
+	do_dummies4 "out" "$max" > "$tmp"
+	if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then
+		echo "WARNING: policy insertion cancelled after $timeout"
+		ret=1
+	fi
+	stop=$(date +%s%3N)
+
+	result=$((stop-start))
+
+	policies=$(wc -l < "$tmp")
+	printf "Inserted %-06s policies in $result ms\n" $policies
+
+	have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l)
+	if [ "$have" -ne "$policies" ]; then
+		echo "WARNING: mismatch, have $have policies, expected $policies"
+		ret=1
+	fi
+}
+
+p=100
+while [ $p -le "$maxpolicies" ]; do
+	do_bench "$p"
+	p="${p}0"
+done
+
+exit $ret
diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
index 7f7d3eb8b9c9..f9ccae50349b 100644
--- a/tools/testing/selftests/riscv/mm/mmap_bottomup.c
+++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
@@ -7,8 +7,6 @@
 TEST(infinite_rlimit)
 {
 	EXPECT_EQ(BOTTOM_UP, memory_layout());
-
-	TEST_MMAPS;
 }
 
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c
index 2ba3ec990006..3f53b6ecc326 100644
--- a/tools/testing/selftests/riscv/mm/mmap_default.c
+++ b/tools/testing/selftests/riscv/mm/mmap_default.c
@@ -7,8 +7,6 @@
 TEST(default_rlimit)
 {
 	EXPECT_EQ(TOP_DOWN, memory_layout());
-
-	TEST_MMAPS;
 }
 
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h
index 3b29ca3bb3d4..75918d15919f 100644
--- a/tools/testing/selftests/riscv/mm/mmap_test.h
+++ b/tools/testing/selftests/riscv/mm/mmap_test.h
@@ -10,76 +10,9 @@
 #define TOP_DOWN 0
 #define BOTTOM_UP 1
 
-#if __riscv_xlen == 64
-uint64_t random_addresses[] = {
-	0x19764f0d73b3a9f0, 0x016049584cecef59, 0x3580bdd3562f4acd,
-	0x1164219f20b17da0, 0x07d97fcb40ff2373, 0x76ec528921272ee7,
-	0x4dd48c38a3de3f70, 0x2e11415055f6997d, 0x14b43334ac476c02,
-	0x375a60795aff19f6, 0x47f3051725b8ee1a, 0x4e697cf240494a9f,
-	0x456b59b5c2f9e9d1, 0x101724379d63cb96, 0x7fe9ad31619528c1,
-	0x2f417247c495c2ea, 0x329a5a5b82943a5e, 0x06d7a9d6adcd3827,
-	0x327b0b9ee37f62d5, 0x17c7b1851dfd9b76, 0x006ebb6456ec2cd9,
-	0x00836cd14146a134, 0x00e5c4dcde7126db, 0x004c29feadf75753,
-	0x00d8b20149ed930c, 0x00d71574c269387a, 0x0006ebe4a82acb7a,
-	0x0016135df51f471b, 0x00758bdb55455160, 0x00d0bdd949b13b32,
-	0x00ecea01e7c5f54b, 0x00e37b071b9948b1, 0x0011fdd00ff57ab3,
-	0x00e407294b52f5ea, 0x00567748c200ed20, 0x000d073084651046,
-	0x00ac896f4365463c, 0x00eb0d49a0b26216, 0x0066a2564a982a31,
-	0x002e0d20237784ae, 0x0000554ff8a77a76, 0x00006ce07a54c012,
-	0x000009570516d799, 0x00000954ca15b84d, 0x0000684f0d453379,
-	0x00002ae5816302b5, 0x0000042403fb54bf, 0x00004bad7392bf30,
-	0x00003e73bfa4b5e3, 0x00005442c29978e0, 0x00002803f11286b6,
-	0x000073875d745fc6, 0x00007cede9cb8240, 0x000027df84cc6a4f,
-	0x00006d7e0e74242a, 0x00004afd0b836e02, 0x000047d0e837cd82,
-	0x00003b42405efeda, 0x00001531bafa4c95, 0x00007172cae34ac4,
-};
-#else
-uint32_t random_addresses[] = {
-	0x8dc302e0, 0x929ab1e0, 0xb47683ba, 0xea519c73, 0xa19f1c90, 0xc49ba213,
-	0x8f57c625, 0xadfe5137, 0x874d4d95, 0xaa20f09d, 0xcf21ebfc, 0xda7737f1,
-	0xcedf392a, 0x83026c14, 0xccedca52, 0xc6ccf826, 0xe0cd9415, 0x997472ca,
-	0xa21a44c1, 0xe82196f5, 0xa23fd66b, 0xc28d5590, 0xd009cdce, 0xcf0be646,
-	0x8fc8c7ff, 0xe2a85984, 0xa3d3236b, 0x89a0619d, 0xc03db924, 0xb5d4cc1b,
-	0xb96ee04c, 0xd191da48, 0xb432a000, 0xaa2bebbc, 0xa2fcb289, 0xb0cca89b,
-	0xb0c18d6a, 0x88f58deb, 0xa4d42d1c, 0xe4d74e86, 0x99902b09, 0x8f786d31,
-	0xbec5e381, 0x9a727e65, 0xa9a65040, 0xa880d789, 0x8f1b335e, 0xfc821c1e,
-	0x97e34be4, 0xbbef84ed, 0xf447d197, 0xfd7ceee2, 0xe632348d, 0xee4590f4,
-	0x958992a5, 0xd57e05d6, 0xfd240970, 0xc5b0dcff, 0xd96da2c2, 0xa7ae041d,
-};
-#endif
-
-// Only works on 64 bit
-#if __riscv_xlen == 64
 #define PROT (PROT_READ | PROT_WRITE)
 #define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
 
-/* mmap must return a value that doesn't use more bits than the hint address. */
-static inline unsigned long get_max_value(unsigned long input)
-{
-	unsigned long max_bit = (1UL << (((sizeof(unsigned long) * 8) - 1 -
-					  __builtin_clzl(input))));
-
-	return max_bit + (max_bit - 1);
-}
-
-#define TEST_MMAPS                                                            \
-	({                                                                    \
-		void *mmap_addr;                                              \
-		for (int i = 0; i < ARRAY_SIZE(random_addresses); i++) {      \
-			mmap_addr = mmap((void *)random_addresses[i],         \
-					 5 * sizeof(int), PROT, FLAGS, 0, 0); \
-			EXPECT_NE(MAP_FAILED, mmap_addr);                     \
-			EXPECT_GE((void *)get_max_value(random_addresses[i]), \
-				  mmap_addr);                                 \
-			mmap_addr = mmap((void *)random_addresses[i],         \
-					 5 * sizeof(int), PROT, FLAGS, 0, 0); \
-			EXPECT_NE(MAP_FAILED, mmap_addr);                     \
-			EXPECT_GE((void *)get_max_value(random_addresses[i]), \
-				  mmap_addr);                                 \
-		}                                                             \
-	})
-#endif /* __riscv_xlen == 64 */
-
 static inline int memory_layout(void)
 {
 	void *value1 = mmap(NULL, sizeof(int), PROT, FLAGS, 0, 0);
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 07c81c0093c0..16bd49492efa 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -6,10 +6,13 @@
  *
  * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com>
  */
-
+#define _GNU_SOURCE
 #include <sys/time.h>
+#include <sys/types.h>
 #include <stdio.h>
 #include <signal.h>
+#include <stdint.h>
+#include <string.h>
 #include <unistd.h>
 #include <time.h>
 #include <pthread.h>
@@ -18,6 +21,21 @@
 
 #define DELAY 2
 #define USECS_PER_SEC 1000000
+#define NSECS_PER_SEC 1000000000
+
+static void __fatal_error(const char *test, const char *name, const char *what)
+{
+	char buf[64];
+
+	strerror_r(errno, buf, sizeof(buf));
+
+	if (name && strlen(name))
+		ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, buf);
+	else
+		ksft_exit_fail_msg("%s %s %s\n", test, what, buf);
+}
+
+#define fatal_error(name, what)	__fatal_error(__func__, name, what)
 
 static volatile int done;
 
@@ -74,24 +92,13 @@ static int check_diff(struct timeval start, struct timeval end)
 	return 0;
 }
 
-static int check_itimer(int which)
+static void check_itimer(int which, const char *name)
 {
-	const char *name;
-	int err;
 	struct timeval start, end;
 	struct itimerval val = {
 		.it_value.tv_sec = DELAY,
 	};
 
-	if (which == ITIMER_VIRTUAL)
-		name = "ITIMER_VIRTUAL";
-	else if (which == ITIMER_PROF)
-		name = "ITIMER_PROF";
-	else if (which == ITIMER_REAL)
-		name = "ITIMER_REAL";
-	else
-		return -1;
-
 	done = 0;
 
 	if (which == ITIMER_VIRTUAL)
@@ -101,17 +108,11 @@ static int check_itimer(int which)
 	else if (which == ITIMER_REAL)
 		signal(SIGALRM, sig_handler);
 
-	err = gettimeofday(&start, NULL);
-	if (err < 0) {
-		ksft_perror("Can't call gettimeofday()");
-		return -1;
-	}
+	if (gettimeofday(&start, NULL) < 0)
+		fatal_error(name, "gettimeofday()");
 
-	err = setitimer(which, &val, NULL);
-	if (err < 0) {
-		ksft_perror("Can't set timer");
-		return -1;
-	}
+	if (setitimer(which, &val, NULL) < 0)
+		fatal_error(name, "setitimer()");
 
 	if (which == ITIMER_VIRTUAL)
 		user_loop();
@@ -120,68 +121,41 @@ static int check_itimer(int which)
 	else if (which == ITIMER_REAL)
 		idle_loop();
 
-	err = gettimeofday(&end, NULL);
-	if (err < 0) {
-		ksft_perror("Can't call gettimeofday()");
-		return -1;
-	}
+	if (gettimeofday(&end, NULL) < 0)
+		fatal_error(name, "gettimeofday()");
 
 	ksft_test_result(check_diff(start, end) == 0, "%s\n", name);
-
-	return 0;
 }
 
-static int check_timer_create(int which)
+static void check_timer_create(int which, const char *name)
 {
-	const char *type;
-	int err;
-	timer_t id;
 	struct timeval start, end;
 	struct itimerspec val = {
 		.it_value.tv_sec = DELAY,
 	};
-
-	if (which == CLOCK_THREAD_CPUTIME_ID) {
-		type = "thread";
-	} else if (which == CLOCK_PROCESS_CPUTIME_ID) {
-		type = "process";
-	} else {
-		ksft_print_msg("Unknown timer_create() type %d\n", which);
-		return -1;
-	}
+	timer_t id;
 
 	done = 0;
-	err = timer_create(which, NULL, &id);
-	if (err < 0) {
-		ksft_perror("Can't create timer");
-		return -1;
-	}
-	signal(SIGALRM, sig_handler);
 
-	err = gettimeofday(&start, NULL);
-	if (err < 0) {
-		ksft_perror("Can't call gettimeofday()");
-		return -1;
-	}
+	if (timer_create(which, NULL, &id) < 0)
+		fatal_error(name, "timer_create()");
 
-	err = timer_settime(id, 0, &val, NULL);
-	if (err < 0) {
-		ksft_perror("Can't set timer");
-		return -1;
-	}
+	if (signal(SIGALRM, sig_handler) == SIG_ERR)
+		fatal_error(name, "signal()");
+
+	if (gettimeofday(&start, NULL) < 0)
+		fatal_error(name, "gettimeofday()");
+
+	if (timer_settime(id, 0, &val, NULL) < 0)
+		fatal_error(name, "timer_settime()");
 
 	user_loop();
 
-	err = gettimeofday(&end, NULL);
-	if (err < 0) {
-		ksft_perror("Can't call gettimeofday()");
-		return -1;
-	}
+	if (gettimeofday(&end, NULL) < 0)
+		fatal_error(name, "gettimeofday()");
 
 	ksft_test_result(check_diff(start, end) == 0,
-			 "timer_create() per %s\n", type);
-
-	return 0;
+			 "timer_create() per %s\n", name);
 }
 
 static pthread_t ctd_thread;
@@ -209,15 +183,14 @@ static void *ctd_thread_func(void *arg)
 
 	ctd_count = 100;
 	if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
-		return "Can't create timer\n";
+		fatal_error(NULL, "timer_create()");
 	if (timer_settime(id, 0, &val, NULL))
-		return "Can't set timer\n";
-
+		fatal_error(NULL, "timer_settime()");
 	while (ctd_count > 0 && !ctd_failed)
 		;
 
 	if (timer_delete(id))
-		return "Can't delete timer\n";
+		fatal_error(NULL, "timer_delete()");
 
 	return NULL;
 }
@@ -225,19 +198,16 @@ static void *ctd_thread_func(void *arg)
 /*
  * Test that only the running thread receives the timer signal.
  */
-static int check_timer_distribution(void)
+static void check_timer_distribution(void)
 {
-	const char *errmsg;
+	if (signal(SIGALRM, ctd_sighandler) == SIG_ERR)
+		fatal_error(NULL, "signal()");
 
-	signal(SIGALRM, ctd_sighandler);
-
-	errmsg = "Can't create thread\n";
 	if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
-		goto err;
+		fatal_error(NULL, "pthread_create()");
 
-	errmsg = "Can't join thread\n";
-	if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
-		goto err;
+	if (pthread_join(ctd_thread, NULL))
+		fatal_error(NULL, "pthread_join()");
 
 	if (!ctd_failed)
 		ksft_test_result_pass("check signal distribution\n");
@@ -245,31 +215,399 @@ static int check_timer_distribution(void)
 		ksft_test_result_fail("check signal distribution\n");
 	else
 		ksft_test_result_skip("check signal distribution (old kernel)\n");
-	return 0;
-err:
-	ksft_print_msg("%s", errmsg);
-	return -1;
+}
+
+struct tmrsig {
+	int	signals;
+	int	overruns;
+};
+
+static void siginfo_handler(int sig, siginfo_t *si, void *uc)
+{
+	struct tmrsig *tsig = si ? si->si_ptr : NULL;
+
+	if (tsig) {
+		tsig->signals++;
+		tsig->overruns += si->si_overrun;
+	}
+}
+
+static void *ignore_thread(void *arg)
+{
+	unsigned int *tid = arg;
+	sigset_t set;
+
+	sigemptyset(&set);
+	sigaddset(&set, SIGUSR1);
+	if (sigprocmask(SIG_BLOCK, &set, NULL))
+		fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+	*tid = gettid();
+	sleep(100);
+
+	if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+		fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+	return NULL;
+}
+
+static void check_sig_ign(int thread)
+{
+	struct tmrsig tsig = { };
+	struct itimerspec its;
+	unsigned int tid = 0;
+	struct sigaction sa;
+	struct sigevent sev;
+	pthread_t pthread;
+	timer_t timerid;
+	sigset_t set;
+
+	if (thread) {
+		if (pthread_create(&pthread, NULL, ignore_thread, &tid))
+			fatal_error(NULL, "pthread_create()");
+		sleep(1);
+	}
+
+	sa.sa_flags = SA_SIGINFO;
+	sa.sa_sigaction = siginfo_handler;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGUSR1, &sa, NULL))
+		fatal_error(NULL, "sigaction()");
+
+	/* Block the signal */
+	sigemptyset(&set);
+	sigaddset(&set, SIGUSR1);
+	if (sigprocmask(SIG_BLOCK, &set, NULL))
+		fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+	memset(&sev, 0, sizeof(sev));
+	sev.sigev_notify = SIGEV_SIGNAL;
+	sev.sigev_signo = SIGUSR1;
+	sev.sigev_value.sival_ptr = &tsig;
+	if (thread) {
+		sev.sigev_notify = SIGEV_THREAD_ID;
+		sev._sigev_un._tid = tid;
+	}
+
+	if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+		fatal_error(NULL, "timer_create()");
+
+	/* Start the timer to expire in 100ms and 100ms intervals */
+	its.it_value.tv_sec = 0;
+	its.it_value.tv_nsec = 100000000;
+	its.it_interval.tv_sec = 0;
+	its.it_interval.tv_nsec = 100000000;
+	timer_settime(timerid, 0, &its, NULL);
+
+	sleep(1);
+
+	/* Set the signal to be ignored */
+	if (signal(SIGUSR1, SIG_IGN) == SIG_ERR)
+		fatal_error(NULL, "signal(SIG_IGN)");
+
+	sleep(1);
+
+	if (thread) {
+		/* Stop the thread first. No signal should be delivered to it */
+		if (pthread_cancel(pthread))
+			fatal_error(NULL, "pthread_cancel()");
+		if (pthread_join(pthread, NULL))
+			fatal_error(NULL, "pthread_join()");
+	}
+
+	/* Restore the handler */
+	if (sigaction(SIGUSR1, &sa, NULL))
+		fatal_error(NULL, "sigaction()");
+
+	sleep(1);
+
+	/* Unblock it, which should deliver the signal in the !thread case*/
+	if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+		fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+
+	if (timer_delete(timerid))
+		fatal_error(NULL, "timer_delete()");
+
+	if (!thread) {
+		ksft_test_result(tsig.signals == 1 && tsig.overruns == 29,
+				 "check_sig_ign SIGEV_SIGNAL\n");
+	} else {
+		ksft_test_result(tsig.signals == 0 && tsig.overruns == 0,
+				 "check_sig_ign SIGEV_THREAD_ID\n");
+	}
+}
+
+static void check_rearm(void)
+{
+	struct tmrsig tsig = { };
+	struct itimerspec its;
+	struct sigaction sa;
+	struct sigevent sev;
+	timer_t timerid;
+	sigset_t set;
+
+	sa.sa_flags = SA_SIGINFO;
+	sa.sa_sigaction = siginfo_handler;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGUSR1, &sa, NULL))
+		fatal_error(NULL, "sigaction()");
+
+	/* Block the signal */
+	sigemptyset(&set);
+	sigaddset(&set, SIGUSR1);
+	if (sigprocmask(SIG_BLOCK, &set, NULL))
+		fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+	memset(&sev, 0, sizeof(sev));
+	sev.sigev_notify = SIGEV_SIGNAL;
+	sev.sigev_signo = SIGUSR1;
+	sev.sigev_value.sival_ptr = &tsig;
+	if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+		fatal_error(NULL, "timer_create()");
+
+	/* Start the timer to expire in 100ms and 100ms intervals */
+	its.it_value.tv_sec = 0;
+	its.it_value.tv_nsec = 100000000;
+	its.it_interval.tv_sec = 0;
+	its.it_interval.tv_nsec = 100000000;
+	if (timer_settime(timerid, 0, &its, NULL))
+		fatal_error(NULL, "timer_settime()");
+
+	sleep(1);
+
+	/* Reprogram the timer to single shot */
+	its.it_value.tv_sec = 10;
+	its.it_value.tv_nsec = 0;
+	its.it_interval.tv_sec = 0;
+	its.it_interval.tv_nsec = 0;
+	if (timer_settime(timerid, 0, &its, NULL))
+		fatal_error(NULL, "timer_settime()");
+
+	/* Unblock it, which should not deliver a signal */
+	if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+		fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+
+	if (timer_delete(timerid))
+		fatal_error(NULL, "timer_delete()");
+
+	ksft_test_result(!tsig.signals, "check_rearm\n");
+}
+
+static void check_delete(void)
+{
+	struct tmrsig tsig = { };
+	struct itimerspec its;
+	struct sigaction sa;
+	struct sigevent sev;
+	timer_t timerid;
+	sigset_t set;
+
+	sa.sa_flags = SA_SIGINFO;
+	sa.sa_sigaction = siginfo_handler;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGUSR1, &sa, NULL))
+		fatal_error(NULL, "sigaction()");
+
+	/* Block the signal */
+	sigemptyset(&set);
+	sigaddset(&set, SIGUSR1);
+	if (sigprocmask(SIG_BLOCK, &set, NULL))
+		fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+	memset(&sev, 0, sizeof(sev));
+	sev.sigev_notify = SIGEV_SIGNAL;
+	sev.sigev_signo = SIGUSR1;
+	sev.sigev_value.sival_ptr = &tsig;
+	if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+		fatal_error(NULL, "timer_create()");
+
+	/* Start the timer to expire in 100ms and 100ms intervals */
+	its.it_value.tv_sec = 0;
+	its.it_value.tv_nsec = 100000000;
+	its.it_interval.tv_sec = 0;
+	its.it_interval.tv_nsec = 100000000;
+	if (timer_settime(timerid, 0, &its, NULL))
+		fatal_error(NULL, "timer_settime()");
+
+	sleep(1);
+
+	if (timer_delete(timerid))
+		fatal_error(NULL, "timer_delete()");
+
+	/* Unblock it, which should not deliver a signal */
+	if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+		fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+
+	ksft_test_result(!tsig.signals, "check_delete\n");
+}
+
+static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2)
+{
+	int64_t diff;
+
+	diff = NSECS_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec);
+	diff += ((int) t1.tv_nsec - (int) t2.tv_nsec);
+	return diff;
+}
+
+static void check_sigev_none(int which, const char *name)
+{
+	struct timespec start, now;
+	struct itimerspec its;
+	struct sigevent sev;
+	timer_t timerid;
+
+	memset(&sev, 0, sizeof(sev));
+	sev.sigev_notify = SIGEV_NONE;
+
+	if (timer_create(which, &sev, &timerid))
+		fatal_error(name, "timer_create()");
+
+	/* Start the timer to expire in 100ms and 100ms intervals */
+	its.it_value.tv_sec = 0;
+	its.it_value.tv_nsec = 100000000;
+	its.it_interval.tv_sec = 0;
+	its.it_interval.tv_nsec = 100000000;
+	timer_settime(timerid, 0, &its, NULL);
+
+	if (clock_gettime(which, &start))
+		fatal_error(name, "clock_gettime()");
+
+	do {
+		if (clock_gettime(which, &now))
+			fatal_error(name, "clock_gettime()");
+	} while (calcdiff_ns(now, start) < NSECS_PER_SEC);
+
+	if (timer_gettime(timerid, &its))
+		fatal_error(name, "timer_gettime()");
+
+	if (timer_delete(timerid))
+		fatal_error(name, "timer_delete()");
+
+	ksft_test_result(its.it_value.tv_sec || its.it_value.tv_nsec,
+			 "check_sigev_none %s\n", name);
+}
+
+static void check_gettime(int which, const char *name)
+{
+	struct itimerspec its, prev;
+	struct timespec start, now;
+	struct sigevent sev;
+	timer_t timerid;
+	int wraps = 0;
+	sigset_t set;
+
+	/* Block the signal */
+	sigemptyset(&set);
+	sigaddset(&set, SIGUSR1);
+	if (sigprocmask(SIG_BLOCK, &set, NULL))
+		fatal_error(name, "sigprocmask(SIG_BLOCK)");
+
+	memset(&sev, 0, sizeof(sev));
+	sev.sigev_notify = SIGEV_SIGNAL;
+	sev.sigev_signo = SIGUSR1;
+
+	if (timer_create(which, &sev, &timerid))
+		fatal_error(name, "timer_create()");
+
+	/* Start the timer to expire in 100ms and 100ms intervals */
+	its.it_value.tv_sec = 0;
+	its.it_value.tv_nsec = 100000000;
+	its.it_interval.tv_sec = 0;
+	its.it_interval.tv_nsec = 100000000;
+	if (timer_settime(timerid, 0, &its, NULL))
+		fatal_error(name, "timer_settime()");
+
+	if (timer_gettime(timerid, &prev))
+		fatal_error(name, "timer_gettime()");
+
+	if (clock_gettime(which, &start))
+		fatal_error(name, "clock_gettime()");
+
+	do {
+		if (clock_gettime(which, &now))
+			fatal_error(name, "clock_gettime()");
+		if (timer_gettime(timerid, &its))
+			fatal_error(name, "timer_gettime()");
+		if (its.it_value.tv_nsec > prev.it_value.tv_nsec)
+			wraps++;
+		prev = its;
+
+	} while (calcdiff_ns(now, start) < NSECS_PER_SEC);
+
+	if (timer_delete(timerid))
+		fatal_error(name, "timer_delete()");
+
+	ksft_test_result(wraps > 1, "check_gettime %s\n", name);
+}
+
+static void check_overrun(int which, const char *name)
+{
+	struct timespec start, now;
+	struct tmrsig tsig = { };
+	struct itimerspec its;
+	struct sigaction sa;
+	struct sigevent sev;
+	timer_t timerid;
+	sigset_t set;
+
+	sa.sa_flags = SA_SIGINFO;
+	sa.sa_sigaction = siginfo_handler;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(SIGUSR1, &sa, NULL))
+		fatal_error(name, "sigaction()");
+
+	/* Block the signal */
+	sigemptyset(&set);
+	sigaddset(&set, SIGUSR1);
+	if (sigprocmask(SIG_BLOCK, &set, NULL))
+		fatal_error(name, "sigprocmask(SIG_BLOCK)");
+
+	memset(&sev, 0, sizeof(sev));
+	sev.sigev_notify = SIGEV_SIGNAL;
+	sev.sigev_signo = SIGUSR1;
+	sev.sigev_value.sival_ptr = &tsig;
+	if (timer_create(which, &sev, &timerid))
+		fatal_error(name, "timer_create()");
+
+	/* Start the timer to expire in 100ms and 100ms intervals */
+	its.it_value.tv_sec = 0;
+	its.it_value.tv_nsec = 100000000;
+	its.it_interval.tv_sec = 0;
+	its.it_interval.tv_nsec = 100000000;
+	if (timer_settime(timerid, 0, &its, NULL))
+		fatal_error(name, "timer_settime()");
+
+	if (clock_gettime(which, &start))
+		fatal_error(name, "clock_gettime()");
+
+	do {
+		if (clock_gettime(which, &now))
+			fatal_error(name, "clock_gettime()");
+	} while (calcdiff_ns(now, start) < NSECS_PER_SEC);
+
+	/* Unblock it, which should deliver a signal */
+	if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+		fatal_error(name, "sigprocmask(SIG_UNBLOCK)");
+
+	if (timer_delete(timerid))
+		fatal_error(name, "timer_delete()");
+
+	ksft_test_result(tsig.signals == 1 && tsig.overruns == 9,
+			 "check_overrun %s\n", name);
 }
 
 int main(int argc, char **argv)
 {
 	ksft_print_header();
-	ksft_set_plan(6);
+	ksft_set_plan(18);
 
 	ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
 	ksft_print_msg("based timers if other threads run on the CPU...\n");
 
-	if (check_itimer(ITIMER_VIRTUAL) < 0)
-		ksft_exit_fail();
-
-	if (check_itimer(ITIMER_PROF) < 0)
-		ksft_exit_fail();
-
-	if (check_itimer(ITIMER_REAL) < 0)
-		ksft_exit_fail();
-
-	if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0)
-		ksft_exit_fail();
+	check_itimer(ITIMER_VIRTUAL, "ITIMER_VIRTUAL");
+	check_itimer(ITIMER_PROF, "ITIMER_PROF");
+	check_itimer(ITIMER_REAL, "ITIMER_REAL");
+	check_timer_create(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
 
 	/*
 	 * It's unfortunately hard to reliably test a timer expiration
@@ -280,11 +618,21 @@ int main(int argc, char **argv)
 	 * to ensure true parallelism. So test only one thread until we
 	 * find a better solution.
 	 */
-	if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
-		ksft_exit_fail();
-
-	if (check_timer_distribution() < 0)
-		ksft_exit_fail();
+	check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+	check_timer_distribution();
+
+	check_sig_ign(0);
+	check_sig_ign(1);
+	check_rearm();
+	check_delete();
+	check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+	check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+	check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+	check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+	check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
+	check_overrun(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+	check_overrun(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+	check_overrun(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
 
 	ksft_finished();
 }
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5c8757a25998..d51249f14e2f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -77,7 +77,7 @@ all_32: $(BINARIES_32)
 
 all_64: $(BINARIES_64)
 
-EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
+EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) srso
 
 $(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
 	$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl -lm
diff --git a/tools/testing/selftests/x86/srso.c b/tools/testing/selftests/x86/srso.c
new file mode 100644
index 000000000000..394ec8bdeb00
--- /dev/null
+++ b/tools/testing/selftests/x86/srso.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/perf_event.h>
+#include <cpuid.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+int main(void)
+{
+	struct perf_event_attr ret_attr, mret_attr;
+	long long count_rets, count_rets_mispred;
+	int rrets_fd, mrrets_fd;
+	unsigned int cpuid1_eax, b, c, d;
+
+	__cpuid(1, cpuid1_eax, b, c, d);
+
+	if (cpuid1_eax < 0x00800f00 ||
+	    cpuid1_eax > 0x00afffff) {
+		fprintf(stderr, "This needs to run on a Zen[1-4] machine (CPUID(1).EAX: 0x%x). Exiting...\n", cpuid1_eax);
+		exit(EXIT_FAILURE);
+	}
+
+	memset(&ret_attr, 0, sizeof(struct perf_event_attr));
+	memset(&mret_attr, 0, sizeof(struct perf_event_attr));
+
+	ret_attr.type = mret_attr.type = PERF_TYPE_RAW;
+	ret_attr.size = mret_attr.size = sizeof(struct perf_event_attr);
+	ret_attr.config = 0xc8;
+	mret_attr.config = 0xc9;
+	ret_attr.disabled = mret_attr.disabled = 1;
+	ret_attr.exclude_user = mret_attr.exclude_user = 1;
+	ret_attr.exclude_hv = mret_attr.exclude_hv = 1;
+
+	rrets_fd = syscall(SYS_perf_event_open, &ret_attr, 0, -1, -1, 0);
+	if (rrets_fd == -1) {
+		perror("opening retired RETs fd");
+		exit(EXIT_FAILURE);
+	}
+
+	mrrets_fd = syscall(SYS_perf_event_open, &mret_attr, 0, -1, -1, 0);
+	if (mrrets_fd == -1) {
+		perror("opening retired mispredicted RETs fd");
+		exit(EXIT_FAILURE);
+	}
+
+	ioctl(rrets_fd, PERF_EVENT_IOC_RESET, 0);
+	ioctl(mrrets_fd, PERF_EVENT_IOC_RESET, 0);
+
+	ioctl(rrets_fd, PERF_EVENT_IOC_ENABLE, 0);
+	ioctl(mrrets_fd, PERF_EVENT_IOC_ENABLE, 0);
+
+	printf("Sleeping for 10 seconds\n");
+	sleep(10);
+
+	ioctl(rrets_fd, PERF_EVENT_IOC_DISABLE, 0);
+	ioctl(mrrets_fd, PERF_EVENT_IOC_DISABLE, 0);
+
+	read(rrets_fd, &count_rets, sizeof(long long));
+	read(mrrets_fd, &count_rets_mispred, sizeof(long long));
+
+	printf("RETs: (%lld retired <-> %lld mispredicted)\n",
+		count_rets, count_rets_mispred);
+	printf("SRSO Safe-RET mitigation works correctly if both counts are almost equal.\n");
+
+	return 0;
+}
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 554b290fefdc..a3d448a075e3 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -139,7 +139,7 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po
 }
 
 /* Connect to <cid, port> and return the file descriptor. */
-static int vsock_connect(unsigned int cid, unsigned int port, int type)
+int vsock_connect(unsigned int cid, unsigned int port, int type)
 {
 	union {
 		struct sockaddr sa;
@@ -226,8 +226,8 @@ static int vsock_listen(unsigned int cid, unsigned int port, int type)
 /* Listen on <cid, port> and return the first incoming connection.  The remote
  * address is stored to clientaddrp.  clientaddrp may be NULL.
  */
-static int vsock_accept(unsigned int cid, unsigned int port,
-			struct sockaddr_vm *clientaddrp, int type)
+int vsock_accept(unsigned int cid, unsigned int port,
+		 struct sockaddr_vm *clientaddrp, int type)
 {
 	union {
 		struct sockaddr sa;
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index e95e62485959..fff22d4a14c0 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -39,6 +39,9 @@ struct test_case {
 void init_signals(void);
 unsigned int parse_cid(const char *str);
 unsigned int parse_port(const char *str);
+int vsock_connect(unsigned int cid, unsigned int port, int type);
+int vsock_accept(unsigned int cid, unsigned int port,
+		 struct sockaddr_vm *clientaddrp, int type);
 int vsock_stream_connect(unsigned int cid, unsigned int port);
 int vsock_bind_connect(unsigned int cid, unsigned int port,
 		       unsigned int bind_port, int type);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index f851f8961247..8d38dbf8f41f 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -20,6 +20,8 @@
 #include <sys/mman.h>
 #include <poll.h>
 #include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/sockios.h>
 
 #include "vsock_test_zerocopy.h"
 #include "timeout.h"
@@ -1238,6 +1240,79 @@ static void test_double_bind_connect_client(const struct test_opts *opts)
 	}
 }
 
+#define MSG_BUF_IOCTL_LEN 64
+static void test_unsent_bytes_server(const struct test_opts *opts, int type)
+{
+	unsigned char buf[MSG_BUF_IOCTL_LEN];
+	int client_fd;
+
+	client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type);
+	if (client_fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	recv_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf));
+	control_writeln("RECEIVED");
+
+	close(client_fd);
+}
+
+static void test_unsent_bytes_client(const struct test_opts *opts, int type)
+{
+	unsigned char buf[MSG_BUF_IOCTL_LEN];
+	int ret, fd, sock_bytes_unsent;
+
+	fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	for (int i = 0; i < sizeof(buf); i++)
+		buf[i] = rand() & 0xFF;
+
+	send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
+	control_expectln("RECEIVED");
+
+	ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+	if (ret < 0) {
+		if (errno == EOPNOTSUPP) {
+			fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+		} else {
+			perror("ioctl");
+			exit(EXIT_FAILURE);
+		}
+	} else if (ret == 0 && sock_bytes_unsent != 0) {
+		fprintf(stderr,
+			"Unexpected 'SIOCOUTQ' value, expected 0, got %i\n",
+			sock_bytes_unsent);
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+}
+
+static void test_stream_unsent_bytes_client(const struct test_opts *opts)
+{
+	test_unsent_bytes_client(opts, SOCK_STREAM);
+}
+
+static void test_stream_unsent_bytes_server(const struct test_opts *opts)
+{
+	test_unsent_bytes_server(opts, SOCK_STREAM);
+}
+
+static void test_seqpacket_unsent_bytes_client(const struct test_opts *opts)
+{
+	test_unsent_bytes_client(opts, SOCK_SEQPACKET);
+}
+
+static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts)
+{
+	test_unsent_bytes_server(opts, SOCK_SEQPACKET);
+}
+
 #define RCVLOWAT_CREDIT_UPD_BUF_SIZE	(1024 * 128)
 /* This define is the same as in 'include/linux/virtio_vsock.h':
  * it is used to decide when to send credit update message during
@@ -1523,6 +1598,16 @@ static struct test_case test_cases[] = {
 		.run_client = test_stream_rcvlowat_def_cred_upd_client,
 		.run_server = test_stream_cred_upd_on_low_rx_bytes,
 	},
+	{
+		.name = "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes",
+		.run_client = test_stream_unsent_bytes_client,
+		.run_server = test_stream_unsent_bytes_server,
+	},
+	{
+		.name = "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes",
+		.run_client = test_seqpacket_unsent_bytes_client,
+		.run_server = test_seqpacket_unsent_bytes_server,
+	},
 	{},
 };