From d76bb7a09bb3b8711077912f3e80cfcf39cd9d0b Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 27 May 2020 00:38:38 -0400
Subject: tools/power turbostat: Print /dev/cpu_dma_latency

Users are puzzled when they use tuned performance and all their
C-states vanish.  Dump /dev/cpu_dma_latency and state
whether the value is default, or constraining,
to explain this situation.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 33b370865d16..4c679568fda4 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4698,6 +4698,32 @@ unsigned int intel_model_duplicates(unsigned int model)
 	}
 	return model;
 }
+
+void print_dev_latency(void)
+{
+	char *path = "/dev/cpu_dma_latency";
+	int fd;
+	int value;
+	int retval;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		warn("fopen %s\n", path);
+		return;
+	}
+
+	retval = read(fd, (void *)&value, sizeof(int));
+	if (retval != sizeof(int)) {
+		warn("read %s\n", path);
+		close(fd);
+		return;
+	}
+	fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n",
+		value, value == 2000000000 ? "default" : "constrained");
+
+	close(fd);
+}
+
 void process_cpuid()
 {
 	unsigned int eax, ebx, ecx, edx;
@@ -4966,6 +4992,8 @@ void process_cpuid()
 	if (!quiet)
 		dump_cstate_pstate_config_info(family, model);
 
+	if (!quiet)
+		print_dev_latency();
 	if (!quiet)
 		dump_sysfs_cstate_config();
 	if (!quiet)
-- 
cgit v1.2.3-70-g09d2


From 9aefc2cda6353f48708415d9adc5dff4deb73412 Mon Sep 17 00:00:00 2001
From: Doug Smythies <doug.smythies@gmail.com>
Date: Thu, 26 Mar 2020 13:36:37 -0700
Subject: tools/power turbostat: Always print idle in the system configuration
 header

If the --quiet option is not used, turbostat prints a useful system
configuration header during startup.

But inclusion of idle system configuration information in this header
is currently a function of inclusion in the columns chosen to be displayed.

Always list this idle system configuration.

Signed-off-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4c679568fda4..4edad3fc760a 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3530,9 +3530,6 @@ dump_sysfs_cstate_config(void)
 	int state;
 	char *sp;
 
-	if (!DO_BIC(BIC_sysfs))
-		return;
-
 	if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
 		fprintf(outf, "cpuidle not loaded\n");
 		return;
-- 
cgit v1.2.3-70-g09d2


From 7c2ccc507bd44d17227930181f937b2066565349 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Sat, 18 Apr 2020 16:31:47 +0800
Subject: tools/power turbostat: Make the energy variable to be 64 bit

Change the energy variable from 32bit to 64bit,
so that it can record long time duration.
After this conversion, adjust the DELTA_WRAP32() accordingly.

Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4edad3fc760a..29ec1018852d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -211,12 +211,12 @@ struct pkg_data {
 	long long gfx_rc6_ms;
 	unsigned int gfx_mhz;
 	unsigned int package_id;
-	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
-	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
-	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */
-	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
-	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
-	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
+	unsigned long long energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
+	unsigned long long energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
+	unsigned long long energy_cores;	/* MSR_PP0_ENERGY_STATUS */
+	unsigned long long energy_gfx;	/* MSR_PP1_ENERGY_STATUS */
+	unsigned long long rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */
+	unsigned long long rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */
 	unsigned int pkg_temp_c;
 	unsigned long long counter[MAX_ADDED_COUNTERS];
 } *package_even, *package_odd;
@@ -858,13 +858,13 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
 		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
 		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
-		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
-		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
-		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
-		outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
-		outp += sprintf(outp, "Throttle PKG: %0X\n",
+		outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
+		outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
+		outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
+		outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
+		outp += sprintf(outp, "Throttle PKG: %0llX\n",
 			p->rapl_pkg_perf_status);
-		outp += sprintf(outp, "Throttle RAM: %0X\n",
+		outp += sprintf(outp, "Throttle RAM: %0llX\n",
 			p->rapl_dram_perf_status);
 		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
 
@@ -1210,11 +1210,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
 }
 
 #define DELTA_WRAP32(new, old)			\
-	if (new > old) {			\
-		old = new - old;		\
-	} else {				\
-		old = 0x100000000 + new - old;	\
-	}
+	old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
 
 int
 delta_package(struct pkg_data *new, struct pkg_data *old)
-- 
cgit v1.2.3-70-g09d2


From 87e15da95775a2ffb8c444e84f08ca982b758364 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Sat, 18 Apr 2020 16:31:57 +0800
Subject: tools/power turbostat: Introduce functions to accumulate RAPL
 consumption

Since the RAPL Joule Counter is 32 bit, turbostat would
only print a *star* instead of printing the actual energy
consumed to indicate the overflow due to long duration.
This does not meet the requirement from servers as the
sampling time of turbostat is usually very long on servers.

So maintain a set of MSR buffer, and update them
periodically before the 32bit MSR register is wrapped round,
so as to avoid the overflow.

The idea is similar to the implementation of ktime_get():

Periodical MSR timer:
total_rapl_sum += (current_rapl_msr - last_rapl_msr);

Using get_msr_sum() to get the accumulated RAPL:
return (current_rapl_msr - last_rapl_msr) + total_rapl_sum;

The accumulated RAPL mechanism will be turned on in next patch.

Originally-by: Aaron Lu <aaron.lwe@gmail.com>
Reviewed-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/Makefile    |   2 +-
 tools/power/x86/turbostat/turbostat.c | 224 +++++++++++++++++++++++++++++++++-
 2 files changed, 219 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 2b6551269e43..d08765531bcb 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -16,7 +16,7 @@ override CFLAGS +=	-D_FORTIFY_SOURCE=2
 
 %: %.c
 	@mkdir -p $(BUILD_OUTPUT)
-	$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap
+	$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap -lrt
 
 .PHONY : clean
 clean :
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 29ec1018852d..c1759f6c84a8 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -259,6 +259,113 @@ struct msr_counter {
 #define	SYSFS_PERCPU	(1 << 1)
 };
 
+/*
+ * The accumulated sum of MSR is defined as a monotonic
+ * increasing MSR, it will be accumulated periodically,
+ * despite its register's bit width.
+ */
+enum {
+	IDX_PKG_ENERGY,
+	IDX_DRAM_ENERGY,
+	IDX_PP0_ENERGY,
+	IDX_PP1_ENERGY,
+	IDX_PKG_PERF,
+	IDX_DRAM_PERF,
+	IDX_COUNT,
+};
+
+int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
+
+struct msr_sum_array {
+	/* get_msr_sum() = sum + (get_msr() - last) */
+	struct {
+		/*The accumulated MSR value is updated by the timer*/
+		unsigned long long sum;
+		/*The MSR footprint recorded in last timer*/
+		unsigned long long last;
+	} entries[IDX_COUNT];
+};
+
+/* The percpu MSR sum array.*/
+struct msr_sum_array *per_cpu_msr_sum;
+
+int idx_to_offset(int idx)
+{
+	int offset;
+
+	switch (idx) {
+	case IDX_PKG_ENERGY:
+		offset = MSR_PKG_ENERGY_STATUS;
+		break;
+	case IDX_DRAM_ENERGY:
+		offset = MSR_DRAM_ENERGY_STATUS;
+		break;
+	case IDX_PP0_ENERGY:
+		offset = MSR_PP0_ENERGY_STATUS;
+		break;
+	case IDX_PP1_ENERGY:
+		offset = MSR_PP1_ENERGY_STATUS;
+		break;
+	case IDX_PKG_PERF:
+		offset = MSR_PKG_PERF_STATUS;
+		break;
+	case IDX_DRAM_PERF:
+		offset = MSR_DRAM_PERF_STATUS;
+		break;
+	default:
+		offset = -1;
+	}
+	return offset;
+}
+
+int offset_to_idx(int offset)
+{
+	int idx;
+
+	switch (offset) {
+	case MSR_PKG_ENERGY_STATUS:
+		idx = IDX_PKG_ENERGY;
+		break;
+	case MSR_DRAM_ENERGY_STATUS:
+		idx = IDX_DRAM_ENERGY;
+		break;
+	case MSR_PP0_ENERGY_STATUS:
+		idx = IDX_PP0_ENERGY;
+		break;
+	case MSR_PP1_ENERGY_STATUS:
+		idx = IDX_PP1_ENERGY;
+		break;
+	case MSR_PKG_PERF_STATUS:
+		idx = IDX_PKG_PERF;
+		break;
+	case MSR_DRAM_PERF_STATUS:
+		idx = IDX_DRAM_PERF;
+		break;
+	default:
+		idx = -1;
+	}
+	return idx;
+}
+
+int idx_valid(int idx)
+{
+	switch (idx) {
+	case IDX_PKG_ENERGY:
+		return do_rapl & RAPL_PKG;
+	case IDX_DRAM_ENERGY:
+		return do_rapl & RAPL_DRAM;
+	case IDX_PP0_ENERGY:
+		return do_rapl & RAPL_CORES_ENERGY_STATUS;
+	case IDX_PP1_ENERGY:
+		return do_rapl & RAPL_GFX;
+	case IDX_PKG_PERF:
+		return do_rapl & RAPL_PKG_PERF_STATUS;
+	case IDX_DRAM_PERF:
+		return do_rapl & RAPL_DRAM_PERF_STATUS;
+	default:
+		return 0;
+	}
+}
 struct sys_counters {
 	unsigned int added_thread_counters;
 	unsigned int added_core_counters;
@@ -1250,12 +1357,12 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
 
 	old->gfx_mhz = new->gfx_mhz;
 
-	DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
-	DELTA_WRAP32(new->energy_cores, old->energy_cores);
-	DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
-	DELTA_WRAP32(new->energy_dram, old->energy_dram);
-	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
-	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
+	old->energy_pkg = new->energy_pkg - old->energy_pkg;
+	old->energy_cores = new->energy_cores - old->energy_cores;
+	old->energy_gfx = new->energy_gfx - old->energy_gfx;
+	old->energy_dram = new->energy_dram - old->energy_dram;
+	old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
+	old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
 
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (mp->format == FORMAT_RAW)
@@ -3053,6 +3160,111 @@ void do_sleep(void)
 	}
 }
 
+int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
+{
+	int ret, idx;
+	unsigned long long msr_cur, msr_last;
+
+	if (!per_cpu_msr_sum)
+		return 1;
+
+	idx = offset_to_idx(offset);
+	if (idx < 0)
+		return idx;
+	/* get_msr_sum() = sum + (get_msr() - last) */
+	ret = get_msr(cpu, offset, &msr_cur);
+	if (ret)
+		return ret;
+	msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
+	DELTA_WRAP32(msr_cur, msr_last);
+	*msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
+
+	return 0;
+}
+
+timer_t timerid;
+
+/* Timer callback, update the sum of MSRs periodically. */
+static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+	int i, ret;
+	int cpu = t->cpu_id;
+
+	for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
+		unsigned long long msr_cur, msr_last;
+		int offset;
+
+		if (!idx_valid(i))
+			continue;
+		offset = idx_to_offset(i);
+		if (offset < 0)
+			continue;
+		ret = get_msr(cpu, offset, &msr_cur);
+		if (ret) {
+			fprintf(outf, "Can not update msr(0x%x)\n", offset);
+			continue;
+		}
+
+		msr_last = per_cpu_msr_sum[cpu].entries[i].last;
+		per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
+
+		DELTA_WRAP32(msr_cur, msr_last);
+		per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
+	}
+	return 0;
+}
+
+static void
+msr_record_handler(union sigval v)
+{
+	for_all_cpus(update_msr_sum, EVEN_COUNTERS);
+}
+
+void msr_sum_record(void)
+{
+	struct itimerspec its;
+	struct sigevent sev;
+
+	per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
+	if (!per_cpu_msr_sum) {
+		fprintf(outf, "Can not allocate memory for long time MSR.\n");
+		return;
+	}
+	/*
+	 * Signal handler might be restricted, so use thread notifier instead.
+	 */
+	memset(&sev, 0, sizeof(struct sigevent));
+	sev.sigev_notify = SIGEV_THREAD;
+	sev.sigev_notify_function = msr_record_handler;
+
+	sev.sigev_value.sival_ptr = &timerid;
+	if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
+		fprintf(outf, "Can not create timer.\n");
+		goto release_msr;
+	}
+
+	its.it_value.tv_sec = 0;
+	its.it_value.tv_nsec = 1;
+	/*
+	 * A wraparound time has been calculated early.
+	 * Some sources state that the peak power for a
+	 * microprocessor is usually 1.5 times the TDP rating,
+	 * use 2 * TDP for safety.
+	 */
+	its.it_interval.tv_sec = rapl_joule_counter_range / 2;
+	its.it_interval.tv_nsec = 0;
+
+	if (timer_settime(timerid, 0, &its, NULL) == -1) {
+		fprintf(outf, "Can not set timer.\n");
+		goto release_timer;
+	}
+	return;
+
+ release_timer:
+	timer_delete(timerid);
+ release_msr:
+	free(per_cpu_msr_sum);
+}
 
 void turbostat_loop()
 {
-- 
cgit v1.2.3-70-g09d2


From 9972d5d84d76982606806b2ce887f70c2f8ba60a Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Sat, 18 Apr 2020 16:32:05 +0800
Subject: tools/power turbostat: Enable accumulate RAPL display

Enable the accumulated RAPL display by default.

Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 38 +++++++++++++++--------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index c1759f6c84a8..66c468262020 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1169,14 +1169,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		}
 	}
 
-	/*
-	 * If measurement interval exceeds minimum RAPL Joule Counter range,
-	 * indicate that results are suspect by printing "**" in fraction place.
-	 */
-	if (interval_float < rapl_joule_counter_range)
-		fmt8 = "%s%.2f";
-	else
-		fmt8 = "%6.0f**";
+	fmt8 = "%s%.2f";
 
 	if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
 		outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
@@ -2069,39 +2062,39 @@ retry:
 		p->sys_lpi = cpuidle_cur_sys_lpi_us;
 
 	if (do_rapl & RAPL_PKG) {
-		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
+		if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
 			return -13;
-		p->energy_pkg = msr & 0xFFFFFFFF;
+		p->energy_pkg = msr;
 	}
 	if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
-		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
+		if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
 			return -14;
-		p->energy_cores = msr & 0xFFFFFFFF;
+		p->energy_cores = msr;
 	}
 	if (do_rapl & RAPL_DRAM) {
-		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
+		if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
 			return -15;
-		p->energy_dram = msr & 0xFFFFFFFF;
+		p->energy_dram = msr;
 	}
 	if (do_rapl & RAPL_GFX) {
-		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
+		if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
 			return -16;
-		p->energy_gfx = msr & 0xFFFFFFFF;
+		p->energy_gfx = msr;
 	}
 	if (do_rapl & RAPL_PKG_PERF_STATUS) {
-		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
+		if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
 			return -16;
-		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
+		p->rapl_pkg_perf_status = msr;
 	}
 	if (do_rapl & RAPL_DRAM_PERF_STATUS) {
-		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
+		if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
 			return -16;
-		p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
+		p->rapl_dram_perf_status = msr;
 	}
 	if (do_rapl & RAPL_AMD_F17H) {
-		if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr))
+		if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
 			return -13;
-		p->energy_pkg = msr & 0xFFFFFFFF;
+		p->energy_pkg = msr;
 	}
 	if (DO_BIC(BIC_PkgTmp)) {
 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
@@ -6101,6 +6094,7 @@ int main(int argc, char **argv)
 		return 0;
 	}
 
+	msr_sum_record();
 	/*
 	 * if any params left, it must be a command to fork
 	 */
-- 
cgit v1.2.3-70-g09d2


From 8201a0285789fade1c5b031914577e2b27a64f05 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Mon, 29 Jun 2020 15:26:57 -0400
Subject: tools/power turbostat: Use sched_getcpu() instead of hardcoded cpu 0

Disabling cpu 0 results in an error

turbostat: /sys/devices/system/cpu/cpu0/topology/thread_siblings: open failed: No such file or directory

Use sched_getcpu() instead of a hardcoded cpu 0 to get the max cpu number.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 66c468262020..151a70f2311b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2865,12 +2865,19 @@ void re_initialize(void)
 void set_max_cpu_num(void)
 {
 	FILE *filep;
+	int base_cpu;
 	unsigned long dummy;
+	char pathname[64];
 
+	base_cpu = sched_getcpu();
+	if (base_cpu < 0)
+		err(1, "cannot find calling cpu ID");
+	sprintf(pathname,
+		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings",
+		base_cpu);
+
+	filep = fopen_or_die(pathname, "r");
 	topo.max_cpu_num = 0;
-	filep = fopen_or_die(
-			"/sys/devices/system/cpu/cpu0/topology/thread_siblings",
-			"r");
 	while (fscanf(filep, "%lx,", &dummy) == 1)
 		topo.max_cpu_num += BITMASK_SIZE;
 	fclose(filep);
-- 
cgit v1.2.3-70-g09d2


From b88cad57d4d32bb5c53cd8e0ce3a1971062142af Mon Sep 17 00:00:00 2001
From: "Alexander A. Klimov" <grandmaster@al2klimov.de>
Date: Wed, 8 Jul 2020 12:55:30 +0200
Subject: tools/power turbostat: Replace HTTP links with HTTPS ones: TURBOSTAT
 UTILITY

Rationale:
Reduces attack surface on kernel devs opening the links for MITM
as HTTPS traffic is much harder to manipulate.

Deterministic algorithm:
For each file:
  If not .svg:
    For each line:
      If doesn't contain `\bxmlns\b`:
        For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
	  If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`:
            If both the HTTP and HTTPS versions
            return 200 OK and serve the same content:
              Replace HTTP with HTTPS.

Signed-off-by: Alexander A. Klimov <grandmaster@al2klimov.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index a6db83a88e85..f6b7e85b121c 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -335,7 +335,7 @@ that they count at TSC rate, which is true on all processors tested to date.
 
 .SH REFERENCES
 Volume 3B: System Programming Guide"
-http://www.intel.com/products/processor/manuals/
+https://www.intel.com/products/processor/manuals/
 
 .SH FILES
 .ta
-- 
cgit v1.2.3-70-g09d2


From fecb3bc839df64761cc63c9ee9b45c1cad36aee8 Mon Sep 17 00:00:00 2001
From: David Arcari <darcari@redhat.com>
Date: Mon, 10 Aug 2020 10:43:30 -0400
Subject: tools/power turbostat: Fix output formatting for ACPI CST enumeration

turbostat formatting is broken with ACPI CST for enumeration.  The
problem is that the CX_ACPI% is eight characters long which does not
work with tab formatting.  One simple solution is to remove the underbar
from the state name such that C1_ACPI will be displayed as C1ACPI.

Signed-off-by: David Arcari <darcari@redhat.com>
Cc: Len Brown <lenb@kernel.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 151a70f2311b..56b93e0d9415 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3682,6 +3682,20 @@ int has_config_tdp(unsigned int family, unsigned int model)
 	}
 }
 
+static void
+remove_underbar(char *s)
+{
+	char *to = s;
+
+	while (*s) {
+		if (*s != '_')
+			*to++ = *s;
+		s++;
+	}
+
+	*to = 0;
+}
+
 static void
 dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
 {
@@ -3764,6 +3778,8 @@ dump_sysfs_cstate_config(void)
 		*sp = '\0';
 		fclose(input);
 
+		remove_underbar(name_buf);
+
 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
 			base_cpu, state);
 		input = fopen(path, "r");
@@ -5830,6 +5846,8 @@ void probe_sysfs(void)
 		*sp = '%';
 		*(sp + 1) = '\0';
 
+		remove_underbar(name_buf);
+
 		fclose(input);
 
 		sprintf(path, "cpuidle/state%d/time", state);
@@ -5857,6 +5875,8 @@ void probe_sysfs(void)
 		*sp = '\0';
 		fclose(input);
 
+		remove_underbar(name_buf);
+
 		sprintf(path, "cpuidle/state%d/usage", state);
 
 		if (is_deferred_skip(name_buf))
-- 
cgit v1.2.3-70-g09d2


From e7af1ed3fa4756e8df8270a8635d852a94266061 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 13 Aug 2020 19:06:03 -0400
Subject: tools/power turbostat: Support additional CPU model numbers

Initial support for models recently added to intel-family.h.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 56b93e0d9415..4ee4e3067681 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4906,6 +4906,9 @@ unsigned int intel_model_duplicates(unsigned int model)
 	case INTEL_FAM6_ICELAKE_NNPI:
 	case INTEL_FAM6_TIGERLAKE_L:
 	case INTEL_FAM6_TIGERLAKE:
+	case INTEL_FAM6_ROCKETLAKE:
+	case INTEL_FAM6_LAKEFIELD:
+	case INTEL_FAM6_ALDERLAKE:
 		return INTEL_FAM6_CANNONLAKE_L;
 
 	case INTEL_FAM6_ATOM_TREMONT_D:
@@ -4915,6 +4918,7 @@ unsigned int intel_model_duplicates(unsigned int model)
 		return INTEL_FAM6_ATOM_TREMONT;
 
 	case INTEL_FAM6_ICELAKE_X:
+	case INTEL_FAM6_SAPPHIRERAPIDS_X:
 		return INTEL_FAM6_SKYLAKE_X;
 	}
 	return model;
-- 
cgit v1.2.3-70-g09d2


From c315a09b1b0f491c27d46e9d05f397023a44fb81 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 13 Aug 2020 19:18:22 -0400
Subject: tools/power turbostat: Skip pc8, pc9, pc10 columns, if they are
 disabled

Like we skip PC3 and PC6 columns when the package C-state limit
disables them, skip PC8/PC9/CP10 under analogous conditions.

Reported-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4ee4e3067681..72c0b19db36e 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -5186,9 +5186,12 @@ void process_cpuid()
 		BIC_NOT_PRESENT(BIC_Pkgpc7);
 	}
 	if (has_c8910_msrs(family, model)) {
-		BIC_PRESENT(BIC_Pkgpc8);
-		BIC_PRESENT(BIC_Pkgpc9);
-		BIC_PRESENT(BIC_Pkgpc10);
+		if (pkg_cstate_limit >= PCL__8)
+			BIC_PRESENT(BIC_Pkgpc8);
+		if (pkg_cstate_limit >= PCL__9)
+			BIC_PRESENT(BIC_Pkgpc9);
+		if (pkg_cstate_limit >= PCL_10)
+			BIC_PRESENT(BIC_Pkgpc10);
 	}
 	do_irtl_hsw = has_c8910_msrs(family, model);
 	if (has_skl_msrs(family, model)) {
-- 
cgit v1.2.3-70-g09d2


From 0936cdfbb527a4fa2559292069ebff2e8cf2c843 Mon Sep 17 00:00:00 2001
From: Ondřej Lysoněk <olysonek@redhat.com>
Date: Fri, 27 Mar 2020 08:27:12 +0100
Subject: tools/power x86_energy_perf_policy: Input/output error in a VM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I've encountered an issue with x86_energy_perf_policy. If I run it on a
machine that I'm told is a qemu-kvm virtual machine running inside a
privileged container, I get the following error:

x86_energy_perf_policy: /dev/cpu/0/msr offset 0x1ad read failed: Input/output error

I get the same error in a Digital Ocean droplet, so that might be a
similar environment.

I created the following patch which is intended to give a more
user-friendly message. It's based on a patch for turbostat from Prarit
Bhargava that was posted some time ago. The patch is "[v2] turbostat:
Running on virtual machine is not supported" [1].

Given my limited knowledge of the topic, I can't say with confidence
that this is the right solution, though (that's why this is not an
official patch submission). Also, I'm not sure what the convention with
exit codes is in this tool. Also, instead of the error message, perhaps
the tool should just not print anything in this case, which is how it
behaves in a "regular" VM?

[1] https://patchwork.kernel.org/patch/9868587/

Signed-off-by: Ondřej Lysoněk <olysonek@redhat.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 .../x86_energy_perf_policy.c                       | 67 +++++++++++++++++-----
 1 file changed, 54 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 3fe1eed900d4..ff6c6661f075 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -622,6 +622,57 @@ void cmdline(int argc, char **argv)
 	}
 }
 
+/*
+ * Open a file, and exit on failure
+ */
+FILE *fopen_or_die(const char *path, const char *mode)
+{
+	FILE *filep = fopen(path, "r");
+
+	if (!filep)
+		err(1, "%s: open failed", path);
+	return filep;
+}
+
+void err_on_hypervisor(void)
+{
+	FILE *cpuinfo;
+	char *flags, *hypervisor;
+	char *buffer;
+
+	/* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */
+	cpuinfo = fopen_or_die("/proc/cpuinfo", "ro");
+
+	buffer = malloc(4096);
+	if (!buffer) {
+		fclose(cpuinfo);
+		err(-ENOMEM, "buffer malloc fail");
+	}
+
+	if (!fread(buffer, 1024, 1, cpuinfo)) {
+		fclose(cpuinfo);
+		free(buffer);
+		err(1, "Reading /proc/cpuinfo failed");
+	}
+
+	flags = strstr(buffer, "flags");
+	rewind(cpuinfo);
+	fseek(cpuinfo, flags - buffer, SEEK_SET);
+	if (!fgets(buffer, 4096, cpuinfo)) {
+		fclose(cpuinfo);
+		free(buffer);
+		err(1, "Reading /proc/cpuinfo failed");
+	}
+	fclose(cpuinfo);
+
+	hypervisor = strstr(buffer, "hypervisor");
+
+	free(buffer);
+
+	if (hypervisor)
+		err(-1,
+		    "not supported on this virtual machine");
+}
 
 int get_msr(int cpu, int offset, unsigned long long *msr)
 {
@@ -635,8 +686,10 @@ int get_msr(int cpu, int offset, unsigned long long *msr)
 		err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
 
 	retval = pread(fd, msr, sizeof(*msr), offset);
-	if (retval != sizeof(*msr))
+	if (retval != sizeof(*msr)) {
+		err_on_hypervisor();
 		err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
+	}
 
 	if (debug > 1)
 		fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr);
@@ -1086,18 +1139,6 @@ int update_cpu_msrs(int cpu)
 	return 0;
 }
 
-/*
- * Open a file, and exit on failure
- */
-FILE *fopen_or_die(const char *path, const char *mode)
-{
-	FILE *filep = fopen(path, "r");
-
-	if (!filep)
-		err(1, "%s: open failed", path);
-	return filep;
-}
-
 unsigned int get_pkg_num(int cpu)
 {
 	FILE *fp;
-- 
cgit v1.2.3-70-g09d2


From b4b9156953fea108a9540c262e48eafeeff99ab0 Mon Sep 17 00:00:00 2001
From: Rafael Antognolli <rafael.antognolli@intel.com>
Date: Wed, 22 Apr 2020 15:02:07 -0700
Subject: tools/power turbostat: Add a new GFXAMHz column that exposes
 gt_act_freq_mhz.

The column already present called GFXMHz reads from gt_cur_freq_mhz,
which represents the GT frequency that was requested, but power
management might not be able to do that. So the new column will display
what the actual frequency GT is running at.

Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 50 +++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 72c0b19db36e..7a6e91aedf0f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -79,6 +79,7 @@ unsigned long long  gfx_cur_rc6_ms;
 unsigned long long cpuidle_cur_cpu_lpi_us;
 unsigned long long cpuidle_cur_sys_lpi_us;
 unsigned int gfx_cur_mhz;
+unsigned int gfx_act_mhz;
 unsigned int tcc_activation_temp;
 unsigned int tcc_activation_temp_override;
 double rapl_power_units, rapl_time_units;
@@ -210,6 +211,7 @@ struct pkg_data {
 	unsigned long long pkg_both_core_gfxe_c0;
 	long long gfx_rc6_ms;
 	unsigned int gfx_mhz;
+	unsigned int gfx_act_mhz;
 	unsigned int package_id;
 	unsigned long long energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
 	unsigned long long energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
@@ -558,6 +560,7 @@ struct msr_counter bic[] = {
 	{ 0x0, "APIC" },
 	{ 0x0, "X2APIC" },
 	{ 0x0, "Die" },
+	{ 0x0, "GFXAMHz" },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -612,6 +615,7 @@ struct msr_counter bic[] = {
 #define	BIC_APIC	(1ULL << 48)
 #define	BIC_X2APIC	(1ULL << 49)
 #define	BIC_Die		(1ULL << 50)
+#define	BIC_GFXACTMHz	(1ULL << 51)
 
 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 
@@ -831,6 +835,9 @@ void print_header(char *delim)
 	if (DO_BIC(BIC_GFXMHz))
 		outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
 
+	if (DO_BIC(BIC_GFXACTMHz))
+		outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
+
 	if (DO_BIC(BIC_Totl_c0))
 		outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Any_c0))
@@ -1198,6 +1205,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	if (DO_BIC(BIC_GFXMHz))
 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
 
+	/* GFXACTMHz */
+	if (DO_BIC(BIC_GFXACTMHz))
+		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
+
 	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
 	if (DO_BIC(BIC_Totl_c0))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
@@ -1349,6 +1360,7 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
 		old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
 
 	old->gfx_mhz = new->gfx_mhz;
+	old->gfx_act_mhz = new->gfx_act_mhz;
 
 	old->energy_pkg = new->energy_pkg - old->energy_pkg;
 	old->energy_cores = new->energy_cores - old->energy_cores;
@@ -1565,6 +1577,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	p->gfx_rc6_ms = 0;
 	p->gfx_mhz = 0;
+	p->gfx_act_mhz = 0;
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
 		t->counter[i] = 0;
 
@@ -1660,6 +1673,7 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 
 	average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
 	average.packages.gfx_mhz = p->gfx_mhz;
+	average.packages.gfx_act_mhz = p->gfx_act_mhz;
 
 	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
 
@@ -2108,6 +2122,9 @@ retry:
 	if (DO_BIC(BIC_GFXMHz))
 		p->gfx_mhz = gfx_cur_mhz;
 
+	if (DO_BIC(BIC_GFXACTMHz))
+		p->gfx_act_mhz = gfx_act_mhz;
+
 	for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
 		if (get_mp(cpu, mp, &p->counter[i]))
 			return -10;
@@ -3018,6 +3035,33 @@ int snapshot_gfx_mhz(void)
 	return 0;
 }
 
+/*
+ * snapshot_gfx_cur_mhz()
+ *
+ * record snapshot of
+ * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_gfx_act_mhz(void)
+{
+	static FILE *fp;
+	int retval;
+
+	if (fp == NULL)
+		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
+	else {
+		rewind(fp);
+		fflush(fp);
+	}
+
+	retval = fscanf(fp, "%d", &gfx_act_mhz);
+	if (retval != 1)
+		err(1, "GFX ACT MHz");
+
+	return 0;
+}
+
 /*
  * snapshot_cpu_lpi()
  *
@@ -3083,6 +3127,9 @@ int snapshot_proc_sysfs_files(void)
 	if (DO_BIC(BIC_GFXMHz))
 		snapshot_gfx_mhz();
 
+	if (DO_BIC(BIC_GFXACTMHz))
+		snapshot_gfx_act_mhz();
+
 	if (DO_BIC(BIC_CPU_LPI))
 		snapshot_cpu_lpi_us();
 
@@ -5236,6 +5283,9 @@ void process_cpuid()
 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
 		BIC_PRESENT(BIC_GFXMHz);
 
+	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+		BIC_PRESENT(BIC_GFXACTMHz);
+
 	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
 		BIC_PRESENT(BIC_CPU_LPI);
 	else
-- 
cgit v1.2.3-70-g09d2


From 20de0dab238849414d33c81bc96e2db68cc61467 Mon Sep 17 00:00:00 2001
From: Antti Laakso <antti.laakso@linux.intel.com>
Date: Mon, 17 Aug 2020 18:03:48 +0300
Subject: tools/power turbostat: Remove empty columns for Jacobsville

Jacobsville doesn't have Package C2 and C6. Also
Core and DRAM RAPL are not available. Adjust output
accordingly.

Signed-off-by: Antti Laakso <antti.laakso@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7a6e91aedf0f..629b809075c1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2286,6 +2286,7 @@ int has_turbo_ratio_group_limits(int family, int model)
 	case INTEL_FAM6_ATOM_GOLDMONT:
 	case INTEL_FAM6_SKYLAKE_X:
 	case INTEL_FAM6_ATOM_GOLDMONT_D:
+	case INTEL_FAM6_ATOM_TREMONT_D:
 		return 1;
 	}
 	return 0;
@@ -3534,6 +3535,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
 	case INTEL_FAM6_ATOM_TREMONT:	/* EHL */
+	case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
 		pkg_cstate_limits = glm_pkg_cstate_limits;
 		break;
 	default:
@@ -3616,6 +3618,17 @@ int is_ehl(unsigned int family, unsigned int model)
 	}
 	return 0;
 }
+int is_jvl(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_ATOM_TREMONT_D:
+		return 1;
+	}
+	return 0;
+}
 
 int has_turbo_ratio_limit(unsigned int family, unsigned int model)
 {
@@ -4227,6 +4240,14 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
 			BIC_PRESENT(BIC_GFXWatt);
 		}
 		break;
+	case INTEL_FAM6_ATOM_TREMONT_D:	/* JVL */
+		do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		BIC_PRESENT(BIC_PKG__);
+		if (rapl_joules)
+			BIC_PRESENT(BIC_Pkg_J);
+		else
+			BIC_PRESENT(BIC_PkgWatt);
+		break;
 	case INTEL_FAM6_SKYLAKE_L:	/* SKL */
 	case INTEL_FAM6_CANNONLAKE_L:	/* CNL */
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
@@ -4629,6 +4650,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
 	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
 	case INTEL_FAM6_ATOM_GOLDMONT_D:	/* DNV */
 	case INTEL_FAM6_ATOM_TREMONT:		/* EHL */
+	case INTEL_FAM6_ATOM_TREMONT_D:		/* JVL */
 		return 1;
 	}
 	return 0;
@@ -4958,9 +4980,6 @@ unsigned int intel_model_duplicates(unsigned int model)
 	case INTEL_FAM6_ALDERLAKE:
 		return INTEL_FAM6_CANNONLAKE_L;
 
-	case INTEL_FAM6_ATOM_TREMONT_D:
-		return INTEL_FAM6_ATOM_GOLDMONT_D;
-
 	case INTEL_FAM6_ATOM_TREMONT_L:
 		return INTEL_FAM6_ATOM_TREMONT;
 
@@ -5214,6 +5233,14 @@ void process_cpuid()
 		BIC_PRESENT(BIC_Mod_c6);
 		use_c1_residency_msr = 1;
 	}
+	if (is_jvl(family, model)) {
+		BIC_NOT_PRESENT(BIC_CPU_c3);
+		BIC_NOT_PRESENT(BIC_CPU_c7);
+		BIC_NOT_PRESENT(BIC_Pkgpc2);
+		BIC_NOT_PRESENT(BIC_Pkgpc3);
+		BIC_NOT_PRESENT(BIC_Pkgpc6);
+		BIC_NOT_PRESENT(BIC_Pkgpc7);
+	}
 	if (is_dnv(family, model)) {
 		BIC_PRESENT(BIC_CPU_c1);
 		BIC_NOT_PRESENT(BIC_CPU_c3);
-- 
cgit v1.2.3-70-g09d2


From 33eb82251af9be47a625ca1578f44e596a3a0ca9 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@amd.com>
Date: Mon, 17 Aug 2020 17:42:15 -0500
Subject: tools/power turbostat: Support AMD Family 19h

Family 19h processors have the same RAPL (Running average power limit)
hardware register interface as Family 17h processors.

Change the family checks to succeed for Family 17h and above to enable
core and package energy measurement on Family 19h machines.

Also update the TDP to the largest found at the bottom of the page at
amd.com->processors->servers->epyc->2nd-gen-epyc, i.e., the EPYC 7H12.

Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 629b809075c1..0869f791ed14 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4162,13 +4162,8 @@ double get_tdp_intel(unsigned int model)
 
 double get_tdp_amd(unsigned int family)
 {
-	switch (family) {
-	case 0x17:
-	case 0x18:
-	default:
-		/* This is the max stock TDP of HEDT/Server Fam17h chips */
-		return 250.0;
-	}
+	/* This is the max stock TDP of HEDT/Server Fam17h+ chips */
+	return 280.0;
 }
 
 /*
@@ -4358,27 +4353,20 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
 
 	if (max_extended_level >= 0x80000007) {
 		__cpuid(0x80000007, eax, ebx, ecx, edx);
-		/* RAPL (Fam 17h) */
+		/* RAPL (Fam 17h+) */
 		has_rapl = edx & (1 << 14);
 	}
 
-	if (!has_rapl)
+	if (!has_rapl || family < 0x17)
 		return;
 
-	switch (family) {
-	case 0x17: /* Zen, Zen+ */
-	case 0x18: /* Hygon Dhyana */
-		do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
-		if (rapl_joules) {
-			BIC_PRESENT(BIC_Pkg_J);
-			BIC_PRESENT(BIC_Cor_J);
-		} else {
-			BIC_PRESENT(BIC_PkgWatt);
-			BIC_PRESENT(BIC_CorWatt);
-		}
-		break;
-	default:
-		return;
+	do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
+	if (rapl_joules) {
+		BIC_PRESENT(BIC_Pkg_J);
+		BIC_PRESENT(BIC_Cor_J);
+	} else {
+		BIC_PRESENT(BIC_PkgWatt);
+		BIC_PRESENT(BIC_CorWatt);
 	}
 
 	if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
-- 
cgit v1.2.3-70-g09d2


From 4be61e6b769fc3f97b58870aa4258e27968f07e1 Mon Sep 17 00:00:00 2001
From: Alexander Monakov <amonakov@ispras.ru>
Date: Sun, 23 Aug 2020 23:27:02 +0300
Subject: tools/power turbostat: Build with _FILE_OFFSET_BITS=64

For compatibility reasons, Glibc off_t is a 32-bit type on 32-bit x86
unless _FILE_OFFSET_BITS=64 is defined. Add this define, as otherwise
reading MSRs with index 0x80000000 and above attempts a pread with a
negative offset, which fails.

Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
Tested-by: Liwei Song <liwei.song@windriver.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index d08765531bcb..f3e3c94ab9bd 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -12,6 +12,7 @@ turbostat : turbostat.c
 override CFLAGS +=	-O2 -Wall -I../../../include
 override CFLAGS +=	-DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
 override CFLAGS +=	-DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
+override CFLAGS +=	-D_FILE_OFFSET_BITS=64
 override CFLAGS +=	-D_FORTIFY_SOURCE=2
 
 %: %.c
-- 
cgit v1.2.3-70-g09d2


From 6ff7cb371c4bea3dba03a56d774da925e78a5087 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 29 Sep 2020 17:28:42 -0400
Subject: tools/power turbostat: adjust for temperature offset

cpu1: MSR_IA32_TEMPERATURE_TARGET: 0x05640000 (95 C) (100 default - 5 offset)

Account for the new "offset" field in MSR_TEMPERATURE_TARGET.
While this field is usually zero, ignoring it results in over-stating
the current temperature, both per-core and per-package.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 62 ++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 33 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0869f791ed14..4122468fb73b 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4785,12 +4785,33 @@ double discover_bclk(unsigned int family, unsigned int model)
  * below this value, including the Digital Thermal Sensor (DTS),
  * Package Thermal Management Sensor (PTM), and thermal event thresholds.
  */
-int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+int read_tcc_activation_temp()
 {
 	unsigned long long msr;
-	unsigned int target_c_local;
-	int cpu;
+	unsigned int tcc, target_c, offset_c;
+
+	/* Temperature Target MSR is Nehalem and newer only */
+	if (!do_nhm_platform_info)
+		return 0;
+
+	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
+		return 0;
 
+	target_c = (msr >> 16) & 0xFF;
+
+	offset_c = (msr >> 24) & 0xF;
+
+	tcc = target_c - offset_c;
+
+	if (!quiet)
+		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
+			base_cpu, msr, tcc, target_c, offset_c);
+
+	return tcc;
+}
+
+int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
 	/* tcc_activation_temp is used only for dts or ptm */
 	if (!(do_dts || do_ptm))
 		return 0;
@@ -4799,43 +4820,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		return 0;
 
-	cpu = t->cpu_id;
-	if (cpu_migrate(cpu)) {
-		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
-		return -1;
-	}
-
 	if (tcc_activation_temp_override != 0) {
 		tcc_activation_temp = tcc_activation_temp_override;
-		fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
-			cpu, tcc_activation_temp);
+		fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp);
 		return 0;
 	}
 
-	/* Temperature Target MSR is Nehalem and newer only */
-	if (!do_nhm_platform_info)
-		goto guess;
-
-	if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
-		goto guess;
-
-	target_c_local = (msr >> 16) & 0xFF;
-
-	if (!quiet)
-		fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
-			cpu, msr, target_c_local);
-
-	if (!target_c_local)
-		goto guess;
-
-	tcc_activation_temp = target_c_local;
-
-	return 0;
+	tcc_activation_temp = read_tcc_activation_temp();
+	if (tcc_activation_temp)
+		return 0;
 
-guess:
 	tcc_activation_temp = TJMAX_DEFAULT;
-	fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
-		cpu, tcc_activation_temp);
+	fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 3d7772ea5602b88c7c7f0a50d512171a2eed6659 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 30 Sep 2020 20:58:15 -0400
Subject: tools/power turbostat: harden against cpu hotplug

turbostat tends to get confused when CPUs are added and removed
while it is running.

There are races, such as checking the current cpu, and then
reading a sysfs file that depends on that cpu number.

Close the two issues that seem to come up the most.
First, there is an infinite reset loop detector --
change that to allow more resets before giving up.
Secondly, one of those file reads didn't really need
to exit the program on failure...

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 4122468fb73b..74d2fc6fc78a 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1894,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	int i;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
@@ -2764,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
 
 	sprintf(path,
 		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
-	filep = fopen_or_die(path, "r");
+	filep = fopen(path, "r");
+
+	if (!filep) {
+		warnx("%s: open failed", path);
+		return -1;
+	}
 	do {
 		offset -= BITMASK_SIZE;
 		if (fscanf(filep, "%lx%c", &map, &character) != 2)
@@ -2877,7 +2882,7 @@ void re_initialize(void)
 {
 	free_all_buffers();
 	setup_all_buffers();
-	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
+	fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
 }
 
 void set_max_cpu_num(void)
@@ -3331,7 +3336,7 @@ restart:
 	if (retval < -1) {
 		exit(retval);
 	} else if (retval == -1) {
-		if (restarted > 1) {
+		if (restarted > 10) {
 			exit(retval);
 		}
 		re_initialize();
@@ -3926,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return 0;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
@@ -3970,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return 0;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
@@ -4058,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
 		return 0;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
@@ -4439,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 		return 0;
 
 	if (cpu_migrate(cpu)) {
-		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
@@ -4511,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	cpu = t->cpu_id;
 	if (cpu_migrate(cpu)) {
-		fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+		fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
 		return -1;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From e722a295cf493388dae474745d30e91e1a2ec549 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 27 Aug 2020 14:36:27 +0200
Subject: staging: ion: remove from the tree
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ION android code has long been marked to be removed, now that we
dma-buf support merged into the real part of the kernel.

It was thought that we could wait to remove the ion kernel at a later
time, but as the out-of-tree Android fork of the ion code has diverged
quite a bit, and any Android device using the ion interface uses that
forked version and not this in-tree version, the in-tree copy of the
code is abandonded and not used by anyone.

Combine this abandoned codebase with the need to make changes to it in
order to keep the kernel building properly, which then causes merge
issues when merging those changes into the out-of-tree Android code, and
you end up with two different groups of people (the in-kernel-tree
developers, and the Android kernel developers) who are both annoyed at
the current situation.  Because of this problem, just drop the in-kernel
copy of the ion code now, as it's not used, and is only causing problems
for everyone involved.

Cc: "Arve Hjønnevåg" <arve@android.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Christian Brauner <christian@brauner.io>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hridya Valsaraju <hridya@google.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Laura Abbott <laura@labbott.name>
Cc: Martijn Coenen <maco@android.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Todd Kjos <tkjos@android.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200827123627.538189-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS                                        |  10 -
 drivers/staging/android/Kconfig                    |   2 -
 drivers/staging/android/Makefile                   |   2 -
 drivers/staging/android/TODO                       |   5 -
 drivers/staging/android/ion/Kconfig                |  27 -
 drivers/staging/android/ion/Makefile               |   4 -
 drivers/staging/android/ion/ion.c                  | 649 ---------------------
 drivers/staging/android/ion/ion.h                  | 302 ----------
 drivers/staging/android/ion/ion_cma_heap.c         | 138 -----
 drivers/staging/android/ion/ion_heap.c             | 286 ---------
 drivers/staging/android/ion/ion_page_pool.c        | 155 -----
 drivers/staging/android/ion/ion_system_heap.c      | 377 ------------
 drivers/staging/android/uapi/ion.h                 | 127 ----
 tools/testing/selftests/Makefile                   |   3 +-
 tools/testing/selftests/android/Makefile           |  39 --
 tools/testing/selftests/android/config             |   5 -
 tools/testing/selftests/android/ion/.gitignore     |   4 -
 tools/testing/selftests/android/ion/Makefile       |  20 -
 tools/testing/selftests/android/ion/README         | 101 ----
 tools/testing/selftests/android/ion/ion.h          | 134 -----
 tools/testing/selftests/android/ion/ion_test.sh    |  58 --
 .../testing/selftests/android/ion/ionapp_export.c  | 127 ----
 .../testing/selftests/android/ion/ionapp_import.c  |  79 ---
 tools/testing/selftests/android/ion/ionmap_test.c  | 136 -----
 tools/testing/selftests/android/ion/ionutils.c     | 253 --------
 tools/testing/selftests/android/ion/ionutils.h     |  55 --
 tools/testing/selftests/android/ion/ipcsocket.c    | 227 -------
 tools/testing/selftests/android/ion/ipcsocket.h    |  35 --
 tools/testing/selftests/android/run.sh             |   3 -
 29 files changed, 1 insertion(+), 3362 deletions(-)
 delete mode 100644 drivers/staging/android/ion/Kconfig
 delete mode 100644 drivers/staging/android/ion/Makefile
 delete mode 100644 drivers/staging/android/ion/ion.c
 delete mode 100644 drivers/staging/android/ion/ion.h
 delete mode 100644 drivers/staging/android/ion/ion_cma_heap.c
 delete mode 100644 drivers/staging/android/ion/ion_heap.c
 delete mode 100644 drivers/staging/android/ion/ion_page_pool.c
 delete mode 100644 drivers/staging/android/ion/ion_system_heap.c
 delete mode 100644 drivers/staging/android/uapi/ion.h
 delete mode 100644 tools/testing/selftests/android/Makefile
 delete mode 100644 tools/testing/selftests/android/config
 delete mode 100644 tools/testing/selftests/android/ion/.gitignore
 delete mode 100644 tools/testing/selftests/android/ion/Makefile
 delete mode 100644 tools/testing/selftests/android/ion/README
 delete mode 100644 tools/testing/selftests/android/ion/ion.h
 delete mode 100755 tools/testing/selftests/android/ion/ion_test.sh
 delete mode 100644 tools/testing/selftests/android/ion/ionapp_export.c
 delete mode 100644 tools/testing/selftests/android/ion/ionapp_import.c
 delete mode 100644 tools/testing/selftests/android/ion/ionmap_test.c
 delete mode 100644 tools/testing/selftests/android/ion/ionutils.c
 delete mode 100644 tools/testing/selftests/android/ion/ionutils.h
 delete mode 100644 tools/testing/selftests/android/ion/ipcsocket.c
 delete mode 100644 tools/testing/selftests/android/ion/ipcsocket.h
 delete mode 100755 tools/testing/selftests/android/run.sh

(limited to 'tools')

diff --git a/MAINTAINERS b/MAINTAINERS
index e73636b75f29..b7980e6033f8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1173,16 +1173,6 @@ S:	Supported
 F:	Documentation/devicetree/bindings/rtc/google,goldfish-rtc.txt
 F:	drivers/rtc/rtc-goldfish.c
 
-ANDROID ION DRIVER
-M:	Laura Abbott <labbott@redhat.com>
-M:	Sumit Semwal <sumit.semwal@linaro.org>
-L:	devel@driverdev.osuosl.org
-L:	dri-devel@lists.freedesktop.org
-L:	linaro-mm-sig@lists.linaro.org (moderated for non-subscribers)
-S:	Supported
-F:	drivers/staging/android/ion
-F:	drivers/staging/android/uapi/ion.h
-
 AOA (Apple Onboard Audio) ALSA DRIVER
 M:	Johannes Berg <johannes@sipsolutions.net>
 L:	linuxppc-dev@lists.ozlabs.org
diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig
index 8d8fd5c29349..70498adb1575 100644
--- a/drivers/staging/android/Kconfig
+++ b/drivers/staging/android/Kconfig
@@ -14,8 +14,6 @@ config ASHMEM
 	  It is, in theory, a good memory allocator for low-memory devices,
 	  because it can discard shared memory units when under memory pressure.
 
-source "drivers/staging/android/ion/Kconfig"
-
 endif # if ANDROID
 
 endmenu
diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile
index 3b66cd0b0ec5..e9a55a5e6529 100644
--- a/drivers/staging/android/Makefile
+++ b/drivers/staging/android/Makefile
@@ -1,6 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 ccflags-y += -I$(src)			# needed for trace events
 
-obj-y					+= ion/
-
 obj-$(CONFIG_ASHMEM)			+= ashmem.o
diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO
index 80eccfaf6db5..f74eb44d8e45 100644
--- a/drivers/staging/android/TODO
+++ b/drivers/staging/android/TODO
@@ -4,10 +4,5 @@ TODO:
 	- add proper arch dependencies as needed
 	- audit userspace interfaces to make sure they are sane
 
-
-ion/
- - Split /dev/ion up into multiple nodes (e.g. /dev/ion/heap0)
- - Better test framework (integration with VGEM was suggested)
-
 Please send patches to Greg Kroah-Hartman <greg@kroah.com> and Cc:
 Arve Hjønnevåg <arve@android.com> and Riley Andrews <riandrews@android.com>
diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig
deleted file mode 100644
index 989fe84a9f9d..000000000000
--- a/drivers/staging/android/ion/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-menuconfig ION
-	bool "Ion Memory Manager"
-	depends on HAS_DMA && MMU
-	select GENERIC_ALLOCATOR
-	select DMA_SHARED_BUFFER
-	help
-	  Choose this option to enable the ION Memory Manager,
-	  used by Android to efficiently allocate buffers
-	  from userspace that can be shared between drivers.
-	  If you're not using Android its probably safe to
-	  say N here.
-
-config ION_SYSTEM_HEAP
-	bool "Ion system heap"
-	depends on ION
-	help
-	  Choose this option to enable the Ion system heap. The system heap
-	  is backed by pages from the buddy allocator. If in doubt, say Y.
-
-config ION_CMA_HEAP
-	bool "Ion CMA heap support"
-	depends on ION && DMA_CMA
-	help
-	  Choose this option to enable CMA heaps with Ion. This heap is backed
-	  by the Contiguous Memory Allocator (CMA). If your system has these
-	  regions, you should say Y here.
diff --git a/drivers/staging/android/ion/Makefile b/drivers/staging/android/ion/Makefile
deleted file mode 100644
index 5f4487b1a224..000000000000
--- a/drivers/staging/android/ion/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_ION) += ion.o ion_heap.o
-obj-$(CONFIG_ION_SYSTEM_HEAP) += ion_system_heap.o ion_page_pool.o
-obj-$(CONFIG_ION_CMA_HEAP) += ion_cma_heap.o
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
deleted file mode 100644
index e1fe03ceb7f1..000000000000
--- a/drivers/staging/android/ion/ion.c
+++ /dev/null
@@ -1,649 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ION Memory Allocator
- *
- * Copyright (C) 2011 Google, Inc.
- */
-
-#include <linux/debugfs.h>
-#include <linux/device.h>
-#include <linux/dma-buf.h>
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/file.h>
-#include <linux/freezer.h>
-#include <linux/fs.h>
-#include <linux/kthread.h>
-#include <linux/list.h>
-#include <linux/miscdevice.h>
-#include <linux/mm.h>
-#include <linux/mm_types.h>
-#include <linux/rbtree.h>
-#include <linux/sched/task.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-
-#include "ion.h"
-
-static struct ion_device *internal_dev;
-static int heap_id;
-
-/* this function should only be called while dev->lock is held */
-static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
-					    struct ion_device *dev,
-					    unsigned long len,
-					    unsigned long flags)
-{
-	struct ion_buffer *buffer;
-	int ret;
-
-	buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
-	if (!buffer)
-		return ERR_PTR(-ENOMEM);
-
-	buffer->heap = heap;
-	buffer->flags = flags;
-	buffer->dev = dev;
-	buffer->size = len;
-
-	ret = heap->ops->allocate(heap, buffer, len, flags);
-
-	if (ret) {
-		if (!(heap->flags & ION_HEAP_FLAG_DEFER_FREE))
-			goto err2;
-
-		ion_heap_freelist_drain(heap, 0);
-		ret = heap->ops->allocate(heap, buffer, len, flags);
-		if (ret)
-			goto err2;
-	}
-
-	if (!buffer->sg_table) {
-		WARN_ONCE(1, "This heap needs to set the sgtable");
-		ret = -EINVAL;
-		goto err1;
-	}
-
-	spin_lock(&heap->stat_lock);
-	heap->num_of_buffers++;
-	heap->num_of_alloc_bytes += len;
-	if (heap->num_of_alloc_bytes > heap->alloc_bytes_wm)
-		heap->alloc_bytes_wm = heap->num_of_alloc_bytes;
-	spin_unlock(&heap->stat_lock);
-
-	INIT_LIST_HEAD(&buffer->attachments);
-	mutex_init(&buffer->lock);
-	return buffer;
-
-err1:
-	heap->ops->free(buffer);
-err2:
-	kfree(buffer);
-	return ERR_PTR(ret);
-}
-
-void ion_buffer_destroy(struct ion_buffer *buffer)
-{
-	if (buffer->kmap_cnt > 0) {
-		pr_warn_once("%s: buffer still mapped in the kernel\n",
-			     __func__);
-		buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
-	}
-	buffer->heap->ops->free(buffer);
-	spin_lock(&buffer->heap->stat_lock);
-	buffer->heap->num_of_buffers--;
-	buffer->heap->num_of_alloc_bytes -= buffer->size;
-	spin_unlock(&buffer->heap->stat_lock);
-
-	kfree(buffer);
-}
-
-static void _ion_buffer_destroy(struct ion_buffer *buffer)
-{
-	struct ion_heap *heap = buffer->heap;
-
-	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE)
-		ion_heap_freelist_add(heap, buffer);
-	else
-		ion_buffer_destroy(buffer);
-}
-
-static void *ion_buffer_kmap_get(struct ion_buffer *buffer)
-{
-	void *vaddr;
-
-	if (buffer->kmap_cnt) {
-		buffer->kmap_cnt++;
-		return buffer->vaddr;
-	}
-	vaddr = buffer->heap->ops->map_kernel(buffer->heap, buffer);
-	if (WARN_ONCE(!vaddr,
-		      "heap->ops->map_kernel should return ERR_PTR on error"))
-		return ERR_PTR(-EINVAL);
-	if (IS_ERR(vaddr))
-		return vaddr;
-	buffer->vaddr = vaddr;
-	buffer->kmap_cnt++;
-	return vaddr;
-}
-
-static void ion_buffer_kmap_put(struct ion_buffer *buffer)
-{
-	buffer->kmap_cnt--;
-	if (!buffer->kmap_cnt) {
-		buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
-		buffer->vaddr = NULL;
-	}
-}
-
-static struct sg_table *dup_sg_table(struct sg_table *table)
-{
-	struct sg_table *new_table;
-	int ret, i;
-	struct scatterlist *sg, *new_sg;
-
-	new_table = kzalloc(sizeof(*new_table), GFP_KERNEL);
-	if (!new_table)
-		return ERR_PTR(-ENOMEM);
-
-	ret = sg_alloc_table(new_table, table->orig_nents, GFP_KERNEL);
-	if (ret) {
-		kfree(new_table);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	new_sg = new_table->sgl;
-	for_each_sgtable_sg(table, sg, i) {
-		memcpy(new_sg, sg, sizeof(*sg));
-		new_sg->dma_address = 0;
-		new_sg = sg_next(new_sg);
-	}
-
-	return new_table;
-}
-
-static void free_duped_table(struct sg_table *table)
-{
-	sg_free_table(table);
-	kfree(table);
-}
-
-struct ion_dma_buf_attachment {
-	struct device *dev;
-	struct sg_table *table;
-	struct list_head list;
-};
-
-static int ion_dma_buf_attach(struct dma_buf *dmabuf,
-			      struct dma_buf_attachment *attachment)
-{
-	struct ion_dma_buf_attachment *a;
-	struct sg_table *table;
-	struct ion_buffer *buffer = dmabuf->priv;
-
-	a = kzalloc(sizeof(*a), GFP_KERNEL);
-	if (!a)
-		return -ENOMEM;
-
-	table = dup_sg_table(buffer->sg_table);
-	if (IS_ERR(table)) {
-		kfree(a);
-		return -ENOMEM;
-	}
-
-	a->table = table;
-	a->dev = attachment->dev;
-	INIT_LIST_HEAD(&a->list);
-
-	attachment->priv = a;
-
-	mutex_lock(&buffer->lock);
-	list_add(&a->list, &buffer->attachments);
-	mutex_unlock(&buffer->lock);
-
-	return 0;
-}
-
-static void ion_dma_buf_detach(struct dma_buf *dmabuf,
-			       struct dma_buf_attachment *attachment)
-{
-	struct ion_dma_buf_attachment *a = attachment->priv;
-	struct ion_buffer *buffer = dmabuf->priv;
-
-	mutex_lock(&buffer->lock);
-	list_del(&a->list);
-	mutex_unlock(&buffer->lock);
-	free_duped_table(a->table);
-
-	kfree(a);
-}
-
-static struct sg_table *ion_map_dma_buf(struct dma_buf_attachment *attachment,
-					enum dma_data_direction direction)
-{
-	struct ion_dma_buf_attachment *a = attachment->priv;
-	struct sg_table *table;
-	int ret;
-
-	table = a->table;
-
-	ret = dma_map_sgtable(attachment->dev, table, direction, 0);
-	if (ret)
-		return ERR_PTR(ret);
-
-	return table;
-}
-
-static void ion_unmap_dma_buf(struct dma_buf_attachment *attachment,
-			      struct sg_table *table,
-			      enum dma_data_direction direction)
-{
-	dma_unmap_sgtable(attachment->dev, table, direction, 0);
-}
-
-static int ion_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
-{
-	struct ion_buffer *buffer = dmabuf->priv;
-	int ret = 0;
-
-	if (!buffer->heap->ops->map_user) {
-		pr_err("%s: this heap does not define a method for mapping to userspace\n",
-		       __func__);
-		return -EINVAL;
-	}
-
-	if (!(buffer->flags & ION_FLAG_CACHED))
-		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-
-	mutex_lock(&buffer->lock);
-	/* now map it to userspace */
-	ret = buffer->heap->ops->map_user(buffer->heap, buffer, vma);
-	mutex_unlock(&buffer->lock);
-
-	if (ret)
-		pr_err("%s: failure mapping buffer to userspace\n",
-		       __func__);
-
-	return ret;
-}
-
-static void ion_dma_buf_release(struct dma_buf *dmabuf)
-{
-	struct ion_buffer *buffer = dmabuf->priv;
-
-	_ion_buffer_destroy(buffer);
-}
-
-static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
-					enum dma_data_direction direction)
-{
-	struct ion_buffer *buffer = dmabuf->priv;
-	void *vaddr;
-	struct ion_dma_buf_attachment *a;
-	int ret = 0;
-
-	/*
-	 * TODO: Move this elsewhere because we don't always need a vaddr
-	 */
-	if (buffer->heap->ops->map_kernel) {
-		mutex_lock(&buffer->lock);
-		vaddr = ion_buffer_kmap_get(buffer);
-		if (IS_ERR(vaddr)) {
-			ret = PTR_ERR(vaddr);
-			goto unlock;
-		}
-		mutex_unlock(&buffer->lock);
-	}
-
-	mutex_lock(&buffer->lock);
-	list_for_each_entry(a, &buffer->attachments, list)
-		dma_sync_sgtable_for_cpu(a->dev, a->table, direction);
-
-unlock:
-	mutex_unlock(&buffer->lock);
-	return ret;
-}
-
-static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
-				      enum dma_data_direction direction)
-{
-	struct ion_buffer *buffer = dmabuf->priv;
-	struct ion_dma_buf_attachment *a;
-
-	if (buffer->heap->ops->map_kernel) {
-		mutex_lock(&buffer->lock);
-		ion_buffer_kmap_put(buffer);
-		mutex_unlock(&buffer->lock);
-	}
-
-	mutex_lock(&buffer->lock);
-	list_for_each_entry(a, &buffer->attachments, list)
-		dma_sync_sgtable_for_device(a->dev, a->table, direction);
-	mutex_unlock(&buffer->lock);
-
-	return 0;
-}
-
-static const struct dma_buf_ops dma_buf_ops = {
-	.map_dma_buf = ion_map_dma_buf,
-	.unmap_dma_buf = ion_unmap_dma_buf,
-	.mmap = ion_mmap,
-	.release = ion_dma_buf_release,
-	.attach = ion_dma_buf_attach,
-	.detach = ion_dma_buf_detach,
-	.begin_cpu_access = ion_dma_buf_begin_cpu_access,
-	.end_cpu_access = ion_dma_buf_end_cpu_access,
-};
-
-static int ion_alloc(size_t len, unsigned int heap_id_mask, unsigned int flags)
-{
-	struct ion_device *dev = internal_dev;
-	struct ion_buffer *buffer = NULL;
-	struct ion_heap *heap;
-	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
-	int fd;
-	struct dma_buf *dmabuf;
-
-	pr_debug("%s: len %zu heap_id_mask %u flags %x\n", __func__,
-		 len, heap_id_mask, flags);
-	/*
-	 * traverse the list of heaps available in this system in priority
-	 * order.  If the heap type is supported by the client, and matches the
-	 * request of the caller allocate from it.  Repeat until allocate has
-	 * succeeded or all heaps have been tried
-	 */
-	len = PAGE_ALIGN(len);
-
-	if (!len)
-		return -EINVAL;
-
-	down_read(&dev->lock);
-	plist_for_each_entry(heap, &dev->heaps, node) {
-		/* if the caller didn't specify this heap id */
-		if (!((1 << heap->id) & heap_id_mask))
-			continue;
-		buffer = ion_buffer_create(heap, dev, len, flags);
-		if (!IS_ERR(buffer))
-			break;
-	}
-	up_read(&dev->lock);
-
-	if (!buffer)
-		return -ENODEV;
-
-	if (IS_ERR(buffer))
-		return PTR_ERR(buffer);
-
-	exp_info.ops = &dma_buf_ops;
-	exp_info.size = buffer->size;
-	exp_info.flags = O_RDWR;
-	exp_info.priv = buffer;
-
-	dmabuf = dma_buf_export(&exp_info);
-	if (IS_ERR(dmabuf)) {
-		_ion_buffer_destroy(buffer);
-		return PTR_ERR(dmabuf);
-	}
-
-	fd = dma_buf_fd(dmabuf, O_CLOEXEC);
-	if (fd < 0)
-		dma_buf_put(dmabuf);
-
-	return fd;
-}
-
-static int ion_query_heaps(struct ion_heap_query *query)
-{
-	struct ion_device *dev = internal_dev;
-	struct ion_heap_data __user *buffer = u64_to_user_ptr(query->heaps);
-	int ret = -EINVAL, cnt = 0, max_cnt;
-	struct ion_heap *heap;
-	struct ion_heap_data hdata;
-
-	memset(&hdata, 0, sizeof(hdata));
-
-	down_read(&dev->lock);
-	if (!buffer) {
-		query->cnt = dev->heap_cnt;
-		ret = 0;
-		goto out;
-	}
-
-	if (query->cnt <= 0)
-		goto out;
-
-	max_cnt = query->cnt;
-
-	plist_for_each_entry(heap, &dev->heaps, node) {
-		strncpy(hdata.name, heap->name, MAX_HEAP_NAME);
-		hdata.name[sizeof(hdata.name) - 1] = '\0';
-		hdata.type = heap->type;
-		hdata.heap_id = heap->id;
-
-		if (copy_to_user(&buffer[cnt], &hdata, sizeof(hdata))) {
-			ret = -EFAULT;
-			goto out;
-		}
-
-		cnt++;
-		if (cnt >= max_cnt)
-			break;
-	}
-
-	query->cnt = cnt;
-	ret = 0;
-out:
-	up_read(&dev->lock);
-	return ret;
-}
-
-union ion_ioctl_arg {
-	struct ion_allocation_data allocation;
-	struct ion_heap_query query;
-};
-
-static int validate_ioctl_arg(unsigned int cmd, union ion_ioctl_arg *arg)
-{
-	switch (cmd) {
-	case ION_IOC_HEAP_QUERY:
-		if (arg->query.reserved0 ||
-		    arg->query.reserved1 ||
-		    arg->query.reserved2)
-			return -EINVAL;
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	int ret = 0;
-	union ion_ioctl_arg data;
-
-	if (_IOC_SIZE(cmd) > sizeof(data))
-		return -EINVAL;
-
-	/*
-	 * The copy_from_user is unconditional here for both read and write
-	 * to do the validate. If there is no write for the ioctl, the
-	 * buffer is cleared
-	 */
-	if (copy_from_user(&data, (void __user *)arg, _IOC_SIZE(cmd)))
-		return -EFAULT;
-
-	ret = validate_ioctl_arg(cmd, &data);
-	if (ret) {
-		pr_warn_once("%s: ioctl validate failed\n", __func__);
-		return ret;
-	}
-
-	if (!(_IOC_DIR(cmd) & _IOC_WRITE))
-		memset(&data, 0, sizeof(data));
-
-	switch (cmd) {
-	case ION_IOC_ALLOC:
-	{
-		int fd;
-
-		fd = ion_alloc(data.allocation.len,
-			       data.allocation.heap_id_mask,
-			       data.allocation.flags);
-		if (fd < 0)
-			return fd;
-
-		data.allocation.fd = fd;
-
-		break;
-	}
-	case ION_IOC_HEAP_QUERY:
-		ret = ion_query_heaps(&data.query);
-		break;
-	default:
-		return -ENOTTY;
-	}
-
-	if (_IOC_DIR(cmd) & _IOC_READ) {
-		if (copy_to_user((void __user *)arg, &data, _IOC_SIZE(cmd)))
-			return -EFAULT;
-	}
-	return ret;
-}
-
-static const struct file_operations ion_fops = {
-	.owner          = THIS_MODULE,
-	.unlocked_ioctl = ion_ioctl,
-	.compat_ioctl	= compat_ptr_ioctl,
-};
-
-static int debug_shrink_set(void *data, u64 val)
-{
-	struct ion_heap *heap = data;
-	struct shrink_control sc;
-	int objs;
-
-	sc.gfp_mask = GFP_HIGHUSER;
-	sc.nr_to_scan = val;
-
-	if (!val) {
-		objs = heap->shrinker.count_objects(&heap->shrinker, &sc);
-		sc.nr_to_scan = objs;
-	}
-
-	heap->shrinker.scan_objects(&heap->shrinker, &sc);
-	return 0;
-}
-
-static int debug_shrink_get(void *data, u64 *val)
-{
-	struct ion_heap *heap = data;
-	struct shrink_control sc;
-	int objs;
-
-	sc.gfp_mask = GFP_HIGHUSER;
-	sc.nr_to_scan = 0;
-
-	objs = heap->shrinker.count_objects(&heap->shrinker, &sc);
-	*val = objs;
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(debug_shrink_fops, debug_shrink_get,
-			debug_shrink_set, "%llu\n");
-
-void ion_device_add_heap(struct ion_heap *heap)
-{
-	struct ion_device *dev = internal_dev;
-	int ret;
-	struct dentry *heap_root;
-	char debug_name[64];
-
-	if (!heap->ops->allocate || !heap->ops->free)
-		pr_err("%s: can not add heap with invalid ops struct.\n",
-		       __func__);
-
-	spin_lock_init(&heap->free_lock);
-	spin_lock_init(&heap->stat_lock);
-	heap->free_list_size = 0;
-
-	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE)
-		ion_heap_init_deferred_free(heap);
-
-	if ((heap->flags & ION_HEAP_FLAG_DEFER_FREE) || heap->ops->shrink) {
-		ret = ion_heap_init_shrinker(heap);
-		if (ret)
-			pr_err("%s: Failed to register shrinker\n", __func__);
-	}
-
-	heap->dev = dev;
-	heap->num_of_buffers = 0;
-	heap->num_of_alloc_bytes = 0;
-	heap->alloc_bytes_wm = 0;
-
-	heap_root = debugfs_create_dir(heap->name, dev->debug_root);
-	debugfs_create_u64("num_of_buffers",
-			   0444, heap_root,
-			   &heap->num_of_buffers);
-	debugfs_create_u64("num_of_alloc_bytes",
-			   0444,
-			   heap_root,
-			   &heap->num_of_alloc_bytes);
-	debugfs_create_u64("alloc_bytes_wm",
-			   0444,
-			   heap_root,
-			   &heap->alloc_bytes_wm);
-
-	if (heap->shrinker.count_objects &&
-	    heap->shrinker.scan_objects) {
-		snprintf(debug_name, 64, "%s_shrink", heap->name);
-		debugfs_create_file(debug_name,
-				    0644,
-				    heap_root,
-				    heap,
-				    &debug_shrink_fops);
-	}
-
-	down_write(&dev->lock);
-	heap->id = heap_id++;
-	/*
-	 * use negative heap->id to reverse the priority -- when traversing
-	 * the list later attempt higher id numbers first
-	 */
-	plist_node_init(&heap->node, -heap->id);
-	plist_add(&heap->node, &dev->heaps);
-
-	dev->heap_cnt++;
-	up_write(&dev->lock);
-}
-EXPORT_SYMBOL(ion_device_add_heap);
-
-static int ion_device_create(void)
-{
-	struct ion_device *idev;
-	int ret;
-
-	idev = kzalloc(sizeof(*idev), GFP_KERNEL);
-	if (!idev)
-		return -ENOMEM;
-
-	idev->dev.minor = MISC_DYNAMIC_MINOR;
-	idev->dev.name = "ion";
-	idev->dev.fops = &ion_fops;
-	idev->dev.parent = NULL;
-	ret = misc_register(&idev->dev);
-	if (ret) {
-		pr_err("ion: failed to register misc device.\n");
-		kfree(idev);
-		return ret;
-	}
-
-	idev->debug_root = debugfs_create_dir("ion", NULL);
-	init_rwsem(&idev->lock);
-	plist_head_init(&idev->heaps);
-	internal_dev = idev;
-	return 0;
-}
-subsys_initcall(ion_device_create);
diff --git a/drivers/staging/android/ion/ion.h b/drivers/staging/android/ion/ion.h
deleted file mode 100644
index c199e88afc6c..000000000000
--- a/drivers/staging/android/ion/ion.h
+++ /dev/null
@@ -1,302 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ION Memory Allocator kernel interface header
- *
- * Copyright (C) 2011 Google, Inc.
- */
-
-#ifndef _ION_H
-#define _ION_H
-
-#include <linux/device.h>
-#include <linux/dma-direction.h>
-#include <linux/kref.h>
-#include <linux/mm_types.h>
-#include <linux/mutex.h>
-#include <linux/rbtree.h>
-#include <linux/sched.h>
-#include <linux/shrinker.h>
-#include <linux/types.h>
-#include <linux/miscdevice.h>
-
-#include "../uapi/ion.h"
-
-/**
- * struct ion_buffer - metadata for a particular buffer
- * @list:		element in list of deferred freeable buffers
- * @dev:		back pointer to the ion_device
- * @heap:		back pointer to the heap the buffer came from
- * @flags:		buffer specific flags
- * @private_flags:	internal buffer specific flags
- * @size:		size of the buffer
- * @priv_virt:		private data to the buffer representable as
- *			a void *
- * @lock:		protects the buffers cnt fields
- * @kmap_cnt:		number of times the buffer is mapped to the kernel
- * @vaddr:		the kernel mapping if kmap_cnt is not zero
- * @sg_table:		the sg table for the buffer
- * @attachments:	list of devices attached to this buffer
- */
-struct ion_buffer {
-	struct list_head list;
-	struct ion_device *dev;
-	struct ion_heap *heap;
-	unsigned long flags;
-	unsigned long private_flags;
-	size_t size;
-	void *priv_virt;
-	struct mutex lock;
-	int kmap_cnt;
-	void *vaddr;
-	struct sg_table *sg_table;
-	struct list_head attachments;
-};
-
-void ion_buffer_destroy(struct ion_buffer *buffer);
-
-/**
- * struct ion_device - the metadata of the ion device node
- * @dev:		the actual misc device
- * @lock:		rwsem protecting the tree of heaps and clients
- */
-struct ion_device {
-	struct miscdevice dev;
-	struct rw_semaphore lock;
-	struct plist_head heaps;
-	struct dentry *debug_root;
-	int heap_cnt;
-};
-
-/**
- * struct ion_heap_ops - ops to operate on a given heap
- * @allocate:		allocate memory
- * @free:		free memory
- * @map_kernel		map memory to the kernel
- * @unmap_kernel	unmap memory to the kernel
- * @map_user		map memory to userspace
- *
- * allocate, phys, and map_user return 0 on success, -errno on error.
- * map_dma and map_kernel return pointer on success, ERR_PTR on
- * error. @free will be called with ION_PRIV_FLAG_SHRINKER_FREE set in
- * the buffer's private_flags when called from a shrinker. In that
- * case, the pages being free'd must be truly free'd back to the
- * system, not put in a page pool or otherwise cached.
- */
-struct ion_heap_ops {
-	int (*allocate)(struct ion_heap *heap,
-			struct ion_buffer *buffer, unsigned long len,
-			unsigned long flags);
-	void (*free)(struct ion_buffer *buffer);
-	void * (*map_kernel)(struct ion_heap *heap, struct ion_buffer *buffer);
-	void (*unmap_kernel)(struct ion_heap *heap, struct ion_buffer *buffer);
-	int (*map_user)(struct ion_heap *mapper, struct ion_buffer *buffer,
-			struct vm_area_struct *vma);
-	int (*shrink)(struct ion_heap *heap, gfp_t gfp_mask, int nr_to_scan);
-};
-
-/**
- * heap flags - flags between the heaps and core ion code
- */
-#define ION_HEAP_FLAG_DEFER_FREE BIT(0)
-
-/**
- * private flags - flags internal to ion
- */
-/*
- * Buffer is being freed from a shrinker function. Skip any possible
- * heap-specific caching mechanism (e.g. page pools). Guarantees that
- * any buffer storage that came from the system allocator will be
- * returned to the system allocator.
- */
-#define ION_PRIV_FLAG_SHRINKER_FREE BIT(0)
-
-/**
- * struct ion_heap - represents a heap in the system
- * @node:		rb node to put the heap on the device's tree of heaps
- * @dev:		back pointer to the ion_device
- * @type:		type of heap
- * @ops:		ops struct as above
- * @flags:		flags
- * @id:			id of heap, also indicates priority of this heap when
- *			allocating.  These are specified by platform data and
- *			MUST be unique
- * @name:		used for debugging
- * @shrinker:		a shrinker for the heap
- * @free_list:		free list head if deferred free is used
- * @free_list_size	size of the deferred free list in bytes
- * @lock:		protects the free list
- * @waitqueue:		queue to wait on from deferred free thread
- * @task:		task struct of deferred free thread
- * @num_of_buffers	the number of currently allocated buffers
- * @num_of_alloc_bytes	the number of allocated bytes
- * @alloc_bytes_wm	the number of allocated bytes watermark
- *
- * Represents a pool of memory from which buffers can be made.  In some
- * systems the only heap is regular system memory allocated via vmalloc.
- * On others, some blocks might require large physically contiguous buffers
- * that are allocated from a specially reserved heap.
- */
-struct ion_heap {
-	struct plist_node node;
-	struct ion_device *dev;
-	enum ion_heap_type type;
-	struct ion_heap_ops *ops;
-	unsigned long flags;
-	unsigned int id;
-	const char *name;
-
-	/* deferred free support */
-	struct shrinker shrinker;
-	struct list_head free_list;
-	size_t free_list_size;
-	spinlock_t free_lock;
-	wait_queue_head_t waitqueue;
-	struct task_struct *task;
-
-	/* heap statistics */
-	u64 num_of_buffers;
-	u64 num_of_alloc_bytes;
-	u64 alloc_bytes_wm;
-
-	/* protect heap statistics */
-	spinlock_t stat_lock;
-};
-
-/**
- * ion_device_add_heap - adds a heap to the ion device
- * @heap:		the heap to add
- */
-void ion_device_add_heap(struct ion_heap *heap);
-
-/**
- * some helpers for common operations on buffers using the sg_table
- * and vaddr fields
- */
-void *ion_heap_map_kernel(struct ion_heap *heap, struct ion_buffer *buffer);
-void ion_heap_unmap_kernel(struct ion_heap *heap, struct ion_buffer *buffer);
-int ion_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer,
-		      struct vm_area_struct *vma);
-int ion_heap_buffer_zero(struct ion_buffer *buffer);
-
-/**
- * ion_heap_init_shrinker
- * @heap:		the heap
- *
- * If a heap sets the ION_HEAP_FLAG_DEFER_FREE flag or defines the shrink op
- * this function will be called to setup a shrinker to shrink the freelists
- * and call the heap's shrink op.
- */
-int ion_heap_init_shrinker(struct ion_heap *heap);
-
-/**
- * ion_heap_init_deferred_free -- initialize deferred free functionality
- * @heap:		the heap
- *
- * If a heap sets the ION_HEAP_FLAG_DEFER_FREE flag this function will
- * be called to setup deferred frees. Calls to free the buffer will
- * return immediately and the actual free will occur some time later
- */
-int ion_heap_init_deferred_free(struct ion_heap *heap);
-
-/**
- * ion_heap_freelist_add - add a buffer to the deferred free list
- * @heap:		the heap
- * @buffer:		the buffer
- *
- * Adds an item to the deferred freelist.
- */
-void ion_heap_freelist_add(struct ion_heap *heap, struct ion_buffer *buffer);
-
-/**
- * ion_heap_freelist_drain - drain the deferred free list
- * @heap:		the heap
- * @size:		amount of memory to drain in bytes
- *
- * Drains the indicated amount of memory from the deferred freelist immediately.
- * Returns the total amount freed.  The total freed may be higher depending
- * on the size of the items in the list, or lower if there is insufficient
- * total memory on the freelist.
- */
-size_t ion_heap_freelist_drain(struct ion_heap *heap, size_t size);
-
-/**
- * ion_heap_freelist_shrink - drain the deferred free
- *				list, skipping any heap-specific
- *				pooling or caching mechanisms
- *
- * @heap:		the heap
- * @size:		amount of memory to drain in bytes
- *
- * Drains the indicated amount of memory from the deferred freelist immediately.
- * Returns the total amount freed.  The total freed may be higher depending
- * on the size of the items in the list, or lower if there is insufficient
- * total memory on the freelist.
- *
- * Unlike with @ion_heap_freelist_drain, don't put any pages back into
- * page pools or otherwise cache the pages. Everything must be
- * genuinely free'd back to the system. If you're free'ing from a
- * shrinker you probably want to use this. Note that this relies on
- * the heap.ops.free callback honoring the ION_PRIV_FLAG_SHRINKER_FREE
- * flag.
- */
-size_t ion_heap_freelist_shrink(struct ion_heap *heap,
-				size_t size);
-
-/**
- * ion_heap_freelist_size - returns the size of the freelist in bytes
- * @heap:		the heap
- */
-size_t ion_heap_freelist_size(struct ion_heap *heap);
-
-/**
- * functions for creating and destroying a heap pool -- allows you
- * to keep a pool of pre allocated memory to use from your heap.  Keeping
- * a pool of memory that is ready for dma, ie any cached mapping have been
- * invalidated from the cache, provides a significant performance benefit on
- * many systems
- */
-
-/**
- * struct ion_page_pool - pagepool struct
- * @high_count:		number of highmem items in the pool
- * @low_count:		number of lowmem items in the pool
- * @high_items:		list of highmem items
- * @low_items:		list of lowmem items
- * @mutex:		lock protecting this struct and especially the count
- *			item list
- * @gfp_mask:		gfp_mask to use from alloc
- * @order:		order of pages in the pool
- * @list:		plist node for list of pools
- *
- * Allows you to keep a pool of pre allocated pages to use from your heap.
- * Keeping a pool of pages that is ready for dma, ie any cached mapping have
- * been invalidated from the cache, provides a significant performance benefit
- * on many systems
- */
-struct ion_page_pool {
-	int high_count;
-	int low_count;
-	struct list_head high_items;
-	struct list_head low_items;
-	struct mutex mutex;
-	gfp_t gfp_mask;
-	unsigned int order;
-	struct plist_node list;
-};
-
-struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order);
-void ion_page_pool_destroy(struct ion_page_pool *pool);
-struct page *ion_page_pool_alloc(struct ion_page_pool *pool);
-void ion_page_pool_free(struct ion_page_pool *pool, struct page *page);
-
-/** ion_page_pool_shrink - shrinks the size of the memory cached in the pool
- * @pool:		the pool
- * @gfp_mask:		the memory type to reclaim
- * @nr_to_scan:		number of items to shrink in pages
- *
- * returns the number of items freed in pages
- */
-int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask,
-			 int nr_to_scan);
-
-#endif /* _ION_H */
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
deleted file mode 100644
index bf65e67ef9d8..000000000000
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ION Memory Allocator CMA heap exporter
- *
- * Copyright (C) Linaro 2012
- * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
- */
-
-#include <linux/device.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/cma.h>
-#include <linux/scatterlist.h>
-#include <linux/highmem.h>
-
-#include "ion.h"
-
-struct ion_cma_heap {
-	struct ion_heap heap;
-	struct cma *cma;
-};
-
-#define to_cma_heap(x) container_of(x, struct ion_cma_heap, heap)
-
-/* ION CMA heap operations functions */
-static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
-			    unsigned long len,
-			    unsigned long flags)
-{
-	struct ion_cma_heap *cma_heap = to_cma_heap(heap);
-	struct sg_table *table;
-	struct page *pages;
-	unsigned long size = PAGE_ALIGN(len);
-	unsigned long nr_pages = size >> PAGE_SHIFT;
-	unsigned long align = get_order(size);
-	int ret;
-
-	if (align > CONFIG_CMA_ALIGNMENT)
-		align = CONFIG_CMA_ALIGNMENT;
-
-	pages = cma_alloc(cma_heap->cma, nr_pages, align, false);
-	if (!pages)
-		return -ENOMEM;
-
-	if (PageHighMem(pages)) {
-		unsigned long nr_clear_pages = nr_pages;
-		struct page *page = pages;
-
-		while (nr_clear_pages > 0) {
-			void *vaddr = kmap_atomic(page);
-
-			memset(vaddr, 0, PAGE_SIZE);
-			kunmap_atomic(vaddr);
-			page++;
-			nr_clear_pages--;
-		}
-	} else {
-		memset(page_address(pages), 0, size);
-	}
-
-	table = kmalloc(sizeof(*table), GFP_KERNEL);
-	if (!table)
-		goto err;
-
-	ret = sg_alloc_table(table, 1, GFP_KERNEL);
-	if (ret)
-		goto free_mem;
-
-	sg_set_page(table->sgl, pages, size, 0);
-
-	buffer->priv_virt = pages;
-	buffer->sg_table = table;
-	return 0;
-
-free_mem:
-	kfree(table);
-err:
-	cma_release(cma_heap->cma, pages, nr_pages);
-	return -ENOMEM;
-}
-
-static void ion_cma_free(struct ion_buffer *buffer)
-{
-	struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
-	struct page *pages = buffer->priv_virt;
-	unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT;
-
-	/* release memory */
-	cma_release(cma_heap->cma, pages, nr_pages);
-	/* release sg table */
-	sg_free_table(buffer->sg_table);
-	kfree(buffer->sg_table);
-}
-
-static struct ion_heap_ops ion_cma_ops = {
-	.allocate = ion_cma_allocate,
-	.free = ion_cma_free,
-	.map_user = ion_heap_map_user,
-	.map_kernel = ion_heap_map_kernel,
-	.unmap_kernel = ion_heap_unmap_kernel,
-};
-
-static struct ion_heap *__ion_cma_heap_create(struct cma *cma)
-{
-	struct ion_cma_heap *cma_heap;
-
-	cma_heap = kzalloc(sizeof(*cma_heap), GFP_KERNEL);
-
-	if (!cma_heap)
-		return ERR_PTR(-ENOMEM);
-
-	cma_heap->heap.ops = &ion_cma_ops;
-	cma_heap->cma = cma;
-	cma_heap->heap.type = ION_HEAP_TYPE_DMA;
-	return &cma_heap->heap;
-}
-
-static int __ion_add_cma_heaps(struct cma *cma, void *data)
-{
-	struct ion_heap *heap;
-
-	heap = __ion_cma_heap_create(cma);
-	if (IS_ERR(heap))
-		return PTR_ERR(heap);
-
-	heap->name = cma_get_name(cma);
-
-	ion_device_add_heap(heap);
-	return 0;
-}
-
-static int ion_add_cma_heaps(void)
-{
-	cma_for_each_area(__ion_add_cma_heaps, NULL);
-	return 0;
-}
-device_initcall(ion_add_cma_heaps);
diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c
deleted file mode 100644
index ea7e0a244ffc..000000000000
--- a/drivers/staging/android/ion/ion_heap.c
+++ /dev/null
@@ -1,286 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ION Memory Allocator generic heap helpers
- *
- * Copyright (C) 2011 Google, Inc.
- */
-
-#include <linux/err.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/mm.h>
-#include <linux/rtmutex.h>
-#include <linux/sched.h>
-#include <uapi/linux/sched/types.h>
-#include <linux/scatterlist.h>
-#include <linux/vmalloc.h>
-
-#include "ion.h"
-
-void *ion_heap_map_kernel(struct ion_heap *heap,
-			  struct ion_buffer *buffer)
-{
-	struct sg_page_iter piter;
-	void *vaddr;
-	pgprot_t pgprot;
-	struct sg_table *table = buffer->sg_table;
-	int npages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
-	struct page **pages = vmalloc(array_size(npages,
-						 sizeof(struct page *)));
-	struct page **tmp = pages;
-
-	if (!pages)
-		return ERR_PTR(-ENOMEM);
-
-	if (buffer->flags & ION_FLAG_CACHED)
-		pgprot = PAGE_KERNEL;
-	else
-		pgprot = pgprot_writecombine(PAGE_KERNEL);
-
-	for_each_sgtable_page(table, &piter, 0) {
-		BUG_ON(tmp - pages >= npages);
-		*tmp++ = sg_page_iter_page(&piter);
-	}
-
-	vaddr = vmap(pages, npages, VM_MAP, pgprot);
-	vfree(pages);
-
-	if (!vaddr)
-		return ERR_PTR(-ENOMEM);
-
-	return vaddr;
-}
-
-void ion_heap_unmap_kernel(struct ion_heap *heap,
-			   struct ion_buffer *buffer)
-{
-	vunmap(buffer->vaddr);
-}
-
-int ion_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer,
-		      struct vm_area_struct *vma)
-{
-	struct sg_page_iter piter;
-	struct sg_table *table = buffer->sg_table;
-	unsigned long addr = vma->vm_start;
-	int ret;
-
-	for_each_sgtable_page(table, &piter, vma->vm_pgoff) {
-		struct page *page = sg_page_iter_page(&piter);
-
-		ret = remap_pfn_range(vma, addr, page_to_pfn(page), PAGE_SIZE,
-				      vma->vm_page_prot);
-		if (ret)
-			return ret;
-		addr += PAGE_SIZE;
-		if (addr >= vma->vm_end)
-			return 0;
-	}
-
-	return 0;
-}
-
-static int ion_heap_clear_pages(struct page **pages, int num, pgprot_t pgprot)
-{
-	void *addr = vmap(pages, num, VM_MAP, pgprot);
-
-	if (!addr)
-		return -ENOMEM;
-	memset(addr, 0, PAGE_SIZE * num);
-	vunmap(addr);
-
-	return 0;
-}
-
-static int ion_heap_sglist_zero(struct sg_table *sgt, pgprot_t pgprot)
-{
-	int p = 0;
-	int ret = 0;
-	struct sg_page_iter piter;
-	struct page *pages[32];
-
-	for_each_sgtable_page(sgt, &piter, 0) {
-		pages[p++] = sg_page_iter_page(&piter);
-		if (p == ARRAY_SIZE(pages)) {
-			ret = ion_heap_clear_pages(pages, p, pgprot);
-			if (ret)
-				return ret;
-			p = 0;
-		}
-	}
-	if (p)
-		ret = ion_heap_clear_pages(pages, p, pgprot);
-
-	return ret;
-}
-
-int ion_heap_buffer_zero(struct ion_buffer *buffer)
-{
-	struct sg_table *table = buffer->sg_table;
-	pgprot_t pgprot;
-
-	if (buffer->flags & ION_FLAG_CACHED)
-		pgprot = PAGE_KERNEL;
-	else
-		pgprot = pgprot_writecombine(PAGE_KERNEL);
-
-	return ion_heap_sglist_zero(table, pgprot);
-}
-
-void ion_heap_freelist_add(struct ion_heap *heap, struct ion_buffer *buffer)
-{
-	spin_lock(&heap->free_lock);
-	list_add(&buffer->list, &heap->free_list);
-	heap->free_list_size += buffer->size;
-	spin_unlock(&heap->free_lock);
-	wake_up(&heap->waitqueue);
-}
-
-size_t ion_heap_freelist_size(struct ion_heap *heap)
-{
-	size_t size;
-
-	spin_lock(&heap->free_lock);
-	size = heap->free_list_size;
-	spin_unlock(&heap->free_lock);
-
-	return size;
-}
-
-static size_t _ion_heap_freelist_drain(struct ion_heap *heap, size_t size,
-				       bool skip_pools)
-{
-	struct ion_buffer *buffer;
-	size_t total_drained = 0;
-
-	if (ion_heap_freelist_size(heap) == 0)
-		return 0;
-
-	spin_lock(&heap->free_lock);
-	if (size == 0)
-		size = heap->free_list_size;
-
-	while (!list_empty(&heap->free_list)) {
-		if (total_drained >= size)
-			break;
-		buffer = list_first_entry(&heap->free_list, struct ion_buffer,
-					  list);
-		list_del(&buffer->list);
-		heap->free_list_size -= buffer->size;
-		if (skip_pools)
-			buffer->private_flags |= ION_PRIV_FLAG_SHRINKER_FREE;
-		total_drained += buffer->size;
-		spin_unlock(&heap->free_lock);
-		ion_buffer_destroy(buffer);
-		spin_lock(&heap->free_lock);
-	}
-	spin_unlock(&heap->free_lock);
-
-	return total_drained;
-}
-
-size_t ion_heap_freelist_drain(struct ion_heap *heap, size_t size)
-{
-	return _ion_heap_freelist_drain(heap, size, false);
-}
-
-size_t ion_heap_freelist_shrink(struct ion_heap *heap, size_t size)
-{
-	return _ion_heap_freelist_drain(heap, size, true);
-}
-
-static int ion_heap_deferred_free(void *data)
-{
-	struct ion_heap *heap = data;
-
-	while (true) {
-		struct ion_buffer *buffer;
-
-		wait_event_freezable(heap->waitqueue,
-				     ion_heap_freelist_size(heap) > 0);
-
-		spin_lock(&heap->free_lock);
-		if (list_empty(&heap->free_list)) {
-			spin_unlock(&heap->free_lock);
-			continue;
-		}
-		buffer = list_first_entry(&heap->free_list, struct ion_buffer,
-					  list);
-		list_del(&buffer->list);
-		heap->free_list_size -= buffer->size;
-		spin_unlock(&heap->free_lock);
-		ion_buffer_destroy(buffer);
-	}
-
-	return 0;
-}
-
-int ion_heap_init_deferred_free(struct ion_heap *heap)
-{
-	INIT_LIST_HEAD(&heap->free_list);
-	init_waitqueue_head(&heap->waitqueue);
-	heap->task = kthread_run(ion_heap_deferred_free, heap,
-				 "%s", heap->name);
-	if (IS_ERR(heap->task)) {
-		pr_err("%s: creating thread for deferred free failed\n",
-		       __func__);
-		return PTR_ERR_OR_ZERO(heap->task);
-	}
-	sched_set_normal(heap->task, 19);
-
-	return 0;
-}
-
-static unsigned long ion_heap_shrink_count(struct shrinker *shrinker,
-					   struct shrink_control *sc)
-{
-	struct ion_heap *heap = container_of(shrinker, struct ion_heap,
-					     shrinker);
-	int total = 0;
-
-	total = ion_heap_freelist_size(heap) / PAGE_SIZE;
-
-	if (heap->ops->shrink)
-		total += heap->ops->shrink(heap, sc->gfp_mask, 0);
-
-	return total;
-}
-
-static unsigned long ion_heap_shrink_scan(struct shrinker *shrinker,
-					  struct shrink_control *sc)
-{
-	struct ion_heap *heap = container_of(shrinker, struct ion_heap,
-					     shrinker);
-	int freed = 0;
-	int to_scan = sc->nr_to_scan;
-
-	if (to_scan == 0)
-		return 0;
-
-	/*
-	 * shrink the free list first, no point in zeroing the memory if we're
-	 * just going to reclaim it. Also, skip any possible page pooling.
-	 */
-	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE)
-		freed = ion_heap_freelist_shrink(heap, to_scan * PAGE_SIZE) /
-				PAGE_SIZE;
-
-	to_scan -= freed;
-	if (to_scan <= 0)
-		return freed;
-
-	if (heap->ops->shrink)
-		freed += heap->ops->shrink(heap, sc->gfp_mask, to_scan);
-
-	return freed;
-}
-
-int ion_heap_init_shrinker(struct ion_heap *heap)
-{
-	heap->shrinker.count_objects = ion_heap_shrink_count;
-	heap->shrinker.scan_objects = ion_heap_shrink_scan;
-	heap->shrinker.seeks = DEFAULT_SEEKS;
-	heap->shrinker.batch = 0;
-
-	return register_shrinker(&heap->shrinker);
-}
diff --git a/drivers/staging/android/ion/ion_page_pool.c b/drivers/staging/android/ion/ion_page_pool.c
deleted file mode 100644
index 0198b886d906..000000000000
--- a/drivers/staging/android/ion/ion_page_pool.c
+++ /dev/null
@@ -1,155 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ION Memory Allocator page pool helpers
- *
- * Copyright (C) 2011 Google, Inc.
- */
-
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/sched/signal.h>
-
-#include "ion.h"
-
-static inline struct page *ion_page_pool_alloc_pages(struct ion_page_pool *pool)
-{
-	if (fatal_signal_pending(current))
-		return NULL;
-	return alloc_pages(pool->gfp_mask, pool->order);
-}
-
-static void ion_page_pool_free_pages(struct ion_page_pool *pool,
-				     struct page *page)
-{
-	__free_pages(page, pool->order);
-}
-
-static void ion_page_pool_add(struct ion_page_pool *pool, struct page *page)
-{
-	mutex_lock(&pool->mutex);
-	if (PageHighMem(page)) {
-		list_add_tail(&page->lru, &pool->high_items);
-		pool->high_count++;
-	} else {
-		list_add_tail(&page->lru, &pool->low_items);
-		pool->low_count++;
-	}
-
-	mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
-			    1 << pool->order);
-	mutex_unlock(&pool->mutex);
-}
-
-static struct page *ion_page_pool_remove(struct ion_page_pool *pool, bool high)
-{
-	struct page *page;
-
-	if (high) {
-		BUG_ON(!pool->high_count);
-		page = list_first_entry(&pool->high_items, struct page, lru);
-		pool->high_count--;
-	} else {
-		BUG_ON(!pool->low_count);
-		page = list_first_entry(&pool->low_items, struct page, lru);
-		pool->low_count--;
-	}
-
-	list_del(&page->lru);
-	mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
-			    -(1 << pool->order));
-	return page;
-}
-
-struct page *ion_page_pool_alloc(struct ion_page_pool *pool)
-{
-	struct page *page = NULL;
-
-	BUG_ON(!pool);
-
-	mutex_lock(&pool->mutex);
-	if (pool->high_count)
-		page = ion_page_pool_remove(pool, true);
-	else if (pool->low_count)
-		page = ion_page_pool_remove(pool, false);
-	mutex_unlock(&pool->mutex);
-
-	if (!page)
-		page = ion_page_pool_alloc_pages(pool);
-
-	return page;
-}
-
-void ion_page_pool_free(struct ion_page_pool *pool, struct page *page)
-{
-	BUG_ON(pool->order != compound_order(page));
-
-	ion_page_pool_add(pool, page);
-}
-
-static int ion_page_pool_total(struct ion_page_pool *pool, bool high)
-{
-	int count = pool->low_count;
-
-	if (high)
-		count += pool->high_count;
-
-	return count << pool->order;
-}
-
-int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask,
-			 int nr_to_scan)
-{
-	int freed = 0;
-	bool high;
-
-	if (current_is_kswapd())
-		high = true;
-	else
-		high = !!(gfp_mask & __GFP_HIGHMEM);
-
-	if (nr_to_scan == 0)
-		return ion_page_pool_total(pool, high);
-
-	while (freed < nr_to_scan) {
-		struct page *page;
-
-		mutex_lock(&pool->mutex);
-		if (pool->low_count) {
-			page = ion_page_pool_remove(pool, false);
-		} else if (high && pool->high_count) {
-			page = ion_page_pool_remove(pool, true);
-		} else {
-			mutex_unlock(&pool->mutex);
-			break;
-		}
-		mutex_unlock(&pool->mutex);
-		ion_page_pool_free_pages(pool, page);
-		freed += (1 << pool->order);
-	}
-
-	return freed;
-}
-
-struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order)
-{
-	struct ion_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL);
-
-	if (!pool)
-		return NULL;
-	pool->high_count = 0;
-	pool->low_count = 0;
-	INIT_LIST_HEAD(&pool->low_items);
-	INIT_LIST_HEAD(&pool->high_items);
-	pool->gfp_mask = gfp_mask | __GFP_COMP;
-	pool->order = order;
-	mutex_init(&pool->mutex);
-	plist_node_init(&pool->list, order);
-
-	return pool;
-}
-
-void ion_page_pool_destroy(struct ion_page_pool *pool)
-{
-	kfree(pool);
-}
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
deleted file mode 100644
index eac0632ab4e8..000000000000
--- a/drivers/staging/android/ion/ion_system_heap.c
+++ /dev/null
@@ -1,377 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ION Memory Allocator system heap exporter
- *
- * Copyright (C) 2011 Google, Inc.
- */
-
-#include <asm/page.h>
-#include <linux/dma-mapping.h>
-#include <linux/err.h>
-#include <linux/highmem.h>
-#include <linux/mm.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "ion.h"
-
-#define NUM_ORDERS ARRAY_SIZE(orders)
-
-static gfp_t high_order_gfp_flags = (GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN |
-				     __GFP_NORETRY) & ~__GFP_RECLAIM;
-static gfp_t low_order_gfp_flags  = GFP_HIGHUSER | __GFP_ZERO;
-static const unsigned int orders[] = {8, 4, 0};
-
-static int order_to_index(unsigned int order)
-{
-	int i;
-
-	for (i = 0; i < NUM_ORDERS; i++)
-		if (order == orders[i])
-			return i;
-	BUG();
-	return -1;
-}
-
-static inline unsigned int order_to_size(int order)
-{
-	return PAGE_SIZE << order;
-}
-
-struct ion_system_heap {
-	struct ion_heap heap;
-	struct ion_page_pool *pools[NUM_ORDERS];
-};
-
-static struct page *alloc_buffer_page(struct ion_system_heap *heap,
-				      struct ion_buffer *buffer,
-				      unsigned long order)
-{
-	struct ion_page_pool *pool = heap->pools[order_to_index(order)];
-
-	return ion_page_pool_alloc(pool);
-}
-
-static void free_buffer_page(struct ion_system_heap *heap,
-			     struct ion_buffer *buffer, struct page *page)
-{
-	struct ion_page_pool *pool;
-	unsigned int order = compound_order(page);
-
-	/* go to system */
-	if (buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE) {
-		__free_pages(page, order);
-		return;
-	}
-
-	pool = heap->pools[order_to_index(order)];
-
-	ion_page_pool_free(pool, page);
-}
-
-static struct page *alloc_largest_available(struct ion_system_heap *heap,
-					    struct ion_buffer *buffer,
-					    unsigned long size,
-					    unsigned int max_order)
-{
-	struct page *page;
-	int i;
-
-	for (i = 0; i < NUM_ORDERS; i++) {
-		if (size < order_to_size(orders[i]))
-			continue;
-		if (max_order < orders[i])
-			continue;
-
-		page = alloc_buffer_page(heap, buffer, orders[i]);
-		if (!page)
-			continue;
-
-		return page;
-	}
-
-	return NULL;
-}
-
-static int ion_system_heap_allocate(struct ion_heap *heap,
-				    struct ion_buffer *buffer,
-				    unsigned long size,
-				    unsigned long flags)
-{
-	struct ion_system_heap *sys_heap = container_of(heap,
-							struct ion_system_heap,
-							heap);
-	struct sg_table *table;
-	struct scatterlist *sg;
-	struct list_head pages;
-	struct page *page, *tmp_page;
-	int i = 0;
-	unsigned long size_remaining = PAGE_ALIGN(size);
-	unsigned int max_order = orders[0];
-
-	if (size / PAGE_SIZE > totalram_pages() / 2)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&pages);
-	while (size_remaining > 0) {
-		page = alloc_largest_available(sys_heap, buffer, size_remaining,
-					       max_order);
-		if (!page)
-			goto free_pages;
-		list_add_tail(&page->lru, &pages);
-		size_remaining -= page_size(page);
-		max_order = compound_order(page);
-		i++;
-	}
-	table = kmalloc(sizeof(*table), GFP_KERNEL);
-	if (!table)
-		goto free_pages;
-
-	if (sg_alloc_table(table, i, GFP_KERNEL))
-		goto free_table;
-
-	sg = table->sgl;
-	list_for_each_entry_safe(page, tmp_page, &pages, lru) {
-		sg_set_page(sg, page, page_size(page), 0);
-		sg = sg_next(sg);
-		list_del(&page->lru);
-	}
-
-	buffer->sg_table = table;
-	return 0;
-
-free_table:
-	kfree(table);
-free_pages:
-	list_for_each_entry_safe(page, tmp_page, &pages, lru)
-		free_buffer_page(sys_heap, buffer, page);
-	return -ENOMEM;
-}
-
-static void ion_system_heap_free(struct ion_buffer *buffer)
-{
-	struct ion_system_heap *sys_heap = container_of(buffer->heap,
-							struct ion_system_heap,
-							heap);
-	struct sg_table *table = buffer->sg_table;
-	struct scatterlist *sg;
-	int i;
-
-	/* zero the buffer before goto page pool */
-	if (!(buffer->private_flags & ION_PRIV_FLAG_SHRINKER_FREE))
-		ion_heap_buffer_zero(buffer);
-
-	for_each_sgtable_sg(table, sg, i)
-		free_buffer_page(sys_heap, buffer, sg_page(sg));
-	sg_free_table(table);
-	kfree(table);
-}
-
-static int ion_system_heap_shrink(struct ion_heap *heap, gfp_t gfp_mask,
-				  int nr_to_scan)
-{
-	struct ion_page_pool *pool;
-	struct ion_system_heap *sys_heap;
-	int nr_total = 0;
-	int i, nr_freed;
-	int only_scan = 0;
-
-	sys_heap = container_of(heap, struct ion_system_heap, heap);
-
-	if (!nr_to_scan)
-		only_scan = 1;
-
-	for (i = 0; i < NUM_ORDERS; i++) {
-		pool = sys_heap->pools[i];
-
-		if (only_scan) {
-			nr_total += ion_page_pool_shrink(pool,
-							 gfp_mask,
-							 nr_to_scan);
-
-		} else {
-			nr_freed = ion_page_pool_shrink(pool,
-							gfp_mask,
-							nr_to_scan);
-			nr_to_scan -= nr_freed;
-			nr_total += nr_freed;
-			if (nr_to_scan <= 0)
-				break;
-		}
-	}
-	return nr_total;
-}
-
-static struct ion_heap_ops system_heap_ops = {
-	.allocate = ion_system_heap_allocate,
-	.free = ion_system_heap_free,
-	.map_kernel = ion_heap_map_kernel,
-	.unmap_kernel = ion_heap_unmap_kernel,
-	.map_user = ion_heap_map_user,
-	.shrink = ion_system_heap_shrink,
-};
-
-static void ion_system_heap_destroy_pools(struct ion_page_pool **pools)
-{
-	int i;
-
-	for (i = 0; i < NUM_ORDERS; i++)
-		if (pools[i])
-			ion_page_pool_destroy(pools[i]);
-}
-
-static int ion_system_heap_create_pools(struct ion_page_pool **pools)
-{
-	int i;
-
-	for (i = 0; i < NUM_ORDERS; i++) {
-		struct ion_page_pool *pool;
-		gfp_t gfp_flags = low_order_gfp_flags;
-
-		if (orders[i] > 4)
-			gfp_flags = high_order_gfp_flags;
-
-		pool = ion_page_pool_create(gfp_flags, orders[i]);
-		if (!pool)
-			goto err_create_pool;
-		pools[i] = pool;
-	}
-
-	return 0;
-
-err_create_pool:
-	ion_system_heap_destroy_pools(pools);
-	return -ENOMEM;
-}
-
-static struct ion_heap *__ion_system_heap_create(void)
-{
-	struct ion_system_heap *heap;
-
-	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
-	if (!heap)
-		return ERR_PTR(-ENOMEM);
-	heap->heap.ops = &system_heap_ops;
-	heap->heap.type = ION_HEAP_TYPE_SYSTEM;
-	heap->heap.flags = ION_HEAP_FLAG_DEFER_FREE;
-
-	if (ion_system_heap_create_pools(heap->pools))
-		goto free_heap;
-
-	return &heap->heap;
-
-free_heap:
-	kfree(heap);
-	return ERR_PTR(-ENOMEM);
-}
-
-static int ion_system_heap_create(void)
-{
-	struct ion_heap *heap;
-
-	heap = __ion_system_heap_create();
-	if (IS_ERR(heap))
-		return PTR_ERR(heap);
-	heap->name = "ion_system_heap";
-
-	ion_device_add_heap(heap);
-
-	return 0;
-}
-device_initcall(ion_system_heap_create);
-
-static int ion_system_contig_heap_allocate(struct ion_heap *heap,
-					   struct ion_buffer *buffer,
-					   unsigned long len,
-					   unsigned long flags)
-{
-	int order = get_order(len);
-	struct page *page;
-	struct sg_table *table;
-	unsigned long i;
-	int ret;
-
-	page = alloc_pages(low_order_gfp_flags | __GFP_NOWARN, order);
-	if (!page)
-		return -ENOMEM;
-
-	split_page(page, order);
-
-	len = PAGE_ALIGN(len);
-	for (i = len >> PAGE_SHIFT; i < (1 << order); i++)
-		__free_page(page + i);
-
-	table = kmalloc(sizeof(*table), GFP_KERNEL);
-	if (!table) {
-		ret = -ENOMEM;
-		goto free_pages;
-	}
-
-	ret = sg_alloc_table(table, 1, GFP_KERNEL);
-	if (ret)
-		goto free_table;
-
-	sg_set_page(table->sgl, page, len, 0);
-
-	buffer->sg_table = table;
-
-	return 0;
-
-free_table:
-	kfree(table);
-free_pages:
-	for (i = 0; i < len >> PAGE_SHIFT; i++)
-		__free_page(page + i);
-
-	return ret;
-}
-
-static void ion_system_contig_heap_free(struct ion_buffer *buffer)
-{
-	struct sg_table *table = buffer->sg_table;
-	struct page *page = sg_page(table->sgl);
-	unsigned long pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT;
-	unsigned long i;
-
-	for (i = 0; i < pages; i++)
-		__free_page(page + i);
-	sg_free_table(table);
-	kfree(table);
-}
-
-static struct ion_heap_ops kmalloc_ops = {
-	.allocate = ion_system_contig_heap_allocate,
-	.free = ion_system_contig_heap_free,
-	.map_kernel = ion_heap_map_kernel,
-	.unmap_kernel = ion_heap_unmap_kernel,
-	.map_user = ion_heap_map_user,
-};
-
-static struct ion_heap *__ion_system_contig_heap_create(void)
-{
-	struct ion_heap *heap;
-
-	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
-	if (!heap)
-		return ERR_PTR(-ENOMEM);
-	heap->ops = &kmalloc_ops;
-	heap->type = ION_HEAP_TYPE_SYSTEM_CONTIG;
-	heap->name = "ion_system_contig_heap";
-
-	return heap;
-}
-
-static int ion_system_contig_heap_create(void)
-{
-	struct ion_heap *heap;
-
-	heap = __ion_system_contig_heap_create();
-	if (IS_ERR(heap))
-		return PTR_ERR(heap);
-
-	ion_device_add_heap(heap);
-
-	return 0;
-}
-device_initcall(ion_system_contig_heap_create);
diff --git a/drivers/staging/android/uapi/ion.h b/drivers/staging/android/uapi/ion.h
deleted file mode 100644
index 46c93fcb46d6..000000000000
--- a/drivers/staging/android/uapi/ion.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * drivers/staging/android/uapi/ion.h
- *
- * Copyright (C) 2011 Google, Inc.
- */
-
-#ifndef _UAPI_LINUX_ION_H
-#define _UAPI_LINUX_ION_H
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-
-/**
- * enum ion_heap_types - list of all possible types of heaps
- * @ION_HEAP_TYPE_SYSTEM:	 memory allocated via vmalloc
- * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
- * @ION_HEAP_TYPE_CARVEOUT:	 memory allocated from a prereserved
- *				 carveout heap, allocations are physically
- *				 contiguous
- * @ION_HEAP_TYPE_DMA:		 memory allocated via DMA API
- * @ION_NUM_HEAPS:		 helper for iterating over heaps, a bit mask
- *				 is used to identify the heaps, so only 32
- *				 total heap types are supported
- */
-enum ion_heap_type {
-	ION_HEAP_TYPE_SYSTEM,
-	ION_HEAP_TYPE_SYSTEM_CONTIG,
-	ION_HEAP_TYPE_CARVEOUT,
-	ION_HEAP_TYPE_CHUNK,
-	ION_HEAP_TYPE_DMA,
-	ION_HEAP_TYPE_CUSTOM, /*
-			       * must be last so device specific heaps always
-			       * are at the end of this enum
-			       */
-};
-
-#define ION_NUM_HEAP_IDS		(sizeof(unsigned int) * 8)
-
-/**
- * allocation flags - the lower 16 bits are used by core ion, the upper 16
- * bits are reserved for use by the heaps themselves.
- */
-
-/*
- * mappings of this buffer should be cached, ion will do cache maintenance
- * when the buffer is mapped for dma
- */
-#define ION_FLAG_CACHED 1
-
-/**
- * DOC: Ion Userspace API
- *
- * create a client by opening /dev/ion
- * most operations handled via following ioctls
- *
- */
-
-/**
- * struct ion_allocation_data - metadata passed from userspace for allocations
- * @len:		size of the allocation
- * @heap_id_mask:	mask of heap ids to allocate from
- * @flags:		flags passed to heap
- * @handle:		pointer that will be populated with a cookie to use to
- *			refer to this allocation
- *
- * Provided by userspace as an argument to the ioctl
- */
-struct ion_allocation_data {
-	__u64 len;
-	__u32 heap_id_mask;
-	__u32 flags;
-	__u32 fd;
-	__u32 unused;
-};
-
-#define MAX_HEAP_NAME			32
-
-/**
- * struct ion_heap_data - data about a heap
- * @name - first 32 characters of the heap name
- * @type - heap type
- * @heap_id - heap id for the heap
- */
-struct ion_heap_data {
-	char name[MAX_HEAP_NAME];
-	__u32 type;
-	__u32 heap_id;
-	__u32 reserved0;
-	__u32 reserved1;
-	__u32 reserved2;
-};
-
-/**
- * struct ion_heap_query - collection of data about all heaps
- * @cnt - total number of heaps to be copied
- * @heaps - buffer to copy heap data
- */
-struct ion_heap_query {
-	__u32 cnt; /* Total number of heaps to be copied */
-	__u32 reserved0; /* align to 64bits */
-	__u64 heaps; /* buffer to be populated */
-	__u32 reserved1;
-	__u32 reserved2;
-};
-
-#define ION_IOC_MAGIC		'I'
-
-/**
- * DOC: ION_IOC_ALLOC - allocate memory
- *
- * Takes an ion_allocation_data struct and returns it with the handle field
- * populated with the opaque handle for the allocation.
- */
-#define ION_IOC_ALLOC		_IOWR(ION_IOC_MAGIC, 0, \
-				      struct ion_allocation_data)
-
-/**
- * DOC: ION_IOC_HEAP_QUERY - information about available heaps
- *
- * Takes an ion_heap_query structure and populates information about
- * available Ion heaps.
- */
-#define ION_IOC_HEAP_QUERY     _IOWR(ION_IOC_MAGIC, 8, \
-					struct ion_heap_query)
-
-#endif /* _UAPI_LINUX_ION_H */
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d9c283503159..924cbcc22e0d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TARGETS = android
-TARGETS += arm64
+TARGETS = arm64
 TARGETS += bpf
 TARGETS += breakpoints
 TARGETS += capabilities
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
deleted file mode 100644
index 9258306cafe9..000000000000
--- a/tools/testing/selftests/android/Makefile
+++ /dev/null
@@ -1,39 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-SUBDIRS := ion
-
-TEST_PROGS := run.sh
-
-.PHONY: all clean
-
-include ../lib.mk
-
-all:
-	@for DIR in $(SUBDIRS); do		\
-		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
-		mkdir $$BUILD_TARGET  -p;	\
-		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-		#SUBDIR test prog name should be in the form: SUBDIR_test.sh \
-		TEST=$$DIR"_test.sh"; \
-		if [ -e $$DIR/$$TEST ]; then \
-			rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
-		fi \
-	done
-
-override define INSTALL_RULE
-	mkdir -p $(INSTALL_PATH)
-install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)  $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-
-	@for SUBDIR in $(SUBDIRS); do \
-		BUILD_TARGET=$(OUTPUT)/$$SUBDIR;	\
-		mkdir $$BUILD_TARGET  -p;	\
-		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
-	done;
-endef
-
-override define CLEAN
-	@for DIR in $(SUBDIRS); do		\
-		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
-		mkdir $$BUILD_TARGET  -p;	\
-		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
-	done
-endef
diff --git a/tools/testing/selftests/android/config b/tools/testing/selftests/android/config
deleted file mode 100644
index b4ad748a9dd9..000000000000
--- a/tools/testing/selftests/android/config
+++ /dev/null
@@ -1,5 +0,0 @@
-CONFIG_ANDROID=y
-CONFIG_STAGING=y
-CONFIG_ION=y
-CONFIG_ION_SYSTEM_HEAP=y
-CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
deleted file mode 100644
index 78eae9972bb1..000000000000
--- a/tools/testing/selftests/android/ion/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-ionapp_export
-ionapp_import
-ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
deleted file mode 100644
index 42b71f005332..000000000000
--- a/tools/testing/selftests/android/ion/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/
-CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
-
-TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test
-
-all: $(TEST_GEN_FILES)
-
-$(TEST_GEN_FILES): ipcsocket.c ionutils.c
-
-TEST_PROGS := ion_test.sh
-
-KSFT_KHDR_INSTALL := 1
-top_srcdir = ../../../../..
-include ../../lib.mk
-
-$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
-$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
-$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c ipcsocket.c
diff --git a/tools/testing/selftests/android/ion/README b/tools/testing/selftests/android/ion/README
deleted file mode 100644
index 21783e9c451e..000000000000
--- a/tools/testing/selftests/android/ion/README
+++ /dev/null
@@ -1,101 +0,0 @@
-ION BUFFER SHARING UTILITY
-==========================
-File: ion_test.sh : Utility to test ION driver buffer sharing mechanism.
-Author: Pintu Kumar <pintu.ping@gmail.com>
-
-Introduction:
--------------
-This is a test utility to verify ION buffer sharing in user space
-between 2 independent processes.
-It uses unix domain socket (with SCM_RIGHTS) as IPC to transfer an FD to
-another process to share the same buffer.
-This utility demonstrates how ION buffer sharing can be implemented between
-two user space processes, using various heap types.
-The following heap types are supported by ION driver.
-ION_HEAP_TYPE_SYSTEM (0)
-ION_HEAP_TYPE_SYSTEM_CONTIG (1)
-ION_HEAP_TYPE_CARVEOUT (2)
-ION_HEAP_TYPE_CHUNK (3)
-ION_HEAP_TYPE_DMA (4)
-
-By default only the SYSTEM and SYSTEM_CONTIG heaps are supported.
-Each heap is associated with the respective heap id.
-This utility is designed in the form of client/server program.
-The server part (ionapp_export) is the exporter of the buffer.
-It is responsible for creating an ION client, allocating the buffer based on
-the heap id, writing some data to this buffer and then exporting the FD
-(associated with this buffer) to another process using socket IPC.
-This FD is called as buffer FD (which is different than the ION client FD).
-
-The client part (ionapp_import) is the importer of the buffer.
-It retrives the FD from the socket data and installs into its address space.
-This new FD internally points to the same kernel buffer.
-So first it reads the data that is stored in this buffer and prints it.
-Then it writes the different size of data (it could be different data) to the
-same buffer.
-Finally the buffer FD must be closed by both the exporter and importer.
-Thus the same kernel buffer is shared among two user space processes using
-ION driver and only one time allocation.
-
-Prerequisite:
--------------
-This utility works only if /dev/ion interface is present.
-The following configs needs to be enabled in kernel to include ion driver.
-CONFIG_ANDROID=y
-CONFIG_STAGING=y
-CONFIG_ION=y
-CONFIG_ION_SYSTEM_HEAP=y
-
-This utility requires to be run as root user.
-
-
-Compile and test:
------------------
-This utility is made to be run as part of kselftest framework in kernel.
-To compile and run using kselftest you can simply do the following from the
-kernel top directory.
-linux$ make TARGETS=android kselftest
-Or you can also use:
-linux$ make -C tools/testing/selftests TARGETS=android run_tests
-Using the selftest it can directly execute the ion_test.sh script to test the
-buffer sharing using ion system heap.
-Currently the heap size is hard coded as just 10 bytes inside this script.
-You need to be a root user to run under selftest.
-
-You can also compile and test manually using the following steps:
-ion$ make
-These will generate 2 executable: ionapp_export, ionapp_import
-Now you can run the export and import manually by specifying the heap type
-and the heap size.
-You can also directly execute the shell script to run the test automatically.
-Simply use the following command to run the test.
-ion$ sudo ./ion_test.sh
-
-Test Results:
--------------
-The utility is verified on Ubuntu-32 bit system with Linux Kernel 4.14.
-Here is the snapshot of the test result using kselftest.
-
-linux# make TARGETS=android kselftest
-heap_type: 0, heap_size: 10
---------------------------------------
-heap type: 0
-  heap id: 1
-heap name: ion_system_heap
---------------------------------------
-Fill buffer content:
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
-Sharing fd: 6, Client fd: 5
-<ion_close_buffer_fd>: buffer release successfully....
-Received buffer fd: 4
-Read buffer content:
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0x0 0x0 0x0 0x0 0x0 0x0
-0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
-Fill buffer content:
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
-0xfd 0xfd
-<ion_close_buffer_fd>: buffer release successfully....
-ion_test.sh: heap_type: 0 - [PASS]
-
-ion_test.sh: done
diff --git a/tools/testing/selftests/android/ion/ion.h b/tools/testing/selftests/android/ion/ion.h
deleted file mode 100644
index 33db23018abf..000000000000
--- a/tools/testing/selftests/android/ion/ion.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * ion.h
- *
- * Copyright (C) 2011 Google, Inc.
- */
-
-/* This file is copied from drivers/staging/android/uapi/ion.h
- * This local copy is required for the selftest to pass, when build
- * outside the kernel source tree.
- * Please keep this file in sync with its original file until the
- * ion driver is moved outside the staging tree.
- */
-
-#ifndef _UAPI_LINUX_ION_H
-#define _UAPI_LINUX_ION_H
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-
-/**
- * enum ion_heap_types - list of all possible types of heaps
- * @ION_HEAP_TYPE_SYSTEM:	 memory allocated via vmalloc
- * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
- * @ION_HEAP_TYPE_CARVEOUT:	 memory allocated from a prereserved
- *				 carveout heap, allocations are physically
- *				 contiguous
- * @ION_HEAP_TYPE_DMA:		 memory allocated via DMA API
- * @ION_NUM_HEAPS:		 helper for iterating over heaps, a bit mask
- *				 is used to identify the heaps, so only 32
- *				 total heap types are supported
- */
-enum ion_heap_type {
-	ION_HEAP_TYPE_SYSTEM,
-	ION_HEAP_TYPE_SYSTEM_CONTIG,
-	ION_HEAP_TYPE_CARVEOUT,
-	ION_HEAP_TYPE_CHUNK,
-	ION_HEAP_TYPE_DMA,
-	ION_HEAP_TYPE_CUSTOM, /*
-			       * must be last so device specific heaps always
-			       * are at the end of this enum
-			       */
-};
-
-#define ION_NUM_HEAP_IDS		(sizeof(unsigned int) * 8)
-
-/**
- * allocation flags - the lower 16 bits are used by core ion, the upper 16
- * bits are reserved for use by the heaps themselves.
- */
-
-/*
- * mappings of this buffer should be cached, ion will do cache maintenance
- * when the buffer is mapped for dma
- */
-#define ION_FLAG_CACHED 1
-
-/**
- * DOC: Ion Userspace API
- *
- * create a client by opening /dev/ion
- * most operations handled via following ioctls
- *
- */
-
-/**
- * struct ion_allocation_data - metadata passed from userspace for allocations
- * @len:		size of the allocation
- * @heap_id_mask:	mask of heap ids to allocate from
- * @flags:		flags passed to heap
- * @handle:		pointer that will be populated with a cookie to use to
- *			refer to this allocation
- *
- * Provided by userspace as an argument to the ioctl
- */
-struct ion_allocation_data {
-	__u64 len;
-	__u32 heap_id_mask;
-	__u32 flags;
-	__u32 fd;
-	__u32 unused;
-};
-
-#define MAX_HEAP_NAME			32
-
-/**
- * struct ion_heap_data - data about a heap
- * @name - first 32 characters of the heap name
- * @type - heap type
- * @heap_id - heap id for the heap
- */
-struct ion_heap_data {
-	char name[MAX_HEAP_NAME];
-	__u32 type;
-	__u32 heap_id;
-	__u32 reserved0;
-	__u32 reserved1;
-	__u32 reserved2;
-};
-
-/**
- * struct ion_heap_query - collection of data about all heaps
- * @cnt - total number of heaps to be copied
- * @heaps - buffer to copy heap data
- */
-struct ion_heap_query {
-	__u32 cnt; /* Total number of heaps to be copied */
-	__u32 reserved0; /* align to 64bits */
-	__u64 heaps; /* buffer to be populated */
-	__u32 reserved1;
-	__u32 reserved2;
-};
-
-#define ION_IOC_MAGIC		'I'
-
-/**
- * DOC: ION_IOC_ALLOC - allocate memory
- *
- * Takes an ion_allocation_data struct and returns it with the handle field
- * populated with the opaque handle for the allocation.
- */
-#define ION_IOC_ALLOC		_IOWR(ION_IOC_MAGIC, 0, \
-				      struct ion_allocation_data)
-
-/**
- * DOC: ION_IOC_HEAP_QUERY - information about available heaps
- *
- * Takes an ion_heap_query structure and populates information about
- * available Ion heaps.
- */
-#define ION_IOC_HEAP_QUERY     _IOWR(ION_IOC_MAGIC, 8, \
-					struct ion_heap_query)
-
-#endif /* _UAPI_LINUX_ION_H */
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
deleted file mode 100755
index 69e676cfc94e..000000000000
--- a/tools/testing/selftests/android/ion/ion_test.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-heapsize=4096
-TCID="ion_test.sh"
-errcode=0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-run_test()
-{
-	heaptype=$1
-	./ionapp_export -i $heaptype -s $heapsize &
-	sleep 1
-	./ionapp_import
-	if [ $? -ne 0 ]; then
-		echo "$TCID: heap_type: $heaptype - [FAIL]"
-		errcode=1
-	else
-		echo "$TCID: heap_type: $heaptype - [PASS]"
-	fi
-	sleep 1
-	echo ""
-}
-
-check_root()
-{
-	uid=$(id -u)
-	if [ $uid -ne 0 ]; then
-		echo $TCID: must be run as root >&2
-		exit $ksft_skip
-	fi
-}
-
-check_device()
-{
-	DEVICE=/dev/ion
-	if [ ! -e $DEVICE ]; then
-		echo $TCID: No $DEVICE device found >&2
-		echo $TCID: May be CONFIG_ION is not set >&2
-		exit $ksft_skip
-	fi
-}
-
-main_function()
-{
-	check_device
-	check_root
-
-	# ION_SYSTEM_HEAP TEST
-	run_test 0
-	# ION_SYSTEM_CONTIG_HEAP TEST
-	run_test 1
-}
-
-main_function
-echo "$TCID: done"
-exit $errcode
diff --git a/tools/testing/selftests/android/ion/ionapp_export.c b/tools/testing/selftests/android/ion/ionapp_export.c
deleted file mode 100644
index 063b7830d1bd..000000000000
--- a/tools/testing/selftests/android/ion/ionapp_export.c
+++ /dev/null
@@ -1,127 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ionapp_export.c
- *
- * It is a user space utility to create and export android
- * ion memory buffer fd to another process using unix domain socket as IPC.
- * This acts like a server for ionapp_import(client).
- * So, this server has to be started first before the client.
- *
- * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/time.h>
-#include "ionutils.h"
-#include "ipcsocket.h"
-
-
-void print_usage(int argc, char *argv[])
-{
-	printf("Usage: %s [-h <help>] [-i <heap id>] [-s <size in bytes>]\n",
-		argv[0]);
-}
-
-int main(int argc, char *argv[])
-{
-	int opt, ret, status, heapid;
-	int sockfd, client_fd, shared_fd;
-	unsigned char *map_buf;
-	unsigned long map_len, heap_type, heap_size, flags;
-	struct ion_buffer_info info;
-	struct socket_info skinfo;
-
-	if (argc < 2) {
-		print_usage(argc, argv);
-		return -1;
-	}
-
-	heap_size = 0;
-	flags = 0;
-	heap_type = ION_HEAP_TYPE_SYSTEM;
-
-	while ((opt = getopt(argc, argv, "hi:s:")) != -1) {
-		switch (opt) {
-		case 'h':
-			print_usage(argc, argv);
-			exit(0);
-			break;
-		case 'i':
-			heapid = atoi(optarg);
-			switch (heapid) {
-			case 0:
-				heap_type = ION_HEAP_TYPE_SYSTEM;
-				break;
-			case 1:
-				heap_type = ION_HEAP_TYPE_SYSTEM_CONTIG;
-				break;
-			default:
-				printf("ERROR: heap type not supported\n");
-				exit(1);
-			}
-			break;
-		case 's':
-			heap_size = atoi(optarg);
-			break;
-		default:
-			print_usage(argc, argv);
-			exit(1);
-			break;
-		}
-	}
-
-	if (heap_size <= 0) {
-		printf("heap_size cannot be 0\n");
-		print_usage(argc, argv);
-		exit(1);
-	}
-
-	printf("heap_type: %ld, heap_size: %ld\n", heap_type, heap_size);
-	info.heap_type = heap_type;
-	info.heap_size = heap_size;
-	info.flag_type = flags;
-
-	/* This is server: open the socket connection first */
-	/* Here; 1 indicates server or exporter */
-	status = opensocket(&sockfd, SOCKET_NAME, 1);
-	if (status < 0) {
-		fprintf(stderr, "<%s>: Failed opensocket.\n", __func__);
-		goto err_socket;
-	}
-	skinfo.sockfd = sockfd;
-
-	ret = ion_export_buffer_fd(&info);
-	if (ret < 0) {
-		fprintf(stderr, "FAILED: ion_get_buffer_fd\n");
-		goto err_export;
-	}
-	client_fd = info.ionfd;
-	shared_fd = info.buffd;
-	map_buf = info.buffer;
-	map_len = info.buflen;
-	write_buffer(map_buf, map_len);
-
-	/* share ion buf fd with other user process */
-	printf("Sharing fd: %d, Client fd: %d\n", shared_fd, client_fd);
-	skinfo.datafd = shared_fd;
-	skinfo.buflen = map_len;
-
-	ret = socket_send_fd(&skinfo);
-	if (ret < 0) {
-		fprintf(stderr, "FAILED: socket_send_fd\n");
-		goto err_send;
-	}
-
-err_send:
-err_export:
-	ion_close_buffer_fd(&info);
-
-err_socket:
-	closesocket(sockfd, SOCKET_NAME);
-
-	return 0;
-}
diff --git a/tools/testing/selftests/android/ion/ionapp_import.c b/tools/testing/selftests/android/ion/ionapp_import.c
deleted file mode 100644
index 54b580cb04f6..000000000000
--- a/tools/testing/selftests/android/ion/ionapp_import.c
+++ /dev/null
@@ -1,79 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ionapp_import.c
- *
- * It is a user space utility to receive android ion memory buffer fd
- * over unix domain socket IPC that can be exported by ionapp_export.
- * This acts like a client for ionapp_export.
- *
- * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include "ionutils.h"
-#include "ipcsocket.h"
-
-
-int main(void)
-{
-	int ret, status;
-	int sockfd, shared_fd;
-	unsigned char *map_buf;
-	unsigned long map_len;
-	struct ion_buffer_info info;
-	struct socket_info skinfo;
-
-	/* This is the client part. Here 0 means client or importer */
-	status = opensocket(&sockfd, SOCKET_NAME, 0);
-	if (status < 0) {
-		fprintf(stderr, "No exporter exists...\n");
-		ret = status;
-		goto err_socket;
-	}
-
-	skinfo.sockfd = sockfd;
-
-	ret = socket_receive_fd(&skinfo);
-	if (ret < 0) {
-		fprintf(stderr, "Failed: socket_receive_fd\n");
-		goto err_recv;
-	}
-
-	shared_fd = skinfo.datafd;
-	printf("Received buffer fd: %d\n", shared_fd);
-	if (shared_fd <= 0) {
-		fprintf(stderr, "ERROR: improper buf fd\n");
-		ret = -1;
-		goto err_fd;
-	}
-
-	memset(&info, 0, sizeof(info));
-	info.buffd = shared_fd;
-	info.buflen = ION_BUFFER_LEN;
-
-	ret = ion_import_buffer_fd(&info);
-	if (ret < 0) {
-		fprintf(stderr, "Failed: ion_use_buffer_fd\n");
-		goto err_import;
-	}
-
-	map_buf = info.buffer;
-	map_len = info.buflen;
-	read_buffer(map_buf, map_len);
-
-	/* Write probably new data to the same buffer again */
-	map_len = ION_BUFFER_LEN;
-	write_buffer(map_buf, map_len);
-
-err_import:
-	ion_close_buffer_fd(&info);
-err_fd:
-err_recv:
-err_socket:
-	closesocket(sockfd, SOCKET_NAME);
-
-	return ret;
-}
diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c
deleted file mode 100644
index dab36b06b37d..000000000000
--- a/tools/testing/selftests/android/ion/ionmap_test.c
+++ /dev/null
@@ -1,136 +0,0 @@
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <linux/dma-buf.h>
-
-#include <drm/drm.h>
-
-#include "ion.h"
-#include "ionutils.h"
-
-int check_vgem(int fd)
-{
-	drm_version_t version = { 0 };
-	char name[5];
-	int ret;
-
-	version.name_len = 4;
-	version.name = name;
-
-	ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
-	if (ret)
-		return 1;
-
-	return strcmp(name, "vgem");
-}
-
-int open_vgem(void)
-{
-	int i, fd;
-	const char *drmstr = "/dev/dri/card";
-
-	fd = -1;
-	for (i = 0; i < 16; i++) {
-		char name[80];
-
-		sprintf(name, "%s%u", drmstr, i);
-
-		fd = open(name, O_RDWR);
-		if (fd < 0)
-			continue;
-
-		if (check_vgem(fd)) {
-			close(fd);
-			continue;
-		} else {
-			break;
-		}
-
-	}
-	return fd;
-}
-
-int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
-{
-	struct drm_prime_handle import_handle = { 0 };
-	int ret;
-
-	import_handle.fd = dma_buf_fd;
-	import_handle.flags = 0;
-	import_handle.handle = 0;
-
-	ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
-	if (ret == 0)
-		*handle = import_handle.handle;
-	return ret;
-}
-
-void close_handle(int vgem_fd, uint32_t handle)
-{
-	struct drm_gem_close close = { 0 };
-
-	close.handle = handle;
-	ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
-}
-
-int main()
-{
-	int ret, vgem_fd;
-	struct ion_buffer_info info;
-	uint32_t handle = 0;
-	struct dma_buf_sync sync = { 0 };
-
-	info.heap_type = ION_HEAP_TYPE_SYSTEM;
-	info.heap_size = 4096;
-	info.flag_type = ION_FLAG_CACHED;
-
-	ret = ion_export_buffer_fd(&info);
-	if (ret < 0) {
-		printf("ion buffer alloc failed\n");
-		return -1;
-	}
-
-	vgem_fd = open_vgem();
-	if (vgem_fd < 0) {
-		ret = vgem_fd;
-		printf("Failed to open vgem\n");
-		goto out_ion;
-	}
-
-	ret = import_vgem_fd(vgem_fd, info.buffd, &handle);
-
-	if (ret < 0) {
-		printf("Failed to import buffer\n");
-		goto out_vgem;
-	}
-
-	sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
-	ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
-	if (ret)
-		printf("sync start failed %d\n", errno);
-
-	memset(info.buffer, 0xff, 4096);
-
-	sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
-	ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
-	if (ret)
-		printf("sync end failed %d\n", errno);
-
-	close_handle(vgem_fd, handle);
-	ret = 0;
-
-out_vgem:
-	close(vgem_fd);
-out_ion:
-	ion_close_buffer_fd(&info);
-	printf("done.\n");
-	return ret;
-}
diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c
deleted file mode 100644
index 7d1d37c4ef6a..000000000000
--- a/tools/testing/selftests/android/ion/ionutils.c
+++ /dev/null
@@ -1,253 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-//#include <stdint.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include "ionutils.h"
-#include "ipcsocket.h"
-
-
-void write_buffer(void *buffer, unsigned long len)
-{
-	int i;
-	unsigned char *ptr = (unsigned char *)buffer;
-
-	if (!ptr) {
-		fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
-		return;
-	}
-
-	printf("Fill buffer content:\n");
-	memset(ptr, 0xfd, len);
-	for (i = 0; i < len; i++)
-		printf("0x%x ", ptr[i]);
-	printf("\n");
-}
-
-void read_buffer(void *buffer, unsigned long len)
-{
-	int i;
-	unsigned char *ptr = (unsigned char *)buffer;
-
-	if (!ptr) {
-		fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
-		return;
-	}
-
-	printf("Read buffer content:\n");
-	for (i = 0; i < len; i++)
-		printf("0x%x ", ptr[i]);
-	printf("\n");
-}
-
-int ion_export_buffer_fd(struct ion_buffer_info *ion_info)
-{
-	int i, ret, ionfd, buffer_fd;
-	unsigned int heap_id;
-	unsigned long maplen;
-	unsigned char *map_buffer;
-	struct ion_allocation_data alloc_data;
-	struct ion_heap_query query;
-	struct ion_heap_data heap_data[MAX_HEAP_COUNT];
-
-	if (!ion_info) {
-		fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
-		return -1;
-	}
-
-	/* Create an ION client */
-	ionfd = open(ION_DEVICE, O_RDWR);
-	if (ionfd < 0) {
-		fprintf(stderr, "<%s>: Failed to open ion client: %s\n",
-			__func__, strerror(errno));
-		return -1;
-	}
-
-	memset(&query, 0, sizeof(query));
-	query.cnt = MAX_HEAP_COUNT;
-	query.heaps = (unsigned long int)&heap_data[0];
-	/* Query ION heap_id_mask from ION heap */
-	ret = ioctl(ionfd, ION_IOC_HEAP_QUERY, &query);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed: ION_IOC_HEAP_QUERY: %s\n",
-			__func__, strerror(errno));
-		goto err_query;
-	}
-
-	heap_id = MAX_HEAP_COUNT + 1;
-	for (i = 0; i < query.cnt; i++) {
-		if (heap_data[i].type == ion_info->heap_type) {
-			heap_id = heap_data[i].heap_id;
-			break;
-		}
-	}
-
-	if (heap_id > MAX_HEAP_COUNT) {
-		fprintf(stderr, "<%s>: ERROR: heap type does not exists\n",
-			__func__);
-		goto err_heap;
-	}
-
-	alloc_data.len = ion_info->heap_size;
-	alloc_data.heap_id_mask = 1 << heap_id;
-	alloc_data.flags = ion_info->flag_type;
-
-	/* Allocate memory for this ION client as per heap_type */
-	ret = ioctl(ionfd, ION_IOC_ALLOC, &alloc_data);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed: ION_IOC_ALLOC: %s\n",
-			__func__, strerror(errno));
-		goto err_alloc;
-	}
-
-	/* This will return a valid buffer fd */
-	buffer_fd = alloc_data.fd;
-	maplen = alloc_data.len;
-
-	if (buffer_fd < 0 || maplen <= 0) {
-		fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
-			__func__, buffer_fd, maplen);
-		goto err_fd_data;
-	}
-
-	/* Create memory mapped buffer for the buffer fd */
-	map_buffer = (unsigned char *)mmap(NULL, maplen, PROT_READ|PROT_WRITE,
-			MAP_SHARED, buffer_fd, 0);
-	if (map_buffer == MAP_FAILED) {
-		fprintf(stderr, "<%s>: Failed: mmap: %s\n",
-			__func__, strerror(errno));
-		goto err_mmap;
-	}
-
-	ion_info->ionfd = ionfd;
-	ion_info->buffd = buffer_fd;
-	ion_info->buffer = map_buffer;
-	ion_info->buflen = maplen;
-
-	return 0;
-
-	munmap(map_buffer, maplen);
-
-err_fd_data:
-err_mmap:
-	/* in case of error: close the buffer fd */
-	if (buffer_fd)
-		close(buffer_fd);
-
-err_query:
-err_heap:
-err_alloc:
-	/* In case of error: close the ion client fd */
-	if (ionfd)
-		close(ionfd);
-
-	return -1;
-}
-
-int ion_import_buffer_fd(struct ion_buffer_info *ion_info)
-{
-	int buffd;
-	unsigned char *map_buf;
-	unsigned long map_len;
-
-	if (!ion_info) {
-		fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
-		return -1;
-	}
-
-	map_len = ion_info->buflen;
-	buffd = ion_info->buffd;
-
-	if (buffd < 0 || map_len <= 0) {
-		fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
-			__func__, buffd, map_len);
-		goto err_buffd;
-	}
-
-	map_buf = (unsigned char *)mmap(NULL, map_len, PROT_READ|PROT_WRITE,
-			MAP_SHARED, buffd, 0);
-	if (map_buf == MAP_FAILED) {
-		printf("<%s>: Failed - mmap: %s\n",
-			__func__, strerror(errno));
-		goto err_mmap;
-	}
-
-	ion_info->buffer = map_buf;
-	ion_info->buflen = map_len;
-
-	return 0;
-
-err_mmap:
-	if (buffd)
-		close(buffd);
-
-err_buffd:
-	return -1;
-}
-
-void ion_close_buffer_fd(struct ion_buffer_info *ion_info)
-{
-	if (ion_info) {
-		/* unmap the buffer properly in the end */
-		munmap(ion_info->buffer, ion_info->buflen);
-		/* close the buffer fd */
-		if (ion_info->buffd > 0)
-			close(ion_info->buffd);
-		/* Finally, close the client fd */
-		if (ion_info->ionfd > 0)
-			close(ion_info->ionfd);
-	}
-}
-
-int socket_send_fd(struct socket_info *info)
-{
-	int status;
-	int fd, sockfd;
-	struct socketdata skdata;
-
-	if (!info) {
-		fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
-		return -1;
-	}
-
-	sockfd = info->sockfd;
-	fd = info->datafd;
-	memset(&skdata, 0, sizeof(skdata));
-	skdata.data = fd;
-	skdata.len = sizeof(skdata.data);
-	status = sendtosocket(sockfd, &skdata);
-	if (status < 0) {
-		fprintf(stderr, "<%s>: Failed: sendtosocket\n", __func__);
-		return -1;
-	}
-
-	return 0;
-}
-
-int socket_receive_fd(struct socket_info *info)
-{
-	int status;
-	int fd, sockfd;
-	struct socketdata skdata;
-
-	if (!info) {
-		fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
-		return -1;
-	}
-
-	sockfd = info->sockfd;
-	memset(&skdata, 0, sizeof(skdata));
-	status = receivefromsocket(sockfd, &skdata);
-	if (status < 0) {
-		fprintf(stderr, "<%s>: Failed: receivefromsocket\n", __func__);
-		return -1;
-	}
-
-	fd = (int)skdata.data;
-	info->datafd = fd;
-
-	return status;
-}
diff --git a/tools/testing/selftests/android/ion/ionutils.h b/tools/testing/selftests/android/ion/ionutils.h
deleted file mode 100644
index 9941eb858576..000000000000
--- a/tools/testing/selftests/android/ion/ionutils.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef __ION_UTILS_H
-#define __ION_UTILS_H
-
-#include "ion.h"
-
-#define SOCKET_NAME "ion_socket"
-#define ION_DEVICE "/dev/ion"
-
-#define ION_BUFFER_LEN	4096
-#define MAX_HEAP_COUNT	ION_HEAP_TYPE_CUSTOM
-
-struct socket_info {
-	int sockfd;
-	int datafd;
-	unsigned long buflen;
-};
-
-struct ion_buffer_info {
-	int ionfd;
-	int buffd;
-	unsigned int heap_type;
-	unsigned int flag_type;
-	unsigned long heap_size;
-	unsigned long buflen;
-	unsigned char *buffer;
-};
-
-
-/* This is used to fill the data into the mapped buffer */
-void write_buffer(void *buffer, unsigned long len);
-
-/* This is used to read the data from the exported buffer */
-void read_buffer(void *buffer, unsigned long len);
-
-/* This is used to create an ION buffer FD for the kernel buffer
- * So you can export this same buffer to others in the form of FD
- */
-int ion_export_buffer_fd(struct ion_buffer_info *ion_info);
-
-/* This is used to import or map an exported FD.
- * So we point to same buffer without making a copy. Hence zero-copy.
- */
-int ion_import_buffer_fd(struct ion_buffer_info *ion_info);
-
-/* This is used to close all references for the ION client */
-void ion_close_buffer_fd(struct ion_buffer_info *ion_info);
-
-/* This is used to send FD to another process using socket IPC */
-int socket_send_fd(struct socket_info *skinfo);
-
-/* This is used to receive FD from another process using socket IPC */
-int socket_receive_fd(struct socket_info *skinfo);
-
-
-#endif
diff --git a/tools/testing/selftests/android/ion/ipcsocket.c b/tools/testing/selftests/android/ion/ipcsocket.c
deleted file mode 100644
index 7dc521002095..000000000000
--- a/tools/testing/selftests/android/ion/ipcsocket.c
+++ /dev/null
@@ -1,227 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/un.h>
-#include <errno.h>
-
-#include "ipcsocket.h"
-
-
-int opensocket(int *sockfd, const char *name, int connecttype)
-{
-	int ret, temp = 1;
-
-	if (!name || strlen(name) > MAX_SOCK_NAME_LEN) {
-		fprintf(stderr, "<%s>: Invalid socket name.\n", __func__);
-		return -1;
-	}
-
-	ret = socket(PF_LOCAL, SOCK_STREAM, 0);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed socket: <%s>\n",
-			__func__, strerror(errno));
-		return ret;
-	}
-
-	*sockfd = ret;
-	if (setsockopt(*sockfd, SOL_SOCKET, SO_REUSEADDR,
-		(char *)&temp, sizeof(int)) < 0) {
-		fprintf(stderr, "<%s>: Failed setsockopt: <%s>\n",
-		__func__, strerror(errno));
-		goto err;
-	}
-
-	sprintf(sock_name, "/tmp/%s", name);
-
-	if (connecttype == 1) {
-		/* This is for Server connection */
-		struct sockaddr_un skaddr;
-		int clientfd;
-		socklen_t sklen;
-
-		unlink(sock_name);
-		memset(&skaddr, 0, sizeof(skaddr));
-		skaddr.sun_family = AF_LOCAL;
-		strcpy(skaddr.sun_path, sock_name);
-
-		ret = bind(*sockfd, (struct sockaddr *)&skaddr,
-			SUN_LEN(&skaddr));
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed bind: <%s>\n",
-			__func__, strerror(errno));
-			goto err;
-		}
-
-		ret = listen(*sockfd, 5);
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed listen: <%s>\n",
-			__func__, strerror(errno));
-			goto err;
-		}
-
-		memset(&skaddr, 0, sizeof(skaddr));
-		sklen = sizeof(skaddr);
-
-		ret = accept(*sockfd, (struct sockaddr *)&skaddr,
-			(socklen_t *)&sklen);
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed accept: <%s>\n",
-			__func__, strerror(errno));
-			goto err;
-		}
-
-		clientfd = ret;
-		*sockfd = clientfd;
-	} else {
-		/* This is for client connection */
-		struct sockaddr_un skaddr;
-
-		memset(&skaddr, 0, sizeof(skaddr));
-		skaddr.sun_family = AF_LOCAL;
-		strcpy(skaddr.sun_path, sock_name);
-
-		ret = connect(*sockfd, (struct sockaddr *)&skaddr,
-			SUN_LEN(&skaddr));
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed connect: <%s>\n",
-			__func__, strerror(errno));
-			goto err;
-		}
-	}
-
-	return 0;
-
-err:
-	if (*sockfd)
-		close(*sockfd);
-
-	return ret;
-}
-
-int sendtosocket(int sockfd, struct socketdata *skdata)
-{
-	int ret, buffd;
-	unsigned int len;
-	char cmsg_b[CMSG_SPACE(sizeof(int))];
-	struct cmsghdr *cmsg;
-	struct msghdr msgh;
-	struct iovec iov;
-	struct timeval timeout;
-	fd_set selFDs;
-
-	if (!skdata) {
-		fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
-		return -1;
-	}
-
-	FD_ZERO(&selFDs);
-	FD_SET(0, &selFDs);
-	FD_SET(sockfd, &selFDs);
-	timeout.tv_sec = 20;
-	timeout.tv_usec = 0;
-
-	ret = select(sockfd+1, NULL, &selFDs, NULL, &timeout);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed select: <%s>\n",
-		__func__, strerror(errno));
-		return -1;
-	}
-
-	if (FD_ISSET(sockfd, &selFDs)) {
-		buffd = skdata->data;
-		len = skdata->len;
-		memset(&msgh, 0, sizeof(msgh));
-		msgh.msg_control = &cmsg_b;
-		msgh.msg_controllen = CMSG_LEN(len);
-		iov.iov_base = "OK";
-		iov.iov_len = 2;
-		msgh.msg_iov = &iov;
-		msgh.msg_iovlen = 1;
-		cmsg = CMSG_FIRSTHDR(&msgh);
-		cmsg->cmsg_level = SOL_SOCKET;
-		cmsg->cmsg_type = SCM_RIGHTS;
-		cmsg->cmsg_len = CMSG_LEN(len);
-		memcpy(CMSG_DATA(cmsg), &buffd, len);
-
-		ret = sendmsg(sockfd, &msgh, MSG_DONTWAIT);
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed sendmsg: <%s>\n",
-			__func__, strerror(errno));
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-int receivefromsocket(int sockfd, struct socketdata *skdata)
-{
-	int ret, buffd;
-	unsigned int len = 0;
-	char cmsg_b[CMSG_SPACE(sizeof(int))];
-	struct cmsghdr *cmsg;
-	struct msghdr msgh;
-	struct iovec iov;
-	fd_set recvFDs;
-	char data[32];
-
-	if (!skdata) {
-		fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
-		return -1;
-	}
-
-	FD_ZERO(&recvFDs);
-	FD_SET(0, &recvFDs);
-	FD_SET(sockfd, &recvFDs);
-
-	ret = select(sockfd+1, &recvFDs, NULL, NULL, NULL);
-	if (ret < 0) {
-		fprintf(stderr, "<%s>: Failed select: <%s>\n",
-		__func__, strerror(errno));
-		return -1;
-	}
-
-	if (FD_ISSET(sockfd, &recvFDs)) {
-		len = sizeof(buffd);
-		memset(&msgh, 0, sizeof(msgh));
-		msgh.msg_control = &cmsg_b;
-		msgh.msg_controllen = CMSG_LEN(len);
-		iov.iov_base = data;
-		iov.iov_len = sizeof(data)-1;
-		msgh.msg_iov = &iov;
-		msgh.msg_iovlen = 1;
-		cmsg = CMSG_FIRSTHDR(&msgh);
-		cmsg->cmsg_level = SOL_SOCKET;
-		cmsg->cmsg_type = SCM_RIGHTS;
-		cmsg->cmsg_len = CMSG_LEN(len);
-
-		ret = recvmsg(sockfd, &msgh, MSG_DONTWAIT);
-		if (ret < 0) {
-			fprintf(stderr, "<%s>: Failed recvmsg: <%s>\n",
-			__func__, strerror(errno));
-			return -1;
-		}
-
-		memcpy(&buffd, CMSG_DATA(cmsg), len);
-		skdata->data = buffd;
-		skdata->len = len;
-	}
-	return 0;
-}
-
-int closesocket(int sockfd, char *name)
-{
-	char sockname[MAX_SOCK_NAME_LEN];
-
-	if (sockfd)
-		close(sockfd);
-	sprintf(sockname, "/tmp/%s", name);
-	unlink(sockname);
-	shutdown(sockfd, 2);
-
-	return 0;
-}
diff --git a/tools/testing/selftests/android/ion/ipcsocket.h b/tools/testing/selftests/android/ion/ipcsocket.h
deleted file mode 100644
index b3e84498a8a1..000000000000
--- a/tools/testing/selftests/android/ion/ipcsocket.h
+++ /dev/null
@@ -1,35 +0,0 @@
-
-#ifndef _IPCSOCKET_H
-#define _IPCSOCKET_H
-
-
-#define MAX_SOCK_NAME_LEN	64
-
-char sock_name[MAX_SOCK_NAME_LEN];
-
-/* This structure is responsible for holding the IPC data
- * data: hold the buffer fd
- * len: just the length of 32-bit integer fd
- */
-struct socketdata {
-	int data;
-	unsigned int len;
-};
-
-/* This API is used to open the IPC socket connection
- * name: implies a unique socket name in the system
- * connecttype: implies server(0) or client(1)
- */
-int opensocket(int *sockfd, const char *name, int connecttype);
-
-/* This is the API to send socket data over IPC socket */
-int sendtosocket(int sockfd, struct socketdata *data);
-
-/* This is the API to receive socket data over IPC socket */
-int receivefromsocket(int sockfd, struct socketdata *data);
-
-/* This is the API to close the socket connection */
-int closesocket(int sockfd, char *name);
-
-
-#endif
diff --git a/tools/testing/selftests/android/run.sh b/tools/testing/selftests/android/run.sh
deleted file mode 100755
index dd8edf291454..000000000000
--- a/tools/testing/selftests/android/run.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-(cd ion; ./ion_test.sh)
-- 
cgit v1.2.3-70-g09d2


From 472547778de24e2764ab325268dd5b77e6923939 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 22 Oct 2020 13:27:38 -0700
Subject: selftest/bpf: Fix profiler test using CO-RE relocation for enums

Instead of hard-coding invalid pids_cgrp_id, use Kconfig to detect the
presence of that enum value and CO-RE to capture its actual value in the
hosts's kernel.

Fixes: 03d4d13fab3f ("selftests/bpf: Add profiler test")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201022202739.3667367-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/progs/profiler.inc.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 00578311a423..30982a7e4d0f 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -243,7 +243,10 @@ static ino_t get_inode_from_kernfs(struct kernfs_node* node)
 	}
 }
 
-int pids_cgrp_id = 1;
+extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
+enum cgroup_subsys_id___local {
+	pids_cgrp_id___local = 123, /* value doesn't matter */
+};
 
 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 					 struct task_struct* task,
@@ -253,7 +256,9 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 
-	if (ENABLE_CGROUP_V1_RESOLVER) {
+	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
+		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
+						  pids_cgrp_id___local);
 #ifdef UNROLL
 #pragma unroll
 #endif
@@ -262,7 +267,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 				BPF_CORE_READ(task, cgroups, subsys[i]);
 			if (subsys != NULL) {
 				int subsys_id = BPF_CORE_READ(subsys, ss, id);
-				if (subsys_id == pids_cgrp_id) {
+				if (subsys_id == cgrp_id) {
 					proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
 					root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
 					break;
-- 
cgit v1.2.3-70-g09d2


From 3adb776384f2042ef6bda876e91a7a7ac2872c5e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 19 Oct 2020 21:08:03 -0700
Subject: x86, libnvdimm/test: Remove COPY_MC_TEST

The COPY_MC_TEST facility has served its purpose for validating the
early termination conditions of the copy_mc_fragile() implementation.
Remove it and the EXPORT_SYMBOL_GPL of copy_mc_fragile().

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/160316688322.3374697.8648308115165836243.stgit@dwillia2-desk3.amr.corp.intel.com
---
 arch/x86/Kconfig.debug              |   3 --
 arch/x86/include/asm/copy_mc_test.h |  75 --------------------------
 arch/x86/lib/copy_mc.c              |   4 --
 arch/x86/lib/copy_mc_64.S           |  10 ----
 tools/testing/nvdimm/test/nfit.c    | 103 ------------------------------------
 5 files changed, 195 deletions(-)
 delete mode 100644 arch/x86/include/asm/copy_mc_test.h

(limited to 'tools')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 27b5e2bc6a01..80b57e7f4947 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,9 +62,6 @@ config EARLY_PRINTK_USB_XDBC
 	  You should normally say N here, unless you want to debug early
 	  crashes or need a very simple printk logging facility.
 
-config COPY_MC_TEST
-	def_bool n
-
 config EFI_PGT_DUMP
 	bool "Dump the EFI pagetable"
 	depends on EFI
diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h
deleted file mode 100644
index e4991ba96726..000000000000
--- a/arch/x86/include/asm/copy_mc_test.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _COPY_MC_TEST_H_
-#define _COPY_MC_TEST_H_
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_COPY_MC_TEST
-extern unsigned long copy_mc_test_src;
-extern unsigned long copy_mc_test_dst;
-
-static inline void copy_mc_inject_src(void *addr)
-{
-	if (addr)
-		copy_mc_test_src = (unsigned long) addr;
-	else
-		copy_mc_test_src = ~0UL;
-}
-
-static inline void copy_mc_inject_dst(void *addr)
-{
-	if (addr)
-		copy_mc_test_dst = (unsigned long) addr;
-	else
-		copy_mc_test_dst = ~0UL;
-}
-#else /* CONFIG_COPY_MC_TEST */
-static inline void copy_mc_inject_src(void *addr)
-{
-}
-
-static inline void copy_mc_inject_dst(void *addr)
-{
-}
-#endif /* CONFIG_COPY_MC_TEST */
-
-#else /* __ASSEMBLY__ */
-#include <asm/export.h>
-
-#ifdef CONFIG_COPY_MC_TEST
-.macro COPY_MC_TEST_CTL
-	.pushsection .data
-	.align 8
-	.globl copy_mc_test_src
-	copy_mc_test_src:
-		.quad 0
-	EXPORT_SYMBOL_GPL(copy_mc_test_src)
-	.globl copy_mc_test_dst
-	copy_mc_test_dst:
-		.quad 0
-	EXPORT_SYMBOL_GPL(copy_mc_test_dst)
-	.popsection
-.endm
-
-.macro COPY_MC_TEST_SRC reg count target
-	leaq \count(\reg), %r9
-	cmp copy_mc_test_src, %r9
-	ja \target
-.endm
-
-.macro COPY_MC_TEST_DST reg count target
-	leaq \count(\reg), %r9
-	cmp copy_mc_test_dst, %r9
-	ja \target
-.endm
-#else
-.macro COPY_MC_TEST_CTL
-.endm
-
-.macro COPY_MC_TEST_SRC reg count target
-.endm
-
-.macro COPY_MC_TEST_DST reg count target
-.endm
-#endif /* CONFIG_COPY_MC_TEST */
-#endif /* __ASSEMBLY__ */
-#endif /* _COPY_MC_TEST_H_ */
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
index c13e8c9ee926..80efd45a7761 100644
--- a/arch/x86/lib/copy_mc.c
+++ b/arch/x86/lib/copy_mc.c
@@ -10,10 +10,6 @@
 #include <asm/mce.h>
 
 #ifdef CONFIG_X86_MCE
-/*
- * See COPY_MC_TEST for self-test of the copy_mc_fragile()
- * implementation.
- */
 static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
 
 void enable_copy_mc_fragile(void)
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
index 892d8915f609..e5f77e293034 100644
--- a/arch/x86/lib/copy_mc_64.S
+++ b/arch/x86/lib/copy_mc_64.S
@@ -2,14 +2,11 @@
 /* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
 
 #include <linux/linkage.h>
-#include <asm/copy_mc_test.h>
-#include <asm/export.h>
 #include <asm/asm.h>
 
 #ifndef CONFIG_UML
 
 #ifdef CONFIG_X86_MCE
-COPY_MC_TEST_CTL
 
 /*
  * copy_mc_fragile - copy memory with indication if an exception / fault happened
@@ -38,8 +35,6 @@ SYM_FUNC_START(copy_mc_fragile)
 	subl %ecx, %edx
 .L_read_leading_bytes:
 	movb (%rsi), %al
-	COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
-	COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
 .L_write_leading_bytes:
 	movb %al, (%rdi)
 	incq %rsi
@@ -55,8 +50,6 @@ SYM_FUNC_START(copy_mc_fragile)
 
 .L_read_words:
 	movq (%rsi), %r8
-	COPY_MC_TEST_SRC %rsi 8 .E_read_words
-	COPY_MC_TEST_DST %rdi 8 .E_write_words
 .L_write_words:
 	movq %r8, (%rdi)
 	addq $8, %rsi
@@ -73,8 +66,6 @@ SYM_FUNC_START(copy_mc_fragile)
 	movl %edx, %ecx
 .L_read_trailing_bytes:
 	movb (%rsi), %al
-	COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
-	COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
 .L_write_trailing_bytes:
 	movb %al, (%rdi)
 	incq %rsi
@@ -88,7 +79,6 @@ SYM_FUNC_START(copy_mc_fragile)
 .L_done:
 	ret
 SYM_FUNC_END(copy_mc_fragile)
-EXPORT_SYMBOL_GPL(copy_mc_fragile)
 
 	.section .fixup, "ax"
 	/*
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 2ac0fff6dad8..9b185bf82da8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,6 @@
 #include "nfit_test.h"
 #include "../watermark.h"
 
-#include <asm/copy_mc_test.h>
 #include <asm/mce.h>
 
 /*
@@ -3284,107 +3283,6 @@ static struct platform_driver nfit_test_driver = {
 	.id_table = nfit_test_id,
 };
 
-static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
-
-enum INJECT {
-	INJECT_NONE,
-	INJECT_SRC,
-	INJECT_DST,
-};
-
-static void copy_mc_test_init(char *dst, char *src, size_t size)
-{
-	size_t i;
-
-	memset(dst, 0xff, size);
-	for (i = 0; i < size; i++)
-		src[i] = (char) i;
-}
-
-static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
-		size_t size, unsigned long rem)
-{
-	size_t i;
-
-	for (i = 0; i < size - rem; i++)
-		if (dst[i] != (unsigned char) i) {
-			pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n",
-					__func__, __LINE__, i, dst[i],
-					(unsigned char) i);
-			return false;
-		}
-	for (i = size - rem; i < size; i++)
-		if (dst[i] != 0xffU) {
-			pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n",
-					__func__, __LINE__, i, dst[i]);
-			return false;
-		}
-	return true;
-}
-
-void copy_mc_test(void)
-{
-	char *inject_desc[] = { "none", "source", "destination" };
-	enum INJECT inj;
-
-	if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
-		pr_info("%s: run...\n", __func__);
-	} else {
-		pr_info("%s: disabled, skip.\n", __func__);
-		return;
-	}
-
-	for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) {
-		int i;
-
-		pr_info("%s: inject: %s\n", __func__, inject_desc[inj]);
-		for (i = 0; i < 512; i++) {
-			unsigned long expect, rem;
-			void *src, *dst;
-			bool valid;
-
-			switch (inj) {
-			case INJECT_NONE:
-				copy_mc_inject_src(NULL);
-				copy_mc_inject_dst(NULL);
-				dst = &copy_mc_buf[2048];
-				src = &copy_mc_buf[1024 - i];
-				expect = 0;
-				break;
-			case INJECT_SRC:
-				copy_mc_inject_src(&copy_mc_buf[1024]);
-				copy_mc_inject_dst(NULL);
-				dst = &copy_mc_buf[2048];
-				src = &copy_mc_buf[1024 - i];
-				expect = 512 - i;
-				break;
-			case INJECT_DST:
-				copy_mc_inject_src(NULL);
-				copy_mc_inject_dst(&copy_mc_buf[2048]);
-				dst = &copy_mc_buf[2048 - i];
-				src = &copy_mc_buf[1024];
-				expect = 512 - i;
-				break;
-			}
-
-			copy_mc_test_init(dst, src, 512);
-			rem = copy_mc_fragile(dst, src, 512);
-			valid = copy_mc_test_validate(dst, src, 512, expect);
-			if (rem == expect && valid)
-				continue;
-			pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
-					__func__,
-					((unsigned long) dst) & ~PAGE_MASK,
-					((unsigned long ) src) & ~PAGE_MASK,
-					512, i, rem, valid ? "valid" : "bad",
-					expect);
-		}
-	}
-
-	copy_mc_inject_src(NULL);
-	copy_mc_inject_dst(NULL);
-}
-
 static __init int nfit_test_init(void)
 {
 	int rc, i;
@@ -3393,7 +3291,6 @@ static __init int nfit_test_init(void)
 	libnvdimm_test();
 	acpi_nfit_test();
 	device_dax_test();
-	copy_mc_test();
 	dax_pmem_test();
 	dax_pmem_core_test();
 #ifdef CONFIG_DEV_DAX_PMEM_COMPAT
-- 
cgit v1.2.3-70-g09d2


From 3023d8ff3fc60e5d32dc1d05f99ad6ffa12b0033 Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Wed, 21 Oct 2020 20:04:55 -0700
Subject: kunit: Fix kunit.py --raw_output option

Due to the raw_output() function on kunit_parser.py actually being a
generator, it only runs if something reads the lines it returns. Since
we no-longer do that (parsing doesn't actually happen if raw_output is
enabled), it was not printing anything.

Fixes: 45ba7a893ad8 ("kunit: kunit_tool: Separate out config/build/exec/parse")
Signed-off-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 8019e3dd4c32..744ee9cb0073 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -66,7 +66,6 @@ def isolate_kunit_output(kernel_output):
 def raw_output(kernel_output):
 	for line in kernel_output:
 		print(line)
-		yield line
 
 DIVIDER = '=' * 60
 
-- 
cgit v1.2.3-70-g09d2


From 3fc48259d5250f7a3ee021ad0492b604c428c564 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 15 Oct 2020 19:28:17 +0300
Subject: kunit: Don't fail test suites if one of them is empty

Empty test suite is okay test suite.

Don't fail the rest of the test suites if one of them is empty.

Fixes: 6ebf5866f2e8 ("kunit: tool: add Python wrappers for running KUnit tests")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 744ee9cb0073..84a1af2581f5 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -241,7 +241,7 @@ def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite:
 		return None
 	test_suite.name = name
 	expected_test_case_num = parse_subtest_plan(lines)
-	if not expected_test_case_num:
+	if expected_test_case_num is None:
 		return None
 	while expected_test_case_num > 0:
 		test_case = parse_test_case(lines)
-- 
cgit v1.2.3-70-g09d2


From 0d0d245104a42e593adcf11396017a6420c08ba8 Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Wed, 21 Oct 2020 13:39:14 -0700
Subject: kunit: tools: fix kunit_tool tests for parsing test plans

Some tests logs for kunit_tool tests are missing their test plans
causing their tests to fail; fix this by adding the test plans.

Fixes: 45dcbb6f5ef7 ("kunit: test: add test plan to KUnit TAP format")
Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_tool_test.py             | 32 +++++++++++++++++-----
 .../kunit/test_data/test_config_printk_time.log    |  3 +-
 .../test_data/test_interrupted_tap_output.log      |  3 +-
 .../test_data/test_kernel_panic_interrupt.log      |  3 +-
 .../kunit/test_data/test_multiple_prefixes.log     |  3 +-
 .../kunit/test_data/test_pound_no_prefix.log       |  3 +-
 tools/testing/kunit/test_data/test_pound_sign.log  |  1 +
 7 files changed, 36 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 99c3c5671ea4..0b60855fb819 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -179,7 +179,7 @@ class KUnitParserTest(unittest.TestCase):
 		print_mock = mock.patch('builtins.print').start()
 		result = kunit_parser.parse_run_tests(
 			kunit_parser.isolate_kunit_output(file.readlines()))
-		print_mock.assert_any_call(StrContains("no kunit output detected"))
+		print_mock.assert_any_call(StrContains('no tests run!'))
 		print_mock.stop()
 		file.close()
 
@@ -198,39 +198,57 @@ class KUnitParserTest(unittest.TestCase):
 			'test_data/test_config_printk_time.log')
 		with open(prefix_log) as file:
 			result = kunit_parser.parse_run_tests(file.readlines())
-		self.assertEqual('kunit-resource-test', result.suites[0].name)
+			self.assertEqual(
+				kunit_parser.TestStatus.SUCCESS,
+				result.status)
+			self.assertEqual('kunit-resource-test', result.suites[0].name)
 
 	def test_ignores_multiple_prefixes(self):
 		prefix_log = get_absolute_path(
 			'test_data/test_multiple_prefixes.log')
 		with open(prefix_log) as file:
 			result = kunit_parser.parse_run_tests(file.readlines())
-		self.assertEqual('kunit-resource-test', result.suites[0].name)
+			self.assertEqual(
+				kunit_parser.TestStatus.SUCCESS,
+				result.status)
+			self.assertEqual('kunit-resource-test', result.suites[0].name)
 
 	def test_prefix_mixed_kernel_output(self):
 		mixed_prefix_log = get_absolute_path(
 			'test_data/test_interrupted_tap_output.log')
 		with open(mixed_prefix_log) as file:
 			result = kunit_parser.parse_run_tests(file.readlines())
-		self.assertEqual('kunit-resource-test', result.suites[0].name)
+			self.assertEqual(
+				kunit_parser.TestStatus.SUCCESS,
+				result.status)
+			self.assertEqual('kunit-resource-test', result.suites[0].name)
 
 	def test_prefix_poundsign(self):
 		pound_log = get_absolute_path('test_data/test_pound_sign.log')
 		with open(pound_log) as file:
 			result = kunit_parser.parse_run_tests(file.readlines())
-		self.assertEqual('kunit-resource-test', result.suites[0].name)
+			self.assertEqual(
+				kunit_parser.TestStatus.SUCCESS,
+				result.status)
+			self.assertEqual('kunit-resource-test', result.suites[0].name)
 
 	def test_kernel_panic_end(self):
 		panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log')
 		with open(panic_log) as file:
 			result = kunit_parser.parse_run_tests(file.readlines())
-		self.assertEqual('kunit-resource-test', result.suites[0].name)
+			self.assertEqual(
+				kunit_parser.TestStatus.TEST_CRASHED,
+				result.status)
+			self.assertEqual('kunit-resource-test', result.suites[0].name)
 
 	def test_pound_no_prefix(self):
 		pound_log = get_absolute_path('test_data/test_pound_no_prefix.log')
 		with open(pound_log) as file:
 			result = kunit_parser.parse_run_tests(file.readlines())
-		self.assertEqual('kunit-resource-test', result.suites[0].name)
+			self.assertEqual(
+				kunit_parser.TestStatus.SUCCESS,
+				result.status)
+			self.assertEqual('kunit-resource-test', result.suites[0].name)
 
 class KUnitJsonTest(unittest.TestCase):
 
diff --git a/tools/testing/kunit/test_data/test_config_printk_time.log b/tools/testing/kunit/test_data/test_config_printk_time.log
index c02ca773946d..6bdb57f76eac 100644
--- a/tools/testing/kunit/test_data/test_config_printk_time.log
+++ b/tools/testing/kunit/test_data/test_config_printk_time.log
@@ -1,6 +1,7 @@
 [    0.060000] printk: console [mc-1] enabled
 [    0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
 [    0.060000] TAP version 14
+[    0.060000] 1..3
 [    0.060000] 	# Subtest: kunit-resource-test
 [    0.060000] 	1..5
 [    0.060000] 	ok 1 - kunit_resource_test_init_resources
@@ -28,4 +29,4 @@
 [    0.060000] Stack:
 [    0.060000]  602086f8 601bc260 705c0000 705c0000
 [    0.060000]  602086f8 6005fcec 705c0000 6002c6ab
-[    0.060000]  6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file
+[    0.060000]  6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_interrupted_tap_output.log b/tools/testing/kunit/test_data/test_interrupted_tap_output.log
index 5c73fb3a1c6f..1fb677728abe 100644
--- a/tools/testing/kunit/test_data/test_interrupted_tap_output.log
+++ b/tools/testing/kunit/test_data/test_interrupted_tap_output.log
@@ -1,6 +1,7 @@
 [    0.060000] printk: console [mc-1] enabled
 [    0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
 [    0.060000] TAP version 14
+[    0.060000] 1..3
 [    0.060000] 	# Subtest: kunit-resource-test
 [    0.060000] 	1..5
 [    0.060000] 	ok 1 - kunit_resource_test_init_resources
@@ -34,4 +35,4 @@
 [    0.060000] Stack:
 [    0.060000]  602086f8 601bc260 705c0000 705c0000
 [    0.060000]  602086f8 6005fcec 705c0000 6002c6ab
-[    0.060000]  6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file
+[    0.060000]  6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
index c045eee75f27..a014ffe9725e 100644
--- a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
+++ b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
@@ -1,6 +1,7 @@
 [    0.060000] printk: console [mc-1] enabled
 [    0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
 [    0.060000] TAP version 14
+[    0.060000] 1..3
 [    0.060000] 	# Subtest: kunit-resource-test
 [    0.060000] 	1..5
 [    0.060000] 	ok 1 - kunit_resource_test_init_resources
@@ -22,4 +23,4 @@
 [    0.060000] Stack:
 [    0.060000]  602086f8 601bc260 705c0000 705c0000
 [    0.060000]  602086f8 6005fcec 705c0000 6002c6ab
-[    0.060000]  6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file
+[    0.060000]  6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_multiple_prefixes.log b/tools/testing/kunit/test_data/test_multiple_prefixes.log
index bc48407dcc36..0ad78481a0b4 100644
--- a/tools/testing/kunit/test_data/test_multiple_prefixes.log
+++ b/tools/testing/kunit/test_data/test_multiple_prefixes.log
@@ -1,6 +1,7 @@
 [    0.060000][    T1] printk: console [mc-1] enabled
 [    0.060000][    T1] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
 [    0.060000][    T1] TAP version 14
+[    0.060000][    T1] 1..3
 [    0.060000][    T1] 	# Subtest: kunit-resource-test
 [    0.060000][    T1] 	1..5
 [    0.060000][    T1] 	ok 1 - kunit_resource_test_init_resources
@@ -28,4 +29,4 @@
 [    0.060000][    T1] Stack:
 [    0.060000][    T1]  602086f8 601bc260 705c0000 705c0000
 [    0.060000][    T1]  602086f8 6005fcec 705c0000 6002c6ab
-[    0.060000][    T1]  6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file
+[    0.060000][    T1]  6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_pound_no_prefix.log b/tools/testing/kunit/test_data/test_pound_no_prefix.log
index 2ceb360be7d5..dc4cf09a96d0 100644
--- a/tools/testing/kunit/test_data/test_pound_no_prefix.log
+++ b/tools/testing/kunit/test_data/test_pound_no_prefix.log
@@ -1,6 +1,7 @@
  printk: console [mc-1] enabled
  random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
  TAP version 14
+ 1..3
  	# Subtest: kunit-resource-test
  	1..5
  	ok 1 - kunit_resource_test_init_resources
@@ -30,4 +31,4 @@
  Stack:
   602086f8 601bc260 705c0000 705c0000
   602086f8 6005fcec 705c0000 6002c6ab
-  6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file
+  6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_pound_sign.log b/tools/testing/kunit/test_data/test_pound_sign.log
index 28ffa5ba03bf..3f358e3a7ba0 100644
--- a/tools/testing/kunit/test_data/test_pound_sign.log
+++ b/tools/testing/kunit/test_data/test_pound_sign.log
@@ -1,6 +1,7 @@
 [    0.060000] printk: console [mc-1] enabled
 [    0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
 [    0.060000] TAP version 14
+[    0.060000] 1..3
 [    0.060000] 	# Subtest: kunit-resource-test
 [    0.060000] 	1..5
 [    0.060000] 	ok 1 - kunit_resource_test_init_resources
-- 
cgit v1.2.3-70-g09d2


From 748f0d70087c56226bf1df1f91a00b7ab4c8f883 Mon Sep 17 00:00:00 2001
From: Brahadambal Srinivasan <latha@linux.vnet.ibm.com>
Date: Fri, 23 Oct 2020 20:55:27 +0530
Subject: cpupower: Provide online and offline CPU information

When a user tries to modify cpuidle or cpufreq properties on offline
CPUs, the tool returns success (exit status 0) but also does not provide
any warning message regarding offline cpus that may have been specified
but left unchanged. In case of all or a few CPUs being offline, it can be
difficult to keep track of which CPUs didn't get the new frequency or idle
state set. Silent failures are difficult to keep track of when there are a
huge number of CPUs on which the action is performed.

This patch adds helper functions to find both online and offline CPUs and
print them out accordingly.

We use these helper functions in cpuidle-set and cpufreq-set to print an
additional message if the user attempts to modify offline cpus.

Reported-by: Pavithra R. Prakash <pavrampu@in.ibm.com>
Signed-off-by: Brahadambal Srinivasan <latha@linux.vnet.ibm.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/cpufreq-set.c     |  3 ++
 tools/power/cpupower/utils/cpuidle-set.c     |  4 ++
 tools/power/cpupower/utils/cpupower.c        |  8 ++++
 tools/power/cpupower/utils/helpers/helpers.h | 12 +++++
 tools/power/cpupower/utils/helpers/misc.c    | 66 +++++++++++++++++++++++++++-
 5 files changed, 92 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 7b2164e07057..c5e60a39cfa6 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -315,6 +315,7 @@ int cmd_freq_set(int argc, char **argv)
 		}
 	}
 
+	get_cpustate();
 
 	/* loop over CPUs */
 	for (cpu = bitmask_first(cpus_chosen);
@@ -332,5 +333,7 @@ int cmd_freq_set(int argc, char **argv)
 		}
 	}
 
+	print_offline_cpus();
+
 	return 0;
 }
diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c
index 569f268f4c7f..46158928f9ad 100644
--- a/tools/power/cpupower/utils/cpuidle-set.c
+++ b/tools/power/cpupower/utils/cpuidle-set.c
@@ -95,6 +95,8 @@ int cmd_idle_set(int argc, char **argv)
 		exit(EXIT_FAILURE);
 	}
 
+	get_cpustate();
+
 	/* Default is: set all CPUs */
 	if (bitmask_isallclear(cpus_chosen))
 		bitmask_setall(cpus_chosen);
@@ -181,5 +183,7 @@ int cmd_idle_set(int argc, char **argv)
 			break;
 		}
 	}
+
+	print_offline_cpus();
 	return EXIT_SUCCESS;
 }
diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c
index 8e3d08042825..8ac3304a9957 100644
--- a/tools/power/cpupower/utils/cpupower.c
+++ b/tools/power/cpupower/utils/cpupower.c
@@ -34,6 +34,8 @@ int run_as_root;
 int base_cpu;
 /* Affected cpus chosen by -c/--cpu param */
 struct bitmask *cpus_chosen;
+struct bitmask *online_cpus;
+struct bitmask *offline_cpus;
 
 #ifdef DEBUG
 int be_verbose;
@@ -178,6 +180,8 @@ int main(int argc, const char *argv[])
 	char pathname[32];
 
 	cpus_chosen = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF));
+	online_cpus = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF));
+	offline_cpus = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF));
 
 	argc--;
 	argv += 1;
@@ -230,6 +234,10 @@ int main(int argc, const char *argv[])
 		ret = p->main(argc, argv);
 		if (cpus_chosen)
 			bitmask_free(cpus_chosen);
+		if (online_cpus)
+			bitmask_free(online_cpus);
+		if (offline_cpus)
+			bitmask_free(offline_cpus);
 		return ret;
 	}
 	print_help();
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index c258eeccd05f..d5799aa71e1f 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -94,6 +94,8 @@ struct cpupower_cpu_info {
  */
 extern int get_cpu_info(struct cpupower_cpu_info *cpu_info);
 extern struct cpupower_cpu_info cpupower_cpu_info;
+
+
 /* cpuid and cpuinfo helpers  **************************/
 
 /* X86 ONLY ****************************************/
@@ -171,4 +173,14 @@ static inline unsigned int cpuid_ecx(unsigned int op) { return 0; };
 static inline unsigned int cpuid_edx(unsigned int op) { return 0; };
 #endif /* defined(__i386__) || defined(__x86_64__) */
 
+/*
+ * CPU State related functions
+ */
+extern struct bitmask *online_cpus;
+extern struct bitmask *offline_cpus;
+
+void get_cpustate(void);
+void print_online_cpus(void);
+void print_offline_cpus(void);
+
 #endif /* __CPUPOWERUTILS_HELPERS__ */
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index f406adc40bad..2ead98169cf5 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -1,8 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
-#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdlib.h>
 
 #include "helpers/helpers.h"
 
+#if defined(__i386__) || defined(__x86_64__)
+
 #define MSR_AMD_HWCR	0xc0010015
 
 int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
@@ -41,3 +45,63 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 	return 0;
 }
 #endif /* #if defined(__i386__) || defined(__x86_64__) */
+
+/* get_cpustate
+ *
+ * Gather the information of all online CPUs into bitmask struct
+ */
+void get_cpustate(void)
+{
+	unsigned int cpu = 0;
+
+	bitmask_clearall(online_cpus);
+	bitmask_clearall(offline_cpus);
+
+	for (cpu = bitmask_first(cpus_chosen);
+		cpu <= bitmask_last(cpus_chosen); cpu++) {
+
+		if (cpupower_is_cpu_online(cpu) == 1)
+			bitmask_setbit(online_cpus, cpu);
+		else
+			bitmask_setbit(offline_cpus, cpu);
+
+		continue;
+	}
+}
+
+/* print_online_cpus
+ *
+ * Print the CPU numbers of all CPUs that are online currently
+ */
+void print_online_cpus(void)
+{
+	int str_len = 0;
+	char *online_cpus_str = NULL;
+
+	str_len = online_cpus->size * 5;
+	online_cpus_str = (void *)malloc(sizeof(char) * str_len);
+
+	if (!bitmask_isallclear(online_cpus)) {
+		bitmask_displaylist(online_cpus_str, str_len, online_cpus);
+		printf(_("Following CPUs are online:\n%s\n"), online_cpus_str);
+	}
+}
+
+/* print_offline_cpus
+ *
+ * Print the CPU numbers of all CPUs that are offline currently
+ */
+void print_offline_cpus(void)
+{
+	int str_len = 0;
+	char *offline_cpus_str = NULL;
+
+	str_len = offline_cpus->size * 5;
+	offline_cpus_str = (void *)malloc(sizeof(char) * str_len);
+
+	if (!bitmask_isallclear(offline_cpus)) {
+		bitmask_displaylist(offline_cpus_str, str_len, offline_cpus);
+		printf(_("Following CPUs are offline:\n%s\n"), offline_cpus_str);
+		printf(_("cpupower set operation was not performed on them\n"));
+	}
+}
-- 
cgit v1.2.3-70-g09d2


From 9270e1a744f8ed953009b0e94b26ed0912d9ec1c Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Sat, 3 Oct 2020 21:40:22 -0400
Subject: tools: memory-model: Document that the LKMM can easily miss control
 dependencies

Add a small section to the litmus-tests.txt documentation file for
the Linux Kernel Memory Model explaining that the memory model often
fails to recognize certain control dependencies.

Suggested-by: Akira Yokosawa <akiyks@gmail.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/Documentation/litmus-tests.txt | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'tools')

diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt
index 2f840dcd15cf..8a9d5d2787f9 100644
--- a/tools/memory-model/Documentation/litmus-tests.txt
+++ b/tools/memory-model/Documentation/litmus-tests.txt
@@ -946,6 +946,23 @@ Limitations of the Linux-kernel memory model (LKMM) include:
 	carrying a dependency, then the compiler can break that dependency
 	by substituting a constant of that value.
 
+	Conversely, LKMM sometimes doesn't recognize that a particular
+	optimization is not allowed, and as a result, thinks that a
+	dependency is not present (because the optimization would break it).
+	The memory model misses some pretty obvious control dependencies
+	because of this limitation.  A simple example is:
+
+		r1 = READ_ONCE(x);
+		if (r1 == 0)
+			smp_mb();
+		WRITE_ONCE(y, 1);
+
+	There is a control dependency from the READ_ONCE to the WRITE_ONCE,
+	even when r1 is nonzero, but LKMM doesn't realize this and thinks
+	that the write may execute before the read if r1 != 0.  (Yes, that
+	doesn't make sense if you think about it, but the memory model's
+	intelligence is limited.)
+
 2.	Multiple access sizes for a single variable are not supported,
 	and neither are misaligned or partially overlapping accesses.
 
-- 
cgit v1.2.3-70-g09d2


From ab8bcad67bee82e4be290b32f0faaf582d7c3edc Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 4 Aug 2020 14:00:17 -0700
Subject: tools/memory-model: Move Documentation description to
 Documentation/README

This commit moves the descriptions of the files residing in
tools/memory-model/Documentation to a README file in that directory,
leaving behind the description of tools/memory-model/Documentation/README
itself.  After this change, tools/memory-model/Documentation/README
provides a guide to the files in the tools/memory-model/Documentation
directory, guiding people with different skills and needs to the most
appropriate starting point.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/Documentation/README | 59 +++++++++++++++++++++++++++++++++
 tools/memory-model/README               | 22 ++----------
 2 files changed, 61 insertions(+), 20 deletions(-)
 create mode 100644 tools/memory-model/Documentation/README

(limited to 'tools')

diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README
new file mode 100644
index 000000000000..2d9539f19912
--- /dev/null
+++ b/tools/memory-model/Documentation/README
@@ -0,0 +1,59 @@
+It has been said that successful communication requires first identifying
+what your audience knows and then building a bridge from their current
+knowledge to what they need to know.  Unfortunately, the expected
+Linux-kernel memory model (LKMM) audience might be anywhere from novice
+to expert both in kernel hacking and in understanding LKMM.
+
+This document therefore points out a number of places to start reading,
+depending on what you know and what you would like to learn.  Please note
+that the documents later in this list assume that the reader understands
+the material provided by documents earlier in this list.
+
+o	You are new to Linux-kernel concurrency: simple.txt
+
+o	You are familiar with the Linux-kernel concurrency primitives
+	that you need, and just want to get started with LKMM litmus
+	tests:  litmus-tests.txt
+
+o	You are familiar with Linux-kernel concurrency, and would
+	like a detailed intuitive understanding of LKMM, including
+	situations involving more than two threads:  recipes.txt
+
+o	You are familiar with Linux-kernel concurrency and the use of
+	LKMM, and would like a quick reference:  cheatsheet.txt
+
+o	You are familiar with Linux-kernel concurrency and the use
+	of LKMM, and would like to learn about LKMM's requirements,
+	rationale, and implementation:	explanation.txt
+
+o	You are interested in the publications related to LKMM, including
+	hardware manuals, academic literature, standards-committee
+	working papers, and LWN articles:  references.txt
+
+
+====================
+DESCRIPTION OF FILES
+====================
+
+README
+	This file.
+
+cheatsheet.txt
+	Quick-reference guide to the Linux-kernel memory model.
+
+explanation.txt
+	Detailed description of the memory model.
+
+litmus-tests.txt
+	The format, features, capabilities, and limitations of the litmus
+	tests that LKMM can evaluate.
+
+recipes.txt
+	Common memory-ordering patterns.
+
+references.txt
+	Background information.
+
+simple.txt
+	Starting point for someone new to Linux-kernel concurrency.
+	And also a reminder of the simpler approaches to concurrency!
diff --git a/tools/memory-model/README b/tools/memory-model/README
index c8144d4aafa0..39d08d1f0443 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -161,26 +161,8 @@ running LKMM litmus tests.
 DESCRIPTION OF FILES
 ====================
 
-Documentation/cheatsheet.txt
-	Quick-reference guide to the Linux-kernel memory model.
-
-Documentation/explanation.txt
-	Describes the memory model in detail.
-
-Documentation/litmus-tests.txt
-	Describes the format, features, capabilities, and limitations
-	of the litmus tests that LKMM can evaluate.
-
-Documentation/recipes.txt
-	Lists common memory-ordering patterns.
-
-Documentation/references.txt
-	Provides background reading.
-
-Documentation/simple.txt
-	Starting point for someone new to Linux-kernel concurrency.
-	And also for those needing a reminder of the simpler approaches
-	to concurrency!
+Documentation/README
+	Guide to the other documents in the Documentation/ directory.
 
 linux-kernel.bell
 	Categorizes the relevant instructions, including memory
-- 
cgit v1.2.3-70-g09d2


From dc6bf4da825aa0301a46f55fec7c0bb706af2aad Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 26 Oct 2020 16:20:32 -0400
Subject: selftests/ftrace: Use $FUNCTION_FORK to reference kernel fork
 function

Commit cad6967ac108 ("fork: introduce kernel_clone()") replaced "_do_fork()"
with "kernel_clone()". The ftrace selftests reference the fork function in
several of the tests. The rename will make the tests break, but if those
names are changed in the tests, they would then break on older kernels. The
same set of tests should pass older kernels if they have previously passed.
Obviously, a new test may not work on older kernels if the test was added
due to a bug or a new feature.

The setup of ftracetest will now create a $FUNCTION_FORK bash variable
that will contain "_do_fork" for older kernels and "kernel_clone" for newer
ones. It figures out the proper name by examining /proc/kallsyms.

Note, available_filter_functions could also be used, but because some tests
should be able to pass without function tracing enabled, it could not be
used.

Fixes: eea11285dab3 ("tracing: switch to kernel_clone()")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 .../selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc  |  2 +-
 .../ftrace/test.d/dynevent/clear_select_events.tc          |  2 +-
 .../ftrace/test.d/dynevent/generic_clear_event.tc          |  2 +-
 .../ftrace/test.d/ftrace/func-filter-notrace-pid.tc        |  2 +-
 .../selftests/ftrace/test.d/ftrace/func-filter-pid.tc      |  2 +-
 .../ftrace/test.d/ftrace/func-filter-stacktrace.tc         |  4 ++--
 tools/testing/selftests/ftrace/test.d/functions            |  7 +++++++
 .../selftests/ftrace/test.d/kprobe/add_and_remove.tc       |  2 +-
 tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc |  2 +-
 .../testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc  |  4 ++--
 .../selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc     |  2 +-
 .../selftests/ftrace/test.d/kprobe/kprobe_args_string.tc   |  4 ++--
 .../selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc   | 10 +++++-----
 .../selftests/ftrace/test.d/kprobe/kprobe_args_type.tc     |  2 +-
 .../selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc        | 14 +++++++-------
 .../selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc    |  2 +-
 .../selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 12 ++++++------
 .../selftests/ftrace/test.d/kprobe/kretprobe_args.tc       |  4 ++--
 tools/testing/selftests/ftrace/test.d/kprobe/profile.tc    |  2 +-
 19 files changed, 44 insertions(+), 37 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
index 3bcd4c3624ee..b4da41d126d5 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
@@ -6,7 +6,7 @@
 echo 0 > events/enable
 echo > dynamic_events
 
-PLACE=kernel_clone
+PLACE=$FUNCTION_FORK
 
 echo "p:myevent1 $PLACE" >> dynamic_events
 echo "r:myevent2 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
index 438961971b7e..3a0e2885fff5 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
@@ -6,7 +6,7 @@
 echo 0 > events/enable
 echo > dynamic_events
 
-PLACE=kernel_clone
+PLACE=$FUNCTION_FORK
 
 setup_events() {
 echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
index a8603bd23e0d..d3e138e8377f 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
@@ -6,7 +6,7 @@
 echo 0 > events/enable
 echo > dynamic_events
 
-PLACE=kernel_clone
+PLACE=$FUNCTION_FORK
 
 setup_events() {
 echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
index acb17ce543d2..80541964b927 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
@@ -39,7 +39,7 @@ do_test() {
     disable_tracing
 
     echo do_execve* > set_ftrace_filter
-    echo *do_fork >> set_ftrace_filter
+    echo $FUNCTION_FORK >> set_ftrace_filter
 
     echo $PID > set_ftrace_notrace_pid
     echo function > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index 9f0a9687c773..2f7211254529 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -39,7 +39,7 @@ do_test() {
     disable_tracing
 
     echo do_execve* > set_ftrace_filter
-    echo *do_fork >> set_ftrace_filter
+    echo $FUNCTION_FORK >> set_ftrace_filter
 
     echo $PID > set_ftrace_pid
     echo function > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 98305d76bd04..191d116b7883 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -4,9 +4,9 @@
 # requires: set_ftrace_filter
 # flags: instance
 
-echo kernel_clone:stacktrace >> set_ftrace_filter
+echo $FUNCTION_FORK:stacktrace >> set_ftrace_filter
 
-grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter
+grep -q "$FUNCTION_FORK:stacktrace:unlimited" set_ftrace_filter
 
 (echo "forked"; sleep 1)
 
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index c5dec55b7d95..a6fac927ee82 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -133,6 +133,13 @@ yield() {
     ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
+# The fork function in the kernel was renamed from "_do_fork" to
+# "kernel_fork". As older tests should still work with older kernels
+# as well as newer kernels, check which version of fork is used on this
+# kernel so that the tests can use the fork function for the running kernel.
+FUNCTION_FORK=`(if grep '\bkernel_clone\b' /proc/kallsyms > /dev/null; then
+                echo kernel_clone; else echo '_do_fork'; fi)`
+
 # Since probe event command may include backslash, explicitly use printf "%s"
 # to NOT interpret it.
 ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index 9737cd0578a7..2428a3ed78c9 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -3,7 +3,7 @@
 # description: Kprobe dynamic event - adding and removing
 # requires: kprobe_events
 
-echo p:myevent kernel_clone > kprobe_events
+echo p:myevent $FUNCTION_FORK > kprobe_events
 grep myevent kprobe_events
 test -d events/kprobes/myevent
 echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index f9a40af76888..010a8b1d6c1d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -3,7 +3,7 @@
 # description: Kprobe dynamic event - busy event check
 # requires: kprobe_events
 
-echo p:myevent kernel_clone > kprobe_events
+echo p:myevent $FUNCTION_FORK > kprobe_events
 test -d events/kprobes/myevent
 echo 1 > events/kprobes/myevent/enable
 echo > kprobe_events && exit_fail # this must fail
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index eb543d3cfe5f..a96a1dc7014f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -3,13 +3,13 @@
 # description: Kprobe dynamic event with arguments
 # requires: kprobe_events
 
-echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events
+echo "p:testprobe $FUNCTION_FORK \$stack \$stack0 +0(\$stack)" > kprobe_events
 grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
 test -d events/kprobes/testprobe
 
 echo 1 > events/kprobes/testprobe/enable
 ( echo "forked")
-grep testprobe trace | grep 'kernel_clone' | \
+grep testprobe trace | grep "$FUNCTION_FORK" | \
   grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
 
 echo 0 > events/kprobes/testprobe/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
index 4e5b63be51c9..a053ee2e7d77 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -5,7 +5,7 @@
 
 grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
 
-echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events
+echo "p:testprobe $FUNCTION_FORK comm=\$comm " > kprobe_events
 grep testprobe kprobe_events | grep -q 'comm=$comm'
 test -d events/kprobes/testprobe
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index a1d70588ab21..84285a6f60b0 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -30,13 +30,13 @@ esac
 : "Test get argument (1)"
 echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
 echo 1 > events/kprobes/testprobe/enable
-echo "p:test kernel_clone" >> kprobe_events
+echo "p:test $FUNCTION_FORK" >> kprobe_events
 grep -qe "testprobe.* arg1=\"test\"" trace
 
 echo 0 > events/kprobes/testprobe/enable
 : "Test get argument (2)"
 echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
 echo 1 > events/kprobes/testprobe/enable
-echo "p:test kernel_clone" >> kprobe_events
+echo "p:test $FUNCTION_FORK" >> kprobe_events
 grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
index bd25dd0ba0d0..717130ed4feb 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then
 fi
 
 : "Test get basic types symbol argument"
-echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
-echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+echo "p:testprobe_u $FUNCTION_FORK arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s $FUNCTION_FORK arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
 if grep -q "x8/16/32/64" README; then
-  echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+  echo "p:testprobe_x $FUNCTION_FORK arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
 fi
-echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo "p:testprobe_bf $FUNCTION_FORK arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
 echo 1 > events/kprobes/enable
 (echo "forked")
 echo 0 > events/kprobes/enable
@@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
 grep "testprobe_bf:.* arg1=.*" trace
 
 : "Test get string symbol argument"
-echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events
+echo "p:testprobe_str $FUNCTION_FORK arg1=@linux_proc_banner:string" > kprobe_events
 echo 1 > events/kprobes/enable
 (echo "forked")
 echo 0 > events/kprobes/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 91fcce1c241c..25b7708eb559 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -4,7 +4,7 @@
 # requires: kprobe_events "x8/16/32/64":README
 
 gen_event() { # Bitsize
-  echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+  echo "p:testprobe $FUNCTION_FORK \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
 }
 
 check_types() { # s-type u-type x-type bf-type width
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index 0d179094191f..5556292601a4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -5,29 +5,29 @@
 
 # prepare
 echo nop > current_tracer
-echo kernel_clone > set_ftrace_filter
-echo 'p:testprobe kernel_clone' > kprobe_events
+echo $FUNCTION_FORK > set_ftrace_filter
+echo "p:testprobe $FUNCTION_FORK" > kprobe_events
 
 # kprobe on / ftrace off
 echo 1 > events/kprobes/testprobe/enable
 echo > trace
 ( echo "forked")
 grep testprobe trace
-! grep 'kernel_clone <-' trace
+! grep "$FUNCTION_FORK <-" trace
 
 # kprobe on / ftrace on
 echo function > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-grep 'kernel_clone <-' trace
+grep "$FUNCTION_FORK <-" trace
 
 # kprobe off / ftrace on
 echo 0 > events/kprobes/testprobe/enable
 echo > trace
 ( echo "forked")
 ! grep testprobe trace
-grep 'kernel_clone <-' trace
+grep "$FUNCTION_FORK <-" trace
 
 # kprobe on / ftrace on
 echo 1 > events/kprobes/testprobe/enable
@@ -35,11 +35,11 @@ echo function > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-grep 'kernel_clone <-' trace
+grep "$FUNCTION_FORK <-" trace
 
 # kprobe on / ftrace off
 echo nop > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-! grep 'kernel_clone <-' trace
+! grep "$FUNCTION_FORK <-" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
index 45d90b6c763d..f0d5b7777ed7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
@@ -4,7 +4,7 @@
 # requires: kprobe_events "Create/append/":README
 
 # Choose 2 symbols for target
-SYM1=kernel_clone
+SYM1=$FUNCTION_FORK
 SYM2=do_exit
 EVENT_NAME=kprobes/testevent
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index 1b5550ef8a9b..fa928b431555 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -86,15 +86,15 @@ esac
 
 # multiprobe errors
 if grep -q "Create/append/" README && grep -q "imm-value" README; then
-echo 'p:kprobes/testevent kernel_clone' > kprobe_events
+echo "p:kprobes/testevent $FUNCTION_FORK" > kprobe_events
 check_error '^r:kprobes/testevent do_exit'	# DIFF_PROBE_TYPE
 
 # Explicitly use printf "%s" to not interpret \1
-printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events
-check_error 'p:kprobes/testevent kernel_clone ^bcd=\1'	# DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8'	# DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"'	# DIFF_ARG_TYPE
-check_error '^p:kprobes/testevent kernel_clone abcd=\1'	# SAME_PROBE
+printf "%s" "p:kprobes/testevent $FUNCTION_FORK abcd=\\1" > kprobe_events
+check_error "p:kprobes/testevent $FUNCTION_FORK ^bcd=\\1"	# DIFF_ARG_TYPE
+check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8"	# DIFF_ARG_TYPE
+check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE
+check_error "^p:kprobes/testevent $FUNCTION_FORK abcd=\\1"	# SAME_PROBE
 fi
 
 # %return suffix errors
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index 7ae492c204a4..197cc2afd404 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -4,14 +4,14 @@
 # requires: kprobe_events
 
 # Add new kretprobe event
-echo 'r:testprobe2 kernel_clone $retval' > kprobe_events
+echo "r:testprobe2 $FUNCTION_FORK \$retval" > kprobe_events
 grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
 test -d events/kprobes/testprobe2
 
 echo 1 > events/kprobes/testprobe2/enable
 ( echo "forked")
 
-cat trace | grep testprobe2 | grep -q '<- kernel_clone'
+cat trace | grep testprobe2 | grep -q "<- $FUNCTION_FORK"
 
 echo 0 > events/kprobes/testprobe2/enable
 echo '-:testprobe2' >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index c4093fc1a773..98166fa3eb91 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -4,7 +4,7 @@
 # requires: kprobe_events
 
 ! grep -q 'myevent' kprobe_profile
-echo p:myevent kernel_clone > kprobe_events
+echo "p:myevent $FUNCTION_FORK" > kprobe_events
 grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
 echo 1 > events/kprobes/myevent/enable
 ( echo "forked" )
-- 
cgit v1.2.3-70-g09d2


From e3e40312567087fbe6880f316cb2b0e1f3d8a82c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 2 Oct 2020 14:25:01 +0100
Subject: selftests/ftrace: check for do_sys_openat2 in user-memory test

More recent libc implementations are now using openat/openat2 system
calls so also add do_sys_openat2 to the tracing so that the test
passes on these systems because do_sys_open may not be called.

Thanks to Masami Hiramatsu for the help on getting this fix to work
correctly.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
index a30a9c07290d..d25d01a19778 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
@@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported
 :;: "user-memory access syntax and ustring working on user memory";:
 echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \
 	> kprobe_events
+echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \
+	>> kprobe_events
 
 grep myevent kprobe_events | \
 	grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string'
 echo 1 > events/kprobes/myevent/enable
+echo 1 > events/kprobes/myevent2/enable
 echo > /dev/null
 echo 0 > events/kprobes/myevent/enable
+echo 0 > events/kprobes/myevent2/enable
 
 grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"'
 
-- 
cgit v1.2.3-70-g09d2


From f825d3f7ed9305e7dd0a3e0a74673a4257d0cc53 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:21 +0300
Subject: selftests: filter kselftest headers from command in lib.mk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 1056d3d2c97e ("selftests: enforce local header dependency in
lib.mk") added header dependency to the rule, but as the rule uses $^,
the headers are added to the compiler command line.

This can cause unexpected precompiled header files being generated when
compilation fails:

  $ echo { >> openat2_test.c

  $ make
  gcc -Wall -O2 -g -fsanitize=address -fsanitize=undefined  openat2_test.c
    tools/testing/selftests/kselftest_harness.h tools/testing/selftests/kselftest.h helpers.c
    -o tools/testing/selftests/openat2/openat2_test
  openat2_test.c:313:1: error: expected identifier or ‘(’ before ‘{’ token
    313 | {
        | ^
  make: *** [../lib.mk:140: tools/testing/selftests/openat2/openat2_test] Error 1

  $ file openat2_test*
  openat2_test:   GCC precompiled header (version 014) for C
  openat2_test.c: C source, ASCII text

Fix it by filtering out the headers, so that we'll only pass the actual
*.c files in the compiler command line.

Fixes: 1056d3d2c97e ("selftests: enforce local header dependency in lib.mk")
Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/lib.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 30848ca36555..a5ce26d548e4 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -136,7 +136,7 @@ endif
 ifeq ($(OVERRIDE_TARGETS),)
 LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
 $(OUTPUT)/%:%.c $(LOCAL_HDRS)
-	$(LINK.c) $^ $(LDLIBS) -o $@
+	$(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
 
 $(OUTPUT)/%.o:%.S
 	$(COMPILE.S) $^ -o $@
-- 
cgit v1.2.3-70-g09d2


From 1948172fdba5ad643529ddcd00a601c0caa913ed Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:22 +0300
Subject: selftests: pidfd: fix compilation errors due to wait.h

Drop unneeded <linux/wait.h> header inclusion to fix pidfd compilation
errors seen in Fedora 32:

In file included from pidfd_open_test.c:9:
../../../../usr/include/linux/wait.h:17:16: error: expected identifier before numeric constant
   17 | #define P_ALL  0
      |                ^

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/pidfd/pidfd_open_test.c | 1 -
 tools/testing/selftests/pidfd/pidfd_poll_test.c | 1 -
 2 files changed, 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index b9fe75fc3e51..8a59438ccc78 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -6,7 +6,6 @@
 #include <inttypes.h>
 #include <limits.h>
 #include <linux/types.h>
-#include <linux/wait.h>
 #include <sched.h>
 #include <signal.h>
 #include <stdbool.h>
diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c
index 4b115444dfe9..610811275357 100644
--- a/tools/testing/selftests/pidfd/pidfd_poll_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c
@@ -3,7 +3,6 @@
 #define _GNU_SOURCE
 #include <errno.h>
 #include <linux/types.h>
-#include <linux/wait.h>
 #include <poll.h>
 #include <signal.h>
 #include <stdbool.h>
-- 
cgit v1.2.3-70-g09d2


From ef7086347c82c53a6c5238bd2cf31379f6acadde Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:24 +0300
Subject: selftests/harness: prettify SKIP message whitespace again

Commit 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
replaced XFAIL with SKIP in the output. Add one more space to make the
output aligned and pretty again.

Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/kselftest_harness.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index f19804df244c..d2cba10acc6d 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -126,7 +126,7 @@
 	snprintf(_metadata->results->reason, \
 		 sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \
 	if (TH_LOG_ENABLED) { \
-		fprintf(TH_LOG_STREAM, "#      SKIP     %s\n", \
+		fprintf(TH_LOG_STREAM, "#      SKIP      %s\n", \
 			_metadata->results->reason); \
 	} \
 	_metadata->passed = 1; \
-- 
cgit v1.2.3-70-g09d2


From 0b18fed98bf96ba5ac14ab7c43c8a3364cb0daf8 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:25 +0300
Subject: selftests: pidfd: use ksft_test_result_skip() when skipping test

There's planned tests != run tests in pidfd_test when some test is
skipped:

  $ ./pidfd_test
  TAP version 13
  1..8
  [...]
  # pidfd_send_signal signal recycled pid test: Skipping test
  # Planned tests != run tests (8 != 7)
  # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:0 error:0

Fix by using ksft_test_result_skip():

  $ ./pidfd_test
  TAP version 13
  1..8
  [...]
  ok 8 # SKIP pidfd_send_signal signal recycled pid test: Unsharing pid namespace not permitted
  # Totals: pass:7 fail:0 xfail:0 xpass:0 skip:1 error:0

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/pidfd/pidfd_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index c585aaa2acd8..529eb700ac26 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -330,7 +330,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void)
 		ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n",
 				   test_name, PID_RECYCLE);
 	case PIDFD_SKIP:
-		ksft_print_msg("%s test: Skipping test\n", test_name);
+		ksft_test_result_skip("%s test: Skipping test\n", test_name);
 		ret = 0;
 		break;
 	case PIDFD_XFAIL:
-- 
cgit v1.2.3-70-g09d2


From b5ec9fe5be5e02e7db9e79aaa9a1ea7a3419d0b5 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:26 +0300
Subject: selftests: pidfd: skip test on kcmp() ENOSYS

Skip test if kcmp() is not available, for example if kernel is compiled
without CONFIG_CHECKPOINT_RESTORE=y.

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/pidfd/pidfd_getfd_test.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
index 7758c98be015..0930e2411dfb 100644
--- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -204,7 +204,10 @@ TEST_F(child, fetch_fd)
 	fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
 	ASSERT_GE(fd, 0);
 
-	EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));
+	ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd);
+	if (ret < 0 && errno == ENOSYS)
+		SKIP(return, "kcmp() syscall not supported");
+	EXPECT_EQ(ret, 0);
 
 	ret = fcntl(fd, F_GETFD);
 	ASSERT_GE(ret, 0);
-- 
cgit v1.2.3-70-g09d2


From 90da74af349e8a476e1d357da735b8f35b56d4e6 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:27 +0300
Subject: selftests: pidfd: add CONFIG_CHECKPOINT_RESTORE=y to config

kcmp syscall is used in pidfd_getfd_test.c, so add
CONFIG_CHECKPOINT_RESTORE=y to config to ensure kcmp is available.

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/pidfd/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
index bb11de90c0c9..f6f2965e17af 100644
--- a/tools/testing/selftests/pidfd/config
+++ b/tools/testing/selftests/pidfd/config
@@ -4,3 +4,4 @@ CONFIG_USER_NS=y
 CONFIG_PID_NS=y
 CONFIG_NET_NS=y
 CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
-- 
cgit v1.2.3-70-g09d2


From 7b9621d4593199aa0268e56081fe730b71c053e6 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:28 +0300
Subject: selftests: pidfd: drop needless linux/kcmp.h inclusion in
 pidfd_setns_test.c

kcmp is not used in pidfd_setns_test.c, so do not include <linux/kcmp.h>

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/pidfd/pidfd_setns_test.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 1f085b922c6e..6e2f2cd400ca 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -16,7 +16,6 @@
 #include <unistd.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
-#include <linux/kcmp.h>
 
 #include "pidfd.h"
 #include "../clone3/clone3_selftests.h"
-- 
cgit v1.2.3-70-g09d2


From 40723419f4079d0c7de98d0f3149db915557b55a Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:41 +0000
Subject: kselftest: Enable vDSO test on non x86 platforms

Currently the vDSO tests are built only on x86 platforms and cannot be
cross compiled.

Enable vDSO TARGET for all the platforms.

Future patches will extend the tests.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/Makefile      | 1 +
 tools/testing/selftests/vDSO/Makefile | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d9c283503159..2dd60070d775 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -65,6 +65,7 @@ endif
 TARGETS += tmpfs
 TARGETS += tpm2
 TARGETS += user
+TARGETS += vDSO
 TARGETS += vm
 TARGETS += x86
 TARGETS += zram
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 0069f2f83f86..47031f3c5f3a 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -9,7 +9,6 @@ ifeq ($(ARCH),x86)
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
 
-ifndef CROSS_COMPILE
 CFLAGS := -std=gnu99
 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
 ifeq ($(CONFIG_X86_32),y)
@@ -24,4 +23,3 @@ $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 		vdso_standalone_test_x86.c parse_vdso.c \
 		-o $@
 
-endif
-- 
cgit v1.2.3-70-g09d2


From 693f5ca08ca0767b407b7ca634dbf1b783676ec3 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:42 +0000
Subject: kselftest: Extend vDSO selftest

The current version of the multiarch vDSO selftest verifies only
gettimeofday.

Extend the vDSO selftest to the other library functions:
 - time
 - clock_getres
 - clock_gettime

The extension has been used to verify the unified vdso library on the
supported architectures.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/Makefile        |   2 +
 tools/testing/selftests/vDSO/vdso_config.h   |  90 ++++++++++
 tools/testing/selftests/vDSO/vdso_test_abi.c | 244 +++++++++++++++++++++++++++
 3 files changed, 336 insertions(+)
 create mode 100644 tools/testing/selftests/vDSO/vdso_config.h
 create mode 100644 tools/testing/selftests/vDSO/vdso_test_abi.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 47031f3c5f3a..2ac7448bd414 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -5,6 +5,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
 TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
+TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
 ifeq ($(ARCH),x86)
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
@@ -18,6 +19,7 @@ endif
 all: $(TEST_GEN_PROGS)
 $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
 $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
+$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
 $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
new file mode 100644
index 000000000000..eeb725df6045
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * vdso_config.h: Configuration options for vDSO tests.
+ * Copyright (c) 2019 Arm Ltd.
+ */
+#ifndef __VDSO_CONFIG_H__
+#define __VDSO_CONFIG_H__
+
+/*
+ * Each architecture exports its vDSO implementation with different names
+ * and a different version from the others, so we need to handle it as a
+ * special case.
+ */
+#if defined(__arm__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#elif defined(__aarch64__)
+#define VDSO_VERSION		3
+#define VDSO_NAMES		0
+#elif defined(__powerpc__)
+#define VDSO_VERSION		1
+#define VDSO_NAMES		0
+#define VDSO_32BIT		1
+#elif defined(__powerpc64__)
+#define VDSO_VERSION		1
+#define VDSO_NAMES		0
+#elif defined (__s390__)
+#define VDSO_VERSION		2
+#define VDSO_NAMES		0
+#define VDSO_32BIT		1
+#elif defined (__s390X__)
+#define VDSO_VERSION		2
+#define VDSO_NAMES		0
+#elif defined(__mips__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#elif defined(__sparc__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#elif defined(__i386__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#elif defined(__x86_64__)
+#define VDSO_VERSION		0
+#define VDSO_NAMES		1
+#elif defined(__riscv__)
+#define VDSO_VERSION		5
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#else /* nds32 */
+#define VDSO_VERSION		4
+#define VDSO_NAMES		1
+#define VDSO_32BIT		1
+#endif
+
+static const char *versions[6] = {
+	"LINUX_2.6",
+	"LINUX_2.6.15",
+	"LINUX_2.6.29",
+	"LINUX_2.6.39",
+	"LINUX_4",
+	"LINUX_4.15",
+};
+
+static const char *names[2][5] = {
+	{
+		"__kernel_gettimeofday",
+		"__kernel_clock_gettime",
+		"__kernel_time",
+		"__kernel_clock_getres",
+#if defined(VDSO_32BIT)
+		"__kernel_clock_gettime64",
+#endif
+	},
+	{
+		"__vdso_gettimeofday",
+		"__vdso_clock_gettime",
+		"__vdso_time",
+		"__vdso_clock_getres",
+#if defined(VDSO_32BIT)
+		"__vdso_clock_gettime64",
+#endif
+	},
+};
+
+#endif /* __VDSO_CONFIG_H__ */
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
new file mode 100644
index 000000000000..3d603f1394af
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vdso_full_test.c: Sample code to test all the timers.
+ * Copyright (c) 2019 Arm Ltd.
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_full_test.c parse_vdso.c
+ *
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <time.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../kselftest.h"
+#include "vdso_config.h"
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+static const char *version;
+static const char **name;
+
+typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz);
+typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts);
+typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts);
+typedef time_t (*vdso_time_t)(time_t *t);
+
+static int vdso_test_gettimeofday(void)
+{
+	/* Find gettimeofday. */
+	vdso_gettimeofday_t vdso_gettimeofday =
+		(vdso_gettimeofday_t)vdso_sym(version, name[0]);
+
+	if (!vdso_gettimeofday) {
+		printf("Could not find %s\n", name[0]);
+		return KSFT_SKIP;
+	}
+
+	struct timeval tv;
+	long ret = vdso_gettimeofday(&tv, 0);
+
+	if (ret == 0) {
+		printf("The time is %lld.%06lld\n",
+		       (long long)tv.tv_sec, (long long)tv.tv_usec);
+	} else {
+		printf("%s failed\n", name[0]);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+static int vdso_test_clock_gettime(clockid_t clk_id)
+{
+	/* Find clock_gettime. */
+	vdso_clock_gettime_t vdso_clock_gettime =
+		(vdso_clock_gettime_t)vdso_sym(version, name[1]);
+
+	if (!vdso_clock_gettime) {
+		printf("Could not find %s\n", name[1]);
+		return KSFT_SKIP;
+	}
+
+	struct timespec ts;
+	long ret = vdso_clock_gettime(clk_id, &ts);
+
+	if (ret == 0) {
+		printf("The time is %lld.%06lld\n",
+		       (long long)ts.tv_sec, (long long)ts.tv_nsec);
+	} else {
+		printf("%s failed\n", name[1]);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+static int vdso_test_time(void)
+{
+	/* Find time. */
+	vdso_time_t vdso_time =
+		(vdso_time_t)vdso_sym(version, name[2]);
+
+	if (!vdso_time) {
+		printf("Could not find %s\n", name[2]);
+		return KSFT_SKIP;
+	}
+
+	long ret = vdso_time(NULL);
+
+	if (ret > 0) {
+		printf("The time in hours since January 1, 1970 is %lld\n",
+				(long long)(ret / 3600));
+	} else {
+		printf("%s failed\n", name[2]);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+static int vdso_test_clock_getres(clockid_t clk_id)
+{
+	/* Find clock_getres. */
+	vdso_clock_getres_t vdso_clock_getres =
+		(vdso_clock_getres_t)vdso_sym(version, name[3]);
+
+	if (!vdso_clock_getres) {
+		printf("Could not find %s\n", name[3]);
+		return KSFT_SKIP;
+	}
+
+	struct timespec ts, sys_ts;
+	long ret = vdso_clock_getres(clk_id, &ts);
+
+	if (ret == 0) {
+		printf("The resolution is %lld %lld\n",
+		       (long long)ts.tv_sec, (long long)ts.tv_nsec);
+	} else {
+		printf("%s failed\n", name[3]);
+		return KSFT_FAIL;
+	}
+
+	ret = syscall(SYS_clock_getres, clk_id, &sys_ts);
+
+	if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) {
+		printf("%s failed\n", name[3]);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+const char *vdso_clock_name[12] = {
+	"CLOCK_REALTIME",
+	"CLOCK_MONOTONIC",
+	"CLOCK_PROCESS_CPUTIME_ID",
+	"CLOCK_THREAD_CPUTIME_ID",
+	"CLOCK_MONOTONIC_RAW",
+	"CLOCK_REALTIME_COARSE",
+	"CLOCK_MONOTONIC_COARSE",
+	"CLOCK_BOOTTIME",
+	"CLOCK_REALTIME_ALARM",
+	"CLOCK_BOOTTIME_ALARM",
+	"CLOCK_SGI_CYCLE",
+	"CLOCK_TAI",
+};
+
+/*
+ * This function calls vdso_test_clock_gettime and vdso_test_clock_getres
+ * with different values for clock_id.
+ */
+static inline int vdso_test_clock(clockid_t clock_id)
+{
+	int ret0, ret1;
+
+	ret0 = vdso_test_clock_gettime(clock_id);
+	/* A skipped test is considered passed */
+	if (ret0 == KSFT_SKIP)
+		ret0 = KSFT_PASS;
+
+	ret1 = vdso_test_clock_getres(clock_id);
+	/* A skipped test is considered passed */
+	if (ret1 == KSFT_SKIP)
+		ret1 = KSFT_PASS;
+
+	ret0 += ret1;
+
+	printf("clock_id: %s", vdso_clock_name[clock_id]);
+
+	if (ret0 > 0)
+		printf(" [FAIL]\n");
+	else
+		printf(" [PASS]\n");
+
+	return ret0;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+	int ret;
+
+	if (!sysinfo_ehdr) {
+		printf("AT_SYSINFO_EHDR is not present!\n");
+		return KSFT_SKIP;
+	}
+
+	version = versions[VDSO_VERSION];
+	name = (const char **)&names[VDSO_NAMES];
+
+	printf("[vDSO kselftest] VDSO_VERSION: %s\n", version);
+
+	vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+	ret = vdso_test_gettimeofday();
+
+#if _POSIX_TIMERS > 0
+
+#ifdef CLOCK_REALTIME
+	ret += vdso_test_clock(CLOCK_REALTIME);
+#endif
+
+#ifdef CLOCK_BOOTTIME
+	ret += vdso_test_clock(CLOCK_BOOTTIME);
+#endif
+
+#ifdef CLOCK_TAI
+	ret += vdso_test_clock(CLOCK_TAI);
+#endif
+
+#ifdef CLOCK_REALTIME_COARSE
+	ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+	ret += vdso_test_clock(CLOCK_MONOTONIC);
+#endif
+
+#ifdef CLOCK_MONOTONIC_RAW
+	ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
+#endif
+
+#ifdef CLOCK_MONOTONIC_COARSE
+	ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
+#endif
+
+#endif
+
+	ret += vdso_test_time();
+
+	if (ret > 0)
+		return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
-- 
cgit v1.2.3-70-g09d2


From 03f55c7952c92d8577d6e9bc695f3fd20032cfd9 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:43 +0000
Subject: kselftest: Extend vDSO selftest to clock_getres

The current version of the multiarch vDSO selftest verifies only
gettimeofday.

Extend the vDSO selftest to clock_getres, to verify that the
syscall and the vDSO library function return the same information.

The extension has been used to verify the hrtimer_resoltion fix.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/Makefile              |   2 +
 .../selftests/vDSO/vdso_test_clock_getres.c        | 124 +++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 tools/testing/selftests/vDSO/vdso_test_clock_getres.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 2ac7448bd414..375d80c2bff5 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -6,6 +6,7 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
 TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
 TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
+TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
 ifeq ($(ARCH),x86)
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
@@ -20,6 +21,7 @@ all: $(TEST_GEN_PROGS)
 $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
 $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
 $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
+$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
 $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
new file mode 100644
index 000000000000..15dcee16ff72
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ * vdso_clock_getres.c: Sample code to test clock_getres.
+ * Copyright (c) 2019 Arm Ltd.
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_clock_getres.c
+ *
+ * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit),
+ * Power (32-bit and 64-bit), S390x (32-bit and 64-bit).
+ * Might work on other architectures.
+ */
+
+#define _GNU_SOURCE
+#include <elf.h>
+#include <err.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../kselftest.h"
+
+static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts)
+{
+	long ret;
+
+	ret = syscall(SYS_clock_getres, _clkid, _ts);
+
+	return ret;
+}
+
+const char *vdso_clock_name[12] = {
+	"CLOCK_REALTIME",
+	"CLOCK_MONOTONIC",
+	"CLOCK_PROCESS_CPUTIME_ID",
+	"CLOCK_THREAD_CPUTIME_ID",
+	"CLOCK_MONOTONIC_RAW",
+	"CLOCK_REALTIME_COARSE",
+	"CLOCK_MONOTONIC_COARSE",
+	"CLOCK_BOOTTIME",
+	"CLOCK_REALTIME_ALARM",
+	"CLOCK_BOOTTIME_ALARM",
+	"CLOCK_SGI_CYCLE",
+	"CLOCK_TAI",
+};
+
+/*
+ * This function calls clock_getres in vdso and by system call
+ * with different values for clock_id.
+ *
+ * Example of output:
+ *
+ * clock_id: CLOCK_REALTIME [PASS]
+ * clock_id: CLOCK_BOOTTIME [PASS]
+ * clock_id: CLOCK_TAI [PASS]
+ * clock_id: CLOCK_REALTIME_COARSE [PASS]
+ * clock_id: CLOCK_MONOTONIC [PASS]
+ * clock_id: CLOCK_MONOTONIC_RAW [PASS]
+ * clock_id: CLOCK_MONOTONIC_COARSE [PASS]
+ */
+static inline int vdso_test_clock(unsigned int clock_id)
+{
+	struct timespec x, y;
+
+	printf("clock_id: %s", vdso_clock_name[clock_id]);
+	clock_getres(clock_id, &x);
+	syscall_clock_getres(clock_id, &y);
+
+	if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) {
+		printf(" [FAIL]\n");
+		return KSFT_FAIL;
+	}
+
+	printf(" [PASS]\n");
+	return KSFT_PASS;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+#if _POSIX_TIMERS > 0
+
+#ifdef CLOCK_REALTIME
+	ret = vdso_test_clock(CLOCK_REALTIME);
+#endif
+
+#ifdef CLOCK_BOOTTIME
+	ret += vdso_test_clock(CLOCK_BOOTTIME);
+#endif
+
+#ifdef CLOCK_TAI
+	ret += vdso_test_clock(CLOCK_TAI);
+#endif
+
+#ifdef CLOCK_REALTIME_COARSE
+	ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+	ret += vdso_test_clock(CLOCK_MONOTONIC);
+#endif
+
+#ifdef CLOCK_MONOTONIC_RAW
+	ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
+#endif
+
+#ifdef CLOCK_MONOTONIC_COARSE
+	ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
+#endif
+
+#endif
+	if (ret > 0)
+		return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
-- 
cgit v1.2.3-70-g09d2


From c7e5789b24d36dd5dddd36ea2b99280a606cac42 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:44 +0000
Subject: kselftest: Move test_vdso to the vDSO test suite

Move test_vdso from x86 to the vDSO test suite.

Suggested-by: Andy Lutomirski <luto@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/Makefile              |  10 +-
 .../testing/selftests/vDSO/vdso_test_correctness.c | 342 +++++++++++++++++++++
 tools/testing/selftests/x86/Makefile               |   2 +-
 tools/testing/selftests/x86/test_vdso.c            | 342 ---------------------
 4 files changed, 351 insertions(+), 345 deletions(-)
 create mode 100644 tools/testing/selftests/vDSO/vdso_test_correctness.c
 delete mode 100644 tools/testing/selftests/x86/test_vdso.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 375d80c2bff5..d53a4d8008f9 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -7,12 +7,14 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
 TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
 TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
-ifeq ($(ARCH),x86)
+ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
+TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness
 
 CFLAGS := -std=gnu99
 CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+LDFLAGS_vdso_test_correctness := -ldl
 ifeq ($(CONFIG_X86_32),y)
 LDLIBS += -lgcc_s
 endif
@@ -26,4 +28,8 @@ $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
 		-o $@
-
+$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
+	$(CC) $(CFLAGS) \
+		vdso_test_correctness.c \
+		-o $@ \
+		$(LDFLAGS_vdso_test_correctness)
diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
new file mode 100644
index 000000000000..42052db0f870
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2011-2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <limits.h>
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
+int nerrs = 0;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
+#ifdef __x86_64__
+	FILE *maps;
+	char line[MAPS_LINE_LEN];
+	bool found = false;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+		return NULL;
+
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
+		char r, x;
+		void *start, *end;
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
+		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+			   &start, &end, &r, &x, name) != 5)
+			continue;
+
+		if (strcmp(name, "[vsyscall]"))
+			continue;
+
+		/* assume entries are OK, as we test vDSO here not vsyscall */
+		found = true;
+		break;
+	}
+
+	fclose(maps);
+
+	if (!found) {
+		printf("Warning: failed to find vsyscall getcpu\n");
+		return NULL;
+	}
+	return (void *) (0xffffffffff600800);
+#else
+	return NULL;
+#endif
+}
+
+
+static void fill_function_pointers()
+{
+	void *vdso = dlopen("linux-vdso.so.1",
+			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso)
+		vdso = dlopen("linux-gate.so.1",
+			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso) {
+		printf("[WARN]\tfailed to find vDSO\n");
+		return;
+	}
+
+	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+	if (!vdso_getcpu)
+		printf("Warning: failed to find getcpu in vDSO\n");
+
+	vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_clock_gettime)
+		printf("Warning: failed to find clock_gettime in vDSO\n");
+
+	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+	if (!vdso_gettimeofday)
+		printf("Warning: failed to find gettimeofday in vDSO\n");
+
+}
+
+static long sys_getcpu(unsigned * cpu, unsigned * node,
+		       void* cache)
+{
+	return syscall(__NR_getcpu, cpu, node, cache);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+	return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	return syscall(__NR_gettimeofday, tv, tz);
+}
+
+static void test_getcpu(void)
+{
+	printf("[RUN]\tTesting getcpu...\n");
+
+	for (int cpu = 0; ; cpu++) {
+		cpu_set_t cpuset;
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu, &cpuset);
+		if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+			return;
+
+		unsigned cpu_sys, cpu_vdso, cpu_vsys,
+			node_sys, node_vdso, node_vsys;
+		long ret_sys, ret_vdso = 1, ret_vsys = 1;
+		unsigned node;
+
+		ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+		if (vdso_getcpu)
+			ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+		if (vgetcpu)
+			ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+		if (!ret_sys)
+			node = node_sys;
+		else if (!ret_vdso)
+			node = node_vdso;
+		else if (!ret_vsys)
+			node = node_vsys;
+
+		bool ok = true;
+		if (!ret_sys && (cpu_sys != cpu || node_sys != node))
+			ok = false;
+		if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
+			ok = false;
+		if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
+			ok = false;
+
+		printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
+		if (!ret_sys)
+			printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
+		if (!ret_vdso)
+			printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
+		if (!ret_vsys)
+			printf(" vsyscall: cpu %u, node %u", cpu_vsys,
+			       node_vsys);
+		printf("\n");
+
+		if (!ok)
+			nerrs++;
+	}
+}
+
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+	[0] = "CLOCK_REALTIME",
+	[1] = "CLOCK_MONOTONIC",
+	[2] = "CLOCK_PROCESS_CPUTIME_ID",
+	[3] = "CLOCK_THREAD_CPUTIME_ID",
+	[4] = "CLOCK_MONOTONIC_RAW",
+	[5] = "CLOCK_REALTIME_COARSE",
+	[6] = "CLOCK_MONOTONIC_COARSE",
+	[7] = "CLOCK_BOOTTIME",
+	[8] = "CLOCK_REALTIME_ALARM",
+	[9] = "CLOCK_BOOTTIME_ALARM",
+	[10] = "CLOCK_SGI_CYCLE",
+	[11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+	struct timespec start, vdso, end;
+	int vdso_ret, end_ret;
+
+	printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+	if (sys_clock_gettime(clock, &start) < 0) {
+		if (errno == EINVAL) {
+			vdso_ret = vdso_clock_gettime(clock, &vdso);
+			if (vdso_ret == -EINVAL) {
+				printf("[OK]\tNo such clock.\n");
+			} else {
+				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+				nerrs++;
+			}
+		} else {
+			printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+		}
+		return;
+	}
+
+	vdso_ret = vdso_clock_gettime(clock, &vdso);
+	end_ret = sys_clock_gettime(clock, &end);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_nsec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+	       (unsigned long long)end.tv_sec, end.tv_nsec);
+
+	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+}
+
+static void test_clock_gettime(void)
+{
+	if (!vdso_clock_gettime) {
+		printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n");
+		return;
+	}
+
+	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+	     clock++) {
+		test_one_clock_gettime(clock, clocknames[clock]);
+	}
+
+	/* Also test some invalid clock ids */
+	test_one_clock_gettime(-1, "invalid");
+	test_one_clock_gettime(INT_MIN, "invalid");
+	test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+	struct timeval start, vdso, end;
+	struct timezone sys_tz, vdso_tz;
+	int vdso_ret, end_ret;
+
+	if (!vdso_gettimeofday)
+		return;
+
+	printf("[RUN]\tTesting gettimeofday...\n");
+
+	if (sys_gettimeofday(&start, &sys_tz) < 0) {
+		printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+		nerrs++;
+		return;
+	}
+
+	vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+	end_ret = sys_gettimeofday(&end, NULL);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_usec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+	       (unsigned long long)end.tv_sec, end.tv_usec);
+
+	if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+
+	if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+	    sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+		printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+		       sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+	} else {
+		printf("[FAIL]\ttimezones do not match\n");
+		nerrs++;
+	}
+
+	/* And make sure that passing NULL for tz doesn't crash. */
+	vdso_gettimeofday(&vdso, NULL);
+}
+
+int main(int argc, char **argv)
+{
+	fill_function_pointers();
+
+	test_clock_gettime();
+	test_gettimeofday();
+
+	/*
+	 * Test getcpu() last so that, if something goes wrong setting affinity,
+	 * we still run the other tests.
+	 */
+	test_getcpu();
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 6703c7906b71..333980375bc7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
 			check_initial_reg_state sigreturn iopl ioperm \
-			test_vdso test_vsyscall mov_ss_trap \
+			test_vsyscall mov_ss_trap \
 			syscall_arg_fault fsgsbase_restore
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
deleted file mode 100644
index 42052db0f870..000000000000
--- a/tools/testing/selftests/x86/test_vdso.c
+++ /dev/null
@@ -1,342 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ldt_gdt.c - Test cases for LDT and GDT access
- * Copyright (c) 2011-2015 Andrew Lutomirski
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <sys/time.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <dlfcn.h>
-#include <string.h>
-#include <errno.h>
-#include <sched.h>
-#include <stdbool.h>
-#include <limits.h>
-
-#ifndef SYS_getcpu
-# ifdef __x86_64__
-#  define SYS_getcpu 309
-# else
-#  define SYS_getcpu 318
-# endif
-#endif
-
-/* max length of lines in /proc/self/maps - anything longer is skipped here */
-#define MAPS_LINE_LEN 128
-
-int nerrs = 0;
-
-typedef int (*vgettime_t)(clockid_t, struct timespec *);
-
-vgettime_t vdso_clock_gettime;
-
-typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
-
-vgtod_t vdso_gettimeofday;
-
-typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-
-getcpu_t vgetcpu;
-getcpu_t vdso_getcpu;
-
-static void *vsyscall_getcpu(void)
-{
-#ifdef __x86_64__
-	FILE *maps;
-	char line[MAPS_LINE_LEN];
-	bool found = false;
-
-	maps = fopen("/proc/self/maps", "r");
-	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
-		return NULL;
-
-	while (fgets(line, MAPS_LINE_LEN, maps)) {
-		char r, x;
-		void *start, *end;
-		char name[MAPS_LINE_LEN];
-
-		/* sscanf() is safe here as strlen(name) >= strlen(line) */
-		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
-			   &start, &end, &r, &x, name) != 5)
-			continue;
-
-		if (strcmp(name, "[vsyscall]"))
-			continue;
-
-		/* assume entries are OK, as we test vDSO here not vsyscall */
-		found = true;
-		break;
-	}
-
-	fclose(maps);
-
-	if (!found) {
-		printf("Warning: failed to find vsyscall getcpu\n");
-		return NULL;
-	}
-	return (void *) (0xffffffffff600800);
-#else
-	return NULL;
-#endif
-}
-
-
-static void fill_function_pointers()
-{
-	void *vdso = dlopen("linux-vdso.so.1",
-			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
-	if (!vdso)
-		vdso = dlopen("linux-gate.so.1",
-			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
-	if (!vdso) {
-		printf("[WARN]\tfailed to find vDSO\n");
-		return;
-	}
-
-	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
-	if (!vdso_getcpu)
-		printf("Warning: failed to find getcpu in vDSO\n");
-
-	vgetcpu = (getcpu_t) vsyscall_getcpu();
-
-	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
-	if (!vdso_clock_gettime)
-		printf("Warning: failed to find clock_gettime in vDSO\n");
-
-	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
-	if (!vdso_gettimeofday)
-		printf("Warning: failed to find gettimeofday in vDSO\n");
-
-}
-
-static long sys_getcpu(unsigned * cpu, unsigned * node,
-		       void* cache)
-{
-	return syscall(__NR_getcpu, cpu, node, cache);
-}
-
-static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
-{
-	return syscall(__NR_clock_gettime, id, ts);
-}
-
-static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-	return syscall(__NR_gettimeofday, tv, tz);
-}
-
-static void test_getcpu(void)
-{
-	printf("[RUN]\tTesting getcpu...\n");
-
-	for (int cpu = 0; ; cpu++) {
-		cpu_set_t cpuset;
-		CPU_ZERO(&cpuset);
-		CPU_SET(cpu, &cpuset);
-		if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
-			return;
-
-		unsigned cpu_sys, cpu_vdso, cpu_vsys,
-			node_sys, node_vdso, node_vsys;
-		long ret_sys, ret_vdso = 1, ret_vsys = 1;
-		unsigned node;
-
-		ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
-		if (vdso_getcpu)
-			ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
-		if (vgetcpu)
-			ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
-
-		if (!ret_sys)
-			node = node_sys;
-		else if (!ret_vdso)
-			node = node_vdso;
-		else if (!ret_vsys)
-			node = node_vsys;
-
-		bool ok = true;
-		if (!ret_sys && (cpu_sys != cpu || node_sys != node))
-			ok = false;
-		if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
-			ok = false;
-		if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
-			ok = false;
-
-		printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
-		if (!ret_sys)
-			printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
-		if (!ret_vdso)
-			printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
-		if (!ret_vsys)
-			printf(" vsyscall: cpu %u, node %u", cpu_vsys,
-			       node_vsys);
-		printf("\n");
-
-		if (!ok)
-			nerrs++;
-	}
-}
-
-static bool ts_leq(const struct timespec *a, const struct timespec *b)
-{
-	if (a->tv_sec != b->tv_sec)
-		return a->tv_sec < b->tv_sec;
-	else
-		return a->tv_nsec <= b->tv_nsec;
-}
-
-static bool tv_leq(const struct timeval *a, const struct timeval *b)
-{
-	if (a->tv_sec != b->tv_sec)
-		return a->tv_sec < b->tv_sec;
-	else
-		return a->tv_usec <= b->tv_usec;
-}
-
-static char const * const clocknames[] = {
-	[0] = "CLOCK_REALTIME",
-	[1] = "CLOCK_MONOTONIC",
-	[2] = "CLOCK_PROCESS_CPUTIME_ID",
-	[3] = "CLOCK_THREAD_CPUTIME_ID",
-	[4] = "CLOCK_MONOTONIC_RAW",
-	[5] = "CLOCK_REALTIME_COARSE",
-	[6] = "CLOCK_MONOTONIC_COARSE",
-	[7] = "CLOCK_BOOTTIME",
-	[8] = "CLOCK_REALTIME_ALARM",
-	[9] = "CLOCK_BOOTTIME_ALARM",
-	[10] = "CLOCK_SGI_CYCLE",
-	[11] = "CLOCK_TAI",
-};
-
-static void test_one_clock_gettime(int clock, const char *name)
-{
-	struct timespec start, vdso, end;
-	int vdso_ret, end_ret;
-
-	printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
-
-	if (sys_clock_gettime(clock, &start) < 0) {
-		if (errno == EINVAL) {
-			vdso_ret = vdso_clock_gettime(clock, &vdso);
-			if (vdso_ret == -EINVAL) {
-				printf("[OK]\tNo such clock.\n");
-			} else {
-				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
-				nerrs++;
-			}
-		} else {
-			printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
-		}
-		return;
-	}
-
-	vdso_ret = vdso_clock_gettime(clock, &vdso);
-	end_ret = sys_clock_gettime(clock, &end);
-
-	if (vdso_ret != 0 || end_ret != 0) {
-		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
-		       vdso_ret, errno);
-		nerrs++;
-		return;
-	}
-
-	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
-	       (unsigned long long)start.tv_sec, start.tv_nsec,
-	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
-	       (unsigned long long)end.tv_sec, end.tv_nsec);
-
-	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
-		printf("[FAIL]\tTimes are out of sequence\n");
-		nerrs++;
-	}
-}
-
-static void test_clock_gettime(void)
-{
-	if (!vdso_clock_gettime) {
-		printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n");
-		return;
-	}
-
-	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
-	     clock++) {
-		test_one_clock_gettime(clock, clocknames[clock]);
-	}
-
-	/* Also test some invalid clock ids */
-	test_one_clock_gettime(-1, "invalid");
-	test_one_clock_gettime(INT_MIN, "invalid");
-	test_one_clock_gettime(INT_MAX, "invalid");
-}
-
-static void test_gettimeofday(void)
-{
-	struct timeval start, vdso, end;
-	struct timezone sys_tz, vdso_tz;
-	int vdso_ret, end_ret;
-
-	if (!vdso_gettimeofday)
-		return;
-
-	printf("[RUN]\tTesting gettimeofday...\n");
-
-	if (sys_gettimeofday(&start, &sys_tz) < 0) {
-		printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
-		nerrs++;
-		return;
-	}
-
-	vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
-	end_ret = sys_gettimeofday(&end, NULL);
-
-	if (vdso_ret != 0 || end_ret != 0) {
-		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
-		       vdso_ret, errno);
-		nerrs++;
-		return;
-	}
-
-	printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
-	       (unsigned long long)start.tv_sec, start.tv_usec,
-	       (unsigned long long)vdso.tv_sec, vdso.tv_usec,
-	       (unsigned long long)end.tv_sec, end.tv_usec);
-
-	if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
-		printf("[FAIL]\tTimes are out of sequence\n");
-		nerrs++;
-	}
-
-	if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
-	    sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
-		printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
-		       sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
-	} else {
-		printf("[FAIL]\ttimezones do not match\n");
-		nerrs++;
-	}
-
-	/* And make sure that passing NULL for tz doesn't crash. */
-	vdso_gettimeofday(&vdso, NULL);
-}
-
-int main(int argc, char **argv)
-{
-	fill_function_pointers();
-
-	test_clock_gettime();
-	test_gettimeofday();
-
-	/*
-	 * Test getcpu() last so that, if something goes wrong setting affinity,
-	 * we still run the other tests.
-	 */
-	test_getcpu();
-
-	return nerrs ? 1 : 0;
-}
-- 
cgit v1.2.3-70-g09d2


From b2f1c3db28870d88d1a19aa86a8374e7725d62c5 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 26 Oct 2020 11:49:45 +0000
Subject: kselftest: Extend vdso correctness test to clock_gettime64

With the release of Linux 5.1 has been added a new syscall,
clock_gettime64, that provided a 64 bit time value for a specified
clock_ID to make the kernel Y2038 safe on 32 bit architectures.

Extend the vdso correctness test to cover the newly exposed vdso
function.

Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vDSO/vdso_config.h         |   4 +-
 .../testing/selftests/vDSO/vdso_test_correctness.c | 115 ++++++++++++++++++++-
 2 files changed, 115 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index eeb725df6045..6a6fe8d4ff55 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -66,12 +66,13 @@ static const char *versions[6] = {
 	"LINUX_4.15",
 };
 
-static const char *names[2][5] = {
+static const char *names[2][6] = {
 	{
 		"__kernel_gettimeofday",
 		"__kernel_clock_gettime",
 		"__kernel_time",
 		"__kernel_clock_getres",
+		"__kernel_getcpu",
 #if defined(VDSO_32BIT)
 		"__kernel_clock_gettime64",
 #endif
@@ -81,6 +82,7 @@ static const char *names[2][5] = {
 		"__vdso_clock_gettime",
 		"__vdso_time",
 		"__vdso_clock_getres",
+		"__vdso_getcpu",
 #if defined(VDSO_32BIT)
 		"__vdso_clock_gettime64",
 #endif
diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index 42052db0f870..5029ef9b228c 100644
--- a/tools/testing/selftests/vDSO/vdso_test_correctness.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -19,6 +19,10 @@
 #include <stdbool.h>
 #include <limits.h>
 
+#include "vdso_config.h"
+
+static const char **name;
+
 #ifndef SYS_getcpu
 # ifdef __x86_64__
 #  define SYS_getcpu 309
@@ -27,6 +31,17 @@
 # endif
 #endif
 
+#ifndef __NR_clock_gettime64
+#define __NR_clock_gettime64	403
+#endif
+
+#ifndef __kernel_timespec
+struct __kernel_timespec {
+	long long	tv_sec;
+	long long	tv_nsec;
+};
+#endif
+
 /* max length of lines in /proc/self/maps - anything longer is skipped here */
 #define MAPS_LINE_LEN 128
 
@@ -36,6 +51,10 @@ typedef int (*vgettime_t)(clockid_t, struct timespec *);
 
 vgettime_t vdso_clock_gettime;
 
+typedef int (*vgettime64_t)(clockid_t, struct __kernel_timespec *);
+
+vgettime64_t vdso_clock_gettime64;
+
 typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
 
 vgtod_t vdso_gettimeofday;
@@ -99,17 +118,23 @@ static void fill_function_pointers()
 		return;
 	}
 
-	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+	vdso_getcpu = (getcpu_t)dlsym(vdso, name[4]);
 	if (!vdso_getcpu)
 		printf("Warning: failed to find getcpu in vDSO\n");
 
 	vgetcpu = (getcpu_t) vsyscall_getcpu();
 
-	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	vdso_clock_gettime = (vgettime_t)dlsym(vdso, name[1]);
 	if (!vdso_clock_gettime)
 		printf("Warning: failed to find clock_gettime in vDSO\n");
 
-	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+#if defined(VDSO_32BIT)
+	vdso_clock_gettime64 = (vgettime64_t)dlsym(vdso, name[5]);
+	if (!vdso_clock_gettime64)
+		printf("Warning: failed to find clock_gettime64 in vDSO\n");
+#endif
+
+	vdso_gettimeofday = (vgtod_t)dlsym(vdso, name[0]);
 	if (!vdso_gettimeofday)
 		printf("Warning: failed to find gettimeofday in vDSO\n");
 
@@ -126,6 +151,11 @@ static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
 	return syscall(__NR_clock_gettime, id, ts);
 }
 
+static inline int sys_clock_gettime64(clockid_t id, struct __kernel_timespec *ts)
+{
+	return syscall(__NR_clock_gettime64, id, ts);
+}
+
 static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
 	return syscall(__NR_gettimeofday, tv, tz);
@@ -191,6 +221,15 @@ static bool ts_leq(const struct timespec *a, const struct timespec *b)
 		return a->tv_nsec <= b->tv_nsec;
 }
 
+static bool ts64_leq(const struct __kernel_timespec *a,
+		     const struct __kernel_timespec *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_nsec <= b->tv_nsec;
+}
+
 static bool tv_leq(const struct timeval *a, const struct timeval *b)
 {
 	if (a->tv_sec != b->tv_sec)
@@ -254,7 +293,10 @@ static void test_one_clock_gettime(int clock, const char *name)
 	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
 		printf("[FAIL]\tTimes are out of sequence\n");
 		nerrs++;
+		return;
 	}
+
+	printf("[OK]\tTest Passed.\n");
 }
 
 static void test_clock_gettime(void)
@@ -275,6 +317,70 @@ static void test_clock_gettime(void)
 	test_one_clock_gettime(INT_MAX, "invalid");
 }
 
+static void test_one_clock_gettime64(int clock, const char *name)
+{
+	struct __kernel_timespec start, vdso, end;
+	int vdso_ret, end_ret;
+
+	printf("[RUN]\tTesting clock_gettime64 for clock %s (%d)...\n", name, clock);
+
+	if (sys_clock_gettime64(clock, &start) < 0) {
+		if (errno == EINVAL) {
+			vdso_ret = vdso_clock_gettime64(clock, &vdso);
+			if (vdso_ret == -EINVAL) {
+				printf("[OK]\tNo such clock.\n");
+			} else {
+				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime64 returned %d\n", vdso_ret);
+				nerrs++;
+			}
+		} else {
+			printf("[WARN]\t clock_gettime64(%d) syscall returned error %d\n", clock, errno);
+		}
+		return;
+	}
+
+	vdso_ret = vdso_clock_gettime64(clock, &vdso);
+	end_ret = sys_clock_gettime64(clock, &end);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_nsec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+	       (unsigned long long)end.tv_sec, end.tv_nsec);
+
+	if (!ts64_leq(&start, &vdso) || !ts64_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+		return;
+	}
+
+	printf("[OK]\tTest Passed.\n");
+}
+
+static void test_clock_gettime64(void)
+{
+	if (!vdso_clock_gettime64) {
+		printf("[SKIP]\tNo vDSO, so skipping clock_gettime64() tests\n");
+		return;
+	}
+
+	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+	     clock++) {
+		test_one_clock_gettime64(clock, clocknames[clock]);
+	}
+
+	/* Also test some invalid clock ids */
+	test_one_clock_gettime64(-1, "invalid");
+	test_one_clock_gettime64(INT_MIN, "invalid");
+	test_one_clock_gettime64(INT_MAX, "invalid");
+}
+
 static void test_gettimeofday(void)
 {
 	struct timeval start, vdso, end;
@@ -327,9 +433,12 @@ static void test_gettimeofday(void)
 
 int main(int argc, char **argv)
 {
+	name = (const char **)&names[VDSO_NAMES];
+
 	fill_function_pointers();
 
 	test_clock_gettime();
+	test_clock_gettime64();
 	test_gettimeofday();
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From d2692abd6fa9866fda3052efa5cbd116b9fec56b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 27 Oct 2020 10:51:33 +0100
Subject: selftests: kselftest_harness.h: fix kernel-doc markups

The kernel-doc markups there is violating the expected
syntax, causing it to not parse the name of the
markup identifier properly, preventing it to check
if the kernel-doc matches the #define below each
markup.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/697640045663f1366beb15e76e78b420dac5f5a2.1603791716.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 tools/testing/selftests/kselftest_harness.h | 44 ++++++++++++++---------------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index f19804df244c..d747d6b1da1a 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -432,7 +432,7 @@
  */
 
 /**
- * ASSERT_EQ(expected, seen)
+ * ASSERT_EQ()
  *
  * @expected: expected value
  * @seen: measured value
@@ -443,7 +443,7 @@
 	__EXPECT(expected, #expected, seen, #seen, ==, 1)
 
 /**
- * ASSERT_NE(expected, seen)
+ * ASSERT_NE()
  *
  * @expected: expected value
  * @seen: measured value
@@ -454,7 +454,7 @@
 	__EXPECT(expected, #expected, seen, #seen, !=, 1)
 
 /**
- * ASSERT_LT(expected, seen)
+ * ASSERT_LT()
  *
  * @expected: expected value
  * @seen: measured value
@@ -465,7 +465,7 @@
 	__EXPECT(expected, #expected, seen, #seen, <, 1)
 
 /**
- * ASSERT_LE(expected, seen)
+ * ASSERT_LE()
  *
  * @expected: expected value
  * @seen: measured value
@@ -476,7 +476,7 @@
 	__EXPECT(expected, #expected, seen, #seen, <=, 1)
 
 /**
- * ASSERT_GT(expected, seen)
+ * ASSERT_GT()
  *
  * @expected: expected value
  * @seen: measured value
@@ -487,7 +487,7 @@
 	__EXPECT(expected, #expected, seen, #seen, >, 1)
 
 /**
- * ASSERT_GE(expected, seen)
+ * ASSERT_GE()
  *
  * @expected: expected value
  * @seen: measured value
@@ -498,7 +498,7 @@
 	__EXPECT(expected, #expected, seen, #seen, >=, 1)
 
 /**
- * ASSERT_NULL(seen)
+ * ASSERT_NULL()
  *
  * @seen: measured value
  *
@@ -508,7 +508,7 @@
 	__EXPECT(NULL, "NULL", seen, #seen, ==, 1)
 
 /**
- * ASSERT_TRUE(seen)
+ * ASSERT_TRUE()
  *
  * @seen: measured value
  *
@@ -518,7 +518,7 @@
 	__EXPECT(0, "0", seen, #seen, !=, 1)
 
 /**
- * ASSERT_FALSE(seen)
+ * ASSERT_FALSE()
  *
  * @seen: measured value
  *
@@ -528,7 +528,7 @@
 	__EXPECT(0, "0", seen, #seen, ==, 1)
 
 /**
- * ASSERT_STREQ(expected, seen)
+ * ASSERT_STREQ()
  *
  * @expected: expected value
  * @seen: measured value
@@ -539,7 +539,7 @@
 	__EXPECT_STR(expected, seen, ==, 1)
 
 /**
- * ASSERT_STRNE(expected, seen)
+ * ASSERT_STRNE()
  *
  * @expected: expected value
  * @seen: measured value
@@ -550,7 +550,7 @@
 	__EXPECT_STR(expected, seen, !=, 1)
 
 /**
- * EXPECT_EQ(expected, seen)
+ * EXPECT_EQ()
  *
  * @expected: expected value
  * @seen: measured value
@@ -561,7 +561,7 @@
 	__EXPECT(expected, #expected, seen, #seen, ==, 0)
 
 /**
- * EXPECT_NE(expected, seen)
+ * EXPECT_NE()
  *
  * @expected: expected value
  * @seen: measured value
@@ -572,7 +572,7 @@
 	__EXPECT(expected, #expected, seen, #seen, !=, 0)
 
 /**
- * EXPECT_LT(expected, seen)
+ * EXPECT_LT()
  *
  * @expected: expected value
  * @seen: measured value
@@ -583,7 +583,7 @@
 	__EXPECT(expected, #expected, seen, #seen, <, 0)
 
 /**
- * EXPECT_LE(expected, seen)
+ * EXPECT_LE()
  *
  * @expected: expected value
  * @seen: measured value
@@ -594,7 +594,7 @@
 	__EXPECT(expected, #expected, seen, #seen, <=, 0)
 
 /**
- * EXPECT_GT(expected, seen)
+ * EXPECT_GT()
  *
  * @expected: expected value
  * @seen: measured value
@@ -605,7 +605,7 @@
 	__EXPECT(expected, #expected, seen, #seen, >, 0)
 
 /**
- * EXPECT_GE(expected, seen)
+ * EXPECT_GE()
  *
  * @expected: expected value
  * @seen: measured value
@@ -616,7 +616,7 @@
 	__EXPECT(expected, #expected, seen, #seen, >=, 0)
 
 /**
- * EXPECT_NULL(seen)
+ * EXPECT_NULL()
  *
  * @seen: measured value
  *
@@ -626,7 +626,7 @@
 	__EXPECT(NULL, "NULL", seen, #seen, ==, 0)
 
 /**
- * EXPECT_TRUE(seen)
+ * EXPECT_TRUE()
  *
  * @seen: measured value
  *
@@ -636,7 +636,7 @@
 	__EXPECT(0, "0", seen, #seen, !=, 0)
 
 /**
- * EXPECT_FALSE(seen)
+ * EXPECT_FALSE()
  *
  * @seen: measured value
  *
@@ -646,7 +646,7 @@
 	__EXPECT(0, "0", seen, #seen, ==, 0)
 
 /**
- * EXPECT_STREQ(expected, seen)
+ * EXPECT_STREQ()
  *
  * @expected: expected value
  * @seen: measured value
@@ -657,7 +657,7 @@
 	__EXPECT_STR(expected, seen, ==, 0)
 
 /**
- * EXPECT_STRNE(expected, seen)
+ * EXPECT_STRNE()
  *
  * @expected: expected value
  * @seen: measured value
-- 
cgit v1.2.3-70-g09d2


From 1e6f5dcc1b9ec9068f5d38331cec38b35498edf5 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 27 Oct 2020 16:36:45 -0700
Subject: tools, bpftool: Avoid array index warnings.

The bpf_caps array is shorter without CAP_BPF, avoid out of bounds reads
if this isn't defined. Working around this avoids -Wno-array-bounds with
clang.

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201027233646.3434896-1-irogers@google.com
---
 tools/bpf/bpftool/feature.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index a43a6f10b564..359960a8f1de 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -843,9 +843,14 @@ static int handle_perms(void)
 		else
 			p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'",
 			      capability_msg(bpf_caps, 0),
+#ifdef CAP_BPF
 			      capability_msg(bpf_caps, 1),
 			      capability_msg(bpf_caps, 2),
-			      capability_msg(bpf_caps, 3));
+			      capability_msg(bpf_caps, 3)
+#else
+				"", "", "", "", "", ""
+#endif /* CAP_BPF */
+				);
 		goto exit_free;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 0698ac66e01019528f0db4191ae3aaf9978e67da Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 27 Oct 2020 16:36:46 -0700
Subject: tools, bpftool: Remove two unused variables.

Avoid an unused variable warning.

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201027233646.3434896-2-irogers@google.com
---
 tools/bpf/bpftool/skeleton/profiler.bpf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index 4e3512f700c0..ce5b65e07ab1 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -70,7 +70,7 @@ int BPF_PROG(fentry_XXX)
 static inline void
 fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
 {
-	struct bpf_perf_event_value *before, diff, *accum;
+	struct bpf_perf_event_value *before, diff;
 
 	before = bpf_map_lookup_elem(&fentry_readings, &id);
 	/* only account samples with a valid fentry_reading */
@@ -95,7 +95,7 @@ int BPF_PROG(fexit_XXX)
 {
 	struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
 	u32 cpu = bpf_get_smp_processor_id();
-	u32 i, one = 1, zero = 0;
+	u32 i, zero = 0;
 	int err;
 	u64 *count;
 
-- 
cgit v1.2.3-70-g09d2


From af8afcf1fdd5f365f70e2386c2d8c7a1abd853d7 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 29 Oct 2020 03:56:05 +0100
Subject: wireguard: selftests: check that route_me_harder packets use the
 right sk

If netfilter changes the packet mark, the packet is rerouted. The
ip_route_me_harder family of functions fails to use the right sk, opting
to instead use skb->sk, resulting in a routing loop when used with
tunnels. With the next change fixing this issue in netfilter, test for
the relevant condition inside our test suite, since wireguard was where
the bug was discovered.

Reported-by: Chen Minqiang <ptpt52@gmail.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/wireguard/netns.sh           | 8 ++++++++
 tools/testing/selftests/wireguard/qemu/kernel.config | 2 ++
 2 files changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index d77f4829f1e0..74c69b75f6f5 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -316,6 +316,14 @@ pp sleep 3
 n2 ping -W 1 -c 1 192.168.241.1
 n1 wg set wg0 peer "$pub2" persistent-keepalive 0
 
+# Test that sk_bound_dev_if works
+n1 ping -I wg0 -c 1 -W 1 192.168.241.2
+# What about when the mark changes and the packet must be rerouted?
+n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1
+n1 ping -c 1 -W 1 192.168.241.2 # First the boring case
+n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case
+n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1
+
 # Test that onion routing works, even when it loops
 n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
 ip1 addr add 192.168.242.1/24 dev wg0
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index d531de13c95b..4eecb432a66c 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -18,10 +18,12 @@ CONFIG_NF_NAT=y
 CONFIG_NETFILTER_XTABLES=y
 CONFIG_NETFILTER_XT_NAT=y
 CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MARK=y
 CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_NF_NAT_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_MANGLE=y
 CONFIG_IP_NF_NAT=y
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MULTIPLE_TABLES=y
-- 
cgit v1.2.3-70-g09d2


From 7afc9d8f82906754e16fce560fb4e9733bb9b75e Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:19 +0200
Subject: selftests: net: bridge: rename current igmp tests to igmpv2

To prepare the bridge_igmp.sh for IGMPv3 we need to rename the
current test to IGMPv2.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/bridge_igmp.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 88d2472ba151..481198300b72 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="reportleave_test"
+ALL_TESTS="v2reportleave_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -110,7 +110,7 @@ mcast_packet_test()
 	return $seen
 }
 
-reportleave_test()
+v2reportleave_test()
 {
 	RET=0
 	ip address add dev $h2 $TEST_GROUP/32 autojoin
@@ -118,12 +118,12 @@ reportleave_test()
 
 	sleep 5
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
-	check_err $? "Report didn't create mdb entry for $TEST_GROUP"
+	check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
 
 	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
 	check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
 
-	log_test "IGMP report $TEST_GROUP"
+	log_test "IGMPv2 report $TEST_GROUP"
 
 	RET=0
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
@@ -139,7 +139,7 @@ reportleave_test()
 	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
 	check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry"
 
-	log_test "IGMP leave $TEST_GROUP"
+	log_test "IGMPv2 leave $TEST_GROUP"
 }
 
 trap cleanup EXIT
-- 
cgit v1.2.3-70-g09d2


From 79ae3e256aa1cfaa801e23a13b7f9e1a49cacb20 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:20 +0200
Subject: selftests: net: bridge: igmp: add support for packet source address

Add support for one more argument which specifies the source address to
use. It will be later used for IGMPv3 S,G entry testing.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/bridge_igmp.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 481198300b72..1c19459dbc58 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -83,9 +83,10 @@ cleanup()
 mcast_packet_test()
 {
 	local mac=$1
-	local ip=$2
-	local host1_if=$3
-	local host2_if=$4
+	local src_ip=$2
+	local ip=$3
+	local host1_if=$4
+	local host2_if=$5
 	local seen=0
 
 	# Add an ACL on `host2_if` which will tell us whether the packet
@@ -94,7 +95,7 @@ mcast_packet_test()
 	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
 		flower dst_mac $mac action drop
 
-	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q
+	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
 	sleep 1
 
 	tc -j -s filter show dev $host2_if ingress \
@@ -120,7 +121,7 @@ v2reportleave_test()
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
 	check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
 
-	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+	mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
 	check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
 
 	log_test "IGMPv2 report $TEST_GROUP"
@@ -136,7 +137,7 @@ v2reportleave_test()
 	bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
 	check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP"
 
-	mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+	mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
 	check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry"
 
 	log_test "IGMPv2 leave $TEST_GROUP"
-- 
cgit v1.2.3-70-g09d2


From f0e260db4c9e0576b2092f30bddd6816f9d37383 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:21 +0200
Subject: selftests: net: bridge: igmp: check for specific udp ip protocol

We have to specifically check for udp protocol in addition to the mac
address because in IGMPv3 tests group-specific queries will use the same
mac address.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/bridge_igmp.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 1c19459dbc58..5562aef14c0a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -93,7 +93,7 @@ mcast_packet_test()
 	# was received by it or not.
 	tc qdisc add dev $host2_if ingress
 	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
-		flower dst_mac $mac action drop
+		flower ip_proto udp dst_mac $mac action drop
 
 	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
 	sleep 1
-- 
cgit v1.2.3-70-g09d2


From 68d3163a4b7e50bc8e9bbf689d55174fdcd44fa5 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:22 +0200
Subject: selftests: net: bridge: igmp: add IGMPv3 entries' state helpers

Add helpers which will be used in subsequent tests, they are:
 - check_sg_entries: check for proper source list and S,G entry
   existence
 - check_sg_fwding: check for proper traffic forwarding/blocking
 - check_sg_state: check for proper blocked/forwarding entry state

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 67 ++++++++++++++++++++++
 1 file changed, 67 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 5562aef14c0a..19c1f46d1151 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -143,6 +143,73 @@ v2reportleave_test()
 	log_test "IGMPv2 leave $TEST_GROUP"
 }
 
+check_sg_entries()
+{
+	local report=$1; shift
+	local slist=("$@")
+	local sarg=""
+
+	for src in "${slist[@]}"; do
+		sarg="${sarg} and .source_list[].address == \"$src\""
+	done
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
+	check_err $? "Wrong *,G entry source list after $report report"
+
+	for sgent in "${slist[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
+		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
+	done
+}
+
+check_sg_fwding()
+{
+	local should_fwd=$1; shift
+	local sources=("$@")
+
+	for src in "${sources[@]}"; do
+		local retval=0
+
+		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
+		retval=$?
+		if [ $should_fwd -eq 1 ]; then
+			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
+		else
+			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
+		fi
+	done
+}
+
+check_sg_state()
+{
+	local is_blocked=$1; shift
+	local sources=("$@")
+	local should_fail=1
+
+	if [ $is_blocked -eq 1 ]; then
+		should_fail=0
+	fi
+
+	for src in "${sources[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
+				 .source_list[] |
+				 select(.address == \"$src\") |
+				 select(.timer == \"0.00\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has zero timer"
+
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
+				 .flags[] == \"blocked\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has blocked flag"
+	done
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 98ae11cf8104a27fcd3dddaed4714be0600df419 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:23 +0200
Subject: selftests: net: bridge: add tests for igmpv3 is_include and inc ->
 allow reports

First we test is_include/include mode then we build on that with allow
effectively achieving:
  state          report        result                 action
 INCLUDE (A)    ALLOW (B)    INCLUDE (A+B)           (B)=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 82 +++++++++++++++++++++-
 1 file changed, 81 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 19c1f46d1151..e9999e346ea6 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,11 +1,20 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="v2reportleave_test"
+ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
 TEST_GROUP_MAC="01:00:5e:0a:0a:0a"
+
+ALL_GROUP="224.0.0.1"
+ALL_MAC="01:00:5e:00:00:01"
+
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3
+MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+
 source lib.sh
 
 h1_create()
@@ -210,6 +219,77 @@ check_sg_state()
 	done
 }
 
+v3include_prepare()
+{
+	local host1_if=$1
+	local mac=$2
+	local group=$3
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+	ip link set dev br0 type bridge mcast_igmp_version 3
+	check_err $? "Could not change bridge IGMP version to 3"
+
+	$MZ $host1_if -b $mac -c 1 -B $group -t ip "proto=2,p=$MZPKT_IS_INC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+	check_err $? "Missing *,G entry with source list"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+	check_sg_entries "is_include" "${X[@]}"
+}
+
+v3cleanup()
+{
+	local port=$1
+	local group=$2
+
+	bridge mdb del dev br0 port $port grp $group
+	ip link set dev br0 type bridge mcast_igmp_version 2
+}
+
+v3include_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	check_sg_state 0 "${X[@]}"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP is_include"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_allow_test()
+{
+	RET=0
+	local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q
+	sleep 1
+	check_sg_entries "allow" "${X[@]}"
+
+	check_sg_state 0 "${X[@]}"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> allow"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 47021771064cc99fd106783ddc698b76684ec3f0 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:24 +0200
Subject: selftests: net: bridge: add test for igmpv3 inc -> is_include report

The test checks for the following case:
   state          report        result                 action
 INCLUDE (A)    IS_IN (B)     INCLUDE (A+B)            (B)=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 25 +++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index e9999e346ea6..added5c69e8b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test"
+ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -12,6 +12,8 @@ ALL_MAC="01:00:5e:00:00:01"
 
 # IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3
 MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 
@@ -290,6 +292,27 @@ v3inc_allow_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3inc_is_include_test()
+{
+	RET=0
+	local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q
+	sleep 1
+	check_sg_entries "is_include" "${X[@]}"
+
+	check_sg_state 0 "${X[@]}"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> is_include"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 3c8b9fdad00481dfb0ca4ce81a5fec6c18fd77bc Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:25 +0200
Subject: selftests: net: bridge: add test for igmpv3 inc -> is_exclude report

The test checks for the following case:
   state          report        result                 action
  INCLUDE (A)    IS_EX (B)     EXCLUDE (A*B,B-A)      (B-A)=0
                                                      Delete (A-B)
                                                      Group Timer=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 41 +++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index added5c69e8b..34d2c4370fa6 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test"
+ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
+	   v3inc_is_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -16,6 +17,8 @@ MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:
 MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
+MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
 
 source lib.sh
 
@@ -313,6 +316,42 @@ v3inc_is_include_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3inc_is_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2")
+	local Y=("192.0.2.20" "192.0.2.21")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.3\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> is_exclude"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 735af7bec0f128e67192512854db459f61b1c278 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:26 +0200
Subject: selftests: net: bridge: add test for igmpv3 inc -> to_exclude report

The test checks for the following case:
  state          report        result                  action
 INCLUDE (A)    TO_EX (B)     EXCLUDE (A*B,B-A)       (B-A)=0
                                                      Delete (A-B)
                                                      Send Q(G,A*B)
                                                      Group Timer=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 51 +++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 34d2c4370fa6..36f10a3168cc 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test"
+	   v3inc_is_exclude_test v3inc_to_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -19,6 +19,8 @@ MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
 MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
+# IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
 
 source lib.sh
 
@@ -352,6 +354,53 @@ v3inc_is_exclude_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3inc_to_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.1")
+	local Y=("192.0.2.20" "192.0.2.30")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.21\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From eecd8cfdff1b9e437ca9162d058de0cee68c1fe6 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:27 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> allow report

The test checks for the following case:
   state          report        result                  action
  EXCLUDE (X,Y)  ALLOW (A)     EXCLUDE (X+A,Y-A)       (A)=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 83 +++++++++++++++-------
 1 file changed, 59 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 36f10a3168cc..d786e75abe2c 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test v3inc_to_exclude_test"
+	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -17,6 +17,8 @@ MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:
 MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30
+MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
 MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
 # IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
@@ -250,6 +252,38 @@ v3include_prepare()
 	check_sg_entries "is_include" "${X[@]}"
 }
 
+v3exclude_prepare()
+{
+	local host1_if=$1
+	local mac=$2
+	local group=$3
+	local pkt=$4
+	local X=("192.0.2.1" "192.0.2.2")
+	local Y=("192.0.2.20" "192.0.2.21")
+
+	v3include_prepare $host1_if $mac $group
+
+	$MZ $host1_if -c 1 -b $mac -B $group -t ip "proto=2,p=$MZPKT_IS_EXC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.3\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists"
+}
+
 v3cleanup()
 {
 	local port=$1
@@ -321,30 +355,8 @@ v3inc_is_include_test()
 v3inc_is_exclude_test()
 {
 	RET=0
-	local X=("192.0.2.1" "192.0.2.2")
-	local Y=("192.0.2.20" "192.0.2.21")
-
-	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
-
-	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC" -q
-	sleep 1
-	bridge -j -d -s mdb show dev br0 \
-		| jq -e ".[].mdb[] | \
-			 select(.grp == \"$TEST_GROUP\" and \
-				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
-	check_err $? "Wrong *,G entry filter mode"
-
-	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
-
-	bridge -j -d -s mdb show dev br0 \
-		| jq -e ".[].mdb[] | \
-			 select(.grp == \"$TEST_GROUP\" and \
-				.source_list != null and
-				.source_list[].address == \"192.0.2.3\")" &>/dev/null
-	check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists"
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
 
 	check_sg_fwding 1 "${X[@]}" 192.0.2.100
 	check_sg_fwding 0 "${Y[@]}"
@@ -401,6 +413,29 @@ v3inc_to_exclude_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_allow_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+	local Y=("192.0.2.21")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+	sleep 1
+	check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> allow"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From e7e7ab7c00c2ecffa64defb54fe938d61fc19d39 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:28 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> is_include report

The test checks for the following case:
    state          report        result                  action
   EXCLUDE (X,Y)  IS_IN (A)     EXCLUDE (X+A,Y-A)       (A)=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 27 +++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index d786e75abe2c..b2b0f7d7e860 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test"
+	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -15,6 +15,8 @@ ALL_MAC="01:00:5e:00:00:01"
 MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03"
 # IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.20,192.0.2.30
+MZPKT_IS_INC3="22:00:5f:b4:00:00:00:01:01:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
 MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
 # IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30
@@ -436,6 +438,29 @@ v3exc_allow_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_is_include_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+	local Y=("192.0.2.21")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q
+	sleep 1
+	check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> is_include"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 7b4f7138221a483e7642e582518814d579edf36a Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:29 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> is_exclude report

The test checks for the following case:
     state          report        result                  action
   EXCLUDE (X,Y)  IS_EX (A)     EXCLUDE (A-Y,Y*A)        (A-X-Y)=GMI
                                                         Delete (X-A)
                                                         Delete (Y-A)
                                                         Group Timer=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 28 +++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index b2b0f7d7e860..91b0b26428f6 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -2,7 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test"
+	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
+	   v3exc_is_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -23,6 +24,8 @@ MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:0
 MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
 MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.20,192.0.2.30
+MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
 MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
 
@@ -461,6 +464,29 @@ v3exc_is_include_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_is_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.30")
+	local Y=("192.0.2.20")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q
+	sleep 1
+	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 65bfc146ab95ad980134fe206467d1d7108e402e Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:30 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> to_exclude report

The test checks for the following case:
    state          report        result                  action
  EXCLUDE (X,Y)  TO_EX (A)     EXCLUDE (A-Y,Y*A)        (A-X-Y)=Group Timer
                                                        Delete (X-A)
                                                        Delete (Y-A)
                                                        Send Q(G,A-Y)
                                                        Group Timer=GMI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 91b0b26428f6..3cfc30b88285 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test"
+	   v3exc_is_exclude_test v3exc_to_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -487,6 +487,34 @@ v3exc_is_exclude_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_to_exclude_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.30")
+	local Y=("192.0.2.20")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+	sleep 1
+	check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 80899f1b1c05a07f907bc54d6dc5bdadb37ab4f1 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:31 +0200
Subject: selftests: net: bridge: add test for igmpv3 inc -> block report

The test checks for the following case:
    state          report        result                  action
  INCLUDE (A)    BLOCK (B)     INCLUDE (A)              Send Q(G,A*B)

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 33 +++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 3cfc30b88285..3772c7a066c9 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test v3exc_to_exclude_test"
+	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -28,6 +28,8 @@ MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:
 MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
 # IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
 MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 block report: grp 239.10.10.10 block 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_BLOCK="22:00:98:b1:00:00:00:01:06:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
 
 source lib.sh
 
@@ -515,6 +517,35 @@ v3exc_to_exclude_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3inc_block_test()
+{
+	RET=0
+	local X=("192.0.2.2" "192.0.2.3")
+
+	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+	# make sure the lowered timers have expired (by default 2 seconds)
+	sleep 3
+	check_sg_entries "block" "${X[@]}"
+
+	check_sg_state 0 "${X[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 "192.0.2.100"
+
+	log_test "IGMPv3 report $TEST_GROUP include -> block"
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 9eb58e07470bfb5a1c1d4ae08806b82d662171f7 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:32 +0200
Subject: selftests: net: bridge: add test for igmpv3 exc -> block report

The test checks for the following case:
   state          report        result                  action
 EXCLUDE (X,Y)  BLOCK (A)     EXCLUDE (X+(A-Y),Y)      (A-X-Y)=Group Timer
                                                       Send Q(G,A-Y)

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 3772c7a066c9..45c5619666d8 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test"
+	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -546,6 +546,34 @@ v3inc_block_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_block_test()
+{
+	RET=0
+	local X=("192.0.2.1" "192.0.2.2" "192.0.2.30")
+	local Y=("192.0.2.20" "192.0.2.21")
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+	sleep 1
+	check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+	check_sg_state 0 "${X[@]}"
+	check_sg_state 1 "${Y[@]}"
+
+	check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	check_sg_fwding 0 "${Y[@]}"
+
+	log_test "IGMPv3 report $TEST_GROUP exclude -> block"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 18f66c96ea585ca7bdd5c75eae3077566b0d73c0 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:33 +0200
Subject: selftests: net: bridge: add test for igmpv3 exclude timeout

Test that when a group in exclude mode expires it changes mode to
include and the blocked entries are deleted.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 49 +++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 45c5619666d8..db0a03e30868 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -3,7 +3,8 @@
 
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test"
+	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
+	   v3exc_timeout_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -574,6 +575,52 @@ v3exc_block_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3exc_timeout_test()
+{
+	RET=0
+	local X=("192.0.2.20" "192.0.2.30")
+
+	# GMI should be 3 seconds
+	ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+	ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
+	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+	sleep 3
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"192.0.2.2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+
+	check_sg_entries "allow" "${X[@]}"
+
+	check_sg_state 0 "${X[@]}"
+
+	check_sg_fwding 1 "${X[@]}"
+	check_sg_fwding 0 192.0.2.100
+
+	log_test "IGMPv3 group $TEST_GROUP exclude timeout"
+
+	ip link set dev br0 type bridge mcast_query_interval 12500 \
+					mcast_query_response_interval 1000
+
+	v3cleanup $swp1 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 414ea3754149847ec9491de9e9923750f5447331 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 27 Oct 2020 20:59:34 +0200
Subject: selftests: net: bridge: add test for igmpv3 *,g auto-add

When we have *,G ports in exclude mode and a new S,G,port is added
the kernel has to automatically create an S,G entry for each exclude
port to get proper forwarding.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 31 +++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index db0a03e30868..0e71abdd7a03 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -4,7 +4,7 @@
 ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
 	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
 	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
-	   v3exc_timeout_test"
+	   v3exc_timeout_test v3star_ex_auto_add_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -621,6 +621,35 @@ v3exc_timeout_test()
 	v3cleanup $swp1 $TEST_GROUP
 }
 
+v3star_ex_auto_add_test()
+{
+	RET=0
+
+	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+	$MZ $h2 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC" -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+				.port == \"$swp1\")" &>/dev/null
+	check_err $? "S,G entry for *,G port doesn't exist"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+				.port == \"$swp1\" and \
+				.flags[] == \"added_by_star_ex\")" &>/dev/null
+	check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+	check_sg_fwding 1 192.0.2.3
+
+	log_test "IGMPv3 S,G port entry automatic add to a *,G port"
+
+	v3cleanup $swp1 $TEST_GROUP
+	v3cleanup $swp2 $TEST_GROUP
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 7a60c2dd0f575ab14a457e99582af0ca1e072a74 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 28 Oct 2020 16:15:26 -0300
Subject: drm: Remove SCATTERLIST_MAX_SEGMENT

Since commit 9a40401cfa13 ("lib/scatterlist: Do not limit max_segment to
PAGE_ALIGNED values") the max_segment input to sg_alloc_table_from_pages()
does not have to be any special value. The new algorithm will always
create something less than what the user provides. Thus eliminate this
confusing constant.

- vmwgfx should use the HW capability, not mix in the OS page size for
  calling dma_set_max_seg_size()

- i915 uses i915_sg_segment_size() both for sg_alloc_table_from_pages
  and for some open coded sgl construction. This doesn't change the value
  since rounddown(size, UINT_MAX) == SCATTERLIST_MAX_SEGMENT

- drm_prime_pages_to_sg uses it as a default if max_segment is zero,
  UINT_MAX is fine to use directly.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Qian Cai <cai@lca.pw>
Cc: "Ursulin, Tvrtko" <tvrtko.ursulin@intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/0-v1-44733fccd781+13d-rm_scatterlist_max_jgg@nvidia.com
---
 drivers/gpu/drm/drm_prime.c             | 4 ++--
 drivers/gpu/drm/i915/i915_scatterlist.h | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     | 3 +--
 include/linux/scatterlist.h             | 6 ------
 tools/testing/scatterlist/main.c        | 2 +-
 5 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 187b55ede62e..a7b61c2d9190 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -820,8 +820,8 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev,
 
 	if (dev)
 		max_segment = dma_max_mapping_size(dev->dev);
-	if (max_segment == 0 || max_segment > SCATTERLIST_MAX_SEGMENT)
-		max_segment = SCATTERLIST_MAX_SEGMENT;
+	if (max_segment == 0)
+		max_segment = UINT_MAX;
 	sge = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0,
 					  nr_pages << PAGE_SHIFT,
 					  max_segment,
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
index b7b59328cb76..883dd8d09d6b 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.h
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -112,7 +112,7 @@ static inline unsigned int i915_sg_segment_size(void)
 	unsigned int size = swiotlb_max_segment();
 
 	if (size == 0)
-		return SCATTERLIST_MAX_SEGMENT;
+		size = UINT_MAX;
 
 	size = rounddown(size, PAGE_SIZE);
 	/* swiotlb_max_segment_size can return 1 byte when it means one page. */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index b3a60959b5d5..0c42d2c05f43 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -794,8 +794,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	if (unlikely(ret != 0))
 		goto out_err0;
 
-	dma_set_max_seg_size(dev->dev, min_t(unsigned int, U32_MAX & PAGE_MASK,
-					     SCATTERLIST_MAX_SEGMENT));
+	dma_set_max_seg_size(dev->dev, U32_MAX);
 
 	if (dev_priv->capabilities & SVGA_CAP_GMR2) {
 		DRM_INFO("Max GMR ids is %u\n",
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 36c47e7e66a2..6f70572b2938 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -18,12 +18,6 @@ struct scatterlist {
 #endif
 };
 
-/*
- * Since the above length field is an unsigned int, below we define the maximum
- * length in bytes that can be stored in one scatterlist entry.
- */
-#define SCATTERLIST_MAX_SEGMENT (UINT_MAX & PAGE_MASK)
-
 /*
  * These macros should be used after a dma_map_sg call has been done
  * to get bus addresses of each of the SG entries and their lengths.
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index b2c7e9f7b8d3..d264bf853034 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -50,7 +50,7 @@ static void fail(struct test *test, struct sg_table *st, const char *cond)
 
 int main(void)
 {
-	const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
+	const unsigned int sgmax = UINT_MAX;
 	struct test *test, tests[] = {
 		{ -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
 		{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
-- 
cgit v1.2.3-70-g09d2


From afabdf3338728c3aaa9f55d127e903dcd5f4acc7 Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Sun, 1 Nov 2020 17:08:07 -0800
Subject: epoll: add a selftest for epoll timeout race

Add a test case to ensure an event is observed by at least one poller
when an epoll timeout is used.

Signed-off-by: Guantao Liu <guantaol@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Khazhismel Kumykov <khazhy@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Link: https://lkml.kernel.org/r/20201028180202.952079-2-soheil.kdev@gmail.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../filesystems/epoll/epoll_wakeup_test.c          | 95 ++++++++++++++++++++++
 1 file changed, 95 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
index d979ff14775a..8f82f99f7748 100644
--- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -3282,4 +3282,99 @@ TEST(epoll60)
 	close(ctx.epfd);
 }
 
+struct epoll61_ctx {
+	int epfd;
+	int evfd;
+};
+
+static void *epoll61_write_eventfd(void *ctx_)
+{
+	struct epoll61_ctx *ctx = ctx_;
+	int64_t l = 1;
+
+	usleep(10950);
+	write(ctx->evfd, &l, sizeof(l));
+	return NULL;
+}
+
+static void *epoll61_epoll_with_timeout(void *ctx_)
+{
+	struct epoll61_ctx *ctx = ctx_;
+	struct epoll_event events[1];
+	int n;
+
+	n = epoll_wait(ctx->epfd, events, 1, 11);
+	/*
+	 * If epoll returned the eventfd, write on the eventfd to wake up the
+	 * blocking poller.
+	 */
+	if (n == 1) {
+		int64_t l = 1;
+
+		write(ctx->evfd, &l, sizeof(l));
+	}
+	return NULL;
+}
+
+static void *epoll61_blocking_epoll(void *ctx_)
+{
+	struct epoll61_ctx *ctx = ctx_;
+	struct epoll_event events[1];
+
+	epoll_wait(ctx->epfd, events, 1, -1);
+	return NULL;
+}
+
+TEST(epoll61)
+{
+	struct epoll61_ctx ctx;
+	struct epoll_event ev;
+	int i, r;
+
+	ctx.epfd = epoll_create1(0);
+	ASSERT_GE(ctx.epfd, 0);
+	ctx.evfd = eventfd(0, EFD_NONBLOCK);
+	ASSERT_GE(ctx.evfd, 0);
+
+	ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP;
+	ev.data.ptr = NULL;
+	r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev);
+	ASSERT_EQ(r, 0);
+
+	/*
+	 * We are testing a race.  Repeat the test case 1000 times to make it
+	 * more likely to fail in case of a bug.
+	 */
+	for (i = 0; i < 1000; i++) {
+		pthread_t threads[3];
+		int n;
+
+		/*
+		 * Start 3 threads:
+		 * Thread 1 sleeps for 10.9ms and writes to the evenfd.
+		 * Thread 2 calls epoll with a timeout of 11ms.
+		 * Thread 3 calls epoll with a timeout of -1.
+		 *
+		 * The eventfd write by Thread 1 should either wakeup Thread 2
+		 * or Thread 3.  If it wakes up Thread 2, Thread 2 writes on the
+		 * eventfd to wake up Thread 3.
+		 *
+		 * If no events are missed, all three threads should eventually
+		 * be joinable.
+		 */
+		ASSERT_EQ(pthread_create(&threads[0], NULL,
+					 epoll61_write_eventfd, &ctx), 0);
+		ASSERT_EQ(pthread_create(&threads[1], NULL,
+					 epoll61_epoll_with_timeout, &ctx), 0);
+		ASSERT_EQ(pthread_create(&threads[2], NULL,
+					 epoll61_blocking_epoll, &ctx), 0);
+
+		for (n = 0; n < ARRAY_SIZE(threads); ++n)
+			ASSERT_EQ(pthread_join(threads[n], NULL), 0);
+	}
+
+	close(ctx.epfd);
+	close(ctx.evfd);
+}
+
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3-70-g09d2


From 338b5da31de0d816b5718dad0e09482a27d51504 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 29 Oct 2020 21:09:31 +0200
Subject: selftests/net: timestamping: add ptp v2 support

The timestamping tool is supporting now only PTPv1 (IEEE-1588 2002) while
modern HW often supports also/only PTPv2.

Hence timestamping tool is still useful for sanity testing of PTP drivers
HW timestamping capabilities it's reasonable to upstate it to support
PTPv2. This patch adds corresponding support which can be enabled by using
new parameter "PTPV2".

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20201029190931.30883-1-grygorii.strashko@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/timestamping.c | 47 +++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index f4bb4fef0f39..21091be70688 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -59,7 +59,8 @@ static void usage(const char *error)
 	       "  SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
 	       "  SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
 	       "  SIOCGSTAMP - check last socket time stamp\n"
-	       "  SIOCGSTAMPNS - more accurate socket time stamp\n");
+	       "  SIOCGSTAMPNS - more accurate socket time stamp\n"
+	       "  PTPV2 - use PTPv2 messages\n");
 	exit(1);
 }
 
@@ -115,13 +116,28 @@ static const unsigned char sync[] = {
 	0x00, 0x00, 0x00, 0x00
 };
 
-static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+static const unsigned char sync_v2[] = {
+	0x00, 0x02, 0x00, 0x2C,
+	0x00, 0x00, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0xFF,
+	0xFE, 0x00, 0x00, 0x00,
+	0x00, 0x01, 0x00, 0x01,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len, int ptpv2)
 {
+	size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+	const void *sync_p = ptpv2 ? sync_v2 : sync;
 	struct timeval now;
 	int res;
 
-	res = sendto(sock, sync, sizeof(sync), 0,
-		addr, addr_len);
+	res = sendto(sock, sync_p, sync_len, 0, addr, addr_len);
 	gettimeofday(&now, 0);
 	if (res < 0)
 		printf("%s: %s\n", "send", strerror(errno));
@@ -134,9 +150,11 @@ static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
 static void printpacket(struct msghdr *msg, int res,
 			char *data,
 			int sock, int recvmsg_flags,
-			int siocgstamp, int siocgstampns)
+			int siocgstamp, int siocgstampns, int ptpv2)
 {
 	struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+	size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+	const void *sync_p = ptpv2 ? sync_v2 : sync;
 	struct cmsghdr *cmsg;
 	struct timeval tv;
 	struct timespec ts;
@@ -210,10 +228,9 @@ static void printpacket(struct msghdr *msg, int res,
 					"probably SO_EE_ORIGIN_TIMESTAMPING"
 #endif
 					);
-				if (res < sizeof(sync))
+				if (res < sync_len)
 					printf(" => truncated data?!");
-				else if (!memcmp(sync, data + res - sizeof(sync),
-							sizeof(sync)))
+				else if (!memcmp(sync_p, data + res - sync_len, sync_len))
 					printf(" => GOT OUR DATA BACK (HURRAY!)");
 				break;
 			}
@@ -257,7 +274,7 @@ static void printpacket(struct msghdr *msg, int res,
 }
 
 static void recvpacket(int sock, int recvmsg_flags,
-		       int siocgstamp, int siocgstampns)
+		       int siocgstamp, int siocgstampns, int ptpv2)
 {
 	char data[256];
 	struct msghdr msg;
@@ -288,7 +305,7 @@ static void recvpacket(int sock, int recvmsg_flags,
 	} else {
 		printpacket(&msg, res, data,
 			    sock, recvmsg_flags,
-			    siocgstamp, siocgstampns);
+			    siocgstamp, siocgstampns, ptpv2);
 	}
 }
 
@@ -300,6 +317,7 @@ int main(int argc, char **argv)
 	int siocgstamp = 0;
 	int siocgstampns = 0;
 	int ip_multicast_loop = 0;
+	int ptpv2 = 0;
 	char *interface;
 	int i;
 	int enabled = 1;
@@ -335,6 +353,8 @@ int main(int argc, char **argv)
 			siocgstampns = 1;
 		else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
 			ip_multicast_loop = 1;
+		else if (!strcasecmp(argv[i], "PTPV2"))
+			ptpv2 = 1;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
 			so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
 		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
@@ -369,6 +389,7 @@ int main(int argc, char **argv)
 		HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
 	hwconfig.rx_filter =
 		(so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+		ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC :
 		HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
 	hwconfig_requested = hwconfig;
 	if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
@@ -496,16 +517,16 @@ int main(int argc, char **argv)
 					printf("has error\n");
 				recvpacket(sock, 0,
 					   siocgstamp,
-					   siocgstampns);
+					   siocgstampns, ptpv2);
 				recvpacket(sock, MSG_ERRQUEUE,
 					   siocgstamp,
-					   siocgstampns);
+					   siocgstampns, ptpv2);
 			}
 		} else {
 			/* write one packet */
 			sendpacket(sock,
 				   (struct sockaddr *)&addr,
-				   sizeof(addr));
+				   sizeof(addr), ptpv2);
 			next.tv_sec += 5;
 			continue;
 		}
-- 
cgit v1.2.3-70-g09d2


From 7a078d2d18801bba7bde7337a823d7342299acf7 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 29 Oct 2020 15:37:07 -0700
Subject: libbpf, hashmap: Fix undefined behavior in hash_bits

If bits is 0, the case when the map is empty, then the >> is the size of
the register which is undefined behavior - on x86 it is the same as a
shift by 0.

Fix by handling the 0 case explicitly and guarding calls to hash_bits for
empty maps in hashmap__for_each_key_entry and hashmap__for_each_entry_safe.

Fixes: e3b924224028 ("libbpf: add resizable non-thread safe internal hashmap")
Suggested-by: Andrii Nakryiko <andriin@fb.com>,
Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201029223707.494059-1-irogers@google.com
---
 tools/lib/bpf/hashmap.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index d9b385fe808c..10a4c4cd13cf 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -15,6 +15,9 @@
 static inline size_t hash_bits(size_t h, int bits)
 {
 	/* shuffle bits and return requested number of upper bits */
+	if (bits == 0)
+		return 0;
+
 #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
 	/* LP64 case */
 	return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
@@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value);
  * @key: key to iterate entries for
  */
 #define hashmap__for_each_key_entry(map, cur, _key)			    \
-	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
-					     map->cap_bits);		    \
-		     map->buckets ? map->buckets[bkt] : NULL; });	    \
+	for (cur = map->buckets						    \
+		     ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+		     : NULL;						    \
 	     cur;							    \
 	     cur = cur->next)						    \
 		if (map->equal_fn(cur->key, (_key), map->ctx))
 
 #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key)		    \
-	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
-					     map->cap_bits);		    \
-		     cur = map->buckets ? map->buckets[bkt] : NULL; });	    \
+	for (cur = map->buckets						    \
+		     ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+		     : NULL;						    \
 	     cur && ({ tmp = cur->next; true; });			    \
 	     cur = tmp)							    \
 		if (map->equal_fn(cur->key, (_key), map->ctx))
-- 
cgit v1.2.3-70-g09d2


From bbbc7aa45eefd4ef7ffbd5ee3bb49bd8b68a2213 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 30 Oct 2020 21:10:54 +0100
Subject: selftests: add test script for bareudp tunnels

Test different encapsulation modes of the bareudp module:
  * Unicast MPLS,
  * IPv4 only,
  * IPv4 in multiproto mode (that is, IPv4 and IPv6),
  * IPv6.

Each mode is tested with both an IPv4 and an IPv6 underlay.

v2:
  * Add build dependencies in config file (Willem de Bruijn).
  * The MPLS test now uses its own IP addresses. This minimises
    the amount of cleanup between tests and simplifies the script.
  * Verify that iproute2 supports bareudp tunnels before running the
    script (and other minor usability improvements).

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/8abc0e58f8a7eeb404f82466505a73110bc43ab8.1604088587.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile   |   1 +
 tools/testing/selftests/net/bareudp.sh | 538 +++++++++++++++++++++++++++++++++
 tools/testing/selftests/net/config     |   7 +
 3 files changed, 546 insertions(+)
 create mode 100755 tools/testing/selftests/net/bareudp.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index ef352477cac6..fa5fa425d148 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -21,6 +21,7 @@ TEST_PROGS += rxtimestamp.sh
 TEST_PROGS += devlink_port_split.py
 TEST_PROGS += drop_monitor_tests.sh
 TEST_PROGS += vrf_route_leaking.sh
+TEST_PROGS += bareudp.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
new file mode 100755
index 000000000000..c6fe22de7d0e
--- /dev/null
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -0,0 +1,538 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Test various bareudp tunnel configurations.
+#
+# The bareudp module allows to tunnel network protocols like IP or MPLS over
+# UDP, without adding any intermediate header. This scripts tests several
+# configurations of bareudp (using IPv4 or IPv6 as underlay and transporting
+# IPv4, IPv6 or MPLS packets on the overlay).
+#
+# Network topology:
+#
+#   * A chain of 4 network namespaces, connected with veth pairs. Each veth
+#     is assigned an IPv4 and an IPv6 address. A host-route allows a veth to
+#     join its peer.
+#
+#   * NS0 and NS3 are at the extremities of the chain. They have additional
+#     IPv4 and IPv6 addresses on their loopback device. Routes are added in NS0
+#     and NS3, so that they can communicate using these overlay IP addresses.
+#     For IPv4 and IPv6 reachability tests, the route simply sets the peer's
+#     veth address as gateway. For MPLS reachability tests, an MPLS header is
+#     also pushed before the IP header.
+#
+#   * NS1 and NS2 are the intermediate namespaces. They use a bareudp device to
+#     encapsulate the traffic into UDP.
+#
+# +-----------------------------------------------------------------------+
+# |                                  NS0                                  |
+# |                                                                       |
+# |   lo:                                                                 |
+# |      * IPv4 address: 192.0.2.100/32                                   |
+# |      * IPv6 address: 2001:db8::100/128                                |
+# |      * IPv6 address: 2001:db8::200/128                                |
+# |      * IPv4 route: 192.0.2.103/32 reachable via 192.0.2.11            |
+# |      * IPv6 route: 2001:db8::103/128 reachable via 2001:db8::11       |
+# |      * IPv6 route: 2001:db8::203/128 reachable via 2001:db8::11       |
+# |                    (encapsulated with MPLS label 203)                 |
+# |                                                                       |
+# |   veth01:                                                             |
+# |   ^  * IPv4 address: 192.0.2.10, peer 192.0.2.11/32                   |
+# |   |  * IPv6 address: 2001:db8::10, peer 2001:db8::11/128              |
+# |   |                                                                   |
+# +---+-------------------------------------------------------------------+
+#     |
+#     | Traffic type: IP or MPLS (depending on test)
+#     |
+# +---+-------------------------------------------------------------------+
+# |   |                              NS1                                  |
+# |   |                                                                   |
+# |   v                                                                   |
+# |   veth10:                                                             |
+# |      * IPv4 address: 192.0.2.11, peer 192.0.2.10/32                   |
+# |      * IPv6 address: 2001:db8::11, peer 2001:db8::10/128              |
+# |                                                                       |
+# |   bareudp_ns1:                                                        |
+# |      * Encapsulate IP or MPLS packets received on veth10 into UDP     |
+# |        and send the resulting packets through veth12.                 |
+# |      * Decapsulate bareudp packets (either IP or MPLS, over UDP)      |
+# |        received on veth12 and send the inner packets through veth10.  |
+# |                                                                       |
+# |   veth12:                                                             |
+# |   ^  * IPv4 address: 192.0.2.21, peer 192.0.2.22/32                   |
+# |   |  * IPv6 address: 2001:db8::21, peer 2001:db8::22/128              |
+# |   |                                                                   |
+# +---+-------------------------------------------------------------------+
+#     |
+#     | Traffic type: IP or MPLS (depending on test), over UDP
+#     |
+# +---+-------------------------------------------------------------------+
+# |   |                              NS2                                  |
+# |   |                                                                   |
+# |   v                                                                   |
+# |   veth21:                                                             |
+# |      * IPv4 address: 192.0.2.22, peer 192.0.2.21/32                   |
+# |      * IPv6 address: 2001:db8::22, peer 2001:db8::21/128              |
+# |                                                                       |
+# |   bareudp_ns2:                                                        |
+# |      * Decapsulate bareudp packets (either IP or MPLS, over UDP)      |
+# |        received on veth21 and send the inner packets through veth23.  |
+# |      * Encapsulate IP or MPLS packets received on veth23 into UDP     |
+# |        and send the resulting packets through veth21.                 |
+# |                                                                       |
+# |   veth23:                                                             |
+# |   ^  * IPv4 address: 192.0.2.32, peer 192.0.2.33/32                   |
+# |   |  * IPv6 address: 2001:db8::32, peer 2001:db8::33/128              |
+# |   |                                                                   |
+# +---+-------------------------------------------------------------------+
+#     |
+#     | Traffic type: IP or MPLS (depending on test)
+#     |
+# +---+-------------------------------------------------------------------+
+# |   |                              NS3                                  |
+# |   v                                                                   |
+# |   veth32:                                                             |
+# |      * IPv4 address: 192.0.2.33, peer 192.0.2.32/32                   |
+# |      * IPv6 address: 2001:db8::33, peer 2001:db8::32/128              |
+# |                                                                       |
+# |   lo:                                                                 |
+# |      * IPv4 address: 192.0.2.103/32                                   |
+# |      * IPv6 address: 2001:db8::103/128                                |
+# |      * IPv6 address: 2001:db8::203/128                                |
+# |      * IPv4 route: 192.0.2.100/32 reachable via 192.0.2.32            |
+# |      * IPv6 route: 2001:db8::100/128 reachable via 2001:db8::32       |
+# |      * IPv6 route: 2001:db8::200/128 reachable via 2001:db8::32       |
+# |                    (encapsulated with MPLS label 200)                 |
+# |                                                                       |
+# +-----------------------------------------------------------------------+
+
+ERR=4 # Return 4 by default, which is the SKIP code for kselftest
+PING6="ping"
+PAUSE_ON_FAIL="no"
+
+readonly NS0=$(mktemp -u ns0-XXXXXXXX)
+readonly NS1=$(mktemp -u ns1-XXXXXXXX)
+readonly NS2=$(mktemp -u ns2-XXXXXXXX)
+readonly NS3=$(mktemp -u ns3-XXXXXXXX)
+
+# Exit the script after having removed the network namespaces it created
+#
+# Parameters:
+#
+#   * The list of network namespaces to delete before exiting.
+#
+exit_cleanup()
+{
+	for ns in "$@"; do
+		ip netns delete "${ns}" 2>/dev/null || true
+	done
+
+	if [ "${ERR}" -eq 4 ]; then
+		echo "Error: Setting up the testing environment failed." >&2
+	fi
+
+	exit "${ERR}"
+}
+
+# Create the four network namespaces used by the script (NS0, NS1, NS2 and NS3)
+#
+# New namespaces are cleaned up manually in case of error, to ensure that only
+# namespaces created by this script are deleted.
+create_namespaces()
+{
+	ip netns add "${NS0}" || exit_cleanup
+	ip netns add "${NS1}" || exit_cleanup "${NS0}"
+	ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
+	ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
+}
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+	exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+}
+
+# Configure a network interface using a host route
+#
+# Parameters
+#
+#   * $1: the netns the network interface resides in,
+#   * $2: the network interface name,
+#   * $3: the local IPv4 address to assign to this interface,
+#   * $4: the IPv4 address of the remote network interface,
+#   * $5: the local IPv6 address to assign to this interface,
+#   * $6: the IPv6 address of the remote network interface.
+#
+iface_config()
+{
+	local NS="${1}"; readonly NS
+	local DEV="${2}"; readonly DEV
+	local LOCAL_IP4="${3}"; readonly LOCAL_IP4
+	local PEER_IP4="${4}"; readonly PEER_IP4
+	local LOCAL_IP6="${5}"; readonly LOCAL_IP6
+	local PEER_IP6="${6}"; readonly PEER_IP6
+
+	ip -netns "${NS}" link set dev "${DEV}" up
+	ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP4}" peer "${PEER_IP4}"
+	ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP6}" peer "${PEER_IP6}" nodad
+}
+
+# Create base networking topology:
+#
+#   * set up the loopback device in all network namespaces (NS0..NS3),
+#   * set up a veth pair to connect each netns in sequence (NS0 with NS1,
+#     NS1 with NS2, etc.),
+#   * add and IPv4 and an IPv6 address on each veth interface,
+#   * prepare the ingress qdiscs in the intermediate namespaces.
+#
+setup_underlay()
+{
+	for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
+		ip -netns "${ns}" link set dev lo up
+	done;
+
+	ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
+	ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
+	ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
+	iface_config "${NS0}" veth01 192.0.2.10 192.0.2.11/32 2001:db8::10 2001:db8::11/128
+	iface_config "${NS1}" veth10 192.0.2.11 192.0.2.10/32 2001:db8::11 2001:db8::10/128
+	iface_config "${NS1}" veth12 192.0.2.21 192.0.2.22/32 2001:db8::21 2001:db8::22/128
+	iface_config "${NS2}" veth21 192.0.2.22 192.0.2.21/32 2001:db8::22 2001:db8::21/128
+	iface_config "${NS2}" veth23 192.0.2.32 192.0.2.33/32 2001:db8::32 2001:db8::33/128
+	iface_config "${NS3}" veth32 192.0.2.33 192.0.2.32/32 2001:db8::33 2001:db8::32/128
+
+	tc -netns "${NS1}" qdisc add dev veth10 ingress
+	tc -netns "${NS2}" qdisc add dev veth23 ingress
+}
+
+# Set up the IPv4, IPv6 and MPLS overlays.
+#
+# Configuration is similar for all protocols:
+#
+#   * add an overlay IP address on the loopback interface of each edge
+#     namespace,
+#   * route these IP addresses via the intermediate namespaces (for the MPLS
+#     tests, this is also where MPLS encapsulation is done),
+#   * add routes for these IP addresses (or MPLS labels) in the intermediate
+#     namespaces.
+#
+# The bareudp encapsulation isn't configured in setup_overlay_*(). That will be
+# done just before running the reachability tests.
+
+setup_overlay_ipv4()
+{
+	# Add the overlay IP addresses and route them through the veth devices
+	ip -netns "${NS0}" address add 192.0.2.100/32 dev lo
+	ip -netns "${NS3}" address add 192.0.2.103/32 dev lo
+	ip -netns "${NS0}" route add 192.0.2.103/32 src 192.0.2.100 via 192.0.2.11
+	ip -netns "${NS3}" route add 192.0.2.100/32 src 192.0.2.103 via 192.0.2.32
+
+	# Route the overlay addresses in the intermediate namespaces
+	# (used after bareudp decapsulation)
+	ip netns exec "${NS1}" sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
+	ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
+	ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
+}
+
+setup_overlay_ipv6()
+{
+	# Add the overlay IP addresses and route them through the veth devices
+	ip -netns "${NS0}" address add 2001:db8::100/128 dev lo
+	ip -netns "${NS3}" address add 2001:db8::103/128 dev lo
+	ip -netns "${NS0}" route add 2001:db8::103/128 src 2001:db8::100 via 2001:db8::11
+	ip -netns "${NS3}" route add 2001:db8::100/128 src 2001:db8::103 via 2001:db8::32
+
+	# Route the overlay addresses in the intermediate namespaces
+	# (used after bareudp decapsulation)
+	ip netns exec "${NS1}" sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip netns exec "${NS2}" sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip -netns "${NS1}" route add 2001:db8::100/128 via 2001:db8::10
+	ip -netns "${NS2}" route add 2001:db8::103/128 via 2001:db8::33
+}
+
+setup_overlay_mpls()
+{
+	# Add specific overlay IP addresses, routed over MPLS
+	ip -netns "${NS0}" address add 2001:db8::200/128 dev lo
+	ip -netns "${NS3}" address add 2001:db8::203/128 dev lo
+	ip -netns "${NS0}" route add 2001:db8::203/128 src 2001:db8::200 encap mpls 203 via 2001:db8::11
+	ip -netns "${NS3}" route add 2001:db8::200/128 src 2001:db8::203 encap mpls 200 via 2001:db8::32
+
+	# Route the MPLS packets in the intermediate namespaces
+	# (used after bareudp decapsulation)
+	ip netns exec "${NS1}" sysctl -qw net.mpls.platform_labels=256
+	ip netns exec "${NS2}" sysctl -qw net.mpls.platform_labels=256
+	ip -netns "${NS1}" -family mpls route add 200 via inet6 2001:db8::10
+	ip -netns "${NS2}" -family mpls route add 203 via inet6 2001:db8::33
+}
+
+# Run "ping" from NS0 and print the result
+#
+# Parameters:
+#
+#   * $1: the variant of ping to use (normally either "ping" or "ping6"),
+#   * $2: the IP address to ping,
+#   * $3: a human readable description of the purpose of the test.
+#
+# If the test fails and PAUSE_ON_FAIL is active, the user is given the
+# possibility to continue with the next test or to quit immediately.
+#
+ping_test_one()
+{
+	local PING="$1"; readonly PING
+	local IP="$2"; readonly IP
+	local MSG="$3"; readonly MSG
+	local RET
+
+	printf "TEST: %-60s  " "${MSG}"
+
+	set +e
+	ip netns exec "${NS0}" "${PING}" -w 5 -c 1 "${IP}" > /dev/null 2>&1
+	RET=$?
+	set -e
+
+	if [ "${RET}" -eq 0 ]; then
+		printf "[ OK ]\n"
+	else
+		ERR=1
+		printf "[FAIL]\n"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			printf "\nHit enter to continue, 'q' to quit\n"
+			read a
+			if [ "$a" = "q" ]; then
+				exit 1
+			fi
+		fi
+	fi
+}
+
+# Run reachability tests
+#
+# Parameters:
+#
+#   * $1: human readable string describing the underlay protocol.
+#
+# $IPV4, $IPV6, $MPLS_UC and $MULTIPROTO are inherited from the calling
+# function.
+#
+ping_test()
+{
+	local UNDERLAY="$1"; readonly UNDERLAY
+	local MODE
+	local MSG
+
+	if [ "${MULTIPROTO}" = "multiproto" ]; then
+		MODE=" (multiproto mode)"
+	else
+		MODE=""
+	fi
+
+	if [ $IPV4 ]; then
+		ping_test_one "ping" "192.0.2.103" "IPv4 packets over ${UNDERLAY}${MODE}"
+	fi
+	if [ $IPV6 ]; then
+		ping_test_one "${PING6}" "2001:db8::103" "IPv6 packets over ${UNDERLAY}${MODE}"
+	fi
+	if [ $MPLS_UC ]; then
+		ping_test_one "${PING6}" "2001:db8::203" "Unicast MPLS packets over ${UNDERLAY}${MODE}"
+	fi
+}
+
+# Set up a bareudp overlay and run reachability tests over IPv4 and IPv6
+#
+# Parameters:
+#
+#   * $1: the packet type (protocol) to be handled by bareudp,
+#   * $2: a flag to activate or deactivate bareudp's "multiproto" mode.
+#
+test_overlay()
+{
+	local ETHERTYPE="$1"; readonly ETHERTYPE
+	local MULTIPROTO="$2"; readonly MULTIPROTO
+	local IPV4
+	local IPV6
+	local MPLS_UC
+
+	case "${ETHERTYPE}" in
+		"ipv4")
+			IPV4="ipv4"
+			if [ "${MULTIPROTO}" = "multiproto" ]; then
+				IPV6="ipv6"
+			else
+				IPV6=""
+			fi
+			MPLS_UC=""
+			;;
+		"ipv6")
+			IPV6="ipv6"
+			IPV4=""
+			MPLS_UC=""
+			;;
+		"mpls_uc")
+			MPLS_UC="mpls_uc"
+			IPV4=""
+			IPV6=""
+			;;
+		*)
+			exit 1
+			;;
+	esac
+	readonly IPV4
+	readonly IPV6
+	readonly MPLS_UC
+
+	# Create the bareudp devices in the intermediate namespaces
+	ip -netns "${NS1}" link add name bareudp_ns1 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+	ip -netns "${NS2}" link add name bareudp_ns2 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+
+	# IPv4 over UDPv4
+	if [ $IPV4 ]; then
+		# Encapsulation instructions for bareudp over IPv4
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4         \
+			flower dst_ip 192.0.2.103/32                                   \
+			action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4         \
+			flower dst_ip 192.0.2.100/32                                   \
+			action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# IPv6 over UDPv4
+	if [ $IPV6 ]; then
+		# Encapsulation instructions for bareudp over IPv4
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6         \
+			flower dst_ip 2001:db8::103/128                                \
+			action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6         \
+			flower dst_ip 2001:db8::100/128                                \
+			action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# MPLS (unicast) over UDPv4
+	if [ $MPLS_UC ]; then
+		ip netns exec "${NS1}" sysctl -qw net.mpls.conf.bareudp_ns1.input=1
+		ip netns exec "${NS2}" sysctl -qw net.mpls.conf.bareudp_ns2.input=1
+
+		# Encapsulation instructions for bareudp over IPv4
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc      \
+			flower mpls_label 203                                          \
+			action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc      \
+			flower mpls_label 200                                          \
+			action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# Test IPv4 underlay
+	ping_test "UDPv4"
+
+	# Cleanup bareudp encapsulation instructions, as they were specific to
+	# the IPv4 underlay, before setting up and testing the IPv6 underlay
+	tc -netns "${NS1}" filter delete dev veth10 ingress
+	tc -netns "${NS2}" filter delete dev veth23 ingress
+
+	# IPv4 over UDPv6
+	if [ $IPV4 ]; then
+		# New encapsulation instructions for bareudp over IPv6
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4             \
+			flower dst_ip 192.0.2.103/32                                       \
+			action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4             \
+			flower dst_ip 192.0.2.100/32                                       \
+			action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# IPv6 over UDPv6
+	if [ $IPV6 ]; then
+		# New encapsulation instructions for bareudp over IPv6
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6             \
+			flower dst_ip 2001:db8::103/128                                    \
+			action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6             \
+			flower dst_ip 2001:db8::100/128                                    \
+			action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# MPLS (unicast) over UDPv6
+	if [ $MPLS_UC ]; then
+		# New encapsulation instructions for bareudp over IPv6
+		tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc          \
+			flower mpls_label 203                                              \
+			action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+			action mirred egress redirect dev bareudp_ns1
+		tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc          \
+			flower mpls_label 200                                              \
+			action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+			action mirred egress redirect dev bareudp_ns2
+	fi
+
+	# Test IPv6 underlay
+	ping_test "UDPv6"
+
+	tc -netns "${NS1}" filter delete dev veth10 ingress
+	tc -netns "${NS2}" filter delete dev veth23 ingress
+	ip -netns "${NS1}" link delete bareudp_ns1
+	ip -netns "${NS2}" link delete bareudp_ns2
+}
+
+check_features()
+{
+	ip link help 2>&1 | grep -q bareudp
+	if [ $? -ne 0 ]; then
+		echo "Missing bareudp support in iproute2" >&2
+		exit_cleanup
+	fi
+
+	# Use ping6 on systems where ping doesn't handle IPv6
+	ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING6="ping6"
+}
+
+usage()
+{
+	echo "Usage: $0 [-p]"
+	exit 1
+}
+
+while getopts :p o
+do
+	case $o in
+		p) PAUSE_ON_FAIL="yes";;
+		*) usage;;
+	esac
+done
+
+check_features
+
+# Create namespaces before setting up the exit trap.
+# Otherwise, exit_cleanup_all() could delete namespaces that were not created
+# by this script.
+create_namespaces
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_underlay
+setup_overlay_ipv4
+setup_overlay_ipv6
+setup_overlay_mpls
+
+test_overlay ipv4 nomultiproto
+test_overlay ipv6 nomultiproto
+test_overlay ipv4 multiproto
+test_overlay mpls_uc nomultiproto
+
+if [ "${ERR}" -eq 1 ]; then
+	echo "Some tests failed." >&2
+else
+	ERR=0
+fi
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 4d5df8e1eee7..614d5477365a 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -34,3 +34,10 @@ CONFIG_TRACEPOINTS=y
 CONFIG_NET_DROP_MONITOR=m
 CONFIG_NETDEVSIM=m
 CONFIG_NET_FOU=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_BAREUDP=m
-- 
cgit v1.2.3-70-g09d2


From e1eb075ccf3766860b7aa3f104ca29dcb8a46ed0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 15 Sep 2020 14:33:38 -0700
Subject: rcutorture: Make preemptible TRACE02 enable lockdep

Currently, the CONFIG_PREEMPT_NONE=y rcutorture TRACE01 rcutorture
scenario enables lockdep.  This limits its ability to find bugs due to
non-preemptible sections of code being RCU readers, and pretty much all
code thus appearing to lockdep to be an RCU reader.  This commit therefore
moves lockdep testing to the CONFIG_PREEMPT=y rcutorture TRACE02 scenario.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TRACE01 | 6 +++---
 tools/testing/selftests/rcutorture/configs/rcu/TRACE02 | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index 12e7661b86f5..34c8ff5a12f2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -4,8 +4,8 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
-#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
 CONFIG_TASKS_TRACE_RCU_READ_MB=y
 CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index b69ed6673c41..77541eeb4e9f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -4,8 +4,8 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_PROVE_LOCKING=n
-#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
 CONFIG_TASKS_TRACE_RCU_READ_MB=n
 CONFIG_RCU_EXPERT=y
-- 
cgit v1.2.3-70-g09d2


From 08c7974293851da6a64989b5ce7a0750e58178b1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 28 Aug 2020 06:46:03 -0700
Subject: torture: Don't kill gdb sessions

The rcutorture scripting will do a "kill -9" on any guest OS that exceeds
its --duration by more than a few minutes, which is very valuable when
bugs result in hangs.  However, this is a problem when the "hang" was due
to a --gdb debugging session.

This commit therefore refrains from killing the guest OS when a debugging
session is in progress.  This means that the user must manually kill the
kvm.sh process group if a hang really does occur.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 6dc2b49b85ea..d04966ab88cc 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -206,7 +206,10 @@ do
 	kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
 	if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
 	then
-		if test $kruntime -ge $seconds -o -f "$TORTURE_STOPFILE"
+		if test -n "$TORTURE_KCONFIG_GDB_ARG"
+		then
+			:
+		elif test $kruntime -ge $seconds || test -f "$TORTURE_STOPFILE"
 		then
 			break;
 		fi
-- 
cgit v1.2.3-70-g09d2


From 716e343f014e2b25320f332677363e884684b742 Mon Sep 17 00:00:00 2001
From: Michael Weiß <michael.weiss@aisec.fraunhofer.de>
Date: Tue, 27 Oct 2020 21:42:58 +0100
Subject: selftests/timens: added selftest for /proc/stat btime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Test that btime value of /proc/stat is as expected in the time namespace
using a simple parser to get btime from /proc/stat.

Signed-off-by: Michael Weiß <michael.weiss@aisec.fraunhofer.de>
Reviewed-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20201027204258.7869-4-michael.weiss@aisec.fraunhofer.de
---
 tools/testing/selftests/timens/procfs.c | 58 ++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c
index 7f14f0fdac84..f2519154208a 100644
--- a/tools/testing/selftests/timens/procfs.c
+++ b/tools/testing/selftests/timens/procfs.c
@@ -93,6 +93,33 @@ static int read_proc_uptime(struct timespec *uptime)
 	return 0;
 }
 
+static int read_proc_stat_btime(unsigned long long *boottime_sec)
+{
+	FILE *proc;
+	char line_buf[2048];
+
+	proc = fopen("/proc/stat", "r");
+	if (proc == NULL) {
+		pr_perror("Unable to open /proc/stat");
+		return -1;
+	}
+
+	while (fgets(line_buf, 2048, proc)) {
+		if (sscanf(line_buf, "btime %llu", boottime_sec) != 1)
+			continue;
+		fclose(proc);
+		return 0;
+	}
+	if (errno) {
+		pr_perror("fscanf");
+		fclose(proc);
+		return -errno;
+	}
+	pr_err("failed to parse /proc/stat");
+	fclose(proc);
+	return -1;
+}
+
 static int check_uptime(void)
 {
 	struct timespec uptime_new, uptime_old;
@@ -123,18 +150,47 @@ static int check_uptime(void)
 	return 0;
 }
 
+static int check_stat_btime(void)
+{
+	unsigned long long btime_new, btime_old;
+	unsigned long long btime_expected;
+
+	if (switch_ns(parent_ns))
+		return pr_err("switch_ns(%d)", parent_ns);
+
+	if (read_proc_stat_btime(&btime_old))
+		return 1;
+
+	if (switch_ns(child_ns))
+		return pr_err("switch_ns(%d)", child_ns);
+
+	if (read_proc_stat_btime(&btime_new))
+		return 1;
+
+	btime_expected = btime_old - TEN_DAYS_IN_SEC;
+	if (btime_new != btime_expected) {
+		pr_fail("btime in /proc/stat: old %llu, new %llu [%llu]",
+			btime_old, btime_new, btime_expected);
+		return 1;
+	}
+
+	ksft_test_result_pass("Passed for /proc/stat btime\n");
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
 	int ret = 0;
 
 	nscheck();
 
-	ksft_set_plan(1);
+	ksft_set_plan(2);
 
 	if (init_namespaces())
 		return 1;
 
 	ret |= check_uptime();
+	ret |= check_stat_btime();
 
 	if (ret)
 		ksft_exit_fail();
-- 
cgit v1.2.3-70-g09d2


From b773ea650576f14442f7a546f2b15e64b10ed0eb Mon Sep 17 00:00:00 2001
From: "Justin M. Forbes" <jforbes@fedoraproject.org>
Date: Wed, 28 Oct 2020 13:59:00 -0300
Subject: perf tools: Remove LTO compiler options when building perl support

To avoid breaking the build by mixing files compiled with things coming
from distro specific compiler options for perl with the rest of perf,
i.e. to avoid this:

  `.gnu.debuglto_.debug_macro' referenced in section `.gnu.debuglto_.debug_macro' of /tmp/build/perf/util/scripting-engines/perf-in.o: defined in discarded section `.gnu.debuglto_.debug_macro[wm4.stdcpredef.h.19.8dc41bed5d9037ff9622e015fb5f0ce3]' of /tmp/build/perf/util/scripting-engines/perf-in.o

Noticed on Fedora 33.

Signed-off-by: Justin M. Forbes <jforbes@fedoraproject.org>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1593431
Cc: Jiri Olsa <jolsa@redhat.com>
Link: https://src.fedoraproject.org/rpms/kernel-tools/c/589a32b62f0c12516ab7b34e3dd30d450145bfa4?branch=master
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 6890fc4b063a..ce8516e4de34 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -749,6 +749,7 @@ else
   PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
   PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
   PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+  PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS))
   PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
   FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
 
-- 
cgit v1.2.3-70-g09d2


From e555b4b8d7b2844a9e48e06a7c3e4f9e44af847f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Oct 2020 12:26:45 -0300
Subject: perf tools: Update copy of libbpf's hashmap.c

To pick the changes in:

  85367030a6c7ef33 ("libbpf: Centralize poisoning and poison reallocarray()")
  7d9c71e10baa3496 ("libbpf: Extract generic string hashing function for reuse")

That don't entail any changes in tools/perf.

This addresses this perf build warning:

  Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h'
  diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h

Not a kernel ABI, its just that this uses the mechanism in place for
checking kernel ABI files drift.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hashmap.c |  3 +++
 tools/perf/util/hashmap.h | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
index a405dad068f5..3c20b126d60d 100644
--- a/tools/perf/util/hashmap.c
+++ b/tools/perf/util/hashmap.c
@@ -15,6 +15,9 @@
 /* make sure libbpf doesn't use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
 
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
 /* start with 4 buckets */
 #define HASHMAP_MIN_CAP_BITS 2
 
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
index e0af36b0e5d8..d9b385fe808c 100644
--- a/tools/perf/util/hashmap.h
+++ b/tools/perf/util/hashmap.h
@@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits)
 #endif
 }
 
+/* generic C-string hashing function */
+static inline size_t str_hash(const char *s)
+{
+	size_t h = 0;
+
+	while (*s) {
+		h = h * 31 + *s;
+		s++;
+	}
+	return h;
+}
+
 typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
 typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
 
-- 
cgit v1.2.3-70-g09d2


From 263e452eff397b370e39d464c8cbd30f6bd59fb9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 3 Nov 2020 08:29:30 -0300
Subject: tools headers UAPI: Update process_madvise affected files

To pick the changes from:

  ecb8ac8b1f146915 ("mm/madvise: introduce process_madvise() syscall: an external memory hinting API")

That addresses these perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h'
  diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h
  Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'
  diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/asm-generic/unistd.h           |  4 +++-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 11 +++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index f2b5d72a46c2..2056318988f7 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2)
 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 #define __NR_faccessat2 439
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_process_madvise 440
+__SYSCALL(__NR_process_madvise, sys_process_madvise)
 
 #undef __NR_syscalls
-#define __NR_syscalls 440
+#define __NR_syscalls 441
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 347809649ba2..379819244b91 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -361,12 +361,13 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
+440	common	process_madvise		sys_process_madvise
 
 #
-# x32-specific system call numbers start at 512 to avoid cache impact
-# for native 64-bit operation. The __x32_compat_sys stubs are created
-# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
-# is defined.
+# Due to a historical design error, certain syscalls are numbered differently
+# in x32 as compared to native x86_64.  These syscalls have numbers 512-547.
+# Do not add new syscalls to this range.  Numbers 548 and above are available
+# for non-x32 use.
 #
 512	x32	rt_sigaction		compat_sys_rt_sigaction
 513	x32	rt_sigreturn		compat_sys_x32_rt_sigreturn
@@ -404,3 +405,5 @@
 545	x32	execveat		compat_sys_execveat
 546	x32	preadv2			compat_sys_preadv64v2
 547	x32	pwritev2		compat_sys_pwritev64v2
+# This is the end of the legacy x32 range.  Numbers 548 and above are
+# not special and are not to be used for x32-specific syscalls.
-- 
cgit v1.2.3-70-g09d2


From ab8bf5f2e0321f254590ad81c6e230185d88b4e5 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Fri, 16 Oct 2020 14:47:18 +0300
Subject: perf tools: Fix crash with non-jited bpf progs

The addr in PERF_RECORD_KSYMBOL events for non-jited bpf progs points to
the bpf interpreter, ie. within kernel text section. When processing the
unregister event, this causes unexpected removal of vmlinux_map,
crashing perf later in cleanup:

  # perf record -- timeout --signal=INT 2s /usr/share/bcc/tools/execsnoop
  PCOMM            PID    PPID   RET ARGS
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.208 MB perf.data (5155 samples) ]
  perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed.
  Aborted (core dumped)

  # perf script -D|grep KSYM
  0 0xa40 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6
  0 0xab0 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_8c42dee26e8cd4c2
  0 0xb20 [0x48]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b530 len 0 type 1 flags 0x0 name bpf_prog_f958f6eb72ef5af6
  108563691893 0x33d98 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3b0 len 0 type 1 flags 0x0 name bpf_prog_bc5697a410556fc2_syscall__execve
  108568518458 0x34098 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3f0 len 0 type 1 flags 0x0 name bpf_prog_45e2203c2928704d_do_ret_sys_execve
  109301967895 0x34830 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3b0 len 0 type 1 flags 0x1 name bpf_prog_bc5697a410556fc2_syscall__execve
  109302007356 0x348b0 [0x58]: PERF_RECORD_KSYMBOL addr ffffffffa9b6b3f0 len 0 type 1 flags 0x1 name bpf_prog_45e2203c2928704d_do_ret_sys_execve
  perf: tools/include/linux/refcount.h:131: refcount_sub_and_test: Assertion `!(new > val)' failed.

Here the addresses match the bpf interpreter:

  # grep -e ffffffffa9b6b530 -e ffffffffa9b6b3b0 -e ffffffffa9b6b3f0 /proc/kallsyms
  ffffffffa9b6b3b0 t __bpf_prog_run224
  ffffffffa9b6b3f0 t __bpf_prog_run192
  ffffffffa9b6b530 t __bpf_prog_run32

Fix by not allowing vmlinux_map to be removed by PERF_RECORD_KSYMBOL
unregister event.

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201016114718.54332-1-tommi.t.rantala@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 11 ++++++++++-
 tools/perf/util/symbol.c  |  7 +++++++
 tools/perf/util/symbol.h  |  2 ++
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 7d4194ffc5b0..15385ea00190 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -786,11 +786,20 @@ static int machine__process_ksymbol_unregister(struct machine *machine,
 					       union perf_event *event,
 					       struct perf_sample *sample __maybe_unused)
 {
+	struct symbol *sym;
 	struct map *map;
 
 	map = maps__find(&machine->kmaps, event->ksymbol.addr);
-	if (map)
+	if (!map)
+		return 0;
+
+	if (map != machine->vmlinux_map)
 		maps__remove(&machine->kmaps, map);
+	else {
+		sym = dso__find_symbol(map->dso, map->map_ip(map, map->start));
+		if (sym)
+			dso__delete_symbol(map->dso, sym);
+	}
 
 	return 0;
 }
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 6138866665df..0d14abdf3d72 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -515,6 +515,13 @@ void dso__insert_symbol(struct dso *dso, struct symbol *sym)
 	}
 }
 
+void dso__delete_symbol(struct dso *dso, struct symbol *sym)
+{
+	rb_erase_cached(&sym->rb_node, &dso->symbols);
+	symbol__delete(sym);
+	dso__reset_find_symbol_cache(dso);
+}
+
 struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
 {
 	if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index f4801c488def..954d6a049ee2 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -131,6 +131,8 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
 
 void dso__insert_symbol(struct dso *dso,
 			struct symbol *sym);
+void dso__delete_symbol(struct dso *dso,
+			struct symbol *sym);
 
 struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
 struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
-- 
cgit v1.2.3-70-g09d2


From a6293f36ac92ab513771a98efe486477be2f981f Mon Sep 17 00:00:00 2001
From: Stanislav Ivanichkin <sivanichkin@yandex-team.ru>
Date: Tue, 27 Oct 2020 12:43:57 +0300
Subject: perf trace: Fix segfault when trying to trace events by cgroup

  # ./perf trace -e sched:sched_switch -G test -a sleep 1
  perf: Segmentation fault
  Obtained 11 stack frames.
  ./perf(sighandler_dump_stack+0x43) [0x55cfdc636db3]
  /lib/x86_64-linux-gnu/libc.so.6(+0x3efcf) [0x7fd23eecafcf]
  ./perf(parse_cgroups+0x36) [0x55cfdc673f36]
  ./perf(+0x3186ed) [0x55cfdc70d6ed]
  ./perf(parse_options_subcommand+0x629) [0x55cfdc70e999]
  ./perf(cmd_trace+0x9c2) [0x55cfdc5ad6d2]
  ./perf(+0x1e8ae0) [0x55cfdc5ddae0]
  ./perf(+0x1e8ded) [0x55cfdc5ddded]
  ./perf(main+0x370) [0x55cfdc556f00]
  /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe6) [0x7fd23eeadb96]
  ./perf(_start+0x29) [0x55cfdc557389]
  Segmentation fault
  #

 It happens because "struct trace" in option->value is passed to the
 parse_cgroups function instead of "struct evlist".

Fixes: 9ea42ba4411ac ("perf trace: Support setting cgroups as targets")
Signed-off-by: Stanislav Ivanichkin <sivanichkin@yandex-team.ru>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Dmitry Monakhov <dmtrmonakhov@yandex-team.ru>
Link: http://lore.kernel.org/lkml/20201027094357.94881-1-sivanichkin@yandex-team.ru
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 44a75f234db1..de80534473af 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4639,9 +4639,9 @@ do_concat:
 	err = 0;
 
 	if (lists[0]) {
-		struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
-					       "event selector. use 'perf list' to list available events",
-					       parse_events_option);
+		struct option o = {
+			.value = &trace->evlist,
+		};
 		err = parse_events_option(&o, lists[0], 0);
 	}
 out:
@@ -4655,9 +4655,12 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u
 {
 	struct trace *trace = opt->value;
 
-	if (!list_empty(&trace->evlist->core.entries))
-		return parse_cgroups(opt, str, unset);
-
+	if (!list_empty(&trace->evlist->core.entries)) {
+		struct option o = {
+			.value = &trace->evlist,
+		};
+		return parse_cgroups(&o, str, unset);
+	}
 	trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 0dfbe4c646bf06a85c3d70572a8b8aa6ebffe3d5 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Fri, 23 Oct 2020 08:53:34 +0800
Subject: perf vendor events: Fix DRAM_BW_Use 0 issue for CLX/SKX

Ian reports an issue that the metric DRAM_BW_Use often remains 0.

The metric expression for DRAM_BW_Use on CLX/SKX:

"( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time"

The counts of uncore_imc/cas_count_read/ and uncore_imc/cas_count_write/
are scaled up by 64, that is to turn a count of cache lines into bytes,
the count is then divided by 1000000000 to give GB.

However, the counts of uncore_imc/cas_count_read/ and
uncore_imc/cas_count_write/ have been scaled yet.

The scale values are from sysfs, such as
/sys/devices/uncore_imc_0/events/cas_count_read.scale.
It's 6.103515625e-5 (64 / 1024.0 / 1024.0).

So if we use original metric expression, the result is not correct.

But the difficulty is, for SKL client, the counts are not scaled.

The metric expression for DRAM_BW_Use on SKL:

"64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000"

root@kbl-ppc:~# perf stat -M DRAM_BW_Use -a -- sleep 1

 Performance counter stats for 'system wide':

               190      arb/event=0x84,umask=0x1/ #     1.86 DRAM_BW_Use
        29,093,178      arb/event=0x81,umask=0x1/
     1,000,703,287 ns   duration_time

       1.000703287 seconds time elapsed

The result is expected.

So the easy way is just change the metric expression for CLX/SKX.
This patch changes the metric expression to:

"( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time"

1048576 = 1024 * 1024.

Before (tested on CLX):

root@lkp-csl-2sp5 ~# perf stat -M DRAM_BW_Use -a -- sleep 1

 Performance counter stats for 'system wide':

            765.35 MiB  uncore_imc/cas_count_read/ #     0.00 DRAM_BW_Use
              5.42 MiB  uncore_imc/cas_count_write/
        1001515088 ns   duration_time

       1.001515088 seconds time elapsed

After:

root@lkp-csl-2sp5 ~# perf stat -M DRAM_BW_Use -a -- sleep 1

 Performance counter stats for 'system wide':

            767.95 MiB  uncore_imc/cas_count_read/ #     0.80 DRAM_BW_Use
              5.02 MiB  uncore_imc/cas_count_write/
        1001900010 ns   duration_time

       1.001900010 seconds time elapsed

Fixes: 038d3b53c284 ("perf vendor events intel: Update CascadelakeX events to v1.08")
Fixes: b5ff7f2799a4 ("perf vendor events: Update SkylakeX events to v1.21")
Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20201023005334.7869-1-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json | 2 +-
 tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index de3193552277..00f4fcffa815 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -329,7 +329,7 @@
     },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
-        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time",
         "MetricGroup": "Memory_BW;SoC",
         "MetricName": "DRAM_BW_Use"
     },
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index f31794d3b926..0dd8b13b5cfb 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -323,7 +323,7 @@
     },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
-        "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+        "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time",
         "MetricGroup": "Memory_BW;SoC",
         "MetricName": "DRAM_BW_Use"
     },
-- 
cgit v1.2.3-70-g09d2


From 9ae1e990f1ab522b98baefbfebf3cbac1a2cfac2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 28 Oct 2020 09:11:23 +0100
Subject: perf tools: Remove broken __no_tail_call attribute
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GCC specific __attribute__((optimize)) attribute does not what is
commonly expected and is explicitly recommended against using in
production code by the GCC people.

Unlike what is often expected, it doesn't add to the optimization flags,
but it fully replaces them, loosing any and all optimization flags
provided by the compiler commandline.

The only guaranteed upon means of inhibiting tail-calls is by placing a
volatile asm with side-effects after the call such that the tail-call simply
cannot be done.

Given the original commit wasn't specific on which calls were the problem, this
removal might re-introduce the problem, which can then be re-analyzed and cured
properly.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Arvind Sankar <nivedita@alum.mit.edu>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Kook <keescook@chromium.org>
Cc: Martin Liška <mliska@suse.cz>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20201028081123.GT2628@hirez.programming.kicks-ass.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/compiler-gcc.h | 12 ------------
 tools/include/linux/compiler.h     |  3 ---
 tools/perf/tests/dwarf-unwind.c    | 10 +++++-----
 3 files changed, 5 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index b9d4322e1e65..95c072b70d0e 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -27,18 +27,6 @@
 #define  __pure		__attribute__((pure))
 #endif
 #define  noinline	__attribute__((noinline))
-#ifdef __has_attribute
-#if __has_attribute(disable_tail_calls)
-#define __no_tail_call	__attribute__((disable_tail_calls))
-#endif
-#endif
-#ifndef __no_tail_call
-#if GCC_VERSION > 40201
-#define __no_tail_call	__attribute__((optimize("no-optimize-sibling-calls")))
-#else
-#define __no_tail_call
-#endif
-#endif
 #ifndef __packed
 #define __packed	__attribute__((packed))
 #endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 2b3f7353e891..d22a974372c0 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -47,9 +47,6 @@
 #ifndef noinline
 #define noinline
 #endif
-#ifndef __no_tail_call
-#define __no_tail_call
-#endif
 
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #ifndef __same_type
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 2491d167bf76..83638097c3bc 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -95,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	return strcmp((const char *) symbol, funcs[idx]);
 }
 
-__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread)
+noinline int test_dwarf_unwind__thread(struct thread *thread)
 {
 	struct perf_sample sample;
 	unsigned long cnt = 0;
@@ -126,7 +126,7 @@ __no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread)
 
 static int global_unwind_retval = -INT_MAX;
 
-__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2)
+noinline int test_dwarf_unwind__compare(void *p1, void *p2)
 {
 	/* Any possible value should be 'thread' */
 	struct thread *thread = *(struct thread **)p1;
@@ -145,7 +145,7 @@ __no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2)
 	return p1 - p2;
 }
 
-__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread)
+noinline int test_dwarf_unwind__krava_3(struct thread *thread)
 {
 	struct thread *array[2] = {thread, thread};
 	void *fp = &bsearch;
@@ -164,12 +164,12 @@ __no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread)
 	return global_unwind_retval;
 }
 
-__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread)
+noinline int test_dwarf_unwind__krava_2(struct thread *thread)
 {
 	return test_dwarf_unwind__krava_3(thread);
 }
 
-__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread)
+noinline int test_dwarf_unwind__krava_1(struct thread *thread)
 {
 	return test_dwarf_unwind__krava_2(thread);
 }
-- 
cgit v1.2.3-70-g09d2


From d0e7b0c71fbb653de90a7163ef46912a96f0bdaf Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 30 Oct 2020 08:24:38 -0300
Subject: perf scripting python: Avoid declaring function pointers with a
 visibility attribute

To avoid this:

  util/scripting-engines/trace-event-python.c: In function 'python_start_script':
  util/scripting-engines/trace-event-python.c:1595:2: error: 'visibility' attribute ignored [-Werror=attributes]
   1595 |  PyMODINIT_FUNC (*initfunc)(void);
        |  ^~~~~~~~~~~~~~

That started breaking when building with PYTHON=python3 and these gcc
versions (I haven't checked with the clang ones, maybe it breaks there
as well):

  # export PERF_TARBALL=http://192.168.86.5/perf/perf-5.9.0.tar.xz
  # dm  fedora:33 fedora:rawhide
     1   107.80 fedora:33         : Ok   gcc (GCC) 10.2.1 20201005 (Red Hat 10.2.1-5), clang version 11.0.0 (Fedora 11.0.0-1.fc33)
     2    92.47 fedora:rawhide    : Ok   gcc (GCC) 10.2.1 20201016 (Red Hat 10.2.1-6), clang version 11.0.0 (Fedora 11.0.0-1.fc34)
  #

Avoid that by ditching that 'initfunc' function pointer with its:

    #define Py_EXPORTED_SYMBOL _attribute_ ((visibility ("default")))
    #define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject*

And just call PyImport_AppendInittab() at the end of the ifdef python3
block with the functions that were being attributed to that initfunc.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/scripting-engines/trace-event-python.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 7cbd024e3e63..c83c2c6564e0 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1592,7 +1592,6 @@ static void _free_command_line(wchar_t **command_line, int num)
 static int python_start_script(const char *script, int argc, const char **argv)
 {
 	struct tables *tables = &tables_global;
-	PyMODINIT_FUNC (*initfunc)(void);
 #if PY_MAJOR_VERSION < 3
 	const char **command_line;
 #else
@@ -1607,20 +1606,18 @@ static int python_start_script(const char *script, int argc, const char **argv)
 	FILE *fp;
 
 #if PY_MAJOR_VERSION < 3
-	initfunc = initperf_trace_context;
 	command_line = malloc((argc + 1) * sizeof(const char *));
 	command_line[0] = script;
 	for (i = 1; i < argc + 1; i++)
 		command_line[i] = argv[i - 1];
+	PyImport_AppendInittab(name, initperf_trace_context);
 #else
-	initfunc = PyInit_perf_trace_context;
 	command_line = malloc((argc + 1) * sizeof(wchar_t *));
 	command_line[0] = Py_DecodeLocale(script, NULL);
 	for (i = 1; i < argc + 1; i++)
 		command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
+	PyImport_AppendInittab(name, PyInit_perf_trace_context);
 #endif
-
-	PyImport_AppendInittab(name, initfunc);
 	Py_Initialize();
 
 #if PY_MAJOR_VERSION < 3
-- 
cgit v1.2.3-70-g09d2


From ad6330ac2c5a38e5573cb6ae8ff75288bfd96325 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Oct 2020 12:38:16 -0300
Subject: tools headers UAPI: Sync prctl.h with the kernel sources

To get the changes in:

  1c101da8b971a366 ("arm64: mte: Allow user control of the tag check mode via prctl()")
  af5ce95282dc99d0 ("arm64: mte: Allow user control of the generated random tags via prctl()")

Which don't cause any change in tooling, only addresses this perf build
warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h'
  diff -u tools/include/uapi/linux/prctl.h include/uapi/linux/prctl.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/prctl.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 07b4f8131e36..7f0827705c9a 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -233,6 +233,15 @@ struct prctl_mm_map {
 #define PR_SET_TAGGED_ADDR_CTRL		55
 #define PR_GET_TAGGED_ADDR_CTRL		56
 # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
+/* MTE tag check fault modes */
+# define PR_MTE_TCF_SHIFT		1
+# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
+/* MTE tag inclusion mask */
+# define PR_MTE_TAG_SHIFT		3
+# define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57
-- 
cgit v1.2.3-70-g09d2


From 9e228f48980635c187720c0956b39c04db5e8f56 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Oct 2020 12:41:58 -0300
Subject: tools headers UAPI: Sync drm/i915_drm.h with the kernel sources

To pick the changes in:

  13149e8bafc46572 ("drm/i915: add syncobj timeline support")
  cda9edd02425d790 ("drm/i915: introduce a mechanism to extend execbuf2")

That don't result in any changes in tooling, just silences this perf
build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h'
  diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/drm/i915_drm.h | 59 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 00546062e023..fa1f3d62f9a6 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION	54
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1046,6 +1052,38 @@ struct drm_i915_gem_exec_fence {
 	__u32 flags;
 };
 
+/**
+ * See drm_i915_gem_execbuffer_ext_timeline_fences.
+ */
+#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0
+
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+	struct i915_user_extension base;
+
+	/**
+	 * Number of element in the handles_ptr & value_ptr arrays.
+	 */
+	__u64 fence_count;
+
+	/**
+	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
+	 * fence_count.
+	 */
+	__u64 handles_ptr;
+
+	/**
+	 * Pointer to an array of u64 values of length fence_count. Values
+	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
+	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
+	 */
+	__u64 values_ptr;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -1062,8 +1100,14 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 num_cliprects;
 	/**
 	 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
-	 * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
-	 * struct drm_i915_gem_exec_fence *fences.
+	 * & I915_EXEC_USE_EXTENSIONS are not set.
+	 *
+	 * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+	 * of struct drm_i915_gem_exec_fence and num_cliprects is the length
+	 * of the array.
+	 *
+	 * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
+	 * single struct i915_user_extension and num_cliprects is 0.
 	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (0x3f)
@@ -1181,7 +1225,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_SUBMIT		(1 << 20)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/*
+ * Setting I915_EXEC_USE_EXTENSIONS implies that
+ * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked
+ * list of i915_user_extension. Each i915_user_extension node is the base of a
+ * larger structure. The list of supported structures are listed in the
+ * drm_i915_gem_execbuffer_ext enum.
+ */
+#define I915_EXEC_USE_EXTENSIONS	(1 << 21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
cgit v1.2.3-70-g09d2


From d0448d6a249b6fc4518181b214d3403dfe2c8075 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Oct 2020 13:12:52 -0300
Subject: tools headers UAPI: Update fscrypt.h copy

To get the changes from:

  c7f0207b613033c5 ("fscrypt: make "#define fscrypt_policy" user-only")

That don't cause any changes in tools/perf, only addresses this perf
tools build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h'
  diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/fscrypt.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index 7875709ccfeb..e5de60336938 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -45,7 +45,6 @@ struct fscrypt_policy_v1 {
 	__u8 flags;
 	__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
 };
-#define fscrypt_policy	fscrypt_policy_v1
 
 /*
  * Process-subscribed "logon" key description prefix and payload format.
@@ -156,9 +155,9 @@ struct fscrypt_get_key_status_arg {
 	__u32 __out_reserved[13];
 };
 
-#define FS_IOC_SET_ENCRYPTION_POLICY		_IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_SET_ENCRYPTION_POLICY		_IOR('f', 19, struct fscrypt_policy_v1)
 #define FS_IOC_GET_ENCRYPTION_PWSALT		_IOW('f', 20, __u8[16])
-#define FS_IOC_GET_ENCRYPTION_POLICY		_IOW('f', 21, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_POLICY		_IOW('f', 21, struct fscrypt_policy_v1)
 #define FS_IOC_GET_ENCRYPTION_POLICY_EX		_IOWR('f', 22, __u8[9]) /* size + version */
 #define FS_IOC_ADD_ENCRYPTION_KEY		_IOWR('f', 23, struct fscrypt_add_key_arg)
 #define FS_IOC_REMOVE_ENCRYPTION_KEY		_IOWR('f', 24, struct fscrypt_remove_key_arg)
@@ -170,6 +169,7 @@ struct fscrypt_get_key_status_arg {
 
 /* old names; don't add anything new here! */
 #ifndef __KERNEL__
+#define fscrypt_policy			fscrypt_policy_v1
 #define FS_KEY_DESCRIPTOR_SIZE		FSCRYPT_KEY_DESCRIPTOR_SIZE
 #define FS_POLICY_FLAGS_PAD_4		FSCRYPT_POLICY_FLAGS_PAD_4
 #define FS_POLICY_FLAGS_PAD_8		FSCRYPT_POLICY_FLAGS_PAD_8
-- 
cgit v1.2.3-70-g09d2


From 40a6bbf5149c7302bd7515fb5e2c3d12bac462f5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Oct 2020 13:21:04 -0300
Subject: tools x86 headers: Update cpufeatures.h headers copies

To pick the changes from:

  5866e9205b47a983 ("x86/cpu: Add hardware-enforced cache coherency as a CPUID feature")
  ff4f82816dff28ff ("x86/cpufeatures: Enumerate ENQCMD and ENQCMDS instructions")
  360e7c5c4ca4fd8e ("x86/cpufeatures: Add SEV-ES CPU feature")
  18ec63faefb3fd31 ("x86/cpufeatures: Enumerate TSX suspend load address tracking instructions")
  e48cb1a3fb916500 ("x86/resctrl: Enumerate per-thread MBA controls")

Which don't cause any changes in tooling, just addresses these build
warnings:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h'
  diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h
  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h'
  diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Cc: Kyung Min Park <kyung.min.park@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/cpufeatures.h       | 6 +++++-
 tools/arch/x86/include/asm/disabled-features.h | 9 ++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366..dad350d42ecf 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,7 @@
 #define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
-/* free					( 3*32+17) */
+#define X86_FEATURE_SME_COHERENT	( 3*32+17) /* "" AMD hardware-enforced cache coherency */
 #define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -236,6 +236,7 @@
 #define X86_FEATURE_EPT_AD		( 8*32+17) /* Intel Extended Page Table access-dirty bit */
 #define X86_FEATURE_VMCALL		( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL		( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_SEV_ES		( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -288,6 +289,7 @@
 #define X86_FEATURE_FENCE_SWAPGS_USER	(11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL	(11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT	(11*32+ 6) /* #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA	(11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -353,6 +355,7 @@
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
 #define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD		(16*32+29) /* ENQCMD and ENQCMDS instructions */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
@@ -368,6 +371,7 @@
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_SERIALIZE		(18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 4ea8584682f9..5861d34f9771 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,6 +56,12 @@
 # define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
 #endif
 
+#ifdef CONFIG_IOMMU_SUPPORT
+# define DISABLE_ENQCMD	0
+#else
+# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -75,7 +81,8 @@
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
-#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
+#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
+			 DISABLE_ENQCMD)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK18	0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
-- 
cgit v1.2.3-70-g09d2


From 8b2fc25a945b125c7ee4c36b048ad65f7c04105e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Oct 2020 13:24:52 -0300
Subject: tools x86 headers: Update required-features.h header from the kernel

To pick the changes from:

  ecac71816a1829c0 ("x86/paravirt: Use CONFIG_PARAVIRT_XXL instead of CONFIG_PARAVIRT")

That don entail any changes in tooling, just addressing these perf tools
build warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/required-features.h' differs from latest version at 'arch/x86/include/asm/required-features.h'
  diff -u tools/arch/x86/include/asm/required-features.h arch/x86/include/asm/required-features.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Juergen Gross <jgross@suse.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/required-features.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index 6847d85400a8..3ff0d48469f2 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -54,7 +54,7 @@
 #endif
 
 #ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
 /* Paravirtualized systems may not have PSE or PGE available */
 #define NEED_PSE	0
 #define NEED_PGE	0
-- 
cgit v1.2.3-70-g09d2


From 32b734e09ec38a0bb81d05d37056a95584d14c99 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Oct 2020 13:28:04 -0300
Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources

To pick up the changes in:

  29dcc60f6a19fb0a ("x86/boot/compressed/64: Add stage1 #VC handler")
  36e1be8ada994d50 ("perf/x86/amd/ibs: Fix raw sample data accumulation")
  59a854e2f3b90ad2 ("perf/x86/intel: Support TopDown metrics on Ice Lake")
  7b2c05a15d29d057 ("perf/x86/intel: Generic support for hardware TopDown metrics")
  99e40204e014e066 ("x86/msr: Move the F15h MSRs where they belong")
  b57de6cd16395be1 ("x86/sev-es: Add SEV-ES Feature Detection")
  ed7bde7a6dab521e ("cpufreq: intel_pstate: Allow enable/disable energy efficiency")
  f0f2f9feb4ee6f28 ("x86/msr-index: Define an IA32_PASID MSR")

That cause these changes in tooling:

  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before
  $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h
  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after
  $ diff -u before after
  --- before	2020-10-19 13:27:33.195274425 -0300
  +++ after	2020-10-19 13:27:44.144507610 -0300
  @@ -113,6 +113,8 @@
   	[0x00000309] = "CORE_PERF_FIXED_CTR0",
   	[0x0000030a] = "CORE_PERF_FIXED_CTR1",
   	[0x0000030b] = "CORE_PERF_FIXED_CTR2",
  +	[0x0000030c] = "CORE_PERF_FIXED_CTR3",
  +	[0x00000329] = "PERF_METRICS",
   	[0x00000345] = "IA32_PERF_CAPABILITIES",
   	[0x0000038d] = "CORE_PERF_FIXED_CTR_CTRL",
   	[0x0000038e] = "CORE_PERF_GLOBAL_STATUS",
  @@ -222,6 +224,7 @@
   	[0x00000774] = "HWP_REQUEST",
   	[0x00000777] = "HWP_STATUS",
   	[0x00000d90] = "IA32_BNDCFGS",
  +	[0x00000d93] = "IA32_PASID",
   	[0x00000da0] = "IA32_XSS",
   	[0x00000dc0] = "LBR_INFO_0",
   	[0x00000ffc] = "IA32_BNDCFGS_RSVD",
  @@ -279,6 +282,7 @@
   	[0xc0010115 - x86_AMD_V_KVM_MSRs_offset] = "VM_IGNNE",
   	[0xc0010117 - x86_AMD_V_KVM_MSRs_offset] = "VM_HSAVE_PA",
   	[0xc001011f - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VIRT_SPEC_CTRL",
  +	[0xc0010130 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV_ES_GHCB",
   	[0xc0010131 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV",
   	[0xc0010140 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_ID_LENGTH",
   	[0xc0010141 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_STATUS",
  $

Which causes these parts of tools/perf/ to be rebuilt:

  CC       /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o
  DESCEND  plugins
  GEN      /tmp/build/perf/python/perf.so
  INSTALL  trace_plugins
  LD       /tmp/build/perf/trace/beauty/tracepoints/perf-in.o
  LD       /tmp/build/perf/trace/beauty/perf-in.o
  LD       /tmp/build/perf/perf-in.o
  LINK     /tmp/build/perf/per

At some point these should just be tables read by perf on demand.

This addresses this perf tools build warning:

  diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h
  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h'

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/msr-index.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 2859ee4f39a8..972a34d93505 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -257,6 +257,9 @@
 #define MSR_IA32_LASTINTFROMIP		0x000001dd
 #define MSR_IA32_LASTINTTOIP		0x000001de
 
+#define MSR_IA32_PASID			0x00000d93
+#define MSR_IA32_PASID_VALID		BIT_ULL(31)
+
 /* DEBUGCTLMSR bits (others vary by model): */
 #define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
 #define DEBUGCTLMSR_BTF_SHIFT		1
@@ -464,11 +467,15 @@
 #define MSR_AMD64_IBSOP_REG_MASK	((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
 #define MSR_AMD64_IBSCTL		0xc001103a
 #define MSR_AMD64_IBSBRTARGET		0xc001103b
+#define MSR_AMD64_ICIBSEXTDCTL		0xc001103c
 #define MSR_AMD64_IBSOPDATA4		0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SEV_ES_GHCB		0xc0010130
 #define MSR_AMD64_SEV			0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT	0
+#define MSR_AMD64_SEV_ES_ENABLED_BIT	1
 #define MSR_AMD64_SEV_ENABLED		BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_SEV_ES_ENABLED	BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
 
 #define MSR_AMD64_VIRT_SPEC_CTRL	0xc001011f
 
@@ -857,11 +864,14 @@
 #define MSR_CORE_PERF_FIXED_CTR0	0x00000309
 #define MSR_CORE_PERF_FIXED_CTR1	0x0000030a
 #define MSR_CORE_PERF_FIXED_CTR2	0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR3	0x0000030c
 #define MSR_CORE_PERF_FIXED_CTR_CTRL	0x0000038d
 #define MSR_CORE_PERF_GLOBAL_STATUS	0x0000038e
 #define MSR_CORE_PERF_GLOBAL_CTRL	0x0000038f
 #define MSR_CORE_PERF_GLOBAL_OVF_CTRL	0x00000390
 
+#define MSR_PERF_METRICS		0x00000329
+
 /* PERF_GLOBAL_OVF_CTL bits */
 #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT	55
 #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI		(1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
-- 
cgit v1.2.3-70-g09d2


From 97a3863b170e38a8eefc07a72d418a81fd225216 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Oct 2020 13:36:41 -0300
Subject: tools UAPI: Update copy of linux/mman.h from the kernel sources

  e47168f3d1b14af5 ("powerpc/8xx: Support 16k hugepages with 4k pages")

That don't cause any changes in tooling, just addresses this perf build
warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/mman.h' differs from latest version at 'include/uapi/linux/mman.h'
  diff -u tools/include/uapi/linux/mman.h include/uapi/linux/mman.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/mman.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index 923cc162609c..f55bc680b5b0 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -27,6 +27,7 @@
 #define MAP_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
 #define MAP_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
 
+#define MAP_HUGE_16KB	HUGETLB_FLAG_ENCODE_16KB
 #define MAP_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
 #define MAP_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
 #define MAP_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
-- 
cgit v1.2.3-70-g09d2


From aa04899a13078e4181146212555a1bbaa387d2c9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Oct 2020 13:42:55 -0300
Subject: tools kvm headers: Update KVM headers from the kernel sources

Some should cause changes in tooling, like the one adding LAST_EXCP, but
the way it is structured end up not making that happen.

The new SVM_EXIT_INVPCID should get used by arch/x86/util/kvm-stat.c,
in the svm_exit_reasons table.

The tools/perf/trace/beauty part has scripts to catch changes and
automagically create tables, like tools/perf/trace/beauty/kvm_ioctl.sh,
but changes are needed to make tools/perf/arch/x86/util/kvm-stat.c catch
those automatically.

These were handled by the existing scripts:

  $ tools/perf/trace/beauty/kvm_ioctl.sh > before
  $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h
  $ tools/perf/trace/beauty/kvm_ioctl.sh > after
  $ diff -u before after
  --- before	2020-11-03 08:43:52.910728608 -0300
  +++ after	2020-11-03 08:44:04.273959984 -0300
  @@ -89,6 +89,7 @@
   	[0xbf] = "SET_NESTED_STATE",
   	[0xc0] = "CLEAR_DIRTY_LOG",
   	[0xc1] = "GET_SUPPORTED_HV_CPUID",
  +	[0xc6] = "X86_SET_MSR_FILTER",
   	[0xe0] = "CREATE_DEVICE",
   	[0xe1] = "SET_DEVICE_ATTR",
   	[0xe2] = "GET_DEVICE_ATTR",
  $
  $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > before
  $ cp include/uapi/linux/vhost.h tools/include/uapi/linux/vhost.h
  $
  $ tools/perf/trace/beauty/vhost_virtio_ioctl.sh > after
  $ diff -u before after
  --- before	2020-11-03 08:45:55.522225198 -0300
  +++ after	2020-11-03 08:46:12.881578666 -0300
  @@ -37,4 +37,5 @@
   	[0x71] = "VDPA_GET_STATUS",
   	[0x73] = "VDPA_GET_CONFIG",
   	[0x76] = "VDPA_GET_VRING_NUM",
  +	[0x78] = "VDPA_GET_IOVA_RANGE",
   };
  $

This addresses these perf build warnings:

  Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h'
  diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h
  Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/sie.h' differs from latest version at 'arch/s390/include/uapi/asm/sie.h'
  diff -u tools/arch/s390/include/uapi/asm/sie.h arch/s390/include/uapi/asm/sie.h
  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h'
  diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h
  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h'
  diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h
  Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h'
  diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
  Warning: Kernel ABI header at 'tools/include/uapi/linux/vhost.h' differs from latest version at 'include/uapi/linux/vhost.h'
  diff -u tools/include/uapi/linux/vhost.h include/uapi/linux/vhost.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Yarygin <yarygin@linux.vnet.ibm.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/arm64/include/uapi/asm/kvm.h | 25 +++++++++++++++++++++++++
 tools/arch/s390/include/uapi/asm/sie.h  |  2 +-
 tools/arch/x86/include/uapi/asm/kvm.h   | 20 ++++++++++++++++++++
 tools/arch/x86/include/uapi/asm/svm.h   | 13 +++++++++++++
 tools/include/uapi/linux/kvm.h          | 19 +++++++++++++++++++
 tools/include/uapi/linux/vhost.h        |  4 ++++
 6 files changed, 82 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index ba85bb23f060..1c17c3a24411 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -159,6 +159,21 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/*
+ * PMU filter structure. Describe a range of events with a particular
+ * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
+ */
+struct kvm_pmu_event_filter {
+	__u16	base_event;
+	__u16	nevents;
+
+#define KVM_PMU_EVENT_ALLOW	0
+#define KVM_PMU_EVENT_DENY	1
+
+	__u8	action;
+	__u8	pad[3];
+};
+
 /* for KVM_GET/SET_VCPU_EVENTS */
 struct kvm_vcpu_events {
 	struct {
@@ -242,6 +257,15 @@ struct kvm_vcpu_events {
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL		0
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL		1
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED	2
+
+/*
+ * Only two states can be presented by the host kernel:
+ * - NOT_REQUIRED: the guest doesn't need to do anything
+ * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available)
+ *
+ * All the other values are deprecated. The host still accepts all
+ * values (they are ABI), but will narrow them to the above two.
+ */
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL		0
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN		1
@@ -329,6 +353,7 @@ struct kvm_vcpu_events {
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
 #define   KVM_ARM_VCPU_PMU_V3_IRQ	0
 #define   KVM_ARM_VCPU_PMU_V3_INIT	1
+#define   KVM_ARM_VCPU_PMU_V3_FILTER	2
 #define KVM_ARM_VCPU_TIMER_CTRL		1
 #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h
index 6ca1e68d7103..ede318653c87 100644
--- a/tools/arch/s390/include/uapi/asm/sie.h
+++ b/tools/arch/s390/include/uapi/asm/sie.h
@@ -29,7 +29,7 @@
 	{ 0x13, "SIGP conditional emergency signal" },		\
 	{ 0x15, "SIGP sense running" },				\
 	{ 0x16, "SIGP set multithreading"},			\
-	{ 0x17, "SIGP store additional status ait address"}
+	{ 0x17, "SIGP store additional status at address"}
 
 #define icpt_prog_codes						\
 	{ 0x0001, "Prog Operation" },				\
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 0780f97c1850..89e5f3d1bba8 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -192,6 +192,26 @@ struct kvm_msr_list {
 	__u32 indices[0];
 };
 
+/* Maximum size of any access bitmap in bytes */
+#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600
+
+/* for KVM_X86_SET_MSR_FILTER */
+struct kvm_msr_filter_range {
+#define KVM_MSR_FILTER_READ  (1 << 0)
+#define KVM_MSR_FILTER_WRITE (1 << 1)
+	__u32 flags;
+	__u32 nmsrs; /* number of msrs in bitmap */
+	__u32 base;  /* MSR index the bitmap starts at */
+	__u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
+};
+
+#define KVM_MSR_FILTER_MAX_RANGES 16
+struct kvm_msr_filter {
+#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+#define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
+	__u32 flags;
+	struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+};
 
 struct kvm_cpuid_entry {
 	__u32 function;
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index 2e8a30f06c74..f1d8307454e0 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -29,6 +29,7 @@
 #define SVM_EXIT_WRITE_DR6     0x036
 #define SVM_EXIT_WRITE_DR7     0x037
 #define SVM_EXIT_EXCP_BASE     0x040
+#define SVM_EXIT_LAST_EXCP     0x05f
 #define SVM_EXIT_INTR          0x060
 #define SVM_EXIT_NMI           0x061
 #define SVM_EXIT_SMI           0x062
@@ -76,10 +77,21 @@
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
 #define SVM_EXIT_RDPRU         0x08e
+#define SVM_EXIT_INVPCID       0x0a2
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
 
+/* SEV-ES software-defined VMGEXIT events */
+#define SVM_VMGEXIT_MMIO_READ			0x80000001
+#define SVM_VMGEXIT_MMIO_WRITE			0x80000002
+#define SVM_VMGEXIT_NMI_COMPLETE		0x80000003
+#define SVM_VMGEXIT_AP_HLT_LOOP			0x80000004
+#define SVM_VMGEXIT_AP_JUMP_TABLE		0x80000005
+#define SVM_VMGEXIT_SET_AP_JUMP_TABLE		0
+#define SVM_VMGEXIT_GET_AP_JUMP_TABLE		1
+#define SVM_VMGEXIT_UNSUPPORTED_EVENT		0x8000ffff
+
 #define SVM_EXIT_ERR           -1
 
 #define SVM_EXIT_REASONS \
@@ -171,6 +183,7 @@
 	{ SVM_EXIT_MONITOR,     "monitor" }, \
 	{ SVM_EXIT_MWAIT,       "mwait" }, \
 	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
+	{ SVM_EXIT_INVPCID,     "invpcid" }, \
 	{ SVM_EXIT_NPF,         "npf" }, \
 	{ SVM_EXIT_AVIC_INCOMPLETE_IPI,		"avic_incomplete_ipi" }, \
 	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 7d8eced6f459..ca41220b40b8 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -248,6 +248,8 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_IOAPIC_EOI       26
 #define KVM_EXIT_HYPERV           27
 #define KVM_EXIT_ARM_NISV         28
+#define KVM_EXIT_X86_RDMSR        29
+#define KVM_EXIT_X86_WRMSR        30
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -413,6 +415,17 @@ struct kvm_run {
 			__u64 esr_iss;
 			__u64 fault_ipa;
 		} arm_nisv;
+		/* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */
+		struct {
+			__u8 error; /* user -> kernel */
+			__u8 pad[7];
+#define KVM_MSR_EXIT_REASON_INVAL	(1 << 0)
+#define KVM_MSR_EXIT_REASON_UNKNOWN	(1 << 1)
+#define KVM_MSR_EXIT_REASON_FILTER	(1 << 2)
+			__u32 reason; /* kernel -> user */
+			__u32 index; /* kernel -> user */
+			__u64 data; /* kernel <-> user */
+		} msr;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -1037,6 +1050,9 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_SMALLER_MAXPHYADDR 185
 #define KVM_CAP_S390_DIAG318 186
 #define KVM_CAP_STEAL_TIME 187
+#define KVM_CAP_X86_USER_SPACE_MSR 188
+#define KVM_CAP_X86_MSR_FILTER 189
+#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1538,6 +1554,9 @@ struct kvm_pv_cmd {
 /* Available with KVM_CAP_S390_PROTECTED */
 #define KVM_S390_PV_COMMAND		_IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
 
+/* Available with KVM_CAP_X86_MSR_FILTER */
+#define KVM_X86_SET_MSR_FILTER	_IOW(KVMIO,  0xc6, struct kvm_msr_filter)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 75232185324a..c998860d7bbc 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -146,4 +146,8 @@
 
 /* Set event fd for config interrupt*/
 #define VHOST_VDPA_SET_CONFIG_CALL	_IOW(VHOST_VIRTIO, 0x77, int)
+
+/* Get the valid iova range */
+#define VHOST_VDPA_GET_IOVA_RANGE	_IOR(VHOST_VIRTIO, 0x78, \
+					     struct vhost_vdpa_iova_range)
 #endif
-- 
cgit v1.2.3-70-g09d2


From a9e27f5f9827eab25b76155fddcc22ddeeed58d2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 3 Nov 2020 08:49:59 -0300
Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h

The diff is just tabs versus spaces, trivial.

This silences this perf tools build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h'
  diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 3e5dcdd48a49..b95d3c485d27 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1196,7 +1196,7 @@ union perf_mem_data_src {
 
 #define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
 /* 1 free */
-#define PERF_MEM_SNOOPX_SHIFT	38
+#define PERF_MEM_SNOOPX_SHIFT  38
 
 /* locked instruction */
 #define PERF_MEM_LOCK_NA	0x01 /* not available */
-- 
cgit v1.2.3-70-g09d2


From 42cc0e70a21faa8e7d7ea8713a3f9cd64bd3f60a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 3 Nov 2020 08:52:11 -0300
Subject: tools include UAPI: Update linux/mount.h copy

To pick the changes from:

  dab741e0e02bd3c4 ("Add a "nosymfollow" mount option.")

That ends up adding support for the new MS_NOSYMFOLLOW mount flag:

  $ tools/perf/trace/beauty/mount_flags.sh > before
  $ cp include/uapi/linux/mount.h tools/include/uapi/linux/mount.h
  $ tools/perf/trace/beauty/mount_flags.sh > after
  $ diff -u before after
  --- before	2020-11-03 08:51:28.117997454 -0300
  +++ after	2020-11-03 08:51:38.992218869 -0300
  @@ -7,6 +7,7 @@
   	[32 ? (ilog2(32) + 1) : 0] = "REMOUNT",
   	[64 ? (ilog2(64) + 1) : 0] = "MANDLOCK",
   	[128 ? (ilog2(128) + 1) : 0] = "DIRSYNC",
  +	[256 ? (ilog2(256) + 1) : 0] = "NOSYMFOLLOW",
   	[1024 ? (ilog2(1024) + 1) : 0] = "NOATIME",
   	[2048 ? (ilog2(2048) + 1) : 0] = "NODIRATIME",
   	[4096 ? (ilog2(4096) + 1) : 0] = "BIND",
  $

So now one can use it in --filter expressions for tracepoints.

This silences this perf build warnings:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/mount.h' differs from latest version at 'include/uapi/linux/mount.h'
  diff -u tools/include/uapi/linux/mount.h include/uapi/linux/mount.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mattias Nissler <mnissler@chromium.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/mount.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index 96a0240f23fe..dd8306ea336c 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -16,6 +16,7 @@
 #define MS_REMOUNT	32	/* Alter flags of a mounted FS */
 #define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
 #define MS_DIRSYNC	128	/* Directory modifications are synchronous */
+#define MS_NOSYMFOLLOW	256	/* Do not follow symlinks */
 #define MS_NOATIME	1024	/* Do not update access times. */
 #define MS_NODIRATIME	2048	/* Do not update directory access times */
 #define MS_BIND		4096
-- 
cgit v1.2.3-70-g09d2


From 86449b12f626a65d2a2ecfada1e024488471f9e2 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 30 Oct 2020 16:54:31 -0700
Subject: perf hists browser: Increase size of 'buf' in
 perf_evsel__hists_browse()

Making perf with gcc-9.1.1 generates the following warning:

    CC       ui/browsers/hists.o
  ui/browsers/hists.c: In function 'perf_evsel__hists_browse':
  ui/browsers/hists.c:3078:61: error: '%d' directive output may be \
  truncated writing between 1 and 11 bytes into a region of size \
  between 2 and 12 [-Werror=format-truncation=]

   3078 |       "Max event group index to sort is %d (index from 0 to %d)",
        |                                                             ^~
  ui/browsers/hists.c:3078:7: note: directive argument in the range [-2147483648, 8]
   3078 |       "Max event group index to sort is %d (index from 0 to %d)",
        |       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  In file included from /usr/include/stdio.h:937,
                   from ui/browsers/hists.c:5:

IOW, the string in line 3078 might be too long for buf[] of 64 bytes.

Fix this by increasing the size of buf[] to 128.

Fixes: dbddf1747441  ("perf report/top TUI: Support hotkeys to let user select any event for sorting")
Signed-off-by: Song Liu <songliubraving@fb.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: stable@vger.kernel.org # v5.7+
Link: http://lore.kernel.org/lkml/20201030235431.534417-1-songliubraving@fb.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index a07626f07208..b0e1880cf992 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2963,7 +2963,7 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
 	struct popup_action actions[MAX_OPTIONS];
 	int nr_options = 0;
 	int key = -1;
-	char buf[64];
+	char buf[128];
 	int delay_secs = hbt ? hbt->refresh : 0;
 
 #define HIST_BROWSER_HELP_COMMON					\
-- 
cgit v1.2.3-70-g09d2


From 6311951d4f8f28c43b554ff0719027884bedd7e3 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 2 Nov 2020 00:31:02 +0100
Subject: perf tools: Initialize output buffer in build_id__sprintf

We display garbage for undefined build_id objects, because we don't
initialize the output buffer.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20201101233103.3537427-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/build-id.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 8763772f1095..6b410c3d52dc 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -102,6 +102,8 @@ int build_id__sprintf(const struct build_id *build_id, char *bf)
 	const u8 *raw = build_id->data;
 	size_t i;
 
+	bf[0] = 0x0;
+
 	for (i = 0; i < build_id->size; ++i) {
 		sprintf(bid, "%02x", *raw);
 		++raw;
-- 
cgit v1.2.3-70-g09d2


From fe01adb72356a4e2f8735e4128af85921ca98fa1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 2 Nov 2020 00:31:03 +0100
Subject: perf tools: Add missing swap for ino_generation

We are missing swap for ino_generation field.

Fixes: 5c5e854bc760 ("perf tools: Add attr->mmap2 support")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20201101233103.3537427-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7a5f03764702..d20b16ee7377 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -595,6 +595,7 @@ static void perf_event__mmap2_swap(union perf_event *event,
 	event->mmap2.maj   = bswap_32(event->mmap2.maj);
 	event->mmap2.min   = bswap_32(event->mmap2.min);
 	event->mmap2.ino   = bswap_64(event->mmap2.ino);
+	event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
 
 	if (sample_id_all) {
 		void *data = &event->mmap2.filename;
-- 
cgit v1.2.3-70-g09d2


From 2c589d933e54d183ee2a052971b730e423c62031 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 2 Nov 2020 23:02:28 +0900
Subject: perf tools: Add missing swap for cgroup events

It was missed to add a swap function for PERF_RECORD_CGROUP.

Fixes: ba78c1c5461c ("perf tools: Basic support for CGROUP event")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201102140228.303657-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d20b16ee7377..098080287c68 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -711,6 +711,18 @@ static void perf_event__namespaces_swap(union perf_event *event,
 		swap_sample_id_all(event, &event->namespaces.link_info[i]);
 }
 
+static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all)
+{
+	event->cgroup.id = bswap_64(event->cgroup.id);
+
+	if (sample_id_all) {
+		void *data = &event->cgroup.path;
+
+		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+		swap_sample_id_all(event, data);
+	}
+}
+
 static u8 revbyte(u8 b)
 {
 	int rev = (b >> 4) | ((b & 0xf) << 4);
@@ -953,6 +965,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_SWITCH]		  = perf_event__switch_swap,
 	[PERF_RECORD_SWITCH_CPU_WIDE]	  = perf_event__switch_swap,
 	[PERF_RECORD_NAMESPACES]	  = perf_event__namespaces_swap,
+	[PERF_RECORD_CGROUP]		  = perf_event__cgroup_swap,
 	[PERF_RECORD_TEXT_POKE]		  = perf_event__text_poke_swap,
 	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
 	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
-- 
cgit v1.2.3-70-g09d2


From 5d020cbd86204e51da05628623a6f9729d4b04c8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 3 Nov 2020 09:24:20 -0300
Subject: tools feature: Fixup fast path feature detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

22dd1ac91a776752 ("tools: Remove feature-libelf-mmap feature detection")
correctly simplified the this feature detection, but forgot to remove
the call to the removed function in the main() function for the
test-all.c fast path feature detection, making it fail and thus do all
the feature detection individually, fix it.

  $ cat /tmp/build/perf/feature/test-all.make.output
  test-all.c: In function ‘main’:
  test-all.c:188:2: error: implicit declaration of function ‘main_test_libelf_mmap’; did you mean ‘main_test_libelf’? [-Werror=implicit-function-declaration]
    188 |  main_test_libelf_mmap();
        |  ^~~~~~~~~~~~~~~~~~~~~
        |  main_test_libelf
  cc1: all warnings being treated as errors
  $ vim tools/build/feature/test-all.c
  $ rm -rf /tmp/build/perf ; mkdir -p /tmp/build/perf ;make V=1 -k O=/tmp/build/perf  -C tools/perf install-bin ; perf test python
  <SNIP>
  $ cat /tmp/build/perf/feature/test-all.make.output
  $

Fixes: 22dd1ac91a776752 ("tools: Remove feature-libelf-mmap feature detection")
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/feature/test-all.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index a04e81321c66..464873883396 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -185,7 +185,6 @@ int main(int argc, char *argv[])
 	main_test_libperl();
 	main_test_hello();
 	main_test_libelf();
-	main_test_libelf_mmap();
 	main_test_get_current_dir_name();
 	main_test_gettid();
 	main_test_glibc();
-- 
cgit v1.2.3-70-g09d2


From aaf376bddf68d0afe5f4b5f25fc555da358e2287 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:34:48 -0800
Subject: selftests/bpf: Move test_tcppbf_user into test_progs

Recently a bug was missed due to the fact that test_tcpbpf_user is not a
part of test_progs. In order to prevent similar issues in the future move
the test functionality into test_progs. By doing this we can make certain
that it is a part of standard testing and will not be overlooked.

As a part of moving the functionality into test_progs it is necessary to
integrate with the test_progs framework and to drop any redundant code.
This patch:
1. Cleans up the include headers
2. Dropped a duplicate definition of bpf_find_map
3. Switched over to using test_progs specific cgroup functions
4. Renamed main to test_tcpbpf_user
5. Dropped return value in favor of CHECK_FAIL to check for errors

The general idea is that I wanted to keep the changes as small as possible
while moving the file into the test_progs framework. The follow-on patches
are meant to clean up the remaining issues such as the use of CHECK_FAIL.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/160443928881.1086697.17661359319919165370.stgit@localhost.localdomain
---
 tools/testing/selftests/bpf/.gitignore             |   1 -
 tools/testing/selftests/bpf/Makefile               |   3 +-
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 141 ++++++++++++++++++
 tools/testing/selftests/bpf/test_tcpbpf_user.c     | 165 ---------------------
 4 files changed, 142 insertions(+), 168 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
 delete mode 100644 tools/testing/selftests/bpf/test_tcpbpf_user.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3ab1200e172f..395ae040ce1f 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -8,7 +8,6 @@ FEATURE-DUMP.libbpf
 fixdep
 test_dev_cgroup
 /test_progs*
-test_tcpbpf_user
 test_verifier_log
 feature
 test_sock
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 542768f5195b..50e5b18fc455 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread
 
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
-	test_verifier_log test_dev_cgroup test_tcpbpf_user \
+	test_verifier_log test_dev_cgroup \
 	test_sock test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage \
 	test_netcnt test_tcpnotify_user test_sysctl \
@@ -163,7 +163,6 @@ $(OUTPUT)/test_sock: cgroup_helpers.c
 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
 $(OUTPUT)/test_socket_cookie: cgroup_helpers.c
 $(OUTPUT)/test_sockmap: cgroup_helpers.c
-$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
 $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
 $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
new file mode 100644
index 000000000000..caa8d3adec8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <test_progs.h>
+
+#include "test_tcpbpf.h"
+
+#define CG_NAME "/tcpbpf-user-test"
+
+/* 3 comes from one listening socket + both ends of the connection */
+#define EXPECTED_CLOSE_EVENTS		3
+
+#define EXPECT_EQ(expected, actual, fmt)			\
+	do {							\
+		if ((expected) != (actual)) {			\
+			printf("  Value of: " #actual "\n"	\
+			       "    Actual: %" fmt "\n"		\
+			       "  Expected: %" fmt "\n",	\
+			       (actual), (expected));		\
+			ret--;					\
+		}						\
+	} while (0)
+
+int verify_result(const struct tcpbpf_globals *result)
+{
+	__u32 expected_events;
+	int ret = 0;
+
+	expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+			   (1 << BPF_SOCK_OPS_RWND_INIT) |
+			   (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+			   (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+			   (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+			   (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+			   (1 << BPF_SOCK_OPS_STATE_CB) |
+			   (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+
+	EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
+	EXPECT_EQ(501ULL, result->bytes_received, "llu");
+	EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
+	EXPECT_EQ(1, result->data_segs_in, PRIu32);
+	EXPECT_EQ(1, result->data_segs_out, PRIu32);
+	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
+	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
+	EXPECT_EQ(1, result->num_listen, PRIu32);
+	EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
+
+	return ret;
+}
+
+int verify_sockopt_result(int sock_map_fd)
+{
+	__u32 key = 0;
+	int ret = 0;
+	int res;
+	int rv;
+
+	/* check setsockopt for SAVE_SYN */
+	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+	EXPECT_EQ(0, rv, "d");
+	EXPECT_EQ(0, res, "d");
+	key = 1;
+	/* check getsockopt for SAVED_SYN */
+	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
+	EXPECT_EQ(0, rv, "d");
+	EXPECT_EQ(1, res, "d");
+	return ret;
+}
+
+void test_tcpbpf_user(void)
+{
+	const char *file = "test_tcpbpf_kern.o";
+	int prog_fd, map_fd, sock_map_fd;
+	struct tcpbpf_globals g = {0};
+	int error = EXIT_FAILURE;
+	struct bpf_object *obj;
+	int cg_fd = -1;
+	int retry = 10;
+	__u32 key = 0;
+	int rv;
+
+	cg_fd = test__join_cgroup(CG_NAME);
+	if (cg_fd < 0)
+		goto err;
+
+	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+		printf("FAILED: load_bpf_file failed for: %s\n", file);
+		goto err;
+	}
+
+	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (rv) {
+		printf("FAILED: bpf_prog_attach: %d (%s)\n",
+		       error, strerror(errno));
+		goto err;
+	}
+
+	if (system("./tcp_server.py")) {
+		printf("FAILED: TCP server\n");
+		goto err;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "global_map");
+	if (map_fd < 0)
+		goto err;
+
+	sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
+	if (sock_map_fd < 0)
+		goto err;
+
+retry_lookup:
+	rv = bpf_map_lookup_elem(map_fd, &key, &g);
+	if (rv != 0) {
+		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+		goto err;
+	}
+
+	if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
+		printf("Unexpected number of close events (%d), retrying!\n",
+		       g.num_close_events);
+		usleep(100);
+		goto retry_lookup;
+	}
+
+	if (verify_result(&g)) {
+		printf("FAILED: Wrong stats\n");
+		goto err;
+	}
+
+	if (verify_sockopt_result(sock_map_fd)) {
+		printf("FAILED: Wrong sockopt stats\n");
+		goto err;
+	}
+
+	error = 0;
+err:
+	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+	if (cg_fd != -1)
+		close(cg_fd);
+
+	CHECK_FAIL(error);
+}
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
deleted file mode 100644
index 74a9e49988b6..000000000000
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ /dev/null
@@ -1,165 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <sys/types.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
-#include "cgroup_helpers.h"
-
-#include "test_tcpbpf.h"
-
-/* 3 comes from one listening socket + both ends of the connection */
-#define EXPECTED_CLOSE_EVENTS		3
-
-#define EXPECT_EQ(expected, actual, fmt)			\
-	do {							\
-		if ((expected) != (actual)) {			\
-			printf("  Value of: " #actual "\n"	\
-			       "    Actual: %" fmt "\n"		\
-			       "  Expected: %" fmt "\n",	\
-			       (actual), (expected));		\
-			ret--;					\
-		}						\
-	} while (0)
-
-int verify_result(const struct tcpbpf_globals *result)
-{
-	__u32 expected_events;
-	int ret = 0;
-
-	expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
-			   (1 << BPF_SOCK_OPS_RWND_INIT) |
-			   (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
-			   (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
-			   (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
-			   (1 << BPF_SOCK_OPS_NEEDS_ECN) |
-			   (1 << BPF_SOCK_OPS_STATE_CB) |
-			   (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
-
-	EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
-	EXPECT_EQ(501ULL, result->bytes_received, "llu");
-	EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
-	EXPECT_EQ(1, result->data_segs_in, PRIu32);
-	EXPECT_EQ(1, result->data_segs_out, PRIu32);
-	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
-	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
-	EXPECT_EQ(1, result->num_listen, PRIu32);
-	EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
-
-	return ret;
-}
-
-int verify_sockopt_result(int sock_map_fd)
-{
-	__u32 key = 0;
-	int ret = 0;
-	int res;
-	int rv;
-
-	/* check setsockopt for SAVE_SYN */
-	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	EXPECT_EQ(0, rv, "d");
-	EXPECT_EQ(0, res, "d");
-	key = 1;
-	/* check getsockopt for SAVED_SYN */
-	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	EXPECT_EQ(0, rv, "d");
-	EXPECT_EQ(1, res, "d");
-	return ret;
-}
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
-			const char *name)
-{
-	struct bpf_map *map;
-
-	map = bpf_object__find_map_by_name(obj, name);
-	if (!map) {
-		printf("%s:FAIL:map '%s' not found\n", test, name);
-		return -1;
-	}
-	return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
-	const char *file = "test_tcpbpf_kern.o";
-	int prog_fd, map_fd, sock_map_fd;
-	struct tcpbpf_globals g = {0};
-	const char *cg_path = "/foo";
-	int error = EXIT_FAILURE;
-	struct bpf_object *obj;
-	int cg_fd = -1;
-	int retry = 10;
-	__u32 key = 0;
-	int rv;
-
-	cg_fd = cgroup_setup_and_join(cg_path);
-	if (cg_fd < 0)
-		goto err;
-
-	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
-		printf("FAILED: load_bpf_file failed for: %s\n", file);
-		goto err;
-	}
-
-	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
-	if (rv) {
-		printf("FAILED: bpf_prog_attach: %d (%s)\n",
-		       error, strerror(errno));
-		goto err;
-	}
-
-	if (system("./tcp_server.py")) {
-		printf("FAILED: TCP server\n");
-		goto err;
-	}
-
-	map_fd = bpf_find_map(__func__, obj, "global_map");
-	if (map_fd < 0)
-		goto err;
-
-	sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
-	if (sock_map_fd < 0)
-		goto err;
-
-retry_lookup:
-	rv = bpf_map_lookup_elem(map_fd, &key, &g);
-	if (rv != 0) {
-		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
-		goto err;
-	}
-
-	if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
-		printf("Unexpected number of close events (%d), retrying!\n",
-		       g.num_close_events);
-		usleep(100);
-		goto retry_lookup;
-	}
-
-	if (verify_result(&g)) {
-		printf("FAILED: Wrong stats\n");
-		goto err;
-	}
-
-	if (verify_sockopt_result(sock_map_fd)) {
-		printf("FAILED: Wrong sockopt stats\n");
-		goto err;
-	}
-
-	printf("PASSED!\n");
-	error = 0;
-err:
-	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
-	close(cg_fd);
-	cleanup_cgroup_environment();
-	return error;
-}
-- 
cgit v1.2.3-70-g09d2


From 247f0ec361b7e0c5c67db8222873182fb8be5146 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:34:56 -0800
Subject: selftests/bpf: Drop python client/server in favor of threads

Drop the tcp_client/server.py files in favor of using a client and server
thread within the test case. Specifically we spawn a new thread to play the
role of the server, and the main testing thread plays the role of client.

Add logic to the end of the run_test function to guarantee that the sockets
are closed when we begin verifying results.

Doing this we are able to reduce overhead since we don't have two python
workers possibly floating around. In addition we don't have to worry about
synchronization issues and as such the retry loop waiting for the threads
to close the sockets can be dropped as we will have already closed the
sockets in the local executable and synchronized the server thread.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/160443929638.1086697.2430242340980315521.stgit@localhost.localdomain
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 95 ++++++++++++++++++----
 tools/testing/selftests/bpf/tcp_client.py          | 50 ------------
 tools/testing/selftests/bpf/tcp_server.py          | 80 ------------------
 3 files changed, 78 insertions(+), 147 deletions(-)
 delete mode 100755 tools/testing/selftests/bpf/tcp_client.py
 delete mode 100755 tools/testing/selftests/bpf/tcp_server.py

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index caa8d3adec8a..616269abdb41 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -1,13 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <inttypes.h>
 #include <test_progs.h>
+#include <network_helpers.h>
 
 #include "test_tcpbpf.h"
 
+#define LO_ADDR6 "::1"
 #define CG_NAME "/tcpbpf-user-test"
 
-/* 3 comes from one listening socket + both ends of the connection */
-#define EXPECTED_CLOSE_EVENTS		3
+static __u32 duration;
 
 #define EXPECT_EQ(expected, actual, fmt)			\
 	do {							\
@@ -42,7 +43,9 @@ int verify_result(const struct tcpbpf_globals *result)
 	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
 	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
 	EXPECT_EQ(1, result->num_listen, PRIu32);
-	EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
+
+	/* 3 comes from one listening socket + both ends of the connection */
+	EXPECT_EQ(3, result->num_close_events, PRIu32);
 
 	return ret;
 }
@@ -66,6 +69,75 @@ int verify_sockopt_result(int sock_map_fd)
 	return ret;
 }
 
+static int run_test(void)
+{
+	int listen_fd = -1, cli_fd = -1, accept_fd = -1;
+	char buf[1000];
+	int err = -1;
+	int i, rv;
+
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+	if (CHECK(listen_fd == -1, "start_server", "listen_fd:%d errno:%d\n",
+		  listen_fd, errno))
+		goto done;
+
+	cli_fd = connect_to_fd(listen_fd, 0);
+	if (CHECK(cli_fd == -1, "connect_to_fd(listen_fd)",
+		  "cli_fd:%d errno:%d\n", cli_fd, errno))
+		goto done;
+
+	accept_fd = accept(listen_fd, NULL, NULL);
+	if (CHECK(accept_fd == -1, "accept(listen_fd)",
+		  "accept_fd:%d errno:%d\n", accept_fd, errno))
+		goto done;
+
+	/* Send 1000B of '+'s from cli_fd -> accept_fd */
+	for (i = 0; i < 1000; i++)
+		buf[i] = '+';
+
+	rv = send(cli_fd, buf, 1000, 0);
+	if (CHECK(rv != 1000, "send(cli_fd)", "rv:%d errno:%d\n", rv, errno))
+		goto done;
+
+	rv = recv(accept_fd, buf, 1000, 0);
+	if (CHECK(rv != 1000, "recv(accept_fd)", "rv:%d errno:%d\n", rv, errno))
+		goto done;
+
+	/* Send 500B of '.'s from accept_fd ->cli_fd */
+	for (i = 0; i < 500; i++)
+		buf[i] = '.';
+
+	rv = send(accept_fd, buf, 500, 0);
+	if (CHECK(rv != 500, "send(accept_fd)", "rv:%d errno:%d\n", rv, errno))
+		goto done;
+
+	rv = recv(cli_fd, buf, 500, 0);
+	if (CHECK(rv != 500, "recv(cli_fd)", "rv:%d errno:%d\n", rv, errno))
+		goto done;
+
+	/*
+	 * shutdown accept first to guarantee correct ordering for
+	 * bytes_received and bytes_acked when we go to verify the results.
+	 */
+	shutdown(accept_fd, SHUT_WR);
+	err = recv(cli_fd, buf, 1, 0);
+	if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", err, errno))
+		goto done;
+
+	shutdown(cli_fd, SHUT_WR);
+	err = recv(accept_fd, buf, 1, 0);
+	CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", err, errno);
+done:
+	if (accept_fd != -1)
+		close(accept_fd);
+	if (cli_fd != -1)
+		close(cli_fd);
+	if (listen_fd != -1)
+		close(listen_fd);
+
+	return err;
+}
+
 void test_tcpbpf_user(void)
 {
 	const char *file = "test_tcpbpf_kern.o";
@@ -74,7 +146,6 @@ void test_tcpbpf_user(void)
 	int error = EXIT_FAILURE;
 	struct bpf_object *obj;
 	int cg_fd = -1;
-	int retry = 10;
 	__u32 key = 0;
 	int rv;
 
@@ -94,11 +165,6 @@ void test_tcpbpf_user(void)
 		goto err;
 	}
 
-	if (system("./tcp_server.py")) {
-		printf("FAILED: TCP server\n");
-		goto err;
-	}
-
 	map_fd = bpf_find_map(__func__, obj, "global_map");
 	if (map_fd < 0)
 		goto err;
@@ -107,20 +173,15 @@ void test_tcpbpf_user(void)
 	if (sock_map_fd < 0)
 		goto err;
 
-retry_lookup:
+	if (run_test())
+		goto err;
+
 	rv = bpf_map_lookup_elem(map_fd, &key, &g);
 	if (rv != 0) {
 		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
 		goto err;
 	}
 
-	if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
-		printf("Unexpected number of close events (%d), retrying!\n",
-		       g.num_close_events);
-		usleep(100);
-		goto retry_lookup;
-	}
-
 	if (verify_result(&g)) {
 		printf("FAILED: Wrong stats\n");
 		goto err;
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
deleted file mode 100755
index bfff82be3fc1..000000000000
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python3
-#
-# SPDX-License-Identifier: GPL-2.0
-#
-
-import sys, os, os.path, getopt
-import socket, time
-import subprocess
-import select
-
-def read(sock, n):
-    buf = b''
-    while len(buf) < n:
-        rem = n - len(buf)
-        try: s = sock.recv(rem)
-        except (socket.error) as e: return b''
-        buf += s
-    return buf
-
-def send(sock, s):
-    total = len(s)
-    count = 0
-    while count < total:
-        try: n = sock.send(s)
-        except (socket.error) as e: n = 0
-        if n == 0:
-            return count;
-        count += n
-    return count
-
-
-serverPort = int(sys.argv[1])
-
-# create active socket
-sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-try:
-    sock.connect(('::1', serverPort))
-except socket.error as e:
-    sys.exit(1)
-
-buf = b''
-n = 0
-while n < 1000:
-    buf += b'+'
-    n += 1
-
-sock.settimeout(1);
-n = send(sock, buf)
-n = read(sock, 500)
-sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
deleted file mode 100755
index 42ab8882f00f..000000000000
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-#
-# SPDX-License-Identifier: GPL-2.0
-#
-
-import sys, os, os.path, getopt
-import socket, time
-import subprocess
-import select
-
-def read(sock, n):
-    buf = b''
-    while len(buf) < n:
-        rem = n - len(buf)
-        try: s = sock.recv(rem)
-        except (socket.error) as e: return b''
-        buf += s
-    return buf
-
-def send(sock, s):
-    total = len(s)
-    count = 0
-    while count < total:
-        try: n = sock.send(s)
-        except (socket.error) as e: n = 0
-        if n == 0:
-            return count;
-        count += n
-    return count
-
-
-SERVER_PORT = 12877
-MAX_PORTS = 2
-
-serverPort = SERVER_PORT
-serverSocket = None
-
-# create passive socket
-serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-
-try: serverSocket.bind(('::1', 0))
-except socket.error as msg:
-    print('bind fails: ' + str(msg))
-
-sn = serverSocket.getsockname()
-serverPort = sn[1]
-
-cmdStr = ("./tcp_client.py %d &") % (serverPort)
-os.system(cmdStr)
-
-buf = b''
-n = 0
-while n < 500:
-    buf += b'.'
-    n += 1
-
-serverSocket.listen(MAX_PORTS)
-readList = [serverSocket]
-
-while True:
-    readyRead, readyWrite, inError = \
-        select.select(readList, [], [], 2)
-
-    if len(readyRead) > 0:
-        waitCount = 0
-        for sock in readyRead:
-            if sock == serverSocket:
-                (clientSocket, address) = serverSocket.accept()
-                address = str(address[0])
-                readList.append(clientSocket)
-            else:
-                sock.settimeout(1);
-                s = read(sock, 1000)
-                n = send(sock, buf)
-                sock.close()
-                serverSocket.close()
-                sys.exit(0)
-    else:
-        print('Select timeout!')
-        sys.exit(1)
-- 
cgit v1.2.3-70-g09d2


From d3813ea14b696053c076123239093822b527f0f7 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:35:04 -0800
Subject: selftests/bpf: Replace EXPECT_EQ with ASSERT_EQ and refactor
 verify_results

There is already logic in test_progs.h for asserting that a value is
expected to be another value. So instead of reinventing it we should just
make use of ASSERT_EQ in tcpbpf_user.c. This will allow for better
debugging and integrates much more closely with the test_progs framework.

In addition we can refactor the code a bit to merge together the two
verify functions and tie them together into a single function. Doing this
helps to clean the code up a bit and makes it more readable as all the
verification is now done in one function.

Lastly we can relocate the verification to the end of the run_test since it
is logically part of the test itself. With this we can drop the need for a
return value from run_test since verification becomes the last step of the
call and then immediately following is the tear down of the test setup.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/160443930408.1086697.16101205859962113000.stgit@localhost.localdomain
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 115 ++++++++-------------
 1 file changed, 43 insertions(+), 72 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index 616269abdb41..22c359871af6 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -1,5 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <inttypes.h>
 #include <test_progs.h>
 #include <network_helpers.h>
 
@@ -10,66 +9,56 @@
 
 static __u32 duration;
 
-#define EXPECT_EQ(expected, actual, fmt)			\
-	do {							\
-		if ((expected) != (actual)) {			\
-			printf("  Value of: " #actual "\n"	\
-			       "    Actual: %" fmt "\n"		\
-			       "  Expected: %" fmt "\n",	\
-			       (actual), (expected));		\
-			ret--;					\
-		}						\
-	} while (0)
-
-int verify_result(const struct tcpbpf_globals *result)
+static void verify_result(int map_fd, int sock_map_fd)
 {
-	__u32 expected_events;
-	int ret = 0;
-
-	expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
-			   (1 << BPF_SOCK_OPS_RWND_INIT) |
-			   (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
-			   (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
-			   (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
-			   (1 << BPF_SOCK_OPS_NEEDS_ECN) |
-			   (1 << BPF_SOCK_OPS_STATE_CB) |
-			   (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
-
-	EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
-	EXPECT_EQ(501ULL, result->bytes_received, "llu");
-	EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
-	EXPECT_EQ(1, result->data_segs_in, PRIu32);
-	EXPECT_EQ(1, result->data_segs_out, PRIu32);
-	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
-	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
-	EXPECT_EQ(1, result->num_listen, PRIu32);
+	__u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+				 (1 << BPF_SOCK_OPS_RWND_INIT) |
+				 (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+				 (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+				 (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+				 (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+				 (1 << BPF_SOCK_OPS_STATE_CB) |
+				 (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+	struct tcpbpf_globals result;
+	__u32 key = 0;
+	int res, rv;
+
+	rv = bpf_map_lookup_elem(map_fd, &key, &result);
+	if (CHECK(rv, "bpf_map_lookup_elem(map_fd)", "err:%d errno:%d",
+		  rv, errno))
+		return;
+
+	/* check global map */
+	CHECK(expected_events != result.event_map, "event_map",
+	      "unexpected event_map: actual 0x%08x != expected 0x%08x\n",
+	      result.event_map, expected_events);
+
+	ASSERT_EQ(result.bytes_received, 501, "bytes_received");
+	ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked");
+	ASSERT_EQ(result.data_segs_in, 1, "data_segs_in");
+	ASSERT_EQ(result.data_segs_out, 1, "data_segs_out");
+	ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv");
+	ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv");
+	ASSERT_EQ(result.num_listen, 1, "num_listen");
 
 	/* 3 comes from one listening socket + both ends of the connection */
-	EXPECT_EQ(3, result->num_close_events, PRIu32);
-
-	return ret;
-}
-
-int verify_sockopt_result(int sock_map_fd)
-{
-	__u32 key = 0;
-	int ret = 0;
-	int res;
-	int rv;
+	ASSERT_EQ(result.num_close_events, 3, "num_close_events");
 
 	/* check setsockopt for SAVE_SYN */
 	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	EXPECT_EQ(0, rv, "d");
-	EXPECT_EQ(0, res, "d");
-	key = 1;
+	CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d",
+	      rv, errno);
+	ASSERT_EQ(res, 0, "bpf_setsockopt(TCP_SAVE_SYN)");
+
 	/* check getsockopt for SAVED_SYN */
+	key = 1;
 	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	EXPECT_EQ(0, rv, "d");
-	EXPECT_EQ(1, res, "d");
-	return ret;
+	CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d",
+	      rv, errno);
+	ASSERT_EQ(res, 1, "bpf_getsockopt(TCP_SAVED_SYN)");
 }
 
-static int run_test(void)
+static void run_test(int map_fd, int sock_map_fd)
 {
 	int listen_fd = -1, cli_fd = -1, accept_fd = -1;
 	char buf[1000];
@@ -135,18 +124,17 @@ done:
 	if (listen_fd != -1)
 		close(listen_fd);
 
-	return err;
+	if (!err)
+		verify_result(map_fd, sock_map_fd);
 }
 
 void test_tcpbpf_user(void)
 {
 	const char *file = "test_tcpbpf_kern.o";
 	int prog_fd, map_fd, sock_map_fd;
-	struct tcpbpf_globals g = {0};
 	int error = EXIT_FAILURE;
 	struct bpf_object *obj;
 	int cg_fd = -1;
-	__u32 key = 0;
 	int rv;
 
 	cg_fd = test__join_cgroup(CG_NAME);
@@ -173,24 +161,7 @@ void test_tcpbpf_user(void)
 	if (sock_map_fd < 0)
 		goto err;
 
-	if (run_test())
-		goto err;
-
-	rv = bpf_map_lookup_elem(map_fd, &key, &g);
-	if (rv != 0) {
-		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
-		goto err;
-	}
-
-	if (verify_result(&g)) {
-		printf("FAILED: Wrong stats\n");
-		goto err;
-	}
-
-	if (verify_sockopt_result(sock_map_fd)) {
-		printf("FAILED: Wrong sockopt stats\n");
-		goto err;
-	}
+	run_test(map_fd, sock_map_fd);
 
 	error = 0;
 err:
-- 
cgit v1.2.3-70-g09d2


From 0a099d1429c709020277d24a460d11ff8356a080 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:35:11 -0800
Subject: selftests/bpf: Migrate tcpbpf_user.c to use BPF skeleton

Update tcpbpf_user.c to make use of the BPF skeleton. Doing this we can
simplify test_tcpbpf_user and reduce the overhead involved in setting up
the test.

In addition we can clean up the remaining bits such as the one remaining
CHECK_FAIL at the end of test_tcpbpf_user so that the function only makes
use of CHECK as needed.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/160443931155.1086697.17869006617113525162.stgit@localhost.localdomain
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 41 ++++++++--------------
 1 file changed, 14 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index 22c359871af6..bef81648797a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -3,6 +3,7 @@
 #include <network_helpers.h>
 
 #include "test_tcpbpf.h"
+#include "test_tcpbpf_kern.skel.h"
 
 #define LO_ADDR6 "::1"
 #define CG_NAME "/tcpbpf-user-test"
@@ -130,44 +131,30 @@ done:
 
 void test_tcpbpf_user(void)
 {
-	const char *file = "test_tcpbpf_kern.o";
-	int prog_fd, map_fd, sock_map_fd;
-	int error = EXIT_FAILURE;
-	struct bpf_object *obj;
+	struct test_tcpbpf_kern *skel;
+	int map_fd, sock_map_fd;
 	int cg_fd = -1;
-	int rv;
 
-	cg_fd = test__join_cgroup(CG_NAME);
-	if (cg_fd < 0)
-		goto err;
-
-	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
-		printf("FAILED: load_bpf_file failed for: %s\n", file);
-		goto err;
-	}
+	skel = test_tcpbpf_kern__open_and_load();
+	if (CHECK(!skel, "open and load skel", "failed"))
+		return;
 
-	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
-	if (rv) {
-		printf("FAILED: bpf_prog_attach: %d (%s)\n",
-		       error, strerror(errno));
+	cg_fd = test__join_cgroup(CG_NAME);
+	if (CHECK(cg_fd < 0, "test__join_cgroup(" CG_NAME ")",
+		  "cg_fd:%d errno:%d", cg_fd, errno))
 		goto err;
-	}
 
-	map_fd = bpf_find_map(__func__, obj, "global_map");
-	if (map_fd < 0)
-		goto err;
+	map_fd = bpf_map__fd(skel->maps.global_map);
+	sock_map_fd = bpf_map__fd(skel->maps.sockopt_results);
 
-	sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
-	if (sock_map_fd < 0)
+	skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd);
+	if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)"))
 		goto err;
 
 	run_test(map_fd, sock_map_fd);
 
-	error = 0;
 err:
-	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
 	if (cg_fd != -1)
 		close(cg_fd);
-
-	CHECK_FAIL(error);
+	test_tcpbpf_kern__destroy(skel);
 }
-- 
cgit v1.2.3-70-g09d2


From 21b5177e997c98643eaabd4b917f2e287395af86 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Tue, 3 Nov 2020 13:35:19 -0800
Subject: selftest/bpf: Use global variables instead of maps for
 test_tcpbpf_kern

Use global variables instead of global_map and sockopt_results_map to track
test data. Doing this greatly simplifies the code as there is not need to
take the extra steps of updating the maps or looking up elements.

Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/160443931900.1086697.6588858453575682351.stgit@localhost.localdomain
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 51 ++++---------
 .../testing/selftests/bpf/progs/test_tcpbpf_kern.c | 86 ++++------------------
 tools/testing/selftests/bpf/test_tcpbpf.h          |  2 +
 3 files changed, 31 insertions(+), 108 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index bef81648797a..ab5281475f44 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -10,7 +10,7 @@
 
 static __u32 duration;
 
-static void verify_result(int map_fd, int sock_map_fd)
+static void verify_result(struct tcpbpf_globals *result)
 {
 	__u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
 				 (1 << BPF_SOCK_OPS_RWND_INIT) |
@@ -20,46 +20,31 @@ static void verify_result(int map_fd, int sock_map_fd)
 				 (1 << BPF_SOCK_OPS_NEEDS_ECN) |
 				 (1 << BPF_SOCK_OPS_STATE_CB) |
 				 (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
-	struct tcpbpf_globals result;
-	__u32 key = 0;
-	int res, rv;
-
-	rv = bpf_map_lookup_elem(map_fd, &key, &result);
-	if (CHECK(rv, "bpf_map_lookup_elem(map_fd)", "err:%d errno:%d",
-		  rv, errno))
-		return;
 
 	/* check global map */
-	CHECK(expected_events != result.event_map, "event_map",
+	CHECK(expected_events != result->event_map, "event_map",
 	      "unexpected event_map: actual 0x%08x != expected 0x%08x\n",
-	      result.event_map, expected_events);
+	      result->event_map, expected_events);
 
-	ASSERT_EQ(result.bytes_received, 501, "bytes_received");
-	ASSERT_EQ(result.bytes_acked, 1002, "bytes_acked");
-	ASSERT_EQ(result.data_segs_in, 1, "data_segs_in");
-	ASSERT_EQ(result.data_segs_out, 1, "data_segs_out");
-	ASSERT_EQ(result.bad_cb_test_rv, 0x80, "bad_cb_test_rv");
-	ASSERT_EQ(result.good_cb_test_rv, 0, "good_cb_test_rv");
-	ASSERT_EQ(result.num_listen, 1, "num_listen");
+	ASSERT_EQ(result->bytes_received, 501, "bytes_received");
+	ASSERT_EQ(result->bytes_acked, 1002, "bytes_acked");
+	ASSERT_EQ(result->data_segs_in, 1, "data_segs_in");
+	ASSERT_EQ(result->data_segs_out, 1, "data_segs_out");
+	ASSERT_EQ(result->bad_cb_test_rv, 0x80, "bad_cb_test_rv");
+	ASSERT_EQ(result->good_cb_test_rv, 0, "good_cb_test_rv");
+	ASSERT_EQ(result->num_listen, 1, "num_listen");
 
 	/* 3 comes from one listening socket + both ends of the connection */
-	ASSERT_EQ(result.num_close_events, 3, "num_close_events");
+	ASSERT_EQ(result->num_close_events, 3, "num_close_events");
 
 	/* check setsockopt for SAVE_SYN */
-	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d",
-	      rv, errno);
-	ASSERT_EQ(res, 0, "bpf_setsockopt(TCP_SAVE_SYN)");
+	ASSERT_EQ(result->tcp_save_syn, 0, "tcp_save_syn");
 
 	/* check getsockopt for SAVED_SYN */
-	key = 1;
-	rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
-	CHECK(rv, "bpf_map_lookup_elem(sock_map_fd)", "err:%d errno:%d",
-	      rv, errno);
-	ASSERT_EQ(res, 1, "bpf_getsockopt(TCP_SAVED_SYN)");
+	ASSERT_EQ(result->tcp_saved_syn, 1, "tcp_saved_syn");
 }
 
-static void run_test(int map_fd, int sock_map_fd)
+static void run_test(struct tcpbpf_globals *result)
 {
 	int listen_fd = -1, cli_fd = -1, accept_fd = -1;
 	char buf[1000];
@@ -126,13 +111,12 @@ done:
 		close(listen_fd);
 
 	if (!err)
-		verify_result(map_fd, sock_map_fd);
+		verify_result(result);
 }
 
 void test_tcpbpf_user(void)
 {
 	struct test_tcpbpf_kern *skel;
-	int map_fd, sock_map_fd;
 	int cg_fd = -1;
 
 	skel = test_tcpbpf_kern__open_and_load();
@@ -144,14 +128,11 @@ void test_tcpbpf_user(void)
 		  "cg_fd:%d errno:%d", cg_fd, errno))
 		goto err;
 
-	map_fd = bpf_map__fd(skel->maps.global_map);
-	sock_map_fd = bpf_map__fd(skel->maps.sockopt_results);
-
 	skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd);
 	if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)"))
 		goto err;
 
-	run_test(map_fd, sock_map_fd);
+	run_test(&skel->bss->global);
 
 err:
 	if (cg_fd != -1)
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 3e6912e4df3d..e85e49deba70 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -14,40 +14,7 @@
 #include <bpf/bpf_endian.h>
 #include "test_tcpbpf.h"
 
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 4);
-	__type(key, __u32);
-	__type(value, struct tcpbpf_globals);
-} global_map SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 2);
-	__type(key, __u32);
-	__type(value, int);
-} sockopt_results SEC(".maps");
-
-static inline void update_event_map(int event)
-{
-	__u32 key = 0;
-	struct tcpbpf_globals g, *gp;
-
-	gp = bpf_map_lookup_elem(&global_map, &key);
-	if (gp == NULL) {
-		struct tcpbpf_globals g = {0};
-
-		g.event_map |= (1 << event);
-		bpf_map_update_elem(&global_map, &key, &g,
-			    BPF_ANY);
-	} else {
-		g = *gp;
-		g.event_map |= (1 << event);
-		bpf_map_update_elem(&global_map, &key, &g,
-			    BPF_ANY);
-	}
-}
-
+struct tcpbpf_globals global = {};
 int _version SEC("version") = 1;
 
 SEC("sockops")
@@ -105,29 +72,15 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 
 	op = (int) skops->op;
 
-	update_event_map(op);
+	global.event_map |= (1 << op);
 
 	switch (op) {
 	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
 		/* Test failure to set largest cb flag (assumes not defined) */
-		bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+		global.bad_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
 		/* Set callback */
-		good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+		global.good_cb_test_rv = bpf_sock_ops_cb_flags_set(skops,
 						 BPF_SOCK_OPS_STATE_CB_FLAG);
-		/* Update results */
-		{
-			__u32 key = 0;
-			struct tcpbpf_globals g, *gp;
-
-			gp = bpf_map_lookup_elem(&global_map, &key);
-			if (!gp)
-				break;
-			g = *gp;
-			g.bad_cb_test_rv = bad_call_rv;
-			g.good_cb_test_rv = good_call_rv;
-			bpf_map_update_elem(&global_map, &key, &g,
-					    BPF_ANY);
-		}
 		break;
 	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
 		skops->sk_txhash = 0x12345f;
@@ -143,10 +96,8 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 
 				thdr = (struct tcphdr *)(header + offset);
 				v = thdr->syn;
-				__u32 key = 1;
 
-				bpf_map_update_elem(&sockopt_results, &key, &v,
-						    BPF_ANY);
+				global.tcp_saved_syn = v;
 			}
 		}
 		break;
@@ -156,25 +107,16 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 		break;
 	case BPF_SOCK_OPS_STATE_CB:
 		if (skops->args[1] == BPF_TCP_CLOSE) {
-			__u32 key = 0;
-			struct tcpbpf_globals g, *gp;
-
-			gp = bpf_map_lookup_elem(&global_map, &key);
-			if (!gp)
-				break;
-			g = *gp;
 			if (skops->args[0] == BPF_TCP_LISTEN) {
-				g.num_listen++;
+				global.num_listen++;
 			} else {
-				g.total_retrans = skops->total_retrans;
-				g.data_segs_in = skops->data_segs_in;
-				g.data_segs_out = skops->data_segs_out;
-				g.bytes_received = skops->bytes_received;
-				g.bytes_acked = skops->bytes_acked;
+				global.total_retrans = skops->total_retrans;
+				global.data_segs_in = skops->data_segs_in;
+				global.data_segs_out = skops->data_segs_out;
+				global.bytes_received = skops->bytes_received;
+				global.bytes_acked = skops->bytes_acked;
 			}
-			g.num_close_events++;
-			bpf_map_update_elem(&global_map, &key, &g,
-					    BPF_ANY);
+			global.num_close_events++;
 		}
 		break;
 	case BPF_SOCK_OPS_TCP_LISTEN_CB:
@@ -182,9 +124,7 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 		v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN,
 				   &save_syn, sizeof(save_syn));
 		/* Update global map w/ result of setsock opt */
-		__u32 key = 0;
-
-		bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY);
+		global.tcp_save_syn = v;
 		break;
 	default:
 		rv = -1;
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
index 6220b95cbd02..0ed33521cbbb 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf.h
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -14,5 +14,7 @@ struct tcpbpf_globals {
 	__u64 bytes_acked;
 	__u32 num_listen;
 	__u32 num_close_events;
+	__u32 tcp_save_syn;
+	__u32 tcp_saved_syn;
 };
 #endif
-- 
cgit v1.2.3-70-g09d2


From a701d28e2d997705ae4376753af6e35b20029cef Mon Sep 17 00:00:00 2001
From: Dengcheng Zhu <dzhu@wavecomp.com>
Date: Mon, 19 Oct 2020 15:21:24 +0800
Subject: perf annotate mips: Add perf arch instructions annotate handlers

Support the MIPS architecture using the ins_ops association method. With
this patch, perf-annotate can work well on MIPS.

Testing it with a perf.data file collected on a mips machine:

$./perf annotate -i perf.data

         :           Disassembly of section .text:
         :
         :           00000000000be6a0 <get_next_seq>:
         :           get_next_seq():
    0.00 :   be6a0:       lw      v0,0(a0)
    0.00 :   be6a4:       daddiu  sp,sp,-128
    0.00 :   be6a8:       ld      a7,72(a0)
    0.00 :   be6ac:       gssq    s5,s4,80(sp)
    0.00 :   be6b0:       gssq    s1,s0,48(sp)
    0.00 :   be6b4:       gssq    s8,gp,112(sp)
    0.00 :   be6b8:       gssq    s7,s6,96(sp)
    0.00 :   be6bc:       gssq    s3,s2,64(sp)
    0.00 :   be6c0:       sd      a3,0(sp)
    0.00 :   be6c4:       move    s0,a0
    0.00 :   be6c8:       sd      v0,32(sp)
    0.00 :   be6cc:       sd      a5,8(sp)
    0.00 :   be6d0:       sd      zero,8(a0)
    0.00 :   be6d4:       sd      a6,16(sp)
    0.00 :   be6d8:       ld      s2,48(a0)
    8.53 :   be6dc:       ld      s1,40(a0)
    9.42 :   be6e0:       ld      v1,32(a0)
    0.00 :   be6e4:       nop
    0.00 :   be6e8:       ld      s4,24(a0)
    0.00 :   be6ec:       ld      s5,16(a0)
    0.00 :   be6f0:       sd      a7,40(sp)
   10.11 :   be6f4:       ld      s6,64(a0)

...

The original patch link:
https://lore.kernel.org/patchwork/patch/1180480/

Signed-off-by: Dengcheng Zhu <dzhu@wavecomp.com>
Cc: Dengcheng Zhu <dzhu@wavecomp.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Cc: linux-mips@vger.kernel.org
[ fanpeng@loongson.cn: Add missing "bgtzl", "bltzl", "bgezl", "blezl", "beql" and "bnel" for pre-R6processors ]
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/mips/Build                   |  2 +-
 tools/perf/arch/mips/annotate/instructions.c | 46 ++++++++++++++++++++++++++++
 tools/perf/util/annotate.c                   |  8 +++++
 3 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/arch/mips/annotate/instructions.c

(limited to 'tools')

diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build
index 1bb8bf6d7fd4..e4e5f33c84d8 100644
--- a/tools/perf/arch/mips/Build
+++ b/tools/perf/arch/mips/Build
@@ -1 +1 @@
-# empty
+perf-y += util/
diff --git a/tools/perf/arch/mips/annotate/instructions.c b/tools/perf/arch/mips/annotate/instructions.c
new file mode 100644
index 000000000000..340993f2a897
--- /dev/null
+++ b/tools/perf/arch/mips/annotate/instructions.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+static
+struct ins_ops *mips__associate_ins_ops(struct arch *arch, const char *name)
+{
+	struct ins_ops *ops = NULL;
+
+	if (!strncmp(name, "bal", 3) ||
+	    !strncmp(name, "bgezal", 6) ||
+	    !strncmp(name, "bltzal", 6) ||
+	    !strncmp(name, "bgtzal", 6) ||
+	    !strncmp(name, "blezal", 6) ||
+	    !strncmp(name, "beqzal", 6) ||
+	    !strncmp(name, "bnezal", 6) ||
+	    !strncmp(name, "bgtzl", 5) ||
+	    !strncmp(name, "bltzl", 5) ||
+	    !strncmp(name, "bgezl", 5) ||
+	    !strncmp(name, "blezl", 5) ||
+	    !strncmp(name, "jialc", 5) ||
+	    !strncmp(name, "beql", 4) ||
+	    !strncmp(name, "bnel", 4) ||
+	    !strncmp(name, "jal", 3))
+		ops = &call_ops;
+	else if (!strncmp(name, "jr", 2))
+		ops = &ret_ops;
+	else if (name[0] == 'j' || name[0] == 'b')
+		ops = &jump_ops;
+	else
+		return NULL;
+
+	arch__associate_ins_ops(arch, name, ops);
+
+	return ops;
+}
+
+static
+int mips__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+	if (!arch->initialized) {
+		arch->associate_instruction_ops = mips__associate_ins_ops;
+		arch->initialized = true;
+		arch->objdump.comment_char = '#';
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 6c8575e182ed..e52053a6ad42 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -152,6 +152,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
 #include "arch/arm/annotate/instructions.c"
 #include "arch/arm64/annotate/instructions.c"
 #include "arch/csky/annotate/instructions.c"
+#include "arch/mips/annotate/instructions.c"
 #include "arch/x86/annotate/instructions.c"
 #include "arch/powerpc/annotate/instructions.c"
 #include "arch/s390/annotate/instructions.c"
@@ -174,6 +175,13 @@ static struct arch architectures[] = {
 		.name = "csky",
 		.init = csky__annotate_init,
 	},
+	{
+		.name = "mips",
+		.init = mips__annotate_init,
+		.objdump = {
+			.comment_char = '#',
+		},
+	},
 	{
 		.name = "x86",
 		.init = x86__annotate_init,
-- 
cgit v1.2.3-70-g09d2


From a7c77c4f52c80fffc53b4c616a95f96d57170933 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 19 Oct 2020 16:25:45 -0700
Subject: perf version: Add a feature for libpfm4

If perf is built with libpfm4 (LIBPFM4=1) then advertise it in perf -vv.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201019232545.4047264-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-version.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index d09ec2f03071..9cd074a3d825 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -80,6 +80,7 @@ static void library_status(void)
 	STATUS(HAVE_LIBBPF_SUPPORT, bpf);
 	STATUS(HAVE_AIO_SUPPORT, aio);
 	STATUS(HAVE_ZSTD_SUPPORT, zstd);
+	STATUS(HAVE_LIBPFM, libpfm4);
 }
 
 int cmd_version(int argc, const char **argv)
-- 
cgit v1.2.3-70-g09d2


From 0ee281e1e4e12f8c09b99f80a2482a55cd7d6bca Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 19 Oct 2020 08:36:13 +0800
Subject: perf mem2node: Improve warning if detected no memory nodes

Some archs (e.g. x86 and Arm64) don't enable the configuration
CONFIG_MEMORY_HOTPLUG by default, if this configuration is not enabled
when build the kernel image, the SysFS for memory nodes will be missed.
This results in perf tool has no chance to catpure the memory nodes
information, when perf tool reports the result and detects no memory
nodes, it outputs "assertion failed at util/mem2node.c:99".

The output log doesn't give out reason for the failure and users have no
clue for how to fix it.  This patch changes to use explicit way for
warning: it tells user that detected no memory nodes and suggests to
enable CONFIG_MEMORY_HOTPLUG for kernel building.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201019003613.8399-1-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mem2node.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c
index c84f5841c7ab..03a7d7b27737 100644
--- a/tools/perf/util/mem2node.c
+++ b/tools/perf/util/mem2node.c
@@ -96,7 +96,8 @@ int mem2node__init(struct mem2node *map, struct perf_env *env)
 
 	/* Cut unused entries, due to merging. */
 	tmp_entries = realloc(entries, sizeof(*entries) * j);
-	if (tmp_entries || WARN_ON_ONCE(j == 0))
+	if (tmp_entries ||
+	    WARN_ONCE(j == 0, "No memory nodes, is CONFIG_MEMORY_HOTPLUG enabled?\n"))
 		entries = tmp_entries;
 
 	for (i = 0; i < j; i++) {
-- 
cgit v1.2.3-70-g09d2


From 3989bbf9607d6716900d9df91c46a2ce8a504b93 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 19 Oct 2020 18:02:35 +0800
Subject: perf tests tsc: Make tsc testing as a common testing

x86 arch provides the testing for conversion between tsc and perf time,
the testing is located in x86 arch folder.  Move this testing out from
x86 arch folder and place it into the common testing folder, so allows
to execute tsc testing on other architectures (e.g. Arm64).

This patch removes the inclusion of "arch-tests.h" from the testing
code, this can avoid building failure if any arch has no this header
file.

Committer testing:

  $ perf test -v tsc
  Couldn't bump rlimit(MEMLOCK), failures may take place when creating BPF maps, etc
  70: Convert perf time to TSC                                        :
  --- start ---
  test child forked, pid 4032834
  mmap size 528384B
  1st event perf time 165409788843605 tsc 336578703793868
  rdtsc          time 165409788854986 tsc 336578703837038
  2nd event perf time 165409788855487 tsc 336578703838935
  test child finished with 0
  ---- end ----
  Convert perf time to TSC: Ok
  $

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/r/20201019100236.23675-2-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/include/arch-tests.h     |   1 -
 tools/perf/arch/x86/tests/Build              |   1 -
 tools/perf/arch/x86/tests/arch-tests.c       |   4 -
 tools/perf/arch/x86/tests/perf-time-to-tsc.c | 173 ---------------------------
 tools/perf/tests/Build                       |   1 +
 tools/perf/tests/builtin-test.c              |   4 +
 tools/perf/tests/perf-time-to-tsc.c          | 171 ++++++++++++++++++++++++++
 tools/perf/tests/tests.h                     |   1 +
 8 files changed, 177 insertions(+), 179 deletions(-)
 delete mode 100644 tools/perf/arch/x86/tests/perf-time-to-tsc.c
 create mode 100644 tools/perf/tests/perf-time-to-tsc.c

(limited to 'tools')

diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index c41c5affe4be..6a54b94f1c25 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -7,7 +7,6 @@ struct test;
 
 /* Tests */
 int test__rdpmc(struct test *test __maybe_unused, int subtest);
-int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
 int test__insn_x86(struct test *test __maybe_unused, int subtest);
 int test__intel_pt_pkt_decoder(struct test *test, int subtest);
 int test__bp_modify(struct test *test, int subtest);
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 2997c506550c..36d4f248b51d 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,6 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 
 perf-y += arch-tests.o
 perf-y += rdpmc.o
-perf-y += perf-time-to-tsc.o
 perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
 perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 6763135aec17..bc25d727b4e9 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -8,10 +8,6 @@ struct test arch_tests[] = {
 		.desc = "x86 rdpmc",
 		.func = test__rdpmc,
 	},
-	{
-		.desc = "Convert perf time to TSC",
-		.func = test__perf_time_to_tsc,
-	},
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 	{
 		.desc = "DWARF unwind",
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
deleted file mode 100644
index 026d32ed078e..000000000000
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ /dev/null
@@ -1,173 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <errno.h>
-#include <inttypes.h>
-#include <limits.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <linux/types.h>
-#include <sys/prctl.h>
-#include <perf/cpumap.h>
-#include <perf/evlist.h>
-#include <perf/mmap.h>
-
-#include "debug.h"
-#include "parse-events.h"
-#include "evlist.h"
-#include "evsel.h"
-#include "thread_map.h"
-#include "record.h"
-#include "tsc.h"
-#include "util/mmap.h"
-#include "tests/tests.h"
-
-#include "arch-tests.h"
-
-#define CHECK__(x) {				\
-	while ((x) < 0) {			\
-		pr_debug(#x " failed!\n");	\
-		goto out_err;			\
-	}					\
-}
-
-#define CHECK_NOT_NULL__(x) {			\
-	while ((x) == NULL) {			\
-		pr_debug(#x " failed!\n");	\
-		goto out_err;			\
-	}					\
-}
-
-/**
- * test__perf_time_to_tsc - test converting perf time to TSC.
- *
- * This function implements a test that checks that the conversion of perf time
- * to and from TSC is consistent with the order of events.  If the test passes
- * %0 is returned, otherwise %-1 is returned.  If TSC conversion is not
- * supported then then the test passes but " (not supported)" is printed.
- */
-int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused)
-{
-	struct record_opts opts = {
-		.mmap_pages	     = UINT_MAX,
-		.user_freq	     = UINT_MAX,
-		.user_interval	     = ULLONG_MAX,
-		.target		     = {
-			.uses_mmap   = true,
-		},
-		.sample_time	     = true,
-	};
-	struct perf_thread_map *threads = NULL;
-	struct perf_cpu_map *cpus = NULL;
-	struct evlist *evlist = NULL;
-	struct evsel *evsel = NULL;
-	int err = -1, ret, i;
-	const char *comm1, *comm2;
-	struct perf_tsc_conversion tc;
-	struct perf_event_mmap_page *pc;
-	union perf_event *event;
-	u64 test_tsc, comm1_tsc, comm2_tsc;
-	u64 test_time, comm1_time = 0, comm2_time = 0;
-	struct mmap *md;
-
-	threads = thread_map__new(-1, getpid(), UINT_MAX);
-	CHECK_NOT_NULL__(threads);
-
-	cpus = perf_cpu_map__new(NULL);
-	CHECK_NOT_NULL__(cpus);
-
-	evlist = evlist__new();
-	CHECK_NOT_NULL__(evlist);
-
-	perf_evlist__set_maps(&evlist->core, cpus, threads);
-
-	CHECK__(parse_events(evlist, "cycles:u", NULL));
-
-	perf_evlist__config(evlist, &opts, NULL);
-
-	evsel = evlist__first(evlist);
-
-	evsel->core.attr.comm = 1;
-	evsel->core.attr.disabled = 1;
-	evsel->core.attr.enable_on_exec = 0;
-
-	CHECK__(evlist__open(evlist));
-
-	CHECK__(evlist__mmap(evlist, UINT_MAX));
-
-	pc = evlist->mmap[0].core.base;
-	ret = perf_read_tsc_conversion(pc, &tc);
-	if (ret) {
-		if (ret == -EOPNOTSUPP) {
-			fprintf(stderr, " (not supported)");
-			return 0;
-		}
-		goto out_err;
-	}
-
-	evlist__enable(evlist);
-
-	comm1 = "Test COMM 1";
-	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm1, 0, 0, 0));
-
-	test_tsc = rdtsc();
-
-	comm2 = "Test COMM 2";
-	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm2, 0, 0, 0));
-
-	evlist__disable(evlist);
-
-	for (i = 0; i < evlist->core.nr_mmaps; i++) {
-		md = &evlist->mmap[i];
-		if (perf_mmap__read_init(&md->core) < 0)
-			continue;
-
-		while ((event = perf_mmap__read_event(&md->core)) != NULL) {
-			struct perf_sample sample;
-
-			if (event->header.type != PERF_RECORD_COMM ||
-			    (pid_t)event->comm.pid != getpid() ||
-			    (pid_t)event->comm.tid != getpid())
-				goto next_event;
-
-			if (strcmp(event->comm.comm, comm1) == 0) {
-				CHECK__(evsel__parse_sample(evsel, event, &sample));
-				comm1_time = sample.time;
-			}
-			if (strcmp(event->comm.comm, comm2) == 0) {
-				CHECK__(evsel__parse_sample(evsel, event, &sample));
-				comm2_time = sample.time;
-			}
-next_event:
-			perf_mmap__consume(&md->core);
-		}
-		perf_mmap__read_done(&md->core);
-	}
-
-	if (!comm1_time || !comm2_time)
-		goto out_err;
-
-	test_time = tsc_to_perf_time(test_tsc, &tc);
-	comm1_tsc = perf_time_to_tsc(comm1_time, &tc);
-	comm2_tsc = perf_time_to_tsc(comm2_time, &tc);
-
-	pr_debug("1st event perf time %"PRIu64" tsc %"PRIu64"\n",
-		 comm1_time, comm1_tsc);
-	pr_debug("rdtsc          time %"PRIu64" tsc %"PRIu64"\n",
-		 test_time, test_tsc);
-	pr_debug("2nd event perf time %"PRIu64" tsc %"PRIu64"\n",
-		 comm2_time, comm2_tsc);
-
-	if (test_time <= comm1_time ||
-	    test_time >= comm2_time)
-		goto out_err;
-
-	if (test_tsc <= comm1_tsc ||
-	    test_tsc >= comm2_tsc)
-		goto out_err;
-
-	err = 0;
-
-out_err:
-	evlist__delete(evlist);
-	return err;
-}
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 4d15bf6041fb..aa4dc4f5abde 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -62,6 +62,7 @@ perf-y += pfm.o
 perf-y += parse-metric.o
 perf-y += pe-file-parsing.o
 perf-y += expand-cgroup.o
+perf-y += perf-time-to-tsc.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 132bdb3e6c31..02e7bbf70419 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -349,6 +349,10 @@ static struct test generic_tests[] = {
 		.desc = "Event expansion for cgroups",
 		.func = test__expand_cgroup_events,
 	},
+	{
+		.desc = "Convert perf time to TSC",
+		.func = test__perf_time_to_tsc,
+	},
 	{
 		.func = NULL,
 	},
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
new file mode 100644
index 000000000000..aee97c16c0d9
--- /dev/null
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/prctl.h>
+#include <perf/cpumap.h>
+#include <perf/evlist.h>
+#include <perf/mmap.h>
+
+#include "debug.h"
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "record.h"
+#include "tsc.h"
+#include "mmap.h"
+#include "tests.h"
+
+#define CHECK__(x) {				\
+	while ((x) < 0) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+#define CHECK_NOT_NULL__(x) {			\
+	while ((x) == NULL) {			\
+		pr_debug(#x " failed!\n");	\
+		goto out_err;			\
+	}					\
+}
+
+/**
+ * test__perf_time_to_tsc - test converting perf time to TSC.
+ *
+ * This function implements a test that checks that the conversion of perf time
+ * to and from TSC is consistent with the order of events.  If the test passes
+ * %0 is returned, otherwise %-1 is returned.  If TSC conversion is not
+ * supported then then the test passes but " (not supported)" is printed.
+ */
+int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	struct record_opts opts = {
+		.mmap_pages	     = UINT_MAX,
+		.user_freq	     = UINT_MAX,
+		.user_interval	     = ULLONG_MAX,
+		.target		     = {
+			.uses_mmap   = true,
+		},
+		.sample_time	     = true,
+	};
+	struct perf_thread_map *threads = NULL;
+	struct perf_cpu_map *cpus = NULL;
+	struct evlist *evlist = NULL;
+	struct evsel *evsel = NULL;
+	int err = -1, ret, i;
+	const char *comm1, *comm2;
+	struct perf_tsc_conversion tc;
+	struct perf_event_mmap_page *pc;
+	union perf_event *event;
+	u64 test_tsc, comm1_tsc, comm2_tsc;
+	u64 test_time, comm1_time = 0, comm2_time = 0;
+	struct mmap *md;
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	CHECK_NOT_NULL__(threads);
+
+	cpus = perf_cpu_map__new(NULL);
+	CHECK_NOT_NULL__(cpus);
+
+	evlist = evlist__new();
+	CHECK_NOT_NULL__(evlist);
+
+	perf_evlist__set_maps(&evlist->core, cpus, threads);
+
+	CHECK__(parse_events(evlist, "cycles:u", NULL));
+
+	perf_evlist__config(evlist, &opts, NULL);
+
+	evsel = evlist__first(evlist);
+
+	evsel->core.attr.comm = 1;
+	evsel->core.attr.disabled = 1;
+	evsel->core.attr.enable_on_exec = 0;
+
+	CHECK__(evlist__open(evlist));
+
+	CHECK__(evlist__mmap(evlist, UINT_MAX));
+
+	pc = evlist->mmap[0].core.base;
+	ret = perf_read_tsc_conversion(pc, &tc);
+	if (ret) {
+		if (ret == -EOPNOTSUPP) {
+			fprintf(stderr, " (not supported)");
+			return 0;
+		}
+		goto out_err;
+	}
+
+	evlist__enable(evlist);
+
+	comm1 = "Test COMM 1";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm1, 0, 0, 0));
+
+	test_tsc = rdtsc();
+
+	comm2 = "Test COMM 2";
+	CHECK__(prctl(PR_SET_NAME, (unsigned long)comm2, 0, 0, 0));
+
+	evlist__disable(evlist);
+
+	for (i = 0; i < evlist->core.nr_mmaps; i++) {
+		md = &evlist->mmap[i];
+		if (perf_mmap__read_init(&md->core) < 0)
+			continue;
+
+		while ((event = perf_mmap__read_event(&md->core)) != NULL) {
+			struct perf_sample sample;
+
+			if (event->header.type != PERF_RECORD_COMM ||
+			    (pid_t)event->comm.pid != getpid() ||
+			    (pid_t)event->comm.tid != getpid())
+				goto next_event;
+
+			if (strcmp(event->comm.comm, comm1) == 0) {
+				CHECK__(evsel__parse_sample(evsel, event, &sample));
+				comm1_time = sample.time;
+			}
+			if (strcmp(event->comm.comm, comm2) == 0) {
+				CHECK__(evsel__parse_sample(evsel, event, &sample));
+				comm2_time = sample.time;
+			}
+next_event:
+			perf_mmap__consume(&md->core);
+		}
+		perf_mmap__read_done(&md->core);
+	}
+
+	if (!comm1_time || !comm2_time)
+		goto out_err;
+
+	test_time = tsc_to_perf_time(test_tsc, &tc);
+	comm1_tsc = perf_time_to_tsc(comm1_time, &tc);
+	comm2_tsc = perf_time_to_tsc(comm2_time, &tc);
+
+	pr_debug("1st event perf time %"PRIu64" tsc %"PRIu64"\n",
+		 comm1_time, comm1_tsc);
+	pr_debug("rdtsc          time %"PRIu64" tsc %"PRIu64"\n",
+		 test_time, test_tsc);
+	pr_debug("2nd event perf time %"PRIu64" tsc %"PRIu64"\n",
+		 comm2_time, comm2_tsc);
+
+	if (test_time <= comm1_time ||
+	    test_time >= comm2_time)
+		goto out_err;
+
+	if (test_tsc <= comm1_tsc ||
+	    test_tsc >= comm2_tsc)
+		goto out_err;
+
+	err = 0;
+
+out_err:
+	evlist__delete(evlist);
+	return err;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index c85a2c08e407..c9b180e640e5 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -124,6 +124,7 @@ int test__pfm_subtest_get_nr(void);
 int test__parse_metric(struct test *test, int subtest);
 int test__pe_file_parsing(struct test *test, int subtest);
 int test__expand_cgroup_events(struct test *test, int subtest);
+int test__perf_time_to_tsc(struct test *test, int subtest);
 
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);
-- 
cgit v1.2.3-70-g09d2


From 248dd9b591db5bc5fb46a0e015753cfcfe60a345 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 19 Oct 2020 18:02:36 +0800
Subject: perf tests tsc: Add checking helper is_supported()

So far tsc is enabled on x86_64, i386 and Arm64 architectures, add
checking helper to skip this testing for other architectures.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/r/20201019100236.23675-3-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c     |  1 +
 tools/perf/tests/perf-time-to-tsc.c | 13 +++++++++++++
 tools/perf/tests/tests.h            |  1 +
 3 files changed, 15 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 02e7bbf70419..a185904c47f3 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -352,6 +352,7 @@ static struct test generic_tests[] = {
 	{
 		.desc = "Convert perf time to TSC",
 		.func = test__perf_time_to_tsc,
+		.is_supported = test__tsc_is_supported,
 	},
 	{
 		.func = NULL,
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index aee97c16c0d9..a9560e0f6360 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -169,3 +169,16 @@ out_err:
 	evlist__delete(evlist);
 	return err;
 }
+
+bool test__tsc_is_supported(void)
+{
+	/*
+	 * Except x86_64/i386 and Arm64, other archs don't support TSC in perf.
+	 * Just enable the test for x86_64/i386 and Arm64 archs.
+	 */
+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
+	return true;
+#else
+	return false;
+#endif
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index c9b180e640e5..b1f2aac93b33 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -129,6 +129,7 @@ int test__perf_time_to_tsc(struct test *test, int subtest);
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);
 bool test__wp_is_supported(void);
+bool test__tsc_is_supported(void);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-- 
cgit v1.2.3-70-g09d2


From cc3b964d5eb49d0c9da08760f8760bb6945f1df5 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Fri, 16 Oct 2020 16:16:50 +0300
Subject: perf test: Implement skip_reason callback for watchpoint tests

Currently reason for skipping the read only watchpoint test is only seen
when running in verbose mode:

  $ perf test watchpoint
  23: Watchpoint                                            :
  23.1: Read Only Watchpoint                                : Skip
  23.2: Write Only Watchpoint                               : Ok
  23.3: Read / Write Watchpoint                             : Ok
  23.4: Modify Watchpoint                                   : Ok

  $ perf test -v watchpoint
  23: Watchpoint                                            :
  23.1: Read Only Watchpoint                                :
  --- start ---
  test child forked, pid 60204
  Hardware does not support read only watchpoints.
  test child finished with -2

Implement skip_reason callback for the watchpoint tests, so that it's
easy to see reason why the test is skipped:

  $ perf test watchpoint
  23: Watchpoint                                            :
  23.1: Read Only Watchpoint                                : Skip (missing hardware support)
  23.2: Write Only Watchpoint                               : Ok
  23.3: Read / Write Watchpoint                             : Ok
  23.4: Modify Watchpoint                                   : Ok

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20201016131650.72476-1-tommi.t.rantala@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c |  1 +
 tools/perf/tests/tests.h        |  1 +
 tools/perf/tests/wp.c           | 21 +++++++++++++++------
 3 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index a185904c47f3..7273823d0d02 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -142,6 +142,7 @@ static struct test generic_tests[] = {
 			.skip_if_fail	= false,
 			.get_nr		= test__wp_subtest_get_nr,
 			.get_desc	= test__wp_subtest_get_desc,
+			.skip_reason    = test__wp_subtest_skip_reason,
 		},
 	},
 	{
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index b1f2aac93b33..8e24a61fe4c2 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -66,6 +66,7 @@ int test__bp_signal_overflow(struct test *test, int subtest);
 int test__bp_accounting(struct test *test, int subtest);
 int test__wp(struct test *test, int subtest);
 const char *test__wp_subtest_get_desc(int subtest);
+const char *test__wp_subtest_skip_reason(int subtest);
 int test__wp_subtest_get_nr(void);
 int test__task_exit(struct test *test, int subtest);
 int test__mem(struct test *test, int subtest);
diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c
index d262d6639829..9387fa76faa5 100644
--- a/tools/perf/tests/wp.c
+++ b/tools/perf/tests/wp.c
@@ -174,10 +174,12 @@ static bool wp_ro_supported(void)
 #endif
 }
 
-static void wp_ro_skip_msg(void)
+static const char *wp_ro_skip_msg(void)
 {
 #if defined (__x86_64__) || defined (__i386__)
-	pr_debug("Hardware does not support read only watchpoints.\n");
+	return "missing hardware support";
+#else
+	return NULL;
 #endif
 }
 
@@ -185,7 +187,7 @@ static struct {
 	const char *desc;
 	int (*target_func)(void);
 	bool (*is_supported)(void);
-	void (*skip_msg)(void);
+	const char *(*skip_msg)(void);
 } wp_testcase_table[] = {
 	{
 		.desc = "Read Only Watchpoint",
@@ -219,16 +221,23 @@ const char *test__wp_subtest_get_desc(int i)
 	return wp_testcase_table[i].desc;
 }
 
+const char *test__wp_subtest_skip_reason(int i)
+{
+	if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
+		return NULL;
+	if (!wp_testcase_table[i].skip_msg)
+		return NULL;
+	return wp_testcase_table[i].skip_msg();
+}
+
 int test__wp(struct test *test __maybe_unused, int i)
 {
 	if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
 		return TEST_FAIL;
 
 	if (wp_testcase_table[i].is_supported &&
-	    !wp_testcase_table[i].is_supported()) {
-		wp_testcase_table[i].skip_msg();
+	    !wp_testcase_table[i].is_supported())
 		return TEST_SKIP;
-	}
 
 	return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL;
 }
-- 
cgit v1.2.3-70-g09d2


From c18cf78d7969db89934587fa476220eefe7bd4bd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Oct 2020 14:12:37 -0300
Subject: perf bpf: Enclose libbpf.h include within HAVE_LIBBPF_SUPPORT

As it uses the 'deprecated' attribute in a way that breaks the build
with old gcc compilers, so to continue being able to build in such
systems where NO_LIBBPF=1 is being used, enclose it under
HAVE_LIBBPF_SUPPORT.

   1 centos:6          : FAIL gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23)
   2 oraclelinux:6     : FAIL gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23.0.1)

    CC       /tmp/build/perf/builtin-record.o
  In file included from util/bpf-loader.h:11,
                   from builtin-record.c:39:
  /git/linux/tools/lib/bpf/libbpf.h:203: error: wrong number of arguments specified for 'deprecated' attribute

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf-loader.h   |  3 +++
 tools/perf/util/parse-events.c | 25 +++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 25251d63164c..5d1c725cea29 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -8,6 +8,8 @@
 
 #include <linux/compiler.h>
 #include <linux/err.h>
+
+#ifdef HAVE_LIBBPF_SUPPORT
 #include <bpf/libbpf.h>
 
 enum bpf_loader_errno {
@@ -38,6 +40,7 @@ enum bpf_loader_errno {
 	BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG,	/* Index too large */
 	__BPF_LOADER_ERRNO__END,
 };
+#endif // HAVE_LIBBPF_SUPPORT
 
 struct evsel;
 struct evlist;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3b273580fb84..3b581d7b3213 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -668,6 +668,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 	return ret;
 }
 
+#ifdef HAVE_LIBBPF_SUPPORT
 struct __add_bpf_event_param {
 	struct parse_events_state *parse_state;
 	struct list_head *list;
@@ -900,6 +901,30 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
 		list_splice_tail(&obj_head_config, head_config);
 	return err;
 }
+#else // HAVE_LIBBPF_SUPPORT
+int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
+			      struct list_head *list __maybe_unused,
+			      struct bpf_object *obj __maybe_unused,
+			      struct list_head *head_config __maybe_unused)
+{
+	parse_events__handle_error(parse_state->error, 0,
+				   strdup("BPF support is not compiled"),
+				   strdup("Make sure libbpf-devel is available at build time."));
+	return -ENOTSUP;
+}
+
+int parse_events_load_bpf(struct parse_events_state *parse_state,
+			  struct list_head *list __maybe_unused,
+			  char *bpf_file_name __maybe_unused,
+			  bool source __maybe_unused,
+			  struct list_head *head_config __maybe_unused)
+{
+	parse_events__handle_error(parse_state->error, 0,
+				   strdup("BPF support is not compiled"),
+				   strdup("Make sure libbpf-devel is available at build time."));
+	return -ENOTSUP;
+}
+#endif // HAVE_LIBBPF_SUPPORT
 
 static int
 parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
-- 
cgit v1.2.3-70-g09d2


From 38219f24116ace9b0e604f2ced9c7dbef3041058 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Oct 2020 15:00:51 -0300
Subject: perf tests: Skip the llvm and bpf tests if HAVE_LIBBPF_SUPPORT isn't
 defined

If either NO_LIBBPF=1 is passed, explicitely disabling it or if libbpf
is not available due to some missing dependency, skip its tests, telling
the user the feature isn't available.

  # perf test
  <SNIP>
  40: LLVM search and compile                                         : Skip (not compiled in)
  41: Session topology                                                : Ok
  42: BPF filter                                                      : Skip (not compiled in)
  <SNIP>

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/bpf.c  |  4 ++--
 tools/perf/tests/llvm.c | 30 +++++++++++++++++++-----------
 2 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index cd77e334e577..d880c588a951 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -9,12 +9,10 @@
 #include <util/util.h>
 #include <util/bpf-loader.h>
 #include <util/evlist.h>
-#include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <api/fs/fs.h>
-#include <bpf/bpf.h>
 #include <perf/mmap.h>
 #include "tests.h"
 #include "llvm.h"
@@ -25,6 +23,8 @@
 #define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test"
 
 #ifdef HAVE_LIBBPF_SUPPORT
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
 
 static int epoll_pwait_loop(void)
 {
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
index ae6cda81c209..98da8a8757ab 100644
--- a/tools/perf/tests/llvm.c
+++ b/tools/perf/tests/llvm.c
@@ -2,13 +2,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <bpf/libbpf.h>
-#include <util/llvm-utils.h>
-#include "llvm.h"
 #include "tests.h"
 #include "debug.h"
 
 #ifdef HAVE_LIBBPF_SUPPORT
+#include <bpf/libbpf.h>
+#include <util/llvm-utils.h>
+#include "llvm.h"
 static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
 {
 	struct bpf_object *obj;
@@ -19,14 +19,6 @@ static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
 	bpf_object__close(obj);
 	return TEST_OK;
 }
-#else
-static int test__bpf_parsing(void *obj_buf __maybe_unused,
-			     size_t obj_buf_sz __maybe_unused)
-{
-	pr_debug("Skip bpf parsing\n");
-	return TEST_OK;
-}
-#endif
 
 static struct {
 	const char *source;
@@ -170,3 +162,19 @@ const char *test__llvm_subtest_get_desc(int subtest)
 
 	return bpf_source_table[subtest].desc;
 }
+#else //HAVE_LIBBPF_SUPPORT
+int test__llvm(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	return TEST_SKIP;
+}
+
+int test__llvm_subtest_get_nr(void)
+{
+	return 0;
+}
+
+const char *test__llvm_subtest_get_desc(int subtest __maybe_unused)
+{
+	return NULL;
+}
+#endif // HAVE_LIBBPF_SUPPORT
-- 
cgit v1.2.3-70-g09d2


From 20e88c6076fc50ebf0560e730349000ff2da94fd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Oct 2020 15:48:23 -0300
Subject: perf annotate: Move bpf header inclusion to inside
 HAVE_LIBBPF_SUPPORT

No need to include it otherwise.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index e52053a6ad42..ce8c07bc8c56 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -10,10 +10,6 @@
 #include <inttypes.h>
 #include <libgen.h>
 #include <stdlib.h>
-#include <bpf/bpf.h>
-#include <bpf/btf.h>
-#include <bpf/libbpf.h>
-#include <linux/btf.h>
 #include "util.h" // hex_width()
 #include "ui/ui.h"
 #include "sort.h"
@@ -1684,6 +1680,10 @@ fallback:
 #define PACKAGE "perf"
 #include <bfd.h>
 #include <dis-asm.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
 
 static int symbol__disassemble_bpf(struct symbol *sym,
 				   struct annotate_args *args)
-- 
cgit v1.2.3-70-g09d2


From ef0580ecd8b0306acf09b7a7508d72cafc67896d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Oct 2020 15:57:21 -0300
Subject: perf env: Conditionally compile BPF support code on having
 HAVE_LIBBPF_SUPPORT

If libbpf isn't selected, no need for a bunch of related code, that were
not even being used, as code using these perf_env methods was also
enclosed in HAVE_LIBBPF_SUPPORT.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c    | 14 ++++++++++----
 tools/perf/util/env.c    | 15 ++++++++++++---
 tools/perf/util/env.h    |  4 ++--
 tools/perf/util/header.c | 21 ++++++++-------------
 4 files changed, 32 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 55c11e854fe4..89b5fd2b5de3 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -11,8 +11,10 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
+#ifdef HAVE_LIBBPF_SUPPORT
 #include <bpf/libbpf.h>
 #include "bpf-event.h"
+#endif
 #include "compress.h"
 #include "env.h"
 #include "namespaces.h"
@@ -728,6 +730,7 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
 	return false;
 }
 
+#ifdef HAVE_LIBBPF_SUPPORT
 static ssize_t bpf_read(struct dso *dso, u64 offset, char *data)
 {
 	struct bpf_prog_info_node *node;
@@ -765,6 +768,7 @@ static int bpf_size(struct dso *dso)
 	dso->data.file_size = node->info_linear->info.jited_prog_len;
 	return 0;
 }
+#endif // HAVE_LIBBPF_SUPPORT
 
 static void
 dso_cache__free(struct dso *dso)
@@ -894,10 +898,12 @@ static struct dso_cache *dso_cache__populate(struct dso *dso,
 		*ret = -ENOMEM;
 		return NULL;
 	}
-
+#ifdef HAVE_LIBBPF_SUPPORT
 	if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
 		*ret = bpf_read(dso, cache_offset, cache->data);
-	else if (dso->binary_type == DSO_BINARY_TYPE__OOL)
+	else
+#endif
+	if (dso->binary_type == DSO_BINARY_TYPE__OOL)
 		*ret = DSO__DATA_CACHE_SIZE;
 	else
 		*ret = file_read(dso, machine, cache_offset, cache->data);
@@ -1018,10 +1024,10 @@ int dso__data_file_size(struct dso *dso, struct machine *machine)
 
 	if (dso->data.status == DSO_DATA_STATUS_ERROR)
 		return -1;
-
+#ifdef HAVE_LIBBPF_SUPPORT
 	if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
 		return bpf_size(dso);
-
+#endif
 	return file_size(dso, machine);
 }
 
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index fadc59708ece..9130f6fad8d5 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -5,16 +5,18 @@
 #include "util/header.h"
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
-#include "bpf-event.h"
 #include "cgroup.h"
 #include <errno.h>
 #include <sys/utsname.h>
-#include <bpf/libbpf.h>
 #include <stdlib.h>
 #include <string.h>
 
 struct perf_env perf_env;
 
+#ifdef HAVE_LIBBPF_SUPPORT
+#include "bpf-event.h"
+#include <bpf/libbpf.h>
+
 void perf_env__insert_bpf_prog_info(struct perf_env *env,
 				    struct bpf_prog_info_node *info_node)
 {
@@ -163,6 +165,11 @@ static void perf_env__purge_bpf(struct perf_env *env)
 
 	up_write(&env->bpf_progs.lock);
 }
+#else // HAVE_LIBBPF_SUPPORT
+static void perf_env__purge_bpf(struct perf_env *env __maybe_unused)
+{
+}
+#endif // HAVE_LIBBPF_SUPPORT
 
 void perf_env__exit(struct perf_env *env)
 {
@@ -197,11 +204,13 @@ void perf_env__exit(struct perf_env *env)
 	zfree(&env->memory_nodes);
 }
 
-void perf_env__init(struct perf_env *env)
+void perf_env__init(struct perf_env *env __maybe_unused)
 {
+#ifdef HAVE_LIBBPF_SUPPORT
 	env->bpf_progs.infos = RB_ROOT;
 	env->bpf_progs.btfs = RB_ROOT;
 	init_rwsem(&env->bpf_progs.lock);
+#endif
 }
 
 int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index a12972652006..ca249bf5e984 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -77,7 +77,7 @@ struct perf_env {
 	struct numa_node	*numa_nodes;
 	struct memory_node	*memory_nodes;
 	unsigned long long	 memory_bsize;
-
+#ifdef HAVE_LIBBPF_SUPPORT
 	/*
 	 * bpf_info_lock protects bpf rbtrees. This is needed because the
 	 * trees are accessed by different threads in perf-top
@@ -89,7 +89,7 @@ struct perf_env {
 		struct rb_root		btfs;
 		u32			btfs_cnt;
 	} bpf_progs;
-
+#endif // HAVE_LIBBPF_SUPPORT
 	/* same reason as above (for perf-top) */
 	struct {
 		struct rw_semaphore	lock;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index be850e9f8852..598285a21dad 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -19,7 +19,9 @@
 #include <sys/utsname.h>
 #include <linux/time64.h>
 #include <dirent.h>
+#ifdef HAVE_LIBBPF_SUPPORT
 #include <bpf/libbpf.h>
+#endif
 #include <perf/cpumap.h>
 
 #include "dso.h"
@@ -987,13 +989,6 @@ out:
 	up_read(&env->bpf_progs.lock);
 	return ret;
 }
-#else // HAVE_LIBBPF_SUPPORT
-static int write_bpf_prog_info(struct feat_fd *ff __maybe_unused,
-			       struct evlist *evlist __maybe_unused)
-{
-	return 0;
-}
-#endif // HAVE_LIBBPF_SUPPORT
 
 static int write_bpf_btf(struct feat_fd *ff,
 			 struct evlist *evlist __maybe_unused)
@@ -1027,6 +1022,7 @@ out:
 	up_read(&env->bpf_progs.lock);
 	return ret;
 }
+#endif // HAVE_LIBBPF_SUPPORT
 
 static int cpu_cache_level__sort(const void *a, const void *b)
 {
@@ -1638,6 +1634,7 @@ static void print_dir_format(struct feat_fd *ff, FILE *fp)
 	fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version);
 }
 
+#ifdef HAVE_LIBBPF_SUPPORT
 static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
 {
 	struct perf_env *env = &ff->ph->env;
@@ -1683,6 +1680,7 @@ static void print_bpf_btf(struct feat_fd *ff, FILE *fp)
 
 	up_read(&env->bpf_progs.lock);
 }
+#endif // HAVE_LIBBPF_SUPPORT
 
 static void free_event_desc(struct evsel *events)
 {
@@ -2938,12 +2936,6 @@ out:
 	up_write(&env->bpf_progs.lock);
 	return err;
 }
-#else // HAVE_LIBBPF_SUPPORT
-static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data __maybe_unused)
-{
-	return 0;
-}
-#endif // HAVE_LIBBPF_SUPPORT
 
 static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
 {
@@ -2990,6 +2982,7 @@ out:
 	free(node);
 	return err;
 }
+#endif // HAVE_LIBBPF_SUPPORT
 
 static int process_compressed(struct feat_fd *ff,
 			      void *data __maybe_unused)
@@ -3120,8 +3113,10 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
 	FEAT_OPR(CLOCKID,	clockid,	false),
 	FEAT_OPN(DIR_FORMAT,	dir_format,	false),
+#ifdef HAVE_LIBBPF_SUPPORT
 	FEAT_OPR(BPF_PROG_INFO, bpf_prog_info,  false),
 	FEAT_OPR(BPF_BTF,       bpf_btf,        false),
+#endif
 	FEAT_OPR(COMPRESSED,	compressed,	false),
 	FEAT_OPR(CPU_PMU_CAPS,	cpu_pmu_caps,	false),
 	FEAT_OPR(CLOCK_DATA,	clock_data,	false),
-- 
cgit v1.2.3-70-g09d2


From 1218838d68f5e9cc195685f17375be96a54832c7 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Tue, 27 Oct 2020 15:24:21 +0900
Subject: perf kvm: Add kvm-stat for arm64

Add support for 'perf kvm stat' on arm64 platform.

Example:

  # perf kvm stat report

Analyze events for all VMs, all VCPUs:

    VM-EXIT    Samples  Samples%     Time%    Min Time    Max Time         Avg time

   DABT_LOW     661867    98.91%    40.45%      2.19us   3364.65us      6.24us ( +-   0.34% )
        IRQ       4598     0.69%    57.44%      2.89us   3397.59us   1276.27us ( +-   1.61% )
        WFx       1475     0.22%     1.71%      2.22us   3388.63us    118.31us ( +-   8.69% )
   IABT_LOW       1018     0.15%     0.38%      2.22us   2742.07us     38.29us ( +-  12.55% )
      SYS64        180     0.03%     0.01%      2.07us    112.91us      6.57us ( +-  14.95% )
      HVC64         17     0.00%     0.01%      2.19us    322.35us     42.95us ( +-  58.98% )

Total Samples:669155, Total events handled time:10216387.86us.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Leo Yan <leo.yan@linaro.org>
Tested-by: Leo Yan <leo.yan@linaro.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Suleiman Souhlal <suleiman@google.com>
Link: http://lore.kernel.org/lkml/20201027062421.463355-1-sergey.senozhatsky@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/Makefile                     |  1 +
 tools/perf/arch/arm64/util/Build                   |  1 +
 tools/perf/arch/arm64/util/arm64_exception_types.h | 92 ++++++++++++++++++++++
 tools/perf/arch/arm64/util/kvm-stat.c              | 85 ++++++++++++++++++++
 4 files changed, 179 insertions(+)
 create mode 100644 tools/perf/arch/arm64/util/arm64_exception_types.h
 create mode 100644 tools/perf/arch/arm64/util/kvm-stat.c

(limited to 'tools')

diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index dbef716a1913..fab3095fb5d0 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1
 endif
 PERF_HAVE_JITDUMP := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+HAVE_KVM_STAT_SUPPORT := 1
 
 #
 # Syscall table generation for perf
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index b53294d74b01..8d2b9bcfffca 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -2,6 +2,7 @@ perf-y += header.o
 perf-y += machine.o
 perf-y += perf_regs.o
 perf-y += tsc.o
+perf-y += kvm-stat.o
 perf-$(CONFIG_DWARF)     += dwarf-regs.o
 perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h
new file mode 100644
index 000000000000..27c981ebe401
--- /dev/null
+++ b/tools/perf/arch/arm64/util/arm64_exception_types.h
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef ARCH_PERF_ARM64_EXCEPTION_TYPES_H
+#define ARCH_PERF_ARM64_EXCEPTION_TYPES_H
+
+/* Per asm/virt.h */
+#define HVC_STUB_ERR		  0xbadca11
+
+/* Per asm/kvm_asm.h */
+#define ARM_EXCEPTION_IRQ		0
+#define ARM_EXCEPTION_EL1_SERROR	1
+#define ARM_EXCEPTION_TRAP		2
+#define ARM_EXCEPTION_IL		3
+/* The hyp-stub will return this for any kvm_call_hyp() call */
+#define ARM_EXCEPTION_HYP_GONE		HVC_STUB_ERR
+
+#define kvm_arm_exception_type					\
+	{ARM_EXCEPTION_IRQ,		"IRQ"		},	\
+	{ARM_EXCEPTION_EL1_SERROR,	"SERROR"	},	\
+	{ARM_EXCEPTION_TRAP,		"TRAP"		},	\
+	{ARM_EXCEPTION_IL,		"ILLEGAL"	},	\
+	{ARM_EXCEPTION_HYP_GONE,	"HYP_GONE"	}
+
+/* Per asm/esr.h */
+#define ESR_ELx_EC_UNKNOWN	(0x00)
+#define ESR_ELx_EC_WFx		(0x01)
+/* Unallocated EC: 0x02 */
+#define ESR_ELx_EC_CP15_32	(0x03)
+#define ESR_ELx_EC_CP15_64	(0x04)
+#define ESR_ELx_EC_CP14_MR	(0x05)
+#define ESR_ELx_EC_CP14_LS	(0x06)
+#define ESR_ELx_EC_FP_ASIMD	(0x07)
+#define ESR_ELx_EC_CP10_ID	(0x08)	/* EL2 only */
+#define ESR_ELx_EC_PAC		(0x09)	/* EL2 and above */
+/* Unallocated EC: 0x0A - 0x0B */
+#define ESR_ELx_EC_CP14_64	(0x0C)
+/* Unallocated EC: 0x0d */
+#define ESR_ELx_EC_ILL		(0x0E)
+/* Unallocated EC: 0x0F - 0x10 */
+#define ESR_ELx_EC_SVC32	(0x11)
+#define ESR_ELx_EC_HVC32	(0x12)	/* EL2 only */
+#define ESR_ELx_EC_SMC32	(0x13)	/* EL2 and above */
+/* Unallocated EC: 0x14 */
+#define ESR_ELx_EC_SVC64	(0x15)
+#define ESR_ELx_EC_HVC64	(0x16)	/* EL2 and above */
+#define ESR_ELx_EC_SMC64	(0x17)	/* EL2 and above */
+#define ESR_ELx_EC_SYS64	(0x18)
+#define ESR_ELx_EC_SVE		(0x19)
+#define ESR_ELx_EC_ERET		(0x1a)	/* EL2 only */
+/* Unallocated EC: 0x1b - 0x1E */
+#define ESR_ELx_EC_IMP_DEF	(0x1f)	/* EL3 only */
+#define ESR_ELx_EC_IABT_LOW	(0x20)
+#define ESR_ELx_EC_IABT_CUR	(0x21)
+#define ESR_ELx_EC_PC_ALIGN	(0x22)
+/* Unallocated EC: 0x23 */
+#define ESR_ELx_EC_DABT_LOW	(0x24)
+#define ESR_ELx_EC_DABT_CUR	(0x25)
+#define ESR_ELx_EC_SP_ALIGN	(0x26)
+/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_FP_EXC32	(0x28)
+/* Unallocated EC: 0x29 - 0x2B */
+#define ESR_ELx_EC_FP_EXC64	(0x2C)
+/* Unallocated EC: 0x2D - 0x2E */
+#define ESR_ELx_EC_SERROR	(0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW	(0x30)
+#define ESR_ELx_EC_BREAKPT_CUR	(0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW	(0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR	(0x33)
+#define ESR_ELx_EC_WATCHPT_LOW	(0x34)
+#define ESR_ELx_EC_WATCHPT_CUR	(0x35)
+/* Unallocated EC: 0x36 - 0x37 */
+#define ESR_ELx_EC_BKPT32	(0x38)
+/* Unallocated EC: 0x39 */
+#define ESR_ELx_EC_VECTOR32	(0x3A)	/* EL2 only */
+/* Unallocated EC: 0x3B */
+#define ESR_ELx_EC_BRK64	(0x3C)
+/* Unallocated EC: 0x3D - 0x3F */
+#define ESR_ELx_EC_MAX		(0x3F)
+
+#define ECN(x) { ESR_ELx_EC_##x, #x }
+
+#define kvm_arm_exception_class \
+	ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
+	ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(PAC), ECN(CP14_64), \
+	ECN(SVC64), ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(SVE), \
+	ECN(IMP_DEF), ECN(IABT_LOW), ECN(IABT_CUR), \
+	ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
+	ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
+	ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
+	ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
+	ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+
+#endif /* ARCH_PERF_ARM64_EXCEPTION_TYPES_H */
diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c
new file mode 100644
index 000000000000..50376b9062c1
--- /dev/null
+++ b/tools/perf/arch/arm64/util/kvm-stat.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <memory.h>
+#include "../../util/evsel.h"
+#include "../../util/kvm-stat.h"
+#include "arm64_exception_types.h"
+#include "debug.h"
+
+define_exit_reasons_table(arm64_exit_reasons, kvm_arm_exception_type);
+define_exit_reasons_table(arm64_trap_exit_reasons, kvm_arm_exception_class);
+
+const char *kvm_trap_exit_reason = "esr_ec";
+const char *vcpu_id_str = "id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "ret";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
+const char *kvm_events_tp[] = {
+	"kvm:kvm_entry",
+	"kvm:kvm_exit",
+	NULL,
+};
+
+static void event_get_key(struct evsel *evsel,
+			  struct perf_sample *sample,
+			  struct event_key *key)
+{
+	key->info = 0;
+	key->key = evsel__intval(evsel, sample, kvm_exit_reason);
+	key->exit_reasons = arm64_exit_reasons;
+
+	/*
+	 * TRAP exceptions carry exception class info in esr_ec field
+	 * and, hence, we need to use a different exit_reasons table to
+	 * properly decode event's est_ec.
+	 */
+	if (key->key == ARM_EXCEPTION_TRAP) {
+		key->key = evsel__intval(evsel, sample, kvm_trap_exit_reason);
+		key->exit_reasons = arm64_trap_exit_reasons;
+	}
+}
+
+static bool event_begin(struct evsel *evsel,
+			struct perf_sample *sample __maybe_unused,
+			struct event_key *key __maybe_unused)
+{
+	return !strcmp(evsel->name, kvm_entry_trace);
+}
+
+static bool event_end(struct evsel *evsel,
+		      struct perf_sample *sample,
+		      struct event_key *key)
+{
+	if (!strcmp(evsel->name, kvm_exit_trace)) {
+		event_get_key(evsel, sample, key);
+		return true;
+	}
+	return false;
+}
+
+static struct kvm_events_ops exit_events = {
+	.is_begin_event = event_begin,
+	.is_end_event	= event_end,
+	.decode_key	= exit_event_decode_key,
+	.name		= "VM-EXIT"
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+	{
+		.name	= "vmexit",
+		.ops	= &exit_events,
+	},
+	{ NULL },
+};
+
+const char * const kvm_skip_events[] = {
+	NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+	kvm->exit_reasons_isa = "arm64";
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 9b0a7836359443227c9af101f7aea8412e739458 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 27 Oct 2020 16:28:54 +0900
Subject: perf test: Use generic event for expand_libpfm_events()

I found that the UNHALTED_CORE_CYCLES event is only available in the
Intel machines and it makes other vendors/archs fail on the test.  As
libpfm4 can parse the generic events like cycles, let's use them.

Fixes: 40b74c30ffb9 ("perf test: Add expand cgroup event test")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201027072855.655449-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/expand-cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
index d5771e4d094f..4c59f3ae438f 100644
--- a/tools/perf/tests/expand-cgroup.c
+++ b/tools/perf/tests/expand-cgroup.c
@@ -145,7 +145,7 @@ static int expand_libpfm_events(void)
 	int ret;
 	struct evlist *evlist;
 	struct rblist metric_events;
-	const char event_str[] = "UNHALTED_CORE_CYCLES";
+	const char event_str[] = "CYCLES";
 	struct option opt = {
 		.value = &evlist,
 	};
-- 
cgit v1.2.3-70-g09d2


From bb1c15b60b981d1065d7766ccf9de6c32beedfa3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 27 Oct 2020 16:28:55 +0900
Subject: perf stat: Support regex pattern in --for-each-cgroup

To make the command line even more compact with cgroups, support regex
pattern matching in cgroup names.

  $ perf stat -a -e cpu-clock,cycles --for-each-cgroup ^foo sleep 1

          3,000.73 msec cpu-clock                 foo #    2.998 CPUs utilized
    12,530,992,699      cycles                    foo #    7.517 GHz                      (100.00%)
          1,000.61 msec cpu-clock                 foo/bar #    1.000 CPUs utilized
     4,178,529,579      cycles                    foo/bar #    2.506 GHz                      (100.00%)
          1,000.03 msec cpu-clock                 foo/baz #    0.999 CPUs utilized
     4,176,104,315      cycles                    foo/baz #    2.505 GHz                      (100.00%)

       1.000892614 seconds time elapsed

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201027072855.655449-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt |   5 +-
 tools/perf/builtin-stat.c              |   5 +-
 tools/perf/util/cgroup.c               | 198 +++++++++++++++++++++++++++++----
 3 files changed, 182 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9f9f29025e49..2b44c08b3b23 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -168,8 +168,9 @@ command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
 
 --for-each-cgroup name::
 Expand event list for each cgroup in "name" (allow multiple cgroups separated
-by comma).  This has same effect that repeating -e option and -G option for
-each event x name.  This option cannot be used with -G/--cgroup option.
+by comma).  It also support regex patterns to match multiple groups.  This has same
+effect that repeating -e option and -G option for each event x name.  This option
+cannot be used with -G/--cgroup option.
 
 -o file::
 --output file::
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b01af171d94f..6709578128c9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2235,8 +2235,11 @@ int cmd_stat(int argc, const char **argv)
 		}
 
 		if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list,
-					  &stat_config.metric_events, true) < 0)
+					  &stat_config.metric_events, true) < 0) {
+			parse_options_usage(stat_usage, stat_options,
+					    "for-each-cgroup", 0);
 			goto out;
+		}
 	}
 
 	target__validate(&target);
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index b81324a13a2b..704333748549 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -13,9 +13,19 @@
 #include <stdlib.h>
 #include <string.h>
 #include <api/fs/fs.h>
+#include <ftw.h>
+#include <regex.h>
 
 int nr_cgroups;
 
+/* used to match cgroup name with patterns */
+struct cgroup_name {
+	struct list_head list;
+	bool used;
+	char name[];
+};
+static LIST_HEAD(cgroup_list);
+
 static int open_cgroup(const char *name)
 {
 	char path[PATH_MAX + 1];
@@ -149,6 +159,137 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup)
 		evsel__set_default_cgroup(evsel, cgroup);
 }
 
+/* helper function for ftw() in match_cgroups and list_cgroups */
+static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused,
+			   int typeflag)
+{
+	struct cgroup_name *cn;
+
+	if (typeflag != FTW_D)
+		return 0;
+
+	cn = malloc(sizeof(*cn) + strlen(fpath) + 1);
+	if (cn == NULL)
+		return -1;
+
+	cn->used = false;
+	strcpy(cn->name, fpath);
+
+	list_add_tail(&cn->list, &cgroup_list);
+	return 0;
+}
+
+static void release_cgroup_list(void)
+{
+	struct cgroup_name *cn;
+
+	while (!list_empty(&cgroup_list)) {
+		cn = list_first_entry(&cgroup_list, struct cgroup_name, list);
+		list_del(&cn->list);
+		free(cn);
+	}
+}
+
+/* collect given cgroups only */
+static int list_cgroups(const char *str)
+{
+	const char *p, *e, *eos = str + strlen(str);
+	struct cgroup_name *cn;
+	char *s;
+
+	/* use given name as is - for testing purpose */
+	for (;;) {
+		p = strchr(str, ',');
+		e = p ? p : eos;
+
+		if (e - str) {
+			int ret;
+
+			s = strndup(str, e - str);
+			if (!s)
+				return -1;
+			/* pretend if it's added by ftw() */
+			ret = add_cgroup_name(s, NULL, FTW_D);
+			free(s);
+			if (ret)
+				return -1;
+		} else {
+			if (add_cgroup_name("", NULL, FTW_D) < 0)
+				return -1;
+		}
+
+		if (!p)
+			break;
+		str = p+1;
+	}
+
+	/* these groups will be used */
+	list_for_each_entry(cn, &cgroup_list, list)
+		cn->used = true;
+
+	return 0;
+}
+
+/* collect all cgroups first and then match with the pattern */
+static int match_cgroups(const char *str)
+{
+	char mnt[PATH_MAX];
+	const char *p, *e, *eos = str + strlen(str);
+	struct cgroup_name *cn;
+	regex_t reg;
+	int prefix_len;
+	char *s;
+
+	if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event"))
+		return -1;
+
+	/* cgroup_name will have a full path, skip the root directory */
+	prefix_len = strlen(mnt);
+
+	/* collect all cgroups in the cgroup_list */
+	if (ftw(mnt, add_cgroup_name, 20) < 0)
+		return -1;
+
+	for (;;) {
+		p = strchr(str, ',');
+		e = p ? p : eos;
+
+		/* allow empty cgroups, i.e., skip */
+		if (e - str) {
+			/* termination added */
+			s = strndup(str, e - str);
+			if (!s)
+				return -1;
+			if (regcomp(&reg, s, REG_NOSUB)) {
+				free(s);
+				return -1;
+			}
+
+			/* check cgroup name with the pattern */
+			list_for_each_entry(cn, &cgroup_list, list) {
+				char *name = cn->name + prefix_len;
+
+				if (name[0] == '/' && name[1])
+					name++;
+				if (!regexec(&reg, name, 0, NULL, 0))
+					cn->used = true;
+			}
+			regfree(&reg);
+			free(s);
+		} else {
+			/* first entry to root cgroup */
+			cn = list_first_entry(&cgroup_list, struct cgroup_name,
+					      list);
+			cn->used = true;
+		}
+
+		if (!p)
+			break;
+		str = p+1;
+	}
+	return prefix_len;
+}
+
 int parse_cgroups(const struct option *opt, const char *str,
 		  int unset __maybe_unused)
 {
@@ -201,6 +342,11 @@ int parse_cgroups(const struct option *opt, const char *str,
 	return 0;
 }
 
+static bool has_pattern_string(const char *str)
+{
+	return !!strpbrk(str, "{}[]()|*+?^$");
+}
+
 int evlist__expand_cgroup(struct evlist *evlist, const char *str,
 			  struct rblist *metric_events, bool open_cgroup)
 {
@@ -208,8 +354,9 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
 	struct evsel *pos, *evsel, *leader;
 	struct rblist orig_metric_events;
 	struct cgroup *cgrp = NULL;
-	const char *p, *e, *eos = str + strlen(str);
+	struct cgroup_name *cn;
 	int ret = -1;
+	int prefix_len;
 
 	if (evlist->core.nr_entries == 0) {
 		fprintf(stderr, "must define events before cgroups\n");
@@ -234,24 +381,27 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
 		rblist__init(&orig_metric_events);
 	}
 
-	for (;;) {
-		p = strchr(str, ',');
-		e = p ? p : eos;
+	if (has_pattern_string(str))
+		prefix_len = match_cgroups(str);
+	else
+		prefix_len = list_cgroups(str);
 
-		/* allow empty cgroups, i.e., skip */
-		if (e - str) {
-			/* termination added */
-			char *name = strndup(str, e - str);
-			if (!name)
-				goto out_err;
+	if (prefix_len < 0)
+		goto out_err;
 
-			cgrp = cgroup__new(name, open_cgroup);
-			free(name);
-			if (cgrp == NULL)
-				goto out_err;
-		} else {
-			cgrp = NULL;
-		}
+	list_for_each_entry(cn, &cgroup_list, list) {
+		char *name;
+
+		if (!cn->used)
+			continue;
+
+		/* cgroup_name might have a full path, skip the prefix */
+		name = cn->name + prefix_len;
+		if (name[0] == '/' && name[1])
+			name++;
+		cgrp = cgroup__new(name, open_cgroup);
+		if (cgrp == NULL)
+			goto out_err;
 
 		leader = NULL;
 		evlist__for_each_entry(orig_list, pos) {
@@ -277,23 +427,25 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
 			if (metricgroup__copy_metric_events(tmp_list, cgrp,
 							    metric_events,
 							    &orig_metric_events) < 0)
-				break;
+				goto out_err;
 		}
 
 		perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries);
 		tmp_list->core.nr_entries = 0;
+	}
 
-		if (!p) {
-			ret = 0;
-			break;
-		}
-		str = p+1;
+	if (list_empty(&evlist->core.entries)) {
+		fprintf(stderr, "no cgroup matched: %s\n", str);
+		goto out_err;
 	}
 
+	ret = 0;
+
 out_err:
 	evlist__delete(orig_list);
 	evlist__delete(tmp_list);
 	rblist__exit(&orig_metric_events);
+	release_cgroup_list();
 
 	return ret;
 }
-- 
cgit v1.2.3-70-g09d2


From 55a4de94c64bacffbcd802c954764e0de2ab217f Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Mon, 26 Oct 2020 17:27:36 -0700
Subject: perf stat: Add --quiet option

Add a new --quiet option to 'perf stat'. This is useful with 'perf stat
record' to write the data only to the perf.data file, which can lower
measurement overhead because the data doesn't need to be formatted.

On my 4C desktop:

  % time ./perf stat record  -e $(python -c 'print ",".join(["cycles"]*1000)')  -a -I 1000 sleep 5
  ...
  real    0m5.377s
  user    0m0.238s
  sys     0m0.452s
  % time ./perf stat record --quiet -e $(python -c 'print ",".join(["cycles"]*1000)')  -a -I 1000 sleep 5

  real    0m5.452s
  user    0m0.183s
  sys     0m0.423s

In this example it cuts the user time by 20%. On systems with more cores
the savings are higher.

Signed-off-by: Andi Kleen <andi@firstfloor.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Link: http://lore.kernel.org/lkml/20201027002737.30942-1-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt | 4 ++++
 tools/perf/builtin-stat.c              | 6 +++++-
 tools/perf/util/stat.h                 | 1 +
 3 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2b44c08b3b23..5d4a673d7621 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -317,6 +317,10 @@ small group that need not have multiplexing is lowered. This option
 forbids the event merging logic from sharing events between groups and
 may be used to increase accuracy in this case.
 
+--quiet::
+Don't print output. This is useful with perf stat record below to only
+write data to the perf.data file.
+
 STAT RECORD
 -----------
 Stores stat data into perf data file.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6709578128c9..f15b2f8aa14d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -972,6 +972,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	/* Do not print anything if we record to the pipe. */
 	if (STAT_RECORD && perf_stat.data.is_pipe)
 		return;
+	if (stat_config.quiet)
+		return;
 
 	perf_evlist__print_counters(evsel_list, &stat_config, &target,
 				    ts, argc, argv);
@@ -1171,6 +1173,8 @@ static struct option stat_options[] = {
 		    "threads of same physical core"),
 	OPT_BOOLEAN(0, "summary", &stat_config.summary,
 		       "print summary for interval mode"),
+	OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
+			"don't print output (useful with record)"),
 #ifdef HAVE_LIBPFM
 	OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
 		"libpfm4 event selector. use 'perf list' to list available events",
@@ -2132,7 +2136,7 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 	}
 
-	if (!output) {
+	if (!output && !stat_config.quiet) {
 		struct timespec tm;
 		mode = append_file ? "a" : "w";
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 487010c624be..05adf8165025 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -122,6 +122,7 @@ struct perf_stat_config {
 	bool			 metric_no_group;
 	bool			 metric_no_merge;
 	bool			 stop_read_counter;
+	bool			 quiet;
 	FILE			*output;
 	unsigned int		 interval;
 	unsigned int		 timeout;
-- 
cgit v1.2.3-70-g09d2


From c5e6bc23355a3b33ffc170f92e315102f1e6a59c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 23 Oct 2020 11:06:28 +0900
Subject: perf trace beauty: Allow header files in a different path

Current script to generate mmap flags and prot checks headers from the
uapi/asm-generic directory but it might come from a different directory
in some environment.  So change the pattern to accept it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201023020628.346257-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/mmap_flags.sh | 4 ++--
 tools/perf/trace/beauty/mmap_prot.sh  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
index 39eb2595983b..76825710c725 100755
--- a/tools/perf/trace/beauty/mmap_flags.sh
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -28,12 +28,12 @@ egrep -q $regex ${linux_mman} && \
 	egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
-([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
+([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
 (egrep $regex ${header_dir}/mman-common.h | \
 	egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
-([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
+([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
 (egrep $regex ${header_dir}/mman.h | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
diff --git a/tools/perf/trace/beauty/mmap_prot.sh b/tools/perf/trace/beauty/mmap_prot.sh
index 28f638f8d216..664d8d534a50 100755
--- a/tools/perf/trace/beauty/mmap_prot.sh
+++ b/tools/perf/trace/beauty/mmap_prot.sh
@@ -17,7 +17,7 @@ prefix="PROT"
 
 printf "static const char *mmap_prot[] = {\n"
 regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}`
-([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman}) &&
+([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
 (egrep $regex ${common_mman} | \
 	egrep -vw PROT_NONE | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g"	| \
-- 
cgit v1.2.3-70-g09d2


From fa1b41a74d1136cbdd6960f36d7b9c7aa35c8139 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 22 Oct 2020 19:02:26 +0800
Subject: perf jevents: Tidy error handling

There is much duplication in the error handling for directory transvering
for prcessing JSONs.

Factor out the common code to tidy a bit.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-By: Kajol Jain<kjain@linux.ibm.com>
Link: https://lore.kernel.org/r/1603364547-197086-2-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/jevents.c | 83 +++++++++++++++++------------------------
 1 file changed, 35 insertions(+), 48 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index e47644cab3fa..7326c14c4623 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -1100,12 +1100,13 @@ static int process_one_file(const char *fpath, const struct stat *sb,
  */
 int main(int argc, char *argv[])
 {
-	int rc, ret = 0;
+	int rc, ret = 0, empty_map = 0;
 	int maxfds;
 	char ldirname[PATH_MAX];
 	const char *arch;
 	const char *output_file;
 	const char *start_dirname;
+	char *err_string_ext = "";
 	struct stat stbuf;
 
 	prog = basename(argv[0]);
@@ -1133,7 +1134,8 @@ int main(int argc, char *argv[])
 	/* If architecture does not have any event lists, bail out */
 	if (stat(ldirname, &stbuf) < 0) {
 		pr_info("%s: Arch %s has no PMU event lists\n", prog, arch);
-		goto empty_map;
+		empty_map = 1;
+		goto err_close_eventsfp;
 	}
 
 	/* Include pmu-events.h first */
@@ -1150,75 +1152,60 @@ int main(int argc, char *argv[])
 	 */
 
 	maxfds = get_maxfds();
-	mapfile = NULL;
 	rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
-	if (rc && verbose) {
-		pr_info("%s: Error preprocessing arch standard files %s\n",
-			prog, ldirname);
-		goto empty_map;
-	} else if (rc < 0) {
-		/* Make build fail */
-		fclose(eventsfp);
-		free_arch_std_events();
-		return 1;
-	} else if (rc) {
-		goto empty_map;
-	}
+	if (rc)
+		goto err_processing_std_arch_event_dir;
 
 	rc = nftw(ldirname, process_one_file, maxfds, 0);
-	if (rc && verbose) {
-		pr_info("%s: Error walking file tree %s\n", prog, ldirname);
-		goto empty_map;
-	} else if (rc < 0) {
-		/* Make build fail */
-		fclose(eventsfp);
-		free_arch_std_events();
-		ret = 1;
-		goto out_free_mapfile;
-	} else if (rc) {
-		goto empty_map;
-	}
+	if (rc)
+		goto err_processing_dir;
 
 	sprintf(ldirname, "%s/test", start_dirname);
 
 	rc = nftw(ldirname, process_one_file, maxfds, 0);
-	if (rc && verbose) {
-		pr_info("%s: Error walking file tree %s rc=%d for test\n",
-			prog, ldirname, rc);
-		goto empty_map;
-	} else if (rc < 0) {
-		/* Make build fail */
-		free_arch_std_events();
-		ret = 1;
-		goto out_free_mapfile;
-	} else if (rc) {
-		goto empty_map;
-	}
+	if (rc)
+		goto err_processing_dir;
 
 	if (close_table)
 		print_events_table_suffix(eventsfp);
 
 	if (!mapfile) {
 		pr_info("%s: No CPU->JSON mapping?\n", prog);
-		goto empty_map;
+		empty_map = 1;
+		goto err_close_eventsfp;
 	}
 
-	if (process_mapfile(eventsfp, mapfile)) {
+	rc = process_mapfile(eventsfp, mapfile);
+	fclose(eventsfp);
+	if (rc) {
 		pr_info("%s: Error processing mapfile %s\n", prog, mapfile);
 		/* Make build fail */
-		fclose(eventsfp);
-		free_arch_std_events();
 		ret = 1;
+		goto err_out;
 	}
 
+	free_arch_std_events();
+	free(mapfile);
+	return 0;
 
-	goto out_free_mapfile;
-
-empty_map:
+err_processing_std_arch_event_dir:
+	err_string_ext = " for std arch event";
+err_processing_dir:
+	if (verbose) {
+		pr_info("%s: Error walking file tree %s%s\n", prog, ldirname,
+			err_string_ext);
+		empty_map = 1;
+	} else if (rc < 0) {
+		ret = 1;
+	} else {
+		empty_map = 1;
+	}
+err_close_eventsfp:
 	fclose(eventsfp);
-	create_empty_mapping(output_file);
+	if (empty_map)
+		create_empty_mapping(output_file);
+err_out:
 	free_arch_std_events();
-out_free_mapfile:
 	free(mapfile);
 	return ret;
 }
-- 
cgit v1.2.3-70-g09d2


From 644bf4b0f7acde641d3db200b4db66977e96c3bd Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 22 Oct 2020 19:02:27 +0800
Subject: perf jevents: Add test for arch std events

Recently there was an undetected breakage for std arch event support.

Add support in "PMU events" testcase to detect such breakages.

For this, the "test" arch needs has support added to process std arch
events. And a test event is added for the test, ifself.

Also add a few code comments to help understand the code a bit better.

Committer testing:

Before:

  # perf test -vv pmu  |& grep l3_cache_rd
  #

After:

  # perf test -vv pmu  |& grep l3_cache_rd
  testing event table l3_cache_rd: pass
  testing aliases PMU cpu: matched event l3_cache_rd
  #

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-By: Kajol Jain<kjain@linux.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/r/1603364547-197086-3-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/test/arch-std-events.json |  8 ++++++++
 tools/perf/pmu-events/arch/test/test_cpu/cache.json  |  5 +++++
 tools/perf/pmu-events/jevents.c                      |  4 ++++
 tools/perf/tests/pmu-events.c                        | 14 ++++++++++++++
 4 files changed, 31 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/test/arch-std-events.json
 create mode 100644 tools/perf/pmu-events/arch/test/test_cpu/cache.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/test/arch-std-events.json b/tools/perf/pmu-events/arch/test/arch-std-events.json
new file mode 100644
index 000000000000..43f6f729d6ae
--- /dev/null
+++ b/tools/perf/pmu-events/arch/test/arch-std-events.json
@@ -0,0 +1,8 @@
+[
+    {
+        "PublicDescription": "Attributable Level 3 cache access, read",
+        "EventCode": "0x40",
+        "EventName": "L3_CACHE_RD",
+        "BriefDescription": "L3 cache access, read"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/cache.json b/tools/perf/pmu-events/arch/test/test_cpu/cache.json
new file mode 100644
index 000000000000..036d0efdb2bb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/test/test_cpu/cache.json
@@ -0,0 +1,5 @@
+[
+    {
+	 "ArchStdEvent": "L3_CACHE_RD"
+    }
+]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 7326c14c4623..72cfa3b5046d 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -1162,6 +1162,10 @@ int main(int argc, char *argv[])
 
 	sprintf(ldirname, "%s/test", start_dirname);
 
+	rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
+	if (rc)
+		goto err_processing_std_arch_event_dir;
+
 	rc = nftw(ldirname, process_one_file, maxfds, 0);
 	if (rc)
 		goto err_processing_dir;
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index d3517a74d95e..ad2b21591275 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -14,8 +14,10 @@
 #include "util/parse-events.h"
 
 struct perf_pmu_test_event {
+	/* used for matching against events from generated pmu-events.c */
 	struct pmu_event event;
 
+	/* used for matching against event aliases */
 	/* extra events for aliases */
 	const char *alias_str;
 
@@ -78,6 +80,17 @@ static struct perf_pmu_test_event test_cpu_events[] = {
 		.alias_str = "umask=0,(null)=0x30d40,event=0x3a",
 		.alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
 	},
+	{
+		.event = {
+			.name = "l3_cache_rd",
+			.event = "event=0x40",
+			.desc = "L3 cache access, read",
+			.long_desc = "Attributable Level 3 cache access, read",
+			.topic = "cache",
+		},
+		.alias_str = "event=0x40",
+		.alias_long_desc = "Attributable Level 3 cache access, read",
+	},
 	{ /* sentinel */
 		.event = {
 			.name = NULL,
@@ -357,6 +370,7 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
 }
 
 
+/* Test that aliases generated are as expected */
 static int test_aliases(void)
 {
 	struct perf_pmu *pmu = NULL;
-- 
cgit v1.2.3-70-g09d2


From f78331f74cacb33d87cd60376dacc5bd397959e2 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 3 Nov 2020 10:41:29 +0100
Subject: libbpf: Fix null dereference in xsk_socket__delete

Fix a possible null pointer dereference in xsk_socket__delete that
will occur if a null pointer is fed into the function.

Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices")
Reported-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1604396490-12129-2-git-send-email-magnus.karlsson@gmail.com
---
 tools/lib/bpf/xsk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index e3c98c007825..504b7a85d445 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -891,13 +891,14 @@ int xsk_umem__delete(struct xsk_umem *umem)
 void xsk_socket__delete(struct xsk_socket *xsk)
 {
 	size_t desc_sz = sizeof(struct xdp_desc);
-	struct xsk_ctx *ctx = xsk->ctx;
 	struct xdp_mmap_offsets off;
+	struct xsk_ctx *ctx;
 	int err;
 
 	if (!xsk)
 		return;
 
+	ctx = xsk->ctx;
 	if (ctx->prog_fd != -1) {
 		xsk_delete_bpf_maps(xsk);
 		close(ctx->prog_fd);
-- 
cgit v1.2.3-70-g09d2


From 25cf73b9ff88fd4608699a0313f820758b4c252d Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 3 Nov 2020 10:41:30 +0100
Subject: libbpf: Fix possible use after free in xsk_socket__delete

Fix a possible use after free in xsk_socket__delete that will happen
if xsk_put_ctx() frees the ctx. To fix, save the umem reference taken
from the context and just use that instead.

Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1604396490-12129-3-git-send-email-magnus.karlsson@gmail.com
---
 tools/lib/bpf/xsk.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 504b7a85d445..9bc537d0b92d 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -892,6 +892,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 {
 	size_t desc_sz = sizeof(struct xdp_desc);
 	struct xdp_mmap_offsets off;
+	struct xsk_umem *umem;
 	struct xsk_ctx *ctx;
 	int err;
 
@@ -899,6 +900,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 		return;
 
 	ctx = xsk->ctx;
+	umem = ctx->umem;
 	if (ctx->prog_fd != -1) {
 		xsk_delete_bpf_maps(xsk);
 		close(ctx->prog_fd);
@@ -918,11 +920,11 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 
 	xsk_put_ctx(ctx);
 
-	ctx->umem->refcount--;
+	umem->refcount--;
 	/* Do not close an fd that also has an associated umem connected
 	 * to it.
 	 */
-	if (xsk->fd != ctx->umem->fd)
+	if (xsk->fd != umem->fd)
 		close(xsk->fd);
 	free(xsk);
 }
-- 
cgit v1.2.3-70-g09d2


From 537e48259eacbd92f3463900c20cc3acd9dd2072 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:23:57 +0200
Subject: selftests: net: bridge: factor out mcast_packet_test

Factor out mcast_packet_test into lib.sh so it can be later extended and
reused by MLDv2 tests.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 32 ----------------------
 tools/testing/selftests/net/forwarding/lib.sh      | 32 ++++++++++++++++++++++
 2 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 0e71abdd7a03..50a48ce16ba1 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -105,38 +105,6 @@ cleanup()
 	vrf_cleanup
 }
 
-# return 0 if the packet wasn't seen on host2_if or 1 if it was
-mcast_packet_test()
-{
-	local mac=$1
-	local src_ip=$2
-	local ip=$3
-	local host1_if=$4
-	local host2_if=$5
-	local seen=0
-
-	# Add an ACL on `host2_if` which will tell us whether the packet
-	# was received by it or not.
-	tc qdisc add dev $host2_if ingress
-	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
-		flower ip_proto udp dst_mac $mac action drop
-
-	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
-	sleep 1
-
-	tc -j -s filter show dev $host2_if ingress \
-		| jq -e ".[] | select(.options.handle == 101) \
-		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
-	if [[ $? -eq 0 ]]; then
-		seen=1
-	fi
-
-	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
-	tc qdisc del dev $host2_if ingress
-
-	return $seen
-}
-
 v2reportleave_test()
 {
 	RET=0
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 927f9ba49e08..bb3ccc6d2165 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1270,3 +1270,35 @@ tcpdump_show()
 {
 	tcpdump -e -n -r $capfile 2>&1
 }
+
+# return 0 if the packet wasn't seen on host2_if or 1 if it was
+mcast_packet_test()
+{
+	local mac=$1
+	local src_ip=$2
+	local ip=$3
+	local host1_if=$4
+	local host2_if=$5
+	local seen=0
+
+	# Add an ACL on `host2_if` which will tell us whether the packet
+	# was received by it or not.
+	tc qdisc add dev $host2_if ingress
+	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+		flower ip_proto udp dst_mac $mac action drop
+
+	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
+	sleep 1
+
+	tc -j -s filter show dev $host2_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	if [[ $? -eq 0 ]]; then
+		seen=1
+	fi
+
+	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host2_if ingress
+
+	return $seen
+}
-- 
cgit v1.2.3-70-g09d2


From 450b0b84c6609e7ec1fb0276c8a7e4efa9e78a4c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:23:58 +0200
Subject: selftests: net: lib: add support for IPv6 mcast packet test

In order to test an IPv6 multicast packet we need to pass different tc
and mausezahn protocols only, so add a simple check for the destination
address which decides if we should generate an IPv4 or IPv6 mcast
packet.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/lib.sh | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index bb3ccc6d2165..0a427b8a039d 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1280,14 +1280,22 @@ mcast_packet_test()
 	local host1_if=$4
 	local host2_if=$5
 	local seen=0
+	local tc_proto="ip"
+	local mz_v6arg=""
+
+	# basic check to see if we were passed an IPv4 address, if not assume IPv6
+	if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+		tc_proto="ipv6"
+		mz_v6arg="-6"
+	fi
 
 	# Add an ACL on `host2_if` which will tell us whether the packet
 	# was received by it or not.
 	tc qdisc add dev $host2_if ingress
-	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+	tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
 		flower ip_proto udp dst_mac $mac action drop
 
-	$MZ $host1_if -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
+	$MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
 	sleep 1
 
 	tc -j -s filter show dev $host2_if ingress \
@@ -1297,7 +1305,7 @@ mcast_packet_test()
 		seen=1
 	fi
 
-	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
 	tc qdisc del dev $host2_if ingress
 
 	return $seen
-- 
cgit v1.2.3-70-g09d2


From 95e6f430ebfee51ac174e234388e7c6e8216ff2c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:23:59 +0200
Subject: selftests: net: bridge: factor out and rename sg state functions

Factor out S,G entry state checking functions for existence, forwarding,
blocking and timer to lib.sh so they can be later used by MLDv2 tests.
Add brmcast_ suffix to their name to make the relation to the bridge
explicit.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/bridge_igmp.sh        | 179 +++++++--------------
 tools/testing/selftests/net/forwarding/lib.sh      |  67 ++++++++
 2 files changed, 123 insertions(+), 123 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 50a48ce16ba1..675eff45b037 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -137,73 +137,6 @@ v2reportleave_test()
 	log_test "IGMPv2 leave $TEST_GROUP"
 }
 
-check_sg_entries()
-{
-	local report=$1; shift
-	local slist=("$@")
-	local sarg=""
-
-	for src in "${slist[@]}"; do
-		sarg="${sarg} and .source_list[].address == \"$src\""
-	done
-	bridge -j -d -s mdb show dev br0 \
-		| jq -e ".[].mdb[] | \
-			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
-	check_err $? "Wrong *,G entry source list after $report report"
-
-	for sgent in "${slist[@]}"; do
-		bridge -j -d -s mdb show dev br0 \
-			| jq -e ".[].mdb[] | \
-				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
-		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
-	done
-}
-
-check_sg_fwding()
-{
-	local should_fwd=$1; shift
-	local sources=("$@")
-
-	for src in "${sources[@]}"; do
-		local retval=0
-
-		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
-		retval=$?
-		if [ $should_fwd -eq 1 ]; then
-			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
-		else
-			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
-		fi
-	done
-}
-
-check_sg_state()
-{
-	local is_blocked=$1; shift
-	local sources=("$@")
-	local should_fail=1
-
-	if [ $is_blocked -eq 1 ]; then
-		should_fail=0
-	fi
-
-	for src in "${sources[@]}"; do
-		bridge -j -d -s mdb show dev br0 \
-			| jq -e ".[].mdb[] | \
-				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
-				 .source_list[] |
-				 select(.address == \"$src\") |
-				 select(.timer == \"0.00\")" &>/dev/null
-		check_err_fail $should_fail $? "Entry $src has zero timer"
-
-		bridge -j -d -s mdb show dev br0 \
-			| jq -e ".[].mdb[] | \
-				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
-				 .flags[] == \"blocked\")" &>/dev/null
-		check_err_fail $should_fail $? "Entry $src has blocked flag"
-	done
-}
-
 v3include_prepare()
 {
 	local host1_if=$1
@@ -225,7 +158,7 @@ v3include_prepare()
 			 select(.grp == \"$TEST_GROUP\" and \
 				.source_list != null and .filter_mode == \"include\")" &>/dev/null
 	check_err $? "Wrong *,G entry filter mode"
-	check_sg_entries "is_include" "${X[@]}"
+	brmcast_check_sg_entries "is_include" "${X[@]}"
 }
 
 v3exclude_prepare()
@@ -247,10 +180,10 @@ v3exclude_prepare()
 				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
 	check_err $? "Wrong *,G entry filter mode"
 
-	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
 	bridge -j -d -s mdb show dev br0 \
 		| jq -e ".[].mdb[] | \
@@ -276,10 +209,10 @@ v3include_test()
 
 	v3include_prepare $h1 $ALL_MAC $ALL_GROUP
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 "192.0.2.100"
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
 
 	log_test "IGMPv3 report $TEST_GROUP is_include"
 
@@ -295,12 +228,12 @@ v3inc_allow_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q
 	sleep 1
-	check_sg_entries "allow" "${X[@]}"
+	brmcast_check_sg_entries "allow" "${X[@]}"
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 "192.0.2.100"
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> allow"
 
@@ -316,12 +249,12 @@ v3inc_is_include_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q
 	sleep 1
-	check_sg_entries "is_include" "${X[@]}"
+	brmcast_check_sg_entries "is_include" "${X[@]}"
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 "192.0.2.100"
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> is_include"
 
@@ -334,8 +267,8 @@ v3inc_is_exclude_test()
 
 	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> is_exclude"
 
@@ -361,10 +294,10 @@ v3inc_to_exclude_test()
 				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
 	check_err $? "Wrong *,G entry filter mode"
 
-	check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
 	bridge -j -d -s mdb show dev br0 \
 		| jq -e ".[].mdb[] | \
@@ -379,8 +312,8 @@ v3inc_to_exclude_test()
 				.source_list[].address == \"192.0.2.21\")" &>/dev/null
 	check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> to_exclude"
 
@@ -399,13 +332,13 @@ v3exc_allow_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
 	sleep 1
-	check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> allow"
 
@@ -422,13 +355,13 @@ v3exc_is_include_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q
 	sleep 1
-	check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> is_include"
 
@@ -445,13 +378,13 @@ v3exc_is_exclude_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q
 	sleep 1
-	check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude"
 
@@ -471,13 +404,13 @@ v3exc_to_exclude_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
 	sleep 1
-	check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude"
 
@@ -496,9 +429,9 @@ v3inc_block_test()
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
 	# make sure the lowered timers have expired (by default 2 seconds)
 	sleep 3
-	check_sg_entries "block" "${X[@]}"
+	brmcast_check_sg_entries "block" "${X[@]}"
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
 	bridge -j -d -s mdb show dev br0 \
 		| jq -e ".[].mdb[] | \
@@ -507,8 +440,8 @@ v3inc_block_test()
 				.source_list[].address == \"192.0.2.1\")" &>/dev/null
 	check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 "192.0.2.100"
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "192.0.2.100"
 
 	log_test "IGMPv3 report $TEST_GROUP include -> block"
 
@@ -528,13 +461,13 @@ v3exc_block_test()
 
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
 	sleep 1
-	check_sg_entries "block" "${X[@]}" "${Y[@]}"
+	brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
 
-	check_sg_state 0 "${X[@]}"
-	check_sg_state 1 "${Y[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
 
-	check_sg_fwding 1 "${X[@]}" 192.0.2.100
-	check_sg_fwding 0 "${Y[@]}"
+	brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
 
 	log_test "IGMPv3 report $TEST_GROUP exclude -> block"
 
@@ -574,12 +507,12 @@ v3exc_timeout_test()
 				.source_list[].address == \"192.0.2.2\")" &>/dev/null
 	check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
 
-	check_sg_entries "allow" "${X[@]}"
+	brmcast_check_sg_entries "allow" "${X[@]}"
 
-	check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 0 "${X[@]}"
 
-	check_sg_fwding 1 "${X[@]}"
-	check_sg_fwding 0 192.0.2.100
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 192.0.2.100
 
 	log_test "IGMPv3 group $TEST_GROUP exclude timeout"
 
@@ -610,7 +543,7 @@ v3star_ex_auto_add_test()
 				.flags[] == \"added_by_star_ex\")" &>/dev/null
 	check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
 
-	check_sg_fwding 1 192.0.2.3
+	brmcast_check_sg_fwding 1 192.0.2.3
 
 	log_test "IGMPv3 S,G port entry automatic add to a *,G port"
 
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 0a427b8a039d..98ea37d26c44 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1310,3 +1310,70 @@ mcast_packet_test()
 
 	return $seen
 }
+
+brmcast_check_sg_entries()
+{
+	local report=$1; shift
+	local slist=("$@")
+	local sarg=""
+
+	for src in "${slist[@]}"; do
+		sarg="${sarg} and .source_list[].address == \"$src\""
+	done
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
+	check_err $? "Wrong *,G entry source list after $report report"
+
+	for sgent in "${slist[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
+		check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
+	done
+}
+
+brmcast_check_sg_fwding()
+{
+	local should_fwd=$1; shift
+	local sources=("$@")
+
+	for src in "${sources[@]}"; do
+		local retval=0
+
+		mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
+		retval=$?
+		if [ $should_fwd -eq 1 ]; then
+			check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
+		else
+			check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
+		fi
+	done
+}
+
+brmcast_check_sg_state()
+{
+	local is_blocked=$1; shift
+	local sources=("$@")
+	local should_fail=1
+
+	if [ $is_blocked -eq 1 ]; then
+		should_fail=0
+	fi
+
+	for src in "${sources[@]}"; do
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .source_list != null) |
+				 .source_list[] |
+				 select(.address == \"$src\") |
+				 select(.timer == \"0.00\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has zero timer"
+
+		bridge -j -d -s mdb show dev br0 \
+			| jq -e ".[].mdb[] | \
+				 select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
+				 .flags[] == \"blocked\")" &>/dev/null
+		check_err_fail $should_fail $? "Entry $src has blocked flag"
+	done
+}
-- 
cgit v1.2.3-70-g09d2


From 444c897111b02b06599b92e597436e09bd969501 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:00 +0200
Subject: selftests: net: bridge: add initial MLDv2 include test

Add the initial setup for MLDv2 tests with the first test of a simple
is_include report. For MLDv2 we need to setup the bridge properly and we
also send the full precooked packets instead of relying on mausezahn to
fill in some parts. For verification we use the generic S,G state checking
functions from lib.sh.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 146 +++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/bridge_mld.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
new file mode 100755
index 000000000000..3d0d579e4e03
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="mldv2include_test"
+NUM_NETIFS=4
+CHECK_TC="yes"
+TEST_GROUP="ff02::cc"
+TEST_GROUP_MAC="33:33:00:00:00:cc"
+
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::1,2001:db8:1::2,2001:db8:1::3
+MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:\
+00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\
+00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\
+00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge mcast_snooping 1 mcast_query_response_interval 100 \
+					mcast_mld_version 2 mcast_startup_query_interval 300 \
+					mcast_querier 1
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	# make sure a query has been generated
+	sleep 5
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+mldv2include_prepare()
+{
+	local host1_if=$1
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+	ip link set dev br0 type bridge mcast_mld_version 2
+	check_err $? "Could not change bridge MLD version to 2"
+
+	$MZ $host1_if $MZPKT_IS_INC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+	check_err $? "Missing *,G entry with source list"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+	brmcast_check_sg_entries "is_include" "${X[@]}"
+}
+
+mldv2cleanup()
+{
+	local port=$1
+
+	bridge mdb del dev br0 port $port grp $TEST_GROUP
+	ip link set dev br0 type bridge mcast_mld_version 1
+}
+
+mldv2include_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+	mldv2include_prepare $h1
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+	log_test "MLDv2 report $TEST_GROUP is_include"
+
+	mldv2cleanup $swp1
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-70-g09d2


From 0ef10e60682ec2604e34ff8e6eff8fb39fee176c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:01 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> allow report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)     ALLOW (B)      INCLUDE (A+B)        (B)=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 29 +++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 3d0d579e4e03..accc4ec2dcce 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test"
+ALL_TESTS="mldv2include_test mldv2inc_allow_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -13,6 +13,12 @@ MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:
 00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\
 00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\
 00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\
+00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\
+02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\
+00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\
+00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
 
 source lib.sh
 
@@ -136,6 +142,27 @@ mldv2include_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_allow_test()
+{
+	RET=0
+	local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+	mldv2include_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_ALLOW -q
+	sleep 1
+	brmcast_check_sg_entries "allow" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+	log_test "MLDv2 report $TEST_GROUP include -> allow"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From f44de2bc684da9d310c7703a077bd992ebdf71b1 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:02 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> is_include report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)       IS_IN (B)     INCLUDE (A+B)       (B)=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 29 +++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index accc4ec2dcce..a93bf6fa6caa 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test mldv2inc_allow_test"
+ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -13,6 +13,12 @@ MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:
 00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\
 00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\
 00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:\
+05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\
+00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\
+00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
 # MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
 MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\
 00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\
@@ -163,6 +169,27 @@ mldv2inc_allow_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_is_include_test()
+{
+	RET=0
+	local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+	mldv2include_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_IS_INC2 -q
+	sleep 1
+	brmcast_check_sg_entries "is_include" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+	log_test "MLDv2 report $TEST_GROUP include -> is_include"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From f9fcd55328a934a252b89b6cdde6c888a62207a7 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:03 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> is_exclude report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)       IS_EX (B)     EXCLUDE (A*B, B-A)  (B-A)=0
                                                       Delete (A-B)
                                                       Filter Timer=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 54 +++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index a93bf6fa6caa..ddef8699be7d 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test"
+ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -25,6 +25,12 @@ MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:f
 02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\
 00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\
 00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21
+MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21"
 
 source lib.sh
 
@@ -123,6 +129,38 @@ mldv2include_prepare()
 	brmcast_check_sg_entries "is_include" "${X[@]}"
 }
 
+mldv2exclude_prepare()
+{
+	local host1_if=$1
+	local mac=$2
+	local group=$3
+	local pkt=$4
+	local X=("2001:db8:1::1" "2001:db8:1::2")
+	local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+	mldv2include_prepare $h1
+
+	$MZ $host1_if -c 1 $MZPKT_IS_EXC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+			 .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::3\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::3 entry still exists"
+}
+
 mldv2cleanup()
 {
 	local port=$1
@@ -190,6 +228,20 @@ mldv2inc_is_include_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_is_exclude_test()
+{
+	RET=0
+
+	mldv2exclude_prepare $h1
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP include -> is_exclude"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 55852f1d6a337e63c38e9c247fffaeabb5faef16 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:04 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> to_exclude report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)     TO_EX (B)      EXCLUDE (A*B,B-A)    (B-A)=0
                                                       Delete (A-B)
                                                       Send Q(MA,A*B)
                                                       Filter Timer=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 56 +++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index ddef8699be7d..571b01ef672c 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test"
+ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
+	   mldv2inc_to_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -31,6 +32,12 @@ MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:
 00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21"
+# MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 
 source lib.sh
 
@@ -242,6 +249,53 @@ mldv2inc_is_exclude_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_to_exclude_test()
+{
+	RET=0
+	local X=("2001:db8:1::1")
+	local Y=("2001:db8:1::20" "2001:db8:1::30")
+
+	mldv2include_prepare $h1
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 $MZPKT_TO_EXC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::21\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::21 entry still exists"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP include -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 0e77581fdf302547f71749e2e1cd657562189375 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:05 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> allow report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)   ALLOW (A)      EXCLUDE (X+A,Y-A)    (A)=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 571b01ef672c..97882c13f278 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
-	   mldv2inc_to_exclude_test"
+	   mldv2inc_to_exclude_test mldv2exc_allow_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -26,6 +26,11 @@ MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:f
 02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\
 00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\
 00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::20,2001:db8:1::30
+MZPKT_ALLOW2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:b8:5a:00:00:00:01:05:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 # MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21
 MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\
 00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
@@ -296,6 +301,29 @@ mldv2inc_to_exclude_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_allow_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+	local Y=("2001:db8:1::21")
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+	sleep 1
+	brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> allow"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 25ba7c03ef1ab77639b00cd2932e0de3b402bed7 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:06 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> is_include report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)     IS_IN (A)     EXCLUDE (X+A, Y-A)  (A)=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 97882c13f278..bae865b5bc8c 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
-	   mldv2inc_to_exclude_test mldv2exc_allow_test"
+	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -20,6 +20,11 @@ MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01
 05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\
 00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\
 00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_INC3="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bc:5a:00:00:00:01:01:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 # MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
 MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\
 00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\
@@ -324,6 +329,29 @@ mldv2exc_allow_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_is_include_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+	local Y=("2001:db8:1::21")
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_IS_INC3 -q
+	sleep 1
+	brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> is_include"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From d0b19dedd6c26a797455acb2f198fe946793f209 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:07 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> is_exclude report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)     IS_EX (A)     EXCLUDE (A-Y, Y*A)  (A-X-Y)=MALI
                                                       Delete (X-A)
                                                       Delete (Y-A)
                                                       Filter Timer=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 31 +++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index bae865b5bc8c..0f48c8da041b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -2,7 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
-	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test"
+	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
+	   mldv2exc_is_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -42,6 +43,11 @@ MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:
 00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_EXC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bb:5a:00:00:00:01:02:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 # MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
 MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\
 00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
@@ -352,6 +358,29 @@ mldv2exc_is_include_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_is_exclude_test()
+{
+	RET=0
+	local X=("2001:db8:1::30")
+	local Y=("2001:db8:1::20")
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_IS_EXC2 -q
+	sleep 1
+	brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> is_exclude"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 9eb4394db91c5ef9595872974224a38719781829 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:08 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> to_exclude report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)   TO_EX (A)      EXCLUDE (A-Y,Y*A)    (A-X-Y) =
                                                            Filter Timer
                                                       Delete (X-A)
                                                       Delete (Y-A)
                                                       Send Q(MA,A-Y)
                                                       Filter Timer=MALI

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 0f48c8da041b..024fa22fa3c2 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
-	   mldv2exc_is_exclude_test"
+	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -381,6 +381,34 @@ mldv2exc_is_exclude_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_to_exclude_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::30")
+	local Y=("2001:db8:1::20")
+
+	mldv2exclude_prepare $h1
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 $MZPKT_TO_EXC -q
+	sleep 1
+	brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> to_exclude"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 57386215cc0b4ed483a3ebcb0d2a378ab0db14ba Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:09 +0200
Subject: selftests: net: bridge: add test for mldv2 inc -> block report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   INCLUDE (A)     BLOCK (B)      INCLUDE (A)          Send Q(MA,A*B)

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 37 +++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index 024fa22fa3c2..a3c405b2fd6f 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
-	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test"
+	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -54,6 +54,12 @@ MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:
 00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\
 01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 block report: grp ff02::cc block 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_BLOCK="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:00:\
+00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:00:\
+00:00:8f:00:89:8e:00:00:00:01:06:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
 
 source lib.sh
 
@@ -409,6 +415,35 @@ mldv2exc_to_exclude_test()
 	mldv2cleanup $swp1
 }
 
+mldv2inc_block_test()
+{
+	RET=0
+	local X=("2001:db8:1::2" "2001:db8:1::3")
+
+	mldv2include_prepare $h1
+
+	$MZ $h1 -c 1 $MZPKT_BLOCK -q
+	# make sure the lowered timers have expired (by default 2 seconds)
+	sleep 3
+	brmcast_check_sg_entries "block" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 2001:db8:1::100
+
+	log_test "MLDv2 report $TEST_GROUP include -> block"
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From a2d667f0c1fb30e24ae71f6b8e832808bf18f117 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:10 +0200
Subject: selftests: net: bridge: add test for mldv2 exc -> block report

The test checks for the following case:
   Router State  Report Received  New Router State     Actions
   EXCLUDE (X,Y)   BLOCK (A)      EXCLUDE (X+(A-Y),Y)  (A-X-Y) =
                                                            Filter Timer
                                                       Send Q(MA,A-Y)

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 31 +++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index a3c405b2fd6f..c498e51b8d2b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -3,7 +3,8 @@
 
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
-	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test"
+	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
+	   mldv2exc_block_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -444,6 +445,34 @@ mldv2inc_block_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_block_test()
+{
+	RET=0
+	local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::30")
+	local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+	mldv2exclude_prepare $h1
+
+	ip link set dev br0 type bridge mcast_last_member_interval 500
+	check_err $? "Could not change mcast_last_member_interval to 5s"
+
+	$MZ $h1 -c 1 $MZPKT_BLOCK -q
+	sleep 1
+	brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+	brmcast_check_sg_state 1 "${Y[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+	brmcast_check_sg_fwding 0 "${Y[@]}"
+
+	log_test "MLDv2 report $TEST_GROUP exclude -> block"
+
+	ip link set dev br0 type bridge mcast_last_member_interval 100
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From d598cc6a2d45321a2a662742f8c38b43021e36e0 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:11 +0200
Subject: selftests: net: bridge: add test for mldv2 exclude timeout

Test that when a group in exclude mode expires it changes mode to
include and the blocked entries are deleted.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 48 +++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index c498e51b8d2b..b34cf4c6ceba 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -4,7 +4,7 @@
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
 	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
-	   mldv2exc_block_test"
+	   mldv2exc_block_test mldv2exc_timeout_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -473,6 +473,52 @@ mldv2exc_block_test()
 	mldv2cleanup $swp1
 }
 
+mldv2exc_timeout_test()
+{
+	RET=0
+	local X=("2001:db8:1::20" "2001:db8:1::30")
+
+	# GMI should be 3 seconds
+	ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
+
+	mldv2exclude_prepare $h1
+	ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
+	$MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+	sleep 3
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and .filter_mode == \"include\")" &>/dev/null
+	check_err $? "Wrong *,G entry filter mode"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and \
+				.source_list != null and
+				.source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+	check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+
+	brmcast_check_sg_entries "allow" "${X[@]}"
+
+	brmcast_check_sg_state 0 "${X[@]}"
+
+	brmcast_check_sg_fwding 1 "${X[@]}"
+	brmcast_check_sg_fwding 0 2001:db8:1::100
+
+	log_test "MLDv2 group $TEST_GROUP exclude timeout"
+
+	ip link set dev br0 type bridge mcast_query_interval 12500 \
+					mcast_query_response_interval 1000
+
+	mldv2cleanup $swp1
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 252b353c5bb30ee9cc0c3d5cef128cec372e6a2c Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Tue, 3 Nov 2020 19:24:12 +0200
Subject: selftests: net: bridge: add test for mldv2 *,g auto-add

When we have *,G ports in exclude mode and a new S,G,port is added
the kernel has to automatically create an S,G entry for each exclude
port to get proper forwarding.

Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/bridge_mld.sh | 31 +++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index b34cf4c6ceba..ffdcfa87ca2b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -4,7 +4,7 @@
 ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
 	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
 	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
-	   mldv2exc_block_test mldv2exc_timeout_test"
+	   mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -519,6 +519,35 @@ mldv2exc_timeout_test()
 	mldv2cleanup $swp1
 }
 
+mldv2star_ex_auto_add_test()
+{
+	RET=0
+
+	mldv2exclude_prepare $h1
+
+	$MZ $h2 -c 1 $MZPKT_IS_INC -q
+	sleep 1
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+				.port == \"$swp1\")" &>/dev/null
+	check_err $? "S,G entry for *,G port doesn't exist"
+
+	bridge -j -d -s mdb show dev br0 \
+		| jq -e ".[].mdb[] | \
+			 select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+				.port == \"$swp1\" and \
+				.flags[] == \"added_by_star_ex\")" &>/dev/null
+	check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+	brmcast_check_sg_fwding 1 2001:db8:1::3
+
+	log_test "MLDv2 S,G port entry automatic add to a *,G port"
+
+	mldv2cleanup $swp1
+	mldv2cleanup $swp2
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 8d014eaa9254a9b8e0841df40dd36782b451579a Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Tue, 3 Nov 2020 11:05:09 -0800
Subject: selftests: mptcp: add ADD_ADDR timeout test case

This patch added the test case for retransmitting ADD_ADDR when timeout
occurs. It set NS1's add_addr_timeout to 1 second, and drop NS2's ADD_ADDR
echo packets.

Here we need to slow down the transfer process of all data to let the
ADD_ADDR suboptions can be retransmitted three times. So we added a new
parameter "speed" for do_transfer, it can be set with fast or slow.

We also added three new optional parameters for run_tests, and dropped
run_remove_tests function.

Since we added the netfilter rules in this test case, we need to update
the "config" file.

Suggested-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/config        | 10 +++
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 94 ++++++++++++++++++-------
 2 files changed, 80 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 741a1c4f4ae8..0faaccd21447 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -5,3 +5,13 @@ CONFIG_INET_DIAG=m
 CONFIG_INET_MPTCP_DIAG=m
 CONFIG_VETH=y
 CONFIG_NET_SCH_NETEM=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 08f53d86dedc..0d93b243695f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -13,6 +13,24 @@ capture=0
 
 TEST_COUNT=0
 
+# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
+#				  (ip6 && (ip6[74] & 0xf0) == 0x30)'"
+CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
+			       48 0 0 0,
+			       84 0 0 240,
+			       21 0 3 64,
+			       48 0 0 54,
+			       84 0 0 240,
+			       21 6 7 48,
+			       48 0 0 0,
+			       84 0 0 240,
+			       21 0 4 96,
+			       48 0 0 74,
+			       84 0 0 240,
+			       21 0 1 48,
+			       6 0 0 65535,
+			       6 0 0 0"
+
 init()
 {
 	capout=$(mktemp)
@@ -82,6 +100,26 @@ reset_with_cookies()
 	done
 }
 
+reset_with_add_addr_timeout()
+{
+	local ip="${1:-4}"
+	local tables
+
+	tables="iptables"
+	if [ $ip -eq 6 ]; then
+		tables="ip6tables"
+	fi
+
+	reset
+
+	ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
+	ip netns exec $ns2 $tables -A OUTPUT -p tcp \
+		-m tcp --tcp-option 30 \
+		-m bpf --bytecode \
+		"$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \
+		-j DROP
+}
+
 for arg in "$@"; do
 	if [ "$arg" = "-c" ]; then
 		capture=1
@@ -94,6 +132,17 @@ if [ $? -ne 0 ];then
 	exit $ksft_skip
 fi
 
+iptables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run all tests without iptables tool"
+	exit $ksft_skip
+fi
+
+ip6tables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run all tests without ip6tables tool"
+	exit $ksft_skip
+fi
 
 check_transfer()
 {
@@ -135,6 +184,7 @@ do_transfer()
 	connect_addr="$5"
 	rm_nr_ns1="$6"
 	rm_nr_ns2="$7"
+	speed="$8"
 
 	port=$((10000+$TEST_COUNT))
 	TEST_COUNT=$((TEST_COUNT+1))
@@ -159,7 +209,7 @@ do_transfer()
 		sleep 1
 	fi
 
-	if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then
+	if [ $speed = "fast" ]; then
 		mptcp_connect="./mptcp_connect -j"
 	else
 		mptcp_connect="./mptcp_connect -r"
@@ -250,26 +300,13 @@ run_tests()
 	listener_ns="$1"
 	connector_ns="$2"
 	connect_addr="$3"
+	rm_nr_ns1="${4:-0}"
+	rm_nr_ns2="${5:-0}"
+	speed="${6:-fast}"
 	lret=0
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0
-	lret=$?
-	if [ $lret -ne 0 ]; then
-		ret=$lret
-		return
-	fi
-}
-
-run_remove_tests()
-{
-	listener_ns="$1"
-	connector_ns="$2"
-	connect_addr="$3"
-	rm_nr_ns1="$4"
-	rm_nr_ns2="$5"
-	lret=0
-
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2}
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
+		${rm_nr_ns1} ${rm_nr_ns2} ${speed}
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
@@ -491,12 +528,21 @@ run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "multiple subflows and signal" 3 3 3
 chk_add_nr 1 1
 
+# add_addr timeout
+reset_with_add_addr_timeout
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1 0 0 slow
+chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
+chk_add_nr 4 0
+
 # single subflow, remove
 reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 0 1
+run_tests $ns1 $ns2 10.0.1.1 0 1 slow
 chk_join_nr "remove single subflow" 1 1 1
 chk_rm_nr 1 1
 
@@ -506,7 +552,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
 ip netns exec $ns2 ./pm_nl_ctl limits 0 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 0 2
+run_tests $ns1 $ns2 10.0.1.1 0 2 slow
 chk_join_nr "remove multiple subflows" 2 2 2
 chk_rm_nr 2 2
 
@@ -515,7 +561,7 @@ reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-run_remove_tests $ns1 $ns2 10.0.1.1 1 0
+run_tests $ns1 $ns2 10.0.1.1 1 0 slow
 chk_join_nr "remove single address" 1 1 1
 chk_add_nr 1 1
 chk_rm_nr 0 0
@@ -526,7 +572,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 1 1
+run_tests $ns1 $ns2 10.0.1.1 1 1 slow
 chk_join_nr "remove subflow and signal" 2 2 2
 chk_add_nr 1 1
 chk_rm_nr 1 1
@@ -538,7 +584,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 3
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 1 2
+run_tests $ns1 $ns2 10.0.1.1 1 2 slow
 chk_join_nr "remove subflows and signal" 3 3 3
 chk_add_nr 1 1
 chk_rm_nr 2 2
-- 
cgit v1.2.3-70-g09d2


From f3ae6c6e8a3ea49076d826c64e63ea78fbf9db43 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:30 +0300
Subject: selftests: proc: fix warning: _GNU_SOURCE redefined

Makefile already contains -D_GNU_SOURCE, so we can remove it from the
*.c files.

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/proc/proc-loadavg-001.c  | 1 -
 tools/testing/selftests/proc/proc-self-syscall.c | 1 -
 tools/testing/selftests/proc/proc-uptime-002.c   | 1 -
 3 files changed, 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
index 471e2aa28077..fb4fe9188806 100644
--- a/tools/testing/selftests/proc/proc-loadavg-001.c
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -14,7 +14,6 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 /* Test that /proc/loadavg correctly reports last pid in pid namespace. */
-#define _GNU_SOURCE
 #include <errno.h>
 #include <sched.h>
 #include <sys/types.h>
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
index 9f6d000c0245..8511dcfe67c7 100644
--- a/tools/testing/selftests/proc/proc-self-syscall.c
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -13,7 +13,6 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
-#define _GNU_SOURCE
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
index 30e2b7849089..e7ceabed7f51 100644
--- a/tools/testing/selftests/proc/proc-uptime-002.c
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -15,7 +15,6 @@
  */
 // Test that values in /proc/uptime increment monotonically
 // while shifting across CPUs.
-#define _GNU_SOURCE
 #undef NDEBUG
 #include <assert.h>
 #include <unistd.h>
-- 
cgit v1.2.3-70-g09d2


From 1d44d0dd61b6121b49f25b731f2f7f605cb3c896 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:31 +0300
Subject: selftests: core: use SKIP instead of XFAIL in close_range_test.c

XFAIL is gone since commit 9847d24af95c ("selftests/harness: Refactor XFAIL
into SKIP"), use SKIP instead.

Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/core/close_range_test.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index c99b98b0d461..575b391ddc78 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -44,7 +44,7 @@ TEST(close_range)
 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
 		ASSERT_GE(fd, 0) {
 			if (errno == ENOENT)
-				XFAIL(return, "Skipping test since /dev/null does not exist");
+				SKIP(return, "Skipping test since /dev/null does not exist");
 		}
 
 		open_fds[i] = fd;
@@ -52,7 +52,7 @@ TEST(close_range)
 
 	EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
 		if (errno == ENOSYS)
-			XFAIL(return, "close_range() syscall not supported");
+			SKIP(return, "close_range() syscall not supported");
 	}
 
 	EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
@@ -108,7 +108,7 @@ TEST(close_range_unshare)
 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
 		ASSERT_GE(fd, 0) {
 			if (errno == ENOENT)
-				XFAIL(return, "Skipping test since /dev/null does not exist");
+				SKIP(return, "Skipping test since /dev/null does not exist");
 		}
 
 		open_fds[i] = fd;
@@ -197,7 +197,7 @@ TEST(close_range_unshare_capped)
 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
 		ASSERT_GE(fd, 0) {
 			if (errno == ENOENT)
-				XFAIL(return, "Skipping test since /dev/null does not exist");
+				SKIP(return, "Skipping test since /dev/null does not exist");
 		}
 
 		open_fds[i] = fd;
-- 
cgit v1.2.3-70-g09d2


From afba8b0a2cc532b54eaf4254092f57bba5d7eb65 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:32 +0300
Subject: selftests: clone3: use SKIP instead of XFAIL

XFAIL is gone since commit 9847d24af95c ("selftests/harness: Refactor XFAIL
into SKIP"), use SKIP instead.

Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 55bd387ce7ec..52d3f0364bda 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -145,7 +145,7 @@ TEST(clone3_cap_checkpoint_restore)
 	test_clone3_supported();
 
 	EXPECT_EQ(getuid(), 0)
-		XFAIL(return, "Skipping all tests as non-root\n");
+		SKIP(return, "Skipping all tests as non-root");
 
 	memset(&set_tid, 0, sizeof(set_tid));
 
-- 
cgit v1.2.3-70-g09d2


From 7d764b685ee1bc73a9fa2b6cb4d42fa72b943145 Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Thu, 8 Oct 2020 15:26:33 +0300
Subject: selftests: binderfs: use SKIP instead of XFAIL

XFAIL is gone since commit 9847d24af95c ("selftests/harness: Refactor XFAIL
into SKIP"), use SKIP instead.

Fixes: 9847d24af95c ("selftests/harness: Refactor XFAIL into SKIP")
Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/filesystems/binderfs/binderfs_test.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 1d27f52c61e6..477cbb042f5b 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -74,7 +74,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
 	ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
 	EXPECT_EQ(ret, 0) {
 		if (errno == ENODEV)
-			XFAIL(goto out, "binderfs missing");
+			SKIP(goto out, "binderfs missing");
 		TH_LOG("%s - Failed to mount binderfs", strerror(errno));
 		goto rmdir;
 	}
@@ -475,10 +475,10 @@ TEST(binderfs_stress)
 TEST(binderfs_test_privileged)
 {
 	if (geteuid() != 0)
-		XFAIL(return, "Tests are not run as root. Skipping privileged tests");
+		SKIP(return, "Tests are not run as root. Skipping privileged tests");
 
 	if (__do_binderfs_test(_metadata))
-		XFAIL(return, "The Android binderfs filesystem is not available");
+		SKIP(return, "The Android binderfs filesystem is not available");
 }
 
 TEST(binderfs_test_unprivileged)
@@ -511,7 +511,7 @@ TEST(binderfs_test_unprivileged)
 	ret = wait_for_pid(pid);
 	if (ret) {
 		if (ret == 2)
-			XFAIL(return, "The Android binderfs filesystem is not available");
+			SKIP(return, "The Android binderfs filesystem is not available");
 		ASSERT_EQ(ret, 0) {
 			TH_LOG("wait_for_pid() failed");
 		}
-- 
cgit v1.2.3-70-g09d2


From f9b7ff0d7f7a466a920424246e7ddc2b84c87e52 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Thu, 5 Nov 2020 11:52:30 +0000
Subject: tools/bpftool: Fix attaching flow dissector

My earlier patch to reject non-zero arguments to flow dissector attach
broke attaching via bpftool. Instead of 0 it uses -1 for target_fd.
Fix this by passing a zero argument when attaching the flow dissector.

Fixes: 1b514239e859 ("bpf: flow_dissector: Check value of unused flags to BPF_PROG_ATTACH")
Reported-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105115230.296657-1-lmb@cloudflare.com
---
 tools/bpf/bpftool/prog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index d942c1e3372c..acdb2c245f0a 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -940,7 +940,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd,
 	}
 
 	if (*attach_type == BPF_FLOW_DISSECTOR) {
-		*mapfd = -1;
+		*mapfd = 0;
 		return 0;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From c81ed6d81e0560713ceb94917ff1981848d0614e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:51 -0800
Subject: libbpf: Factor out common operations in BTF writing APIs

Factor out commiting of appended type data. Also extract fetching the very
last type in the BTF (to append members to). These two operations are common
across many APIs and will be easier to refactor with split BTF, if they are
extracted into a single place.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-2-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 123 ++++++++++++++++++----------------------------------
 1 file changed, 43 insertions(+), 80 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 231b07203e3d..89fecfe5cb2b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1560,6 +1560,20 @@ static void btf_type_inc_vlen(struct btf_type *t)
 	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
 }
 
+static int btf_commit_type(struct btf *btf, int data_sz)
+{
+	int err;
+
+	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	if (err)
+		return err;
+
+	btf->hdr->type_len += data_sz;
+	btf->hdr->str_off += data_sz;
+	btf->nr_types++;
+	return btf->nr_types;
+}
+
 /*
  * Append new BTF_KIND_INT type with:
  *   - *name* - non-empty, non-NULL type name;
@@ -1572,7 +1586,7 @@ static void btf_type_inc_vlen(struct btf_type *t)
 int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding)
 {
 	struct btf_type *t;
-	int sz, err, name_off;
+	int sz, name_off;
 
 	/* non-empty name */
 	if (!name || !name[0])
@@ -1606,14 +1620,7 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
 	/* set INT info, we don't allow setting legacy bit offset/size */
 	*(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8);
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /* it's completely legal to append BTF types with type IDs pointing forward to
@@ -1631,7 +1638,7 @@ static int validate_type_id(int id)
 static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
 {
 	struct btf_type *t;
-	int sz, name_off = 0, err;
+	int sz, name_off = 0;
 
 	if (validate_type_id(ref_type_id))
 		return -EINVAL;
@@ -1654,14 +1661,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
 	t->info = btf_type_info(kind, 0, 0);
 	t->type = ref_type_id;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -1689,7 +1689,7 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
 {
 	struct btf_type *t;
 	struct btf_array *a;
-	int sz, err;
+	int sz;
 
 	if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
 		return -EINVAL;
@@ -1711,21 +1711,14 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
 	a->index_type = index_type_id;
 	a->nelems = nr_elems;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /* generic STRUCT/UNION append function */
 static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz)
 {
 	struct btf_type *t;
-	int sz, err, name_off = 0;
+	int sz, name_off = 0;
 
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
@@ -1748,14 +1741,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
 	t->info = btf_type_info(kind, 0, 0);
 	t->size = bytes_sz;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -1793,6 +1779,11 @@ int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz)
 	return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz);
 }
 
+static struct btf_type *btf_last_type(struct btf *btf)
+{
+	return btf_type_by_id(btf, btf__get_nr_types(btf));
+}
+
 /*
  * Append new field for the current STRUCT/UNION type with:
  *   - *name* - name of the field, can be NULL or empty for anonymous field;
@@ -1814,7 +1805,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 	/* last type should be union/struct */
 	if (btf->nr_types == 0)
 		return -EINVAL;
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	if (!btf_is_composite(t))
 		return -EINVAL;
 
@@ -1849,7 +1840,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 	m->offset = bit_offset | (bit_size << 24);
 
 	/* btf_add_type_mem can invalidate t pointer */
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	/* update parent type's vlen and kflag */
 	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
 
@@ -1874,7 +1865,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
 {
 	struct btf_type *t;
-	int sz, err, name_off = 0;
+	int sz, name_off = 0;
 
 	/* byte_sz must be power of 2 */
 	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
@@ -1899,14 +1890,7 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
 	t->info = btf_type_info(BTF_KIND_ENUM, 0, 0);
 	t->size = byte_sz;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -1926,7 +1910,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 	/* last type should be BTF_KIND_ENUM */
 	if (btf->nr_types == 0)
 		return -EINVAL;
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	if (!btf_is_enum(t))
 		return -EINVAL;
 
@@ -1953,7 +1937,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 	v->val = value;
 
 	/* update parent type's vlen */
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	btf_type_inc_vlen(t);
 
 	btf->hdr->type_len += sz;
@@ -2093,7 +2077,7 @@ int btf__add_func(struct btf *btf, const char *name,
 int btf__add_func_proto(struct btf *btf, int ret_type_id)
 {
 	struct btf_type *t;
-	int sz, err;
+	int sz;
 
 	if (validate_type_id(ret_type_id))
 		return -EINVAL;
@@ -2113,14 +2097,7 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id)
 	t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0);
 	t->type = ret_type_id;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -2143,7 +2120,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
 	/* last type should be BTF_KIND_FUNC_PROTO */
 	if (btf->nr_types == 0)
 		return -EINVAL;
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	if (!btf_is_func_proto(t))
 		return -EINVAL;
 
@@ -2166,7 +2143,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
 	p->type = type_id;
 
 	/* update parent type's vlen */
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	btf_type_inc_vlen(t);
 
 	btf->hdr->type_len += sz;
@@ -2188,7 +2165,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 {
 	struct btf_type *t;
 	struct btf_var *v;
-	int sz, err, name_off;
+	int sz, name_off;
 
 	/* non-empty name */
 	if (!name || !name[0])
@@ -2219,14 +2196,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 	v = btf_var(t);
 	v->linkage = linkage;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -2244,7 +2214,7 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
 {
 	struct btf_type *t;
-	int sz, err, name_off;
+	int sz, name_off;
 
 	/* non-empty name */
 	if (!name || !name[0])
@@ -2267,14 +2237,7 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
 	t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0);
 	t->size = byte_sz;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
-	if (err)
-		return err;
-
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
-	btf->nr_types++;
-	return btf->nr_types;
+	return btf_commit_type(btf, sz);
 }
 
 /*
@@ -2296,7 +2259,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 	/* last type should be BTF_KIND_DATASEC */
 	if (btf->nr_types == 0)
 		return -EINVAL;
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	if (!btf_is_datasec(t))
 		return -EINVAL;
 
@@ -2317,7 +2280,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 	v->size = byte_sz;
 
 	/* update parent type's vlen */
-	t = btf_type_by_id(btf, btf->nr_types);
+	t = btf_last_type(btf);
 	btf_type_inc_vlen(t);
 
 	btf->hdr->type_len += sz;
-- 
cgit v1.2.3-70-g09d2


From d9448f94962bd28554df7d9a342d37c7f13d6232 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:52 -0800
Subject: selftest/bpf: Relax btf_dedup test checks

Remove the requirement of a strictly exact string section contents. This used
to be true when string deduplication was done through sorting, but with string
dedup done through hash table, it's no longer true. So relax test harness to
relax strings checks and, consequently, type checks, which now don't have to
have exactly the same string offsets.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-3-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 40 +++++++++++++++++-----------
 1 file changed, 25 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 93162484c2ca..8ae97e2a4b9d 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -6652,7 +6652,7 @@ static void do_test_dedup(unsigned int test_num)
 	const void *test_btf_data, *expect_btf_data;
 	const char *ret_test_next_str, *ret_expect_next_str;
 	const char *test_strs, *expect_strs;
-	const char *test_str_cur, *test_str_end;
+	const char *test_str_cur;
 	const char *expect_str_cur, *expect_str_end;
 	unsigned int raw_btf_size;
 	void *raw_btf;
@@ -6719,12 +6719,18 @@ static void do_test_dedup(unsigned int test_num)
 		goto done;
 	}
 
-	test_str_cur = test_strs;
-	test_str_end = test_strs + test_hdr->str_len;
 	expect_str_cur = expect_strs;
 	expect_str_end = expect_strs + expect_hdr->str_len;
-	while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) {
+	while (expect_str_cur < expect_str_end) {
 		size_t test_len, expect_len;
+		int off;
+
+		off = btf__find_str(test_btf, expect_str_cur);
+		if (CHECK(off < 0, "exp str '%s' not found: %d\n", expect_str_cur, off)) {
+			err = -1;
+			goto done;
+		}
+		test_str_cur = btf__str_by_offset(test_btf, off);
 
 		test_len = strlen(test_str_cur);
 		expect_len = strlen(expect_str_cur);
@@ -6741,15 +6747,8 @@ static void do_test_dedup(unsigned int test_num)
 			err = -1;
 			goto done;
 		}
-		test_str_cur += test_len + 1;
 		expect_str_cur += expect_len + 1;
 	}
-	if (CHECK(test_str_cur != test_str_end,
-		  "test_str_cur:%p != test_str_end:%p",
-		  test_str_cur, test_str_end)) {
-		err = -1;
-		goto done;
-	}
 
 	test_nr_types = btf__get_nr_types(test_btf);
 	expect_nr_types = btf__get_nr_types(expect_btf);
@@ -6775,10 +6774,21 @@ static void do_test_dedup(unsigned int test_num)
 			err = -1;
 			goto done;
 		}
-		if (CHECK(memcmp((void *)test_type,
-				 (void *)expect_type,
-				 test_size),
-			  "type #%d: contents differ", i)) {
+		if (CHECK(btf_kind(test_type) != btf_kind(expect_type),
+			  "type %d kind: exp %d != got %u\n",
+			  i, btf_kind(expect_type), btf_kind(test_type))) {
+			err = -1;
+			goto done;
+		}
+		if (CHECK(test_type->info != expect_type->info,
+			  "type %d info: exp %d != got %u\n",
+			  i, expect_type->info, test_type->info)) {
+			err = -1;
+			goto done;
+		}
+		if (CHECK(test_type->size != expect_type->size,
+			  "type %d size/type: exp %d != got %u\n",
+			  i, expect_type->size, test_type->size)) {
 			err = -1;
 			goto done;
 		}
-- 
cgit v1.2.3-70-g09d2


From 88a82c2a9ab5b2ce533c3de3f4517853c2f67f53 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 4 Nov 2020 20:33:53 -0800
Subject: libbpf: Unify and speed up BTF string deduplication

Revamp BTF dedup's string deduplication to match the approach of writable BTF
string management. This allows to transfer deduplicated strings index back to
BTF object after deduplication without expensive extra memory copying and hash
map re-construction. It also simplifies the code and speeds it up, because
hashmap-based string deduplication is faster than sort + unique approach.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-4-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 263 ++++++++++++++++++++--------------------------------
 1 file changed, 98 insertions(+), 165 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 89fecfe5cb2b..fa1b147c63c6 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -90,6 +90,14 @@ struct btf {
 	struct hashmap *strs_hash;
 	/* whether strings are already deduplicated */
 	bool strs_deduped;
+	/* extra indirection layer to make strings hashmap work with stable
+	 * string offsets and ability to transparently choose between
+	 * btf->strs_data or btf_dedup->strs_data as a source of strings.
+	 * This is used for BTF strings dedup to transfer deduplicated strings
+	 * data back to struct btf without re-building strings index.
+	 */
+	void **strs_data_ptr;
+
 	/* BTF object FD, if loaded into kernel */
 	int fd;
 
@@ -1363,17 +1371,19 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
 
 static size_t strs_hash_fn(const void *key, void *ctx)
 {
-	struct btf *btf = ctx;
-	const char *str = btf->strs_data + (long)key;
+	const struct btf *btf = ctx;
+	const char *strs = *btf->strs_data_ptr;
+	const char *str = strs + (long)key;
 
 	return str_hash(str);
 }
 
 static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
 {
-	struct btf *btf = ctx;
-	const char *str1 = btf->strs_data + (long)key1;
-	const char *str2 = btf->strs_data + (long)key2;
+	const struct btf *btf = ctx;
+	const char *strs = *btf->strs_data_ptr;
+	const char *str1 = strs + (long)key1;
+	const char *str2 = strs + (long)key2;
 
 	return strcmp(str1, str2) == 0;
 }
@@ -1418,6 +1428,9 @@ static int btf_ensure_modifiable(struct btf *btf)
 	memcpy(types, btf->types_data, btf->hdr->type_len);
 	memcpy(strs, btf->strs_data, btf->hdr->str_len);
 
+	/* make hashmap below use btf->strs_data as a source of strings */
+	btf->strs_data_ptr = &btf->strs_data;
+
 	/* build lookup index for all strings */
 	hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
 	if (IS_ERR(hash)) {
@@ -2824,19 +2837,11 @@ struct btf_dedup {
 	size_t hypot_cap;
 	/* Various option modifying behavior of algorithm */
 	struct btf_dedup_opts opts;
-};
-
-struct btf_str_ptr {
-	const char *str;
-	__u32 new_off;
-	bool used;
-};
-
-struct btf_str_ptrs {
-	struct btf_str_ptr *ptrs;
-	const char *data;
-	__u32 cnt;
-	__u32 cap;
+	/* temporary strings deduplication state */
+	void *strs_data;
+	size_t strs_cap;
+	size_t strs_len;
+	struct hashmap* strs_hash;
 };
 
 static long hash_combine(long h, long value)
@@ -3063,64 +3068,41 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
 	return 0;
 }
 
-static int str_sort_by_content(const void *a1, const void *a2)
-{
-	const struct btf_str_ptr *p1 = a1;
-	const struct btf_str_ptr *p2 = a2;
-
-	return strcmp(p1->str, p2->str);
-}
-
-static int str_sort_by_offset(const void *a1, const void *a2)
-{
-	const struct btf_str_ptr *p1 = a1;
-	const struct btf_str_ptr *p2 = a2;
-
-	if (p1->str != p2->str)
-		return p1->str < p2->str ? -1 : 1;
-	return 0;
-}
-
-static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
-{
-	const struct btf_str_ptr *p = pelem;
-
-	if (str_ptr != p->str)
-		return (const char *)str_ptr < p->str ? -1 : 1;
-	return 0;
-}
-
-static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
+static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
 {
-	struct btf_str_ptrs *strs;
-	struct btf_str_ptr *s;
+	struct btf_dedup *d = ctx;
+	long old_off, new_off, len;
+	const char *s;
+	void *p;
+	int err;
 
 	if (*str_off_ptr == 0)
 		return 0;
 
-	strs = ctx;
-	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
-		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
-	if (!s)
-		return -EINVAL;
-	s->used = true;
-	return 0;
-}
+	s = btf__str_by_offset(d->btf, *str_off_ptr);
+	len = strlen(s) + 1;
 
-static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
-{
-	struct btf_str_ptrs *strs;
-	struct btf_str_ptr *s;
+	new_off = d->strs_len;
+	p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len);
+	if (!p)
+		return -ENOMEM;
 
-	if (*str_off_ptr == 0)
-		return 0;
+	memcpy(p, s, len);
 
-	strs = ctx;
-	s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
-		    sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
-	if (!s)
-		return -EINVAL;
-	*str_off_ptr = s->new_off;
+	/* Now attempt to add the string, but only if the string with the same
+	 * contents doesn't exist already (HASHMAP_ADD strategy). If such
+	 * string exists, we'll get its offset in old_off (that's old_key).
+	 */
+	err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off,
+			      HASHMAP_ADD, (const void **)&old_off, NULL);
+	if (err == -EEXIST) {
+		*str_off_ptr = old_off;
+	} else if (err) {
+		return err;
+	} else {
+		*str_off_ptr = new_off;
+		d->strs_len += len;
+	}
 	return 0;
 }
 
@@ -3137,118 +3119,69 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
  */
 static int btf_dedup_strings(struct btf_dedup *d)
 {
-	char *start = d->btf->strs_data;
-	char *end = start + d->btf->hdr->str_len;
-	char *p = start, *tmp_strs = NULL;
-	struct btf_str_ptrs strs = {
-		.cnt = 0,
-		.cap = 0,
-		.ptrs = NULL,
-		.data = start,
-	};
-	int i, j, err = 0, grp_idx;
-	bool grp_used;
+	char *s;
+	int err;
 
 	if (d->btf->strs_deduped)
 		return 0;
 
-	/* build index of all strings */
-	while (p < end) {
-		if (strs.cnt + 1 > strs.cap) {
-			struct btf_str_ptr *new_ptrs;
-
-			strs.cap += max(strs.cnt / 2, 16U);
-			new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));
-			if (!new_ptrs) {
-				err = -ENOMEM;
-				goto done;
-			}
-			strs.ptrs = new_ptrs;
-		}
-
-		strs.ptrs[strs.cnt].str = p;
-		strs.ptrs[strs.cnt].used = false;
-
-		p += strlen(p) + 1;
-		strs.cnt++;
-	}
-
-	/* temporary storage for deduplicated strings */
-	tmp_strs = malloc(d->btf->hdr->str_len);
-	if (!tmp_strs) {
-		err = -ENOMEM;
-		goto done;
-	}
-
-	/* mark all used strings */
-	strs.ptrs[0].used = true;
-	err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
-	if (err)
-		goto done;
-
-	/* sort strings by context, so that we can identify duplicates */
-	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
+	s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
+	if (!s)
+		return -ENOMEM;
+	/* initial empty string */
+	s[0] = 0;
+	d->strs_len = 1;
 
-	/*
-	 * iterate groups of equal strings and if any instance in a group was
-	 * referenced, emit single instance and remember new offset
+	/* temporarily switch to use btf_dedup's strs_data for strings for hash
+	 * functions; later we'll just transfer hashmap to struct btf as is,
+	 * along the strs_data
 	 */
-	p = tmp_strs;
-	grp_idx = 0;
-	grp_used = strs.ptrs[0].used;
-	/* iterate past end to avoid code duplication after loop */
-	for (i = 1; i <= strs.cnt; i++) {
-		/*
-		 * when i == strs.cnt, we want to skip string comparison and go
-		 * straight to handling last group of strings (otherwise we'd
-		 * need to handle last group after the loop w/ duplicated code)
-		 */
-		if (i < strs.cnt &&
-		    !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
-			grp_used = grp_used || strs.ptrs[i].used;
-			continue;
-		}
+	d->btf->strs_data_ptr = &d->strs_data;
 
-		/*
-		 * this check would have been required after the loop to handle
-		 * last group of strings, but due to <= condition in a loop
-		 * we avoid that duplication
-		 */
-		if (grp_used) {
-			int new_off = p - tmp_strs;
-			__u32 len = strlen(strs.ptrs[grp_idx].str);
-
-			memmove(p, strs.ptrs[grp_idx].str, len + 1);
-			for (j = grp_idx; j < i; j++)
-				strs.ptrs[j].new_off = new_off;
-			p += len + 1;
-		}
-
-		if (i < strs.cnt) {
-			grp_idx = i;
-			grp_used = strs.ptrs[i].used;
-		}
+	d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf);
+	if (IS_ERR(d->strs_hash)) {
+		err = PTR_ERR(d->strs_hash);
+		d->strs_hash = NULL;
+		goto err_out;
 	}
 
-	/* replace original strings with deduped ones */
-	d->btf->hdr->str_len = p - tmp_strs;
-	memmove(start, tmp_strs, d->btf->hdr->str_len);
-	end = start + d->btf->hdr->str_len;
-
-	/* restore original order for further binary search lookups */
-	qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
+	/* insert empty string; we won't be looking it up during strings
+	 * dedup, but it's good to have it for generic BTF string lookups
+	 */
+	err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
+			      HASHMAP_ADD, NULL, NULL);
+	if (err)
+		goto err_out;
 
 	/* remap string offsets */
-	err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
+	err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d);
 	if (err)
-		goto done;
+		goto err_out;
 
-	d->btf->hdr->str_len = end - start;
+	/* replace BTF string data and hash with deduped ones */
+	free(d->btf->strs_data);
+	hashmap__free(d->btf->strs_hash);
+	d->btf->strs_data = d->strs_data;
+	d->btf->strs_data_cap = d->strs_cap;
+	d->btf->hdr->str_len = d->strs_len;
+	d->btf->strs_hash = d->strs_hash;
+	/* now point strs_data_ptr back to btf->strs_data */
+	d->btf->strs_data_ptr = &d->btf->strs_data;
+
+	d->strs_data = d->strs_hash = NULL;
+	d->strs_len = d->strs_cap = 0;
 	d->btf->strs_deduped = true;
+	return 0;
+
+err_out:
+	free(d->strs_data);
+	hashmap__free(d->strs_hash);
+	d->strs_data = d->strs_hash = NULL;
+	d->strs_len = d->strs_cap = 0;
+
+	/* restore strings pointer for existing d->btf->strs_hash back */
+	d->btf->strs_data_ptr = &d->strs_data;
 
-done:
-	free(tmp_strs);
-	free(strs.ptrs);
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From ba451366bf44498f22dd16c31a792083bd6f2ae1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:54 -0800
Subject: libbpf: Implement basic split BTF support

Support split BTF operation, in which one BTF (base BTF) provides basic set of
types and strings, while another one (split BTF) builds on top of base's types
and strings and adds its own new types and strings. From API standpoint, the
fact that the split BTF is built on top of the base BTF is transparent.

Type numeration is transparent. If the base BTF had last type ID #N, then all
types in the split BTF start at type ID N+1. Any type in split BTF can
reference base BTF types, but not vice versa. Programmatically construction of
a split BTF on top of a base BTF is supported: one can create an empty split
BTF with btf__new_empty_split() and pass base BTF as an input, or pass raw
binary data to btf__new_split(), or use btf__parse_xxx_split() variants to get
initial set of split types/strings from the ELF file with .BTF section.

String offsets are similarly transparent and are a logical continuation of
base BTF's strings. When building BTF programmatically and adding a new string
(explicitly with btf__add_str() or implicitly through appending new
types/members), string-to-be-added would first be looked up from the base
BTF's string section and re-used if it's there. If not, it will be looked up
and/or added to the split BTF string section. Similarly to type IDs, types in
split BTF can refer to strings from base BTF absolutely transparently (but not
vice versa, of course, because base BTF doesn't "know" about existence of
split BTF).

Internal type index is slightly adjusted to be zero-indexed, ignoring a fake
[0] VOID type. This allows to handle split/base BTF type lookups transparently
by using btf->start_id type ID offset, which is always 1 for base/non-split
BTF and equals btf__get_nr_types(base_btf) + 1 for the split BTF.

BTF deduplication is not yet supported for split BTF and support for it will
be added in separate patch.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-5-andrii@kernel.org
---
 tools/lib/bpf/btf.c      | 197 ++++++++++++++++++++++++++++++++++++-----------
 tools/lib/bpf/btf.h      |   8 ++
 tools/lib/bpf/libbpf.map |   9 +++
 3 files changed, 169 insertions(+), 45 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index fa1b147c63c6..0258cf108c0a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -78,10 +78,32 @@ struct btf {
 	void *types_data;
 	size_t types_data_cap; /* used size stored in hdr->type_len */
 
-	/* type ID to `struct btf_type *` lookup index */
+	/* type ID to `struct btf_type *` lookup index
+	 * type_offs[0] corresponds to the first non-VOID type:
+	 *   - for base BTF it's type [1];
+	 *   - for split BTF it's the first non-base BTF type.
+	 */
 	__u32 *type_offs;
 	size_t type_offs_cap;
+	/* number of types in this BTF instance:
+	 *   - doesn't include special [0] void type;
+	 *   - for split BTF counts number of types added on top of base BTF.
+	 */
 	__u32 nr_types;
+	/* if not NULL, points to the base BTF on top of which the current
+	 * split BTF is based
+	 */
+	struct btf *base_btf;
+	/* BTF type ID of the first type in this BTF instance:
+	 *   - for base BTF it's equal to 1;
+	 *   - for split BTF it's equal to biggest type ID of base BTF plus 1.
+	 */
+	int start_id;
+	/* logical string offset of this BTF instance:
+	 *   - for base BTF it's equal to 0;
+	 *   - for split BTF it's equal to total size of base BTF's string section size.
+	 */
+	int start_str_off;
 
 	void *strs_data;
 	size_t strs_data_cap; /* used size stored in hdr->str_len */
@@ -176,7 +198,7 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
 	__u32 *p;
 
 	p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
-			btf->nr_types + 1, BTF_MAX_NR_TYPES, 1);
+			btf->nr_types, BTF_MAX_NR_TYPES, 1);
 	if (!p)
 		return -ENOMEM;
 
@@ -252,12 +274,16 @@ static int btf_parse_str_sec(struct btf *btf)
 	const char *start = btf->strs_data;
 	const char *end = start + btf->hdr->str_len;
 
-	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
-	    start[0] || end[-1]) {
+	if (btf->base_btf && hdr->str_len == 0)
+		return 0;
+	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) {
+		pr_debug("Invalid BTF string section\n");
+		return -EINVAL;
+	}
+	if (!btf->base_btf && start[0]) {
 		pr_debug("Invalid BTF string section\n");
 		return -EINVAL;
 	}
-
 	return 0;
 }
 
@@ -372,19 +398,9 @@ static int btf_parse_type_sec(struct btf *btf)
 	struct btf_header *hdr = btf->hdr;
 	void *next_type = btf->types_data;
 	void *end_type = next_type + hdr->type_len;
-	int err, i = 0, type_size;
-
-	/* VOID (type_id == 0) is specially handled by btf__get_type_by_id(),
-	 * so ensure we can never properly use its offset from index by
-	 * setting it to a large value
-	 */
-	err = btf_add_type_idx_entry(btf, UINT_MAX);
-	if (err)
-		return err;
+	int err, type_size;
 
 	while (next_type + sizeof(struct btf_type) <= end_type) {
-		i++;
-
 		if (btf->swapped_endian)
 			btf_bswap_type_base(next_type);
 
@@ -392,7 +408,7 @@ static int btf_parse_type_sec(struct btf *btf)
 		if (type_size < 0)
 			return type_size;
 		if (next_type + type_size > end_type) {
-			pr_warn("BTF type [%d] is malformed\n", i);
+			pr_warn("BTF type [%d] is malformed\n", btf->start_id + btf->nr_types);
 			return -EINVAL;
 		}
 
@@ -417,7 +433,7 @@ static int btf_parse_type_sec(struct btf *btf)
 
 __u32 btf__get_nr_types(const struct btf *btf)
 {
-	return btf->nr_types;
+	return btf->start_id + btf->nr_types - 1;
 }
 
 /* internal helper returning non-const pointer to a type */
@@ -425,13 +441,14 @@ static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
 {
 	if (type_id == 0)
 		return &btf_void;
-
-	return btf->types_data + btf->type_offs[type_id];
+	if (type_id < btf->start_id)
+		return btf_type_by_id(btf->base_btf, type_id);
+	return btf->types_data + btf->type_offs[type_id - btf->start_id];
 }
 
 const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
 {
-	if (type_id > btf->nr_types)
+	if (type_id >= btf->start_id + btf->nr_types)
 		return NULL;
 	return btf_type_by_id((struct btf *)btf, type_id);
 }
@@ -440,9 +457,13 @@ static int determine_ptr_size(const struct btf *btf)
 {
 	const struct btf_type *t;
 	const char *name;
-	int i;
+	int i, n;
 
-	for (i = 1; i <= btf->nr_types; i++) {
+	if (btf->base_btf && btf->base_btf->ptr_sz > 0)
+		return btf->base_btf->ptr_sz;
+
+	n = btf__get_nr_types(btf);
+	for (i = 1; i <= n; i++) {
 		t = btf__type_by_id(btf, i);
 		if (!btf_is_int(t))
 			continue;
@@ -725,7 +746,7 @@ void btf__free(struct btf *btf)
 	free(btf);
 }
 
-struct btf *btf__new_empty(void)
+static struct btf *btf_new_empty(struct btf *base_btf)
 {
 	struct btf *btf;
 
@@ -733,12 +754,21 @@ struct btf *btf__new_empty(void)
 	if (!btf)
 		return ERR_PTR(-ENOMEM);
 
+	btf->nr_types = 0;
+	btf->start_id = 1;
+	btf->start_str_off = 0;
 	btf->fd = -1;
 	btf->ptr_sz = sizeof(void *);
 	btf->swapped_endian = false;
 
+	if (base_btf) {
+		btf->base_btf = base_btf;
+		btf->start_id = btf__get_nr_types(base_btf) + 1;
+		btf->start_str_off = base_btf->hdr->str_len;
+	}
+
 	/* +1 for empty string at offset 0 */
-	btf->raw_size = sizeof(struct btf_header) + 1;
+	btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1);
 	btf->raw_data = calloc(1, btf->raw_size);
 	if (!btf->raw_data) {
 		free(btf);
@@ -752,12 +782,22 @@ struct btf *btf__new_empty(void)
 
 	btf->types_data = btf->raw_data + btf->hdr->hdr_len;
 	btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
-	btf->hdr->str_len = 1; /* empty string at offset 0 */
+	btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */
 
 	return btf;
 }
 
-struct btf *btf__new(const void *data, __u32 size)
+struct btf *btf__new_empty(void)
+{
+	return btf_new_empty(NULL);
+}
+
+struct btf *btf__new_empty_split(struct btf *base_btf)
+{
+	return btf_new_empty(base_btf);
+}
+
+static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
 {
 	struct btf *btf;
 	int err;
@@ -766,6 +806,16 @@ struct btf *btf__new(const void *data, __u32 size)
 	if (!btf)
 		return ERR_PTR(-ENOMEM);
 
+	btf->nr_types = 0;
+	btf->start_id = 1;
+	btf->start_str_off = 0;
+
+	if (base_btf) {
+		btf->base_btf = base_btf;
+		btf->start_id = btf__get_nr_types(base_btf) + 1;
+		btf->start_str_off = base_btf->hdr->str_len;
+	}
+
 	btf->raw_data = malloc(size);
 	if (!btf->raw_data) {
 		err = -ENOMEM;
@@ -798,7 +848,13 @@ done:
 	return btf;
 }
 
-struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+struct btf *btf__new(const void *data, __u32 size)
+{
+	return btf_new(data, size, NULL);
+}
+
+static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
+				 struct btf_ext **btf_ext)
 {
 	Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
 	int err = 0, fd = -1, idx = 0;
@@ -876,7 +932,7 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
 		err = -ENOENT;
 		goto done;
 	}
-	btf = btf__new(btf_data->d_buf, btf_data->d_size);
+	btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf);
 	if (IS_ERR(btf))
 		goto done;
 
@@ -921,7 +977,17 @@ done:
 	return btf;
 }
 
-struct btf *btf__parse_raw(const char *path)
+struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+{
+	return btf_parse_elf(path, NULL, btf_ext);
+}
+
+struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf)
+{
+	return btf_parse_elf(path, base_btf, NULL);
+}
+
+static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
 {
 	struct btf *btf = NULL;
 	void *data = NULL;
@@ -975,7 +1041,7 @@ struct btf *btf__parse_raw(const char *path)
 	}
 
 	/* finally parse BTF data */
-	btf = btf__new(data, sz);
+	btf = btf_new(data, sz, base_btf);
 
 err_out:
 	free(data);
@@ -984,18 +1050,38 @@ err_out:
 	return err ? ERR_PTR(err) : btf;
 }
 
-struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+struct btf *btf__parse_raw(const char *path)
+{
+	return btf_parse_raw(path, NULL);
+}
+
+struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
+{
+	return btf_parse_raw(path, base_btf);
+}
+
+static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
 {
 	struct btf *btf;
 
 	if (btf_ext)
 		*btf_ext = NULL;
 
-	btf = btf__parse_raw(path);
+	btf = btf_parse_raw(path, base_btf);
 	if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
 		return btf;
 
-	return btf__parse_elf(path, btf_ext);
+	return btf_parse_elf(path, base_btf, btf_ext);
+}
+
+struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
+{
+	return btf_parse(path, NULL, btf_ext);
+}
+
+struct btf *btf__parse_split(const char *path, struct btf *base_btf)
+{
+	return btf_parse(path, base_btf, NULL);
 }
 
 static int compare_vsi_off(const void *_a, const void *_b)
@@ -1179,8 +1265,8 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
 
 	memcpy(p, btf->types_data, hdr->type_len);
 	if (swap_endian) {
-		for (i = 1; i <= btf->nr_types; i++) {
-			t = p  + btf->type_offs[i];
+		for (i = 0; i < btf->nr_types; i++) {
+			t = p + btf->type_offs[i];
 			/* btf_bswap_type_rest() relies on native t->info, so
 			 * we swap base type info after we swapped all the
 			 * additional information
@@ -1223,8 +1309,10 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
 
 const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
 {
-	if (offset < btf->hdr->str_len)
-		return btf->strs_data + offset;
+	if (offset < btf->start_str_off)
+		return btf__str_by_offset(btf->base_btf, offset);
+	else if (offset - btf->start_str_off < btf->hdr->str_len)
+		return btf->strs_data + (offset - btf->start_str_off);
 	else
 		return NULL;
 }
@@ -1461,7 +1549,10 @@ static int btf_ensure_modifiable(struct btf *btf)
 	/* if BTF was created from scratch, all strings are guaranteed to be
 	 * unique and deduplicated
 	 */
-	btf->strs_deduped = btf->hdr->str_len <= 1;
+	if (btf->hdr->str_len == 0)
+		btf->strs_deduped = true;
+	if (!btf->base_btf && btf->hdr->str_len == 1)
+		btf->strs_deduped = true;
 
 	/* invalidate raw_data representation */
 	btf_invalidate_raw_data(btf);
@@ -1493,6 +1584,14 @@ int btf__find_str(struct btf *btf, const char *s)
 	long old_off, new_off, len;
 	void *p;
 
+	if (btf->base_btf) {
+		int ret;
+
+		ret = btf__find_str(btf->base_btf, s);
+		if (ret != -ENOENT)
+			return ret;
+	}
+
 	/* BTF needs to be in a modifiable state to build string lookup index */
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
@@ -1507,7 +1606,7 @@ int btf__find_str(struct btf *btf, const char *s)
 	memcpy(p, s, len);
 
 	if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
-		return old_off;
+		return btf->start_str_off + old_off;
 
 	return -ENOENT;
 }
@@ -1523,6 +1622,14 @@ int btf__add_str(struct btf *btf, const char *s)
 	void *p;
 	int err;
 
+	if (btf->base_btf) {
+		int ret;
+
+		ret = btf__find_str(btf->base_btf, s);
+		if (ret != -ENOENT)
+			return ret;
+	}
+
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
 
@@ -1549,12 +1656,12 @@ int btf__add_str(struct btf *btf, const char *s)
 	err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
 			      HASHMAP_ADD, (const void **)&old_off, NULL);
 	if (err == -EEXIST)
-		return old_off; /* duplicated string, return existing offset */
+		return btf->start_str_off + old_off; /* duplicated string, return existing offset */
 	if (err)
 		return err;
 
 	btf->hdr->str_len += len; /* new unique string, adjust data length */
-	return new_off;
+	return btf->start_str_off + new_off;
 }
 
 static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
@@ -1584,7 +1691,7 @@ static int btf_commit_type(struct btf *btf, int data_sz)
 	btf->hdr->type_len += data_sz;
 	btf->hdr->str_off += data_sz;
 	btf->nr_types++;
-	return btf->nr_types;
+	return btf->start_id + btf->nr_types - 1;
 }
 
 /*
@@ -4167,14 +4274,14 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
 
 		memmove(p, btf__type_by_id(d->btf, i), len);
 		d->hypot_map[i] = next_type_id;
-		d->btf->type_offs[next_type_id] = p - d->btf->types_data;
+		d->btf->type_offs[next_type_id - 1] = p - d->btf->types_data;
 		p += len;
 		next_type_id++;
 	}
 
 	/* shrink struct btf's internal types index and update btf_header */
 	d->btf->nr_types = next_type_id - 1;
-	d->btf->type_offs_cap = d->btf->nr_types + 1;
+	d->btf->type_offs_cap = d->btf->nr_types;
 	d->btf->hdr->type_len = p - d->btf->types_data;
 	new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
 				       sizeof(*new_offs));
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 57247240a20a..1093f6fe6800 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -31,11 +31,19 @@ enum btf_endianness {
 };
 
 LIBBPF_API void btf__free(struct btf *btf);
+
 LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
+LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf);
 LIBBPF_API struct btf *btf__new_empty(void);
+LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
+
 LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf);
 LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);
+LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf);
 LIBBPF_API struct btf *btf__parse_raw(const char *path);
+LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf);
+
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
 LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 4ebfadf45b47..29ff4807b909 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -337,3 +337,12 @@ LIBBPF_0.2.0 {
 		perf_buffer__consume_buffer;
 		xsk_socket__create_shared;
 } LIBBPF_0.1.0;
+
+LIBBPF_0.3.0 {
+	global:
+		btf__parse_elf_split;
+		btf__parse_raw_split;
+		btf__parse_split;
+		btf__new_empty_split;
+		btf__new_split;
+} LIBBPF_0.2.0;
-- 
cgit v1.2.3-70-g09d2


From 197389da2fbfbc3cefb229268c32d858d9575c96 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:55 -0800
Subject: selftests/bpf: Add split BTF basic test

Add selftest validating ability to programmatically generate and then dump
split BTF.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-6-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/btf_split.c | 99 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h           | 11 +++
 2 files changed, 110 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_split.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
new file mode 100644
index 000000000000..ca7c2a91610a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static char *dump_buf;
+static size_t dump_buf_sz;
+static FILE *dump_buf_file;
+
+static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+	vfprintf(ctx, fmt, args);
+}
+
+void test_btf_split() {
+	struct btf_dump_opts opts;
+	struct btf_dump *d = NULL;
+	const struct btf_type *t;
+	struct btf *btf1, *btf2;
+	int str_off, i, err;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);	/* [1] int */
+	btf__add_ptr(btf1, 1);				/* [2] ptr to int */
+
+	btf__add_struct(btf1, "s1", 4);			/* [3] struct s1 { */
+	btf__add_field(btf1, "f1", 1, 0, 0);		/*      int f1; */
+							/* } */
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	/* pointer size should be "inherited" from main BTF */
+	ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz");
+
+	str_off = btf__find_str(btf2, "int");
+	ASSERT_NEQ(str_off, -ENOENT, "str_int_missing");
+
+	t = btf__type_by_id(btf2, 1);
+	if (!ASSERT_OK_PTR(t, "int_type"))
+		goto cleanup;
+	ASSERT_EQ(btf_is_int(t), true, "int_kind");
+	ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name");
+
+	btf__add_struct(btf2, "s2", 16);		/* [4] struct s2 {	*/
+	btf__add_field(btf2, "f1", 3, 0, 0);		/*      struct s1 f1;	*/
+	btf__add_field(btf2, "f2", 1, 32, 0);		/*      int f2;		*/
+	btf__add_field(btf2, "f3", 2, 64, 0);		/*      int *f3;	*/
+							/* } */
+
+	t = btf__type_by_id(btf1, 4);
+	ASSERT_NULL(t, "split_type_in_main");
+
+	t = btf__type_by_id(btf2, 4);
+	if (!ASSERT_OK_PTR(t, "split_struct_type"))
+		goto cleanup;
+	ASSERT_EQ(btf_is_struct(t), true, "split_struct_kind");
+	ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen");
+	ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name");
+
+	/* BTF-to-C dump of split BTF */
+	dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
+	if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
+		return;
+	opts.ctx = dump_buf_file;
+	d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf);
+	if (!ASSERT_OK_PTR(d, "btf_dump__new"))
+		goto cleanup;
+	for (i = 1; i <= btf__get_nr_types(btf2); i++) {
+		err = btf_dump__dump_type(d, i);
+		ASSERT_OK(err, "dump_type_ok");
+	}
+	fflush(dump_buf_file);
+	dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+	ASSERT_STREQ(dump_buf,
+"struct s1 {\n"
+"	int f1;\n"
+"};\n"
+"\n"
+"struct s2 {\n"
+"	struct s1 f1;\n"
+"	int f2;\n"
+"	int *f3;\n"
+"};\n\n", "c_dump");
+
+cleanup:
+	if (dump_buf_file)
+		fclose(dump_buf_file);
+	free(dump_buf);
+	btf_dump__free(d);
+	btf__free(btf1);
+	btf__free(btf2);
+}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 238f5f61189e..d6b14853f3bc 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -141,6 +141,17 @@ extern int test__join_cgroup(const char *path);
 	___ok;								\
 })
 
+#define ASSERT_NEQ(actual, expected, name) ({				\
+	static int duration = 0;					\
+	typeof(actual) ___act = (actual);				\
+	typeof(expected) ___exp = (expected);				\
+	bool ___ok = ___act != ___exp;					\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: actual %lld == expected %lld\n",		\
+	      (name), (long long)(___act), (long long)(___exp));	\
+	___ok;								\
+})
+
 #define ASSERT_STREQ(actual, expected, name) ({				\
 	static int duration = 0;					\
 	const char *___act = actual;					\
-- 
cgit v1.2.3-70-g09d2


From 1306c980cf892bc17e7296d3e9ab8e9082f893a1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 4 Nov 2020 20:33:56 -0800
Subject: selftests/bpf: Add checking of raw type dump in BTF writer APIs
 selftests

Add re-usable btf_helpers.{c,h} to provide BTF-related testing routines. Start
with adding a raw BTF dumping helpers.

Raw BTF dump is the most succinct and at the same time a very human-friendly
way to validate exact contents of BTF types. Cross-validate raw BTF dump and
writable BTF in a single selftest. Raw type dump checks also serve as a good
self-documentation.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-7-andrii@kernel.org
---
 tools/testing/selftests/bpf/Makefile               |   2 +-
 tools/testing/selftests/bpf/btf_helpers.c          | 200 +++++++++++++++++++++
 tools/testing/selftests/bpf/btf_helpers.h          |  12 ++
 tools/testing/selftests/bpf/prog_tests/btf_write.c |  43 +++++
 4 files changed, 256 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/btf_helpers.c
 create mode 100644 tools/testing/selftests/bpf/btf_helpers.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 50e5b18fc455..c1708ffa6b1c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -386,7 +386,7 @@ TRUNNER_TESTS_DIR := prog_tests
 TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 network_helpers.c testing_helpers.c		\
-			 flow_dissector_load.h
+			 btf_helpers.c	flow_dissector_load.h
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
new file mode 100644
index 000000000000..abc3f6c04cfc
--- /dev/null
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <stdio.h>
+#include <errno.h>
+#include <bpf/btf.h>
+
+static const char * const btf_kind_str_mapping[] = {
+	[BTF_KIND_UNKN]		= "UNKNOWN",
+	[BTF_KIND_INT]		= "INT",
+	[BTF_KIND_PTR]		= "PTR",
+	[BTF_KIND_ARRAY]	= "ARRAY",
+	[BTF_KIND_STRUCT]	= "STRUCT",
+	[BTF_KIND_UNION]	= "UNION",
+	[BTF_KIND_ENUM]		= "ENUM",
+	[BTF_KIND_FWD]		= "FWD",
+	[BTF_KIND_TYPEDEF]	= "TYPEDEF",
+	[BTF_KIND_VOLATILE]	= "VOLATILE",
+	[BTF_KIND_CONST]	= "CONST",
+	[BTF_KIND_RESTRICT]	= "RESTRICT",
+	[BTF_KIND_FUNC]		= "FUNC",
+	[BTF_KIND_FUNC_PROTO]	= "FUNC_PROTO",
+	[BTF_KIND_VAR]		= "VAR",
+	[BTF_KIND_DATASEC]	= "DATASEC",
+};
+
+static const char *btf_kind_str(__u16 kind)
+{
+	if (kind > BTF_KIND_DATASEC)
+		return "UNKNOWN";
+	return btf_kind_str_mapping[kind];
+}
+
+static const char *btf_int_enc_str(__u8 encoding)
+{
+	switch (encoding) {
+	case 0:
+		return "(none)";
+	case BTF_INT_SIGNED:
+		return "SIGNED";
+	case BTF_INT_CHAR:
+		return "CHAR";
+	case BTF_INT_BOOL:
+		return "BOOL";
+	default:
+		return "UNKN";
+	}
+}
+
+static const char *btf_var_linkage_str(__u32 linkage)
+{
+	switch (linkage) {
+	case BTF_VAR_STATIC:
+		return "static";
+	case BTF_VAR_GLOBAL_ALLOCATED:
+		return "global-alloc";
+	default:
+		return "(unknown)";
+	}
+}
+
+static const char *btf_func_linkage_str(const struct btf_type *t)
+{
+	switch (btf_vlen(t)) {
+	case BTF_FUNC_STATIC:
+		return "static";
+	case BTF_FUNC_GLOBAL:
+		return "global";
+	case BTF_FUNC_EXTERN:
+		return "extern";
+	default:
+		return "(unknown)";
+	}
+}
+
+static const char *btf_str(const struct btf *btf, __u32 off)
+{
+	if (!off)
+		return "(anon)";
+	return btf__str_by_offset(btf, off) ?: "(invalid)";
+}
+
+int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
+{
+	const struct btf_type *t;
+	int kind, i;
+	__u32 vlen;
+
+	t = btf__type_by_id(btf, id);
+	if (!t)
+		return -EINVAL;
+
+	vlen = btf_vlen(t);
+	kind = btf_kind(t);
+
+	fprintf(out, "[%u] %s '%s'", id, btf_kind_str(kind), btf_str(btf, t->name_off));
+
+	switch (kind) {
+	case BTF_KIND_INT:
+		fprintf(out, " size=%u bits_offset=%u nr_bits=%u encoding=%s",
+			t->size, btf_int_offset(t), btf_int_bits(t),
+			btf_int_enc_str(btf_int_encoding(t)));
+		break;
+	case BTF_KIND_PTR:
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPEDEF:
+		fprintf(out, " type_id=%u", t->type);
+		break;
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *arr = btf_array(t);
+
+		fprintf(out, " type_id=%u index_type_id=%u nr_elems=%u",
+			arr->type, arr->index_type, arr->nelems);
+		break;
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = btf_members(t);
+
+		fprintf(out, " size=%u vlen=%u", t->size, vlen);
+		for (i = 0; i < vlen; i++, m++) {
+			__u32 bit_off, bit_sz;
+
+			bit_off = btf_member_bit_offset(t, i);
+			bit_sz = btf_member_bitfield_size(t, i);
+			fprintf(out, "\n\t'%s' type_id=%u bits_offset=%u",
+				btf_str(btf, m->name_off), m->type, bit_off);
+			if (bit_sz)
+				fprintf(out, " bitfield_size=%u", bit_sz);
+		}
+		break;
+	}
+	case BTF_KIND_ENUM: {
+		const struct btf_enum *v = btf_enum(t);
+
+		fprintf(out, " size=%u vlen=%u", t->size, vlen);
+		for (i = 0; i < vlen; i++, v++) {
+			fprintf(out, "\n\t'%s' val=%u",
+				btf_str(btf, v->name_off), v->val);
+		}
+		break;
+	}
+	case BTF_KIND_FWD:
+		fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct");
+		break;
+	case BTF_KIND_FUNC:
+		fprintf(out, " type_id=%u linkage=%s", t->type, btf_func_linkage_str(t));
+		break;
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = btf_params(t);
+
+		fprintf(out, " ret_type_id=%u vlen=%u", t->type, vlen);
+		for (i = 0; i < vlen; i++, p++) {
+			fprintf(out, "\n\t'%s' type_id=%u",
+				btf_str(btf, p->name_off), p->type);
+		}
+		break;
+	}
+	case BTF_KIND_VAR:
+		fprintf(out, " type_id=%u, linkage=%s",
+			t->type, btf_var_linkage_str(btf_var(t)->linkage));
+		break;
+	case BTF_KIND_DATASEC: {
+		const struct btf_var_secinfo *v = btf_var_secinfos(t);
+
+		fprintf(out, " size=%u vlen=%u", t->size, vlen);
+		for (i = 0; i < vlen; i++, v++) {
+			fprintf(out, "\n\ttype_id=%u offset=%u size=%u",
+				v->type, v->offset, v->size);
+		}
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/* Print raw BTF type dump into a local buffer and return string pointer back.
+ * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls
+ */
+const char *btf_type_raw_dump(const struct btf *btf, int type_id)
+{
+	static char buf[16 * 1024];
+	FILE *buf_file;
+
+	buf_file = fmemopen(buf, sizeof(buf) - 1, "w");
+	if (!buf_file) {
+		fprintf(stderr, "Failed to open memstream: %d\n", errno);
+		return NULL;
+	}
+
+	fprintf_btf_type_raw(buf_file, btf, type_id);
+	fflush(buf_file);
+	fclose(buf_file);
+
+	return buf;
+}
diff --git a/tools/testing/selftests/bpf/btf_helpers.h b/tools/testing/selftests/bpf/btf_helpers.h
new file mode 100644
index 000000000000..2c9ce1b61dc9
--- /dev/null
+++ b/tools/testing/selftests/bpf/btf_helpers.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#ifndef __BTF_HELPERS_H
+#define __BTF_HELPERS_H
+
+#include <stdio.h>
+#include <bpf/btf.h>
+
+int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id);
+const char *btf_type_raw_dump(const struct btf *btf, int type_id);
+
+#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index 314e1e7c36df..f36da15b134f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 #include <test_progs.h>
 #include <bpf/btf.h>
+#include "btf_helpers.h"
 
 static int duration = 0;
 
@@ -39,6 +40,8 @@ void test_btf_write() {
 	ASSERT_EQ(t->size, 4, "int_sz");
 	ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc");
 	ASSERT_EQ(btf_int_bits(t), 32, "int_bits");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 1),
+		     "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", "raw_dump");
 
 	/* invalid int size */
 	id = btf__add_int(btf, "bad sz int", 7, 0);
@@ -59,24 +62,32 @@ void test_btf_write() {
 	t = btf__type_by_id(btf, 2);
 	ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind");
 	ASSERT_EQ(t->type, 1, "ptr_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 2),
+		     "[2] PTR '(anon)' type_id=1", "raw_dump");
 
 	id = btf__add_const(btf, 5); /* points forward to restrict */
 	ASSERT_EQ(id, 3, "const_id");
 	t = btf__type_by_id(btf, 3);
 	ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind");
 	ASSERT_EQ(t->type, 5, "const_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 3),
+		     "[3] CONST '(anon)' type_id=5", "raw_dump");
 
 	id = btf__add_volatile(btf, 3);
 	ASSERT_EQ(id, 4, "volatile_id");
 	t = btf__type_by_id(btf, 4);
 	ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind");
 	ASSERT_EQ(t->type, 3, "volatile_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 4),
+		     "[4] VOLATILE '(anon)' type_id=3", "raw_dump");
 
 	id = btf__add_restrict(btf, 4);
 	ASSERT_EQ(id, 5, "restrict_id");
 	t = btf__type_by_id(btf, 5);
 	ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind");
 	ASSERT_EQ(t->type, 4, "restrict_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 5),
+		     "[5] RESTRICT '(anon)' type_id=4", "raw_dump");
 
 	/* ARRAY */
 	id = btf__add_array(btf, 1, 2, 10); /* int *[10] */
@@ -86,6 +97,8 @@ void test_btf_write() {
 	ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type");
 	ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type");
 	ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 6),
+		     "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", "raw_dump");
 
 	/* STRUCT */
 	err = btf__add_field(btf, "field", 1, 0, 0);
@@ -113,6 +126,10 @@ void test_btf_write() {
 	ASSERT_EQ(m->type, 1, "f2_type");
 	ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off");
 	ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 7),
+		     "[7] STRUCT 's1' size=8 vlen=2\n"
+		     "\t'f1' type_id=1 bits_offset=0\n"
+		     "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "raw_dump");
 
 	/* UNION */
 	id = btf__add_union(btf, "u1", 8);
@@ -136,6 +153,9 @@ void test_btf_write() {
 	ASSERT_EQ(m->type, 1, "f1_type");
 	ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
 	ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 8),
+		     "[8] UNION 'u1' size=8 vlen=1\n"
+		     "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", "raw_dump");
 
 	/* ENUM */
 	id = btf__add_enum(btf, "e1", 4);
@@ -156,6 +176,10 @@ void test_btf_write() {
 	v = btf_enum(t) + 1;
 	ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
 	ASSERT_EQ(v->val, 2, "v2_val");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 9),
+		     "[9] ENUM 'e1' size=4 vlen=2\n"
+		     "\t'v1' val=1\n"
+		     "\t'v2' val=2", "raw_dump");
 
 	/* FWDs */
 	id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT);
@@ -164,6 +188,8 @@ void test_btf_write() {
 	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name");
 	ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
 	ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 10),
+		     "[10] FWD 'struct_fwd' fwd_kind=struct", "raw_dump");
 
 	id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION);
 	ASSERT_EQ(id, 11, "union_fwd_id");
@@ -171,6 +197,8 @@ void test_btf_write() {
 	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name");
 	ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
 	ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 11),
+		     "[11] FWD 'union_fwd' fwd_kind=union", "raw_dump");
 
 	id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM);
 	ASSERT_EQ(id, 12, "enum_fwd_id");
@@ -179,6 +207,8 @@ void test_btf_write() {
 	ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind");
 	ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
 	ASSERT_EQ(t->size, 4, "enum_fwd_sz");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 12),
+		     "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump");
 
 	/* TYPEDEF */
 	id = btf__add_typedef(btf, "typedef1", 1);
@@ -187,6 +217,8 @@ void test_btf_write() {
 	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name");
 	ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind");
 	ASSERT_EQ(t->type, 1, "typedef_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 13),
+		     "[13] TYPEDEF 'typedef1' type_id=1", "raw_dump");
 
 	/* FUNC & FUNC_PROTO */
 	id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15);
@@ -196,6 +228,8 @@ void test_btf_write() {
 	ASSERT_EQ(t->type, 15, "func_type");
 	ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind");
 	ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 14),
+		     "[14] FUNC 'func1' type_id=15 linkage=global", "raw_dump");
 
 	id = btf__add_func_proto(btf, 1);
 	ASSERT_EQ(id, 15, "func_proto_id");
@@ -214,6 +248,10 @@ void test_btf_write() {
 	p = btf_params(t) + 1;
 	ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name");
 	ASSERT_EQ(p->type, 2, "p2_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 15),
+		     "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+		     "\t'p1' type_id=1\n"
+		     "\t'p2' type_id=2", "raw_dump");
 
 	/* VAR */
 	id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1);
@@ -223,6 +261,8 @@ void test_btf_write() {
 	ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind");
 	ASSERT_EQ(t->type, 1, "var_type");
 	ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 16),
+		     "[16] VAR 'var1' type_id=1, linkage=global-alloc", "raw_dump");
 
 	/* DATASECT */
 	id = btf__add_datasec(btf, "datasec1", 12);
@@ -239,6 +279,9 @@ void test_btf_write() {
 	ASSERT_EQ(vi->type, 1, "v1_type");
 	ASSERT_EQ(vi->offset, 4, "v1_off");
 	ASSERT_EQ(vi->size, 8, "v1_sz");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 17),
+		     "[17] DATASEC 'datasec1' size=12 vlen=1\n"
+		     "\ttype_id=1 offset=4 size=8", "raw_dump");
 
 	btf__free(btf);
 }
-- 
cgit v1.2.3-70-g09d2


From d8123624506cd62730c9cd9c7672c698e462703d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:57 -0800
Subject: libbpf: Fix BTF data layout checks and allow empty BTF

Make data section layout checks stricter, disallowing overlap of types and
strings data.

Additionally, allow BTFs with no type data. There is nothing inherently wrong
with having BTF with no types (put potentially with some strings). This could
be a situation with kernel module BTFs, if module doesn't introduce any new
type information.

Also fix invalid offset alignment check for btf->hdr->type_off.

Fixes: 8a138aed4a80 ("bpf: btf: Add BTF support to libbpf")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-8-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 0258cf108c0a..20bb88e71f07 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -245,22 +245,18 @@ static int btf_parse_hdr(struct btf *btf)
 		return -EINVAL;
 	}
 
-	if (meta_left < hdr->type_off) {
-		pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off);
+	if (meta_left < hdr->str_off + hdr->str_len) {
+		pr_debug("Invalid BTF total size:%u\n", btf->raw_size);
 		return -EINVAL;
 	}
 
-	if (meta_left < hdr->str_off) {
-		pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off);
+	if (hdr->type_off + hdr->type_len > hdr->str_off) {
+		pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n",
+			 hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len);
 		return -EINVAL;
 	}
 
-	if (hdr->type_off >= hdr->str_off) {
-		pr_debug("BTF type section offset >= string section offset. No type?\n");
-		return -EINVAL;
-	}
-
-	if (hdr->type_off & 0x02) {
+	if (hdr->type_off % 4) {
 		pr_debug("BTF type section is not aligned to 4 bytes\n");
 		return -EINVAL;
 	}
-- 
cgit v1.2.3-70-g09d2


From f86524efcf9e3f3a7cf75ebcd82cf8f58ec716cc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:58 -0800
Subject: libbpf: Support BTF dedup of split BTFs

Add support for deduplication split BTFs. When deduplicating split BTF, base
BTF is considered to be immutable and can't be modified or adjusted. 99% of
BTF deduplication logic is left intact (module some type numbering adjustments).
There are only two differences.

First, each type in base BTF gets hashed (expect VAR and DATASEC, of course,
those are always considered to be self-canonical instances) and added into
a table of canonical table candidates. Hashing is a shallow, fast operation,
so mostly eliminates the overhead of having entire base BTF to be a part of
BTF dedup.

Second difference is very critical and subtle. While deduplicating split BTF
types, it is possible to discover that one of immutable base BTF BTF_KIND_FWD
types can and should be resolved to a full STRUCT/UNION type from the split
BTF part.  This is, obviously, can't happen because we can't modify the base
BTF types anymore. So because of that, any type in split BTF that directly or
indirectly references that newly-to-be-resolved FWD type can't be considered
to be equivalent to the corresponding canonical types in base BTF, because
that would result in a loss of type resolution information. So in such case,
split BTF types will be deduplicated separately and will cause some
duplication of type information, which is unavoidable.

With those two changes, the rest of the algorithm manages to deduplicate split
BTF correctly, pointing all the duplicates to their canonical counter-parts in
base BTF, but also is deduplicating whatever unique types are present in split
BTF on their own.

Also, theoretically, split BTF after deduplication could end up with either
empty type section or empty string section. This is handled by libbpf
correctly in one of previous patches in the series.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-9-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 221 +++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 168 insertions(+), 53 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 20bb88e71f07..8d04d9becb67 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -2718,6 +2718,7 @@ struct btf_dedup;
 static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 				       const struct btf_dedup_opts *opts);
 static void btf_dedup_free(struct btf_dedup *d);
+static int btf_dedup_prep(struct btf_dedup *d);
 static int btf_dedup_strings(struct btf_dedup *d);
 static int btf_dedup_prim_types(struct btf_dedup *d);
 static int btf_dedup_struct_types(struct btf_dedup *d);
@@ -2876,6 +2877,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
 	if (btf_ensure_modifiable(btf))
 		return -ENOMEM;
 
+	err = btf_dedup_prep(d);
+	if (err) {
+		pr_debug("btf_dedup_prep failed:%d\n", err);
+		goto done;
+	}
 	err = btf_dedup_strings(d);
 	if (err < 0) {
 		pr_debug("btf_dedup_strings failed:%d\n", err);
@@ -2938,6 +2944,13 @@ struct btf_dedup {
 	__u32 *hypot_list;
 	size_t hypot_cnt;
 	size_t hypot_cap;
+	/* Whether hypothetical mapping, if successful, would need to adjust
+	 * already canonicalized types (due to a new forward declaration to
+	 * concrete type resolution). In such case, during split BTF dedup
+	 * candidate type would still be considered as different, because base
+	 * BTF is considered to be immutable.
+	 */
+	bool hypot_adjust_canon;
 	/* Various option modifying behavior of algorithm */
 	struct btf_dedup_opts opts;
 	/* temporary strings deduplication state */
@@ -2985,6 +2998,7 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
 	for (i = 0; i < d->hypot_cnt; i++)
 		d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
 	d->hypot_cnt = 0;
+	d->hypot_adjust_canon = false;
 }
 
 static void btf_dedup_free(struct btf_dedup *d)
@@ -3024,7 +3038,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 {
 	struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
 	hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
-	int i, err = 0;
+	int i, err = 0, type_cnt;
 
 	if (!d)
 		return ERR_PTR(-ENOMEM);
@@ -3044,14 +3058,15 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 		goto done;
 	}
 
-	d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+	type_cnt = btf__get_nr_types(btf) + 1;
+	d->map = malloc(sizeof(__u32) * type_cnt);
 	if (!d->map) {
 		err = -ENOMEM;
 		goto done;
 	}
 	/* special BTF "void" type is made canonical immediately */
 	d->map[0] = 0;
-	for (i = 1; i <= btf->nr_types; i++) {
+	for (i = 1; i < type_cnt; i++) {
 		struct btf_type *t = btf_type_by_id(d->btf, i);
 
 		/* VAR and DATASEC are never deduped and are self-canonical */
@@ -3061,12 +3076,12 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 			d->map[i] = BTF_UNPROCESSED_ID;
 	}
 
-	d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+	d->hypot_map = malloc(sizeof(__u32) * type_cnt);
 	if (!d->hypot_map) {
 		err = -ENOMEM;
 		goto done;
 	}
-	for (i = 0; i <= btf->nr_types; i++)
+	for (i = 0; i < type_cnt; i++)
 		d->hypot_map[i] = BTF_UNPROCESSED_ID;
 
 done:
@@ -3090,8 +3105,8 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
 	int i, j, r, rec_size;
 	struct btf_type *t;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		t = btf_type_by_id(d->btf, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		t = btf_type_by_id(d->btf, d->btf->start_id + i);
 		r = fn(&t->name_off, ctx);
 		if (r)
 			return r;
@@ -3174,15 +3189,27 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
 static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
 {
 	struct btf_dedup *d = ctx;
+	__u32 str_off = *str_off_ptr;
 	long old_off, new_off, len;
 	const char *s;
 	void *p;
 	int err;
 
-	if (*str_off_ptr == 0)
+	/* don't touch empty string or string in main BTF */
+	if (str_off == 0 || str_off < d->btf->start_str_off)
 		return 0;
 
-	s = btf__str_by_offset(d->btf, *str_off_ptr);
+	s = btf__str_by_offset(d->btf, str_off);
+	if (d->btf->base_btf) {
+		err = btf__find_str(d->btf->base_btf, s);
+		if (err >= 0) {
+			*str_off_ptr = err;
+			return 0;
+		}
+		if (err != -ENOENT)
+			return err;
+	}
+
 	len = strlen(s) + 1;
 
 	new_off = d->strs_len;
@@ -3199,11 +3226,11 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
 	err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off,
 			      HASHMAP_ADD, (const void **)&old_off, NULL);
 	if (err == -EEXIST) {
-		*str_off_ptr = old_off;
+		*str_off_ptr = d->btf->start_str_off + old_off;
 	} else if (err) {
 		return err;
 	} else {
-		*str_off_ptr = new_off;
+		*str_off_ptr = d->btf->start_str_off + new_off;
 		d->strs_len += len;
 	}
 	return 0;
@@ -3228,13 +3255,6 @@ static int btf_dedup_strings(struct btf_dedup *d)
 	if (d->btf->strs_deduped)
 		return 0;
 
-	s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
-	if (!s)
-		return -ENOMEM;
-	/* initial empty string */
-	s[0] = 0;
-	d->strs_len = 1;
-
 	/* temporarily switch to use btf_dedup's strs_data for strings for hash
 	 * functions; later we'll just transfer hashmap to struct btf as is,
 	 * along the strs_data
@@ -3248,13 +3268,22 @@ static int btf_dedup_strings(struct btf_dedup *d)
 		goto err_out;
 	}
 
-	/* insert empty string; we won't be looking it up during strings
-	 * dedup, but it's good to have it for generic BTF string lookups
-	 */
-	err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
-			      HASHMAP_ADD, NULL, NULL);
-	if (err)
-		goto err_out;
+	if (!d->btf->base_btf) {
+		s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
+		if (!s)
+			return -ENOMEM;
+		/* initial empty string */
+		s[0] = 0;
+		d->strs_len = 1;
+
+		/* insert empty string; we won't be looking it up during strings
+		 * dedup, but it's good to have it for generic BTF string lookups
+		 */
+		err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
+				      HASHMAP_ADD, NULL, NULL);
+		if (err)
+			goto err_out;
+	}
 
 	/* remap string offsets */
 	err = btf_for_each_str_off(d, strs_dedup_remap_str_off, d);
@@ -3549,6 +3578,66 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
 	return true;
 }
 
+/* Prepare split BTF for deduplication by calculating hashes of base BTF's
+ * types and initializing the rest of the state (canonical type mapping) for
+ * the fixed base BTF part.
+ */
+static int btf_dedup_prep(struct btf_dedup *d)
+{
+	struct btf_type *t;
+	int type_id;
+	long h;
+
+	if (!d->btf->base_btf)
+		return 0;
+
+	for (type_id = 1; type_id < d->btf->start_id; type_id++) {
+		t = btf_type_by_id(d->btf, type_id);
+
+		/* all base BTF types are self-canonical by definition */
+		d->map[type_id] = type_id;
+
+		switch (btf_kind(t)) {
+		case BTF_KIND_VAR:
+		case BTF_KIND_DATASEC:
+			/* VAR and DATASEC are never hash/deduplicated */
+			continue;
+		case BTF_KIND_CONST:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_PTR:
+		case BTF_KIND_FWD:
+		case BTF_KIND_TYPEDEF:
+		case BTF_KIND_FUNC:
+			h = btf_hash_common(t);
+			break;
+		case BTF_KIND_INT:
+			h = btf_hash_int(t);
+			break;
+		case BTF_KIND_ENUM:
+			h = btf_hash_enum(t);
+			break;
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			h = btf_hash_struct(t);
+			break;
+		case BTF_KIND_ARRAY:
+			h = btf_hash_array(t);
+			break;
+		case BTF_KIND_FUNC_PROTO:
+			h = btf_hash_fnproto(t);
+			break;
+		default:
+			pr_debug("unknown kind %d for type [%d]\n", btf_kind(t), type_id);
+			return -EINVAL;
+		}
+		if (btf_dedup_table_add(d, h, type_id))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
 /*
  * Deduplicate primitive types, that can't reference other types, by calculating
  * their type signature hash and comparing them with any possible canonical
@@ -3642,8 +3731,8 @@ static int btf_dedup_prim_types(struct btf_dedup *d)
 {
 	int i, err;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		err = btf_dedup_prim_type(d, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		err = btf_dedup_prim_type(d, d->btf->start_id + i);
 		if (err)
 			return err;
 	}
@@ -3833,6 +3922,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		} else {
 			real_kind = cand_kind;
 			fwd_kind = btf_fwd_kind(canon_type);
+			/* we'd need to resolve base FWD to STRUCT/UNION */
+			if (fwd_kind == real_kind && canon_id < d->btf->start_id)
+				d->hypot_adjust_canon = true;
 		}
 		return fwd_kind == real_kind;
 	}
@@ -3870,8 +3962,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 			return 0;
 		cand_arr = btf_array(cand_type);
 		canon_arr = btf_array(canon_type);
-		eq = btf_dedup_is_equiv(d,
-			cand_arr->index_type, canon_arr->index_type);
+		eq = btf_dedup_is_equiv(d, cand_arr->index_type, canon_arr->index_type);
 		if (eq <= 0)
 			return eq;
 		return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
@@ -3954,16 +4045,16 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
  */
 static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
 {
-	__u32 cand_type_id, targ_type_id;
+	__u32 canon_type_id, targ_type_id;
 	__u16 t_kind, c_kind;
 	__u32 t_id, c_id;
 	int i;
 
 	for (i = 0; i < d->hypot_cnt; i++) {
-		cand_type_id = d->hypot_list[i];
-		targ_type_id = d->hypot_map[cand_type_id];
+		canon_type_id = d->hypot_list[i];
+		targ_type_id = d->hypot_map[canon_type_id];
 		t_id = resolve_type_id(d, targ_type_id);
-		c_id = resolve_type_id(d, cand_type_id);
+		c_id = resolve_type_id(d, canon_type_id);
 		t_kind = btf_kind(btf__type_by_id(d->btf, t_id));
 		c_kind = btf_kind(btf__type_by_id(d->btf, c_id));
 		/*
@@ -3978,9 +4069,26 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
 		 * stability is not a requirement for STRUCT/UNION equivalence
 		 * checks, though.
 		 */
+
+		/* if it's the split BTF case, we still need to point base FWD
+		 * to STRUCT/UNION in a split BTF, because FWDs from split BTF
+		 * will be resolved against base FWD. If we don't point base
+		 * canonical FWD to the resolved STRUCT/UNION, then all the
+		 * FWDs in split BTF won't be correctly resolved to a proper
+		 * STRUCT/UNION.
+		 */
 		if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
 			d->map[c_id] = t_id;
-		else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
+
+		/* if graph equivalence determined that we'd need to adjust
+		 * base canonical types, then we need to only point base FWDs
+		 * to STRUCTs/UNIONs and do no more modifications. For all
+		 * other purposes the type graphs were not equivalent.
+		 */
+		if (d->hypot_adjust_canon)
+			continue;
+		
+		if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
 			d->map[t_id] = c_id;
 
 		if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
@@ -4064,8 +4172,10 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 			return eq;
 		if (!eq)
 			continue;
-		new_id = cand_id;
 		btf_dedup_merge_hypot_map(d);
+		if (d->hypot_adjust_canon) /* not really equivalent */
+			continue;
+		new_id = cand_id;
 		break;
 	}
 
@@ -4080,8 +4190,8 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
 {
 	int i, err;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		err = btf_dedup_struct_type(d, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		err = btf_dedup_struct_type(d, d->btf->start_id + i);
 		if (err)
 			return err;
 	}
@@ -4224,8 +4334,8 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
 {
 	int i, err;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		err = btf_dedup_ref_type(d, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		err = btf_dedup_ref_type(d, d->btf->start_id + i);
 		if (err < 0)
 			return err;
 	}
@@ -4249,39 +4359,44 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
 static int btf_dedup_compact_types(struct btf_dedup *d)
 {
 	__u32 *new_offs;
-	__u32 next_type_id = 1;
+	__u32 next_type_id = d->btf->start_id;
+	const struct btf_type *t;
 	void *p;
-	int i, len;
+	int i, id, len;
 
 	/* we are going to reuse hypot_map to store compaction remapping */
 	d->hypot_map[0] = 0;
-	for (i = 1; i <= d->btf->nr_types; i++)
-		d->hypot_map[i] = BTF_UNPROCESSED_ID;
+	/* base BTF types are not renumbered */
+	for (id = 1; id < d->btf->start_id; id++)
+		d->hypot_map[id] = id;
+	for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++)
+		d->hypot_map[id] = BTF_UNPROCESSED_ID;
 
 	p = d->btf->types_data;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		if (d->map[i] != i)
+	for (i = 0, id = d->btf->start_id; i < d->btf->nr_types; i++, id++) {
+		if (d->map[id] != id)
 			continue;
 
-		len = btf_type_size(btf__type_by_id(d->btf, i));
+		t = btf__type_by_id(d->btf, id);
+		len = btf_type_size(t);
 		if (len < 0)
 			return len;
 
-		memmove(p, btf__type_by_id(d->btf, i), len);
-		d->hypot_map[i] = next_type_id;
-		d->btf->type_offs[next_type_id - 1] = p - d->btf->types_data;
+		memmove(p, t, len);
+		d->hypot_map[id] = next_type_id;
+		d->btf->type_offs[next_type_id - d->btf->start_id] = p - d->btf->types_data;
 		p += len;
 		next_type_id++;
 	}
 
 	/* shrink struct btf's internal types index and update btf_header */
-	d->btf->nr_types = next_type_id - 1;
+	d->btf->nr_types = next_type_id - d->btf->start_id;
 	d->btf->type_offs_cap = d->btf->nr_types;
 	d->btf->hdr->type_len = p - d->btf->types_data;
 	new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
 				       sizeof(*new_offs));
-	if (!new_offs)
+	if (d->btf->type_offs_cap && !new_offs)
 		return -ENOMEM;
 	d->btf->type_offs = new_offs;
 	d->btf->hdr->str_off = d->btf->hdr->type_len;
@@ -4413,8 +4528,8 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
 {
 	int i, r;
 
-	for (i = 1; i <= d->btf->nr_types; i++) {
-		r = btf_dedup_remap_type(d, i);
+	for (i = 0; i < d->btf->nr_types; i++) {
+		r = btf_dedup_remap_type(d, d->btf->start_id + i);
 		if (r < 0)
 			return r;
 	}
-- 
cgit v1.2.3-70-g09d2


From 6b6e6b1d09aa20c351a1fce0ea6402da436624a4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:33:59 -0800
Subject: libbpf: Accomodate DWARF/compiler bug with duplicated identical
 arrays

In some cases compiler seems to generate distinct DWARF types for identical
arrays within the same CU. That seems like a bug, but it's already out there
and breaks type graph equivalence checks, so accommodate it anyway by checking
for identical arrays, regardless of their type ID.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-10-andrii@kernel.org
---
 tools/lib/bpf/btf.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 8d04d9becb67..2d0d064c6d31 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -3785,6 +3785,19 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
 	return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
 }
 
+/* Check if given two types are identical ARRAY definitions */
+static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
+{
+	struct btf_type *t1, *t2;
+
+	t1 = btf_type_by_id(d->btf, id1);
+	t2 = btf_type_by_id(d->btf, id2);
+	if (!btf_is_array(t1) || !btf_is_array(t2))
+		return 0;
+
+	return btf_equal_array(t1, t2);
+}
+
 /*
  * Check equivalence of BTF type graph formed by candidate struct/union (we'll
  * call it "candidate graph" in this description for brevity) to a type graph
@@ -3895,8 +3908,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 	canon_id = resolve_fwd_id(d, canon_id);
 
 	hypot_type_id = d->hypot_map[canon_id];
-	if (hypot_type_id <= BTF_MAX_NR_TYPES)
-		return hypot_type_id == cand_id;
+	if (hypot_type_id <= BTF_MAX_NR_TYPES) {
+		/* In some cases compiler will generate different DWARF types
+		 * for *identical* array type definitions and use them for
+		 * different fields within the *same* struct. This breaks type
+		 * equivalence check, which makes an assumption that candidate
+		 * types sub-graph has a consistent and deduped-by-compiler
+		 * types within a single CU. So work around that by explicitly
+		 * allowing identical array types here.
+		 */
+		return hypot_type_id == cand_id ||
+		       btf_dedup_identical_arrays(d, hypot_type_id, cand_id);
+	}
 
 	if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
 		return -ENOMEM;
-- 
cgit v1.2.3-70-g09d2


From 232338fa2fb47726ab7c459419115a6ab6bfb3e3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:34:00 -0800
Subject: selftests/bpf: Add split BTF dedup selftests

Add selftests validating BTF deduplication for split BTF case. Add a helper
macro that allows to validate entire BTF with raw BTF dump, not just
type-by-type. This saves tons of code and complexity.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-11-andrii@kernel.org
---
 tools/testing/selftests/bpf/btf_helpers.c          |  59 ++++
 tools/testing/selftests/bpf/btf_helpers.h          |   7 +
 .../selftests/bpf/prog_tests/btf_dedup_split.c     | 325 +++++++++++++++++++++
 3 files changed, 391 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index abc3f6c04cfc..48f90490f922 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -3,6 +3,8 @@
 #include <stdio.h>
 #include <errno.h>
 #include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include "test_progs.h"
 
 static const char * const btf_kind_str_mapping[] = {
 	[BTF_KIND_UNKN]		= "UNKNOWN",
@@ -198,3 +200,60 @@ const char *btf_type_raw_dump(const struct btf *btf, int type_id)
 
 	return buf;
 }
+
+int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[])
+{
+	int i;
+	bool ok = true;
+
+	ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types");
+
+	for (i = 1; i <= nr_types; i++) {
+		if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump"))
+			ok = false;
+	}
+
+	return ok;
+}
+
+static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+	vfprintf(ctx, fmt, args);
+}
+
+/* Print BTF-to-C dump into a local buffer and return string pointer back.
+ * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls
+ */
+const char *btf_type_c_dump(const struct btf *btf)
+{
+	static char buf[16 * 1024];
+	FILE *buf_file;
+	struct btf_dump *d = NULL;
+	struct btf_dump_opts opts = {};
+	int err, i;
+
+	buf_file = fmemopen(buf, sizeof(buf) - 1, "w");
+	if (!buf_file) {
+		fprintf(stderr, "Failed to open memstream: %d\n", errno);
+		return NULL;
+	}
+
+	opts.ctx = buf_file;
+	d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+	if (libbpf_get_error(d)) {
+		fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d));
+		return NULL;
+	}
+
+	for (i = 1; i <= btf__get_nr_types(btf); i++) {
+		err = btf_dump__dump_type(d, i);
+		if (err) {
+			fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err);
+			return NULL;
+		}
+	}
+
+	fflush(buf_file);
+	fclose(buf_file);
+	return buf;
+}
diff --git a/tools/testing/selftests/bpf/btf_helpers.h b/tools/testing/selftests/bpf/btf_helpers.h
index 2c9ce1b61dc9..295c0137d9bd 100644
--- a/tools/testing/selftests/bpf/btf_helpers.h
+++ b/tools/testing/selftests/bpf/btf_helpers.h
@@ -8,5 +8,12 @@
 
 int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id);
 const char *btf_type_raw_dump(const struct btf *btf, int type_id);
+int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]);
 
+#define VALIDATE_RAW_BTF(btf, raw_types...)				\
+	btf_validate_raw(btf,						\
+			 sizeof((const char *[]){raw_types})/sizeof(void *),\
+			 (const char *[]){raw_types})
+
+const char *btf_type_c_dump(const struct btf *btf);
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
new file mode 100644
index 000000000000..64554fd33547
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+
+static void test_split_simple() {
+	const struct btf_type *t;
+	struct btf *btf1, *btf2;
+	int str_off, err;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);	/* [1] int */
+	btf__add_ptr(btf1, 1);				/* [2] ptr to int */
+	btf__add_struct(btf1, "s1", 4);			/* [3] struct s1 { */
+	btf__add_field(btf1, "f1", 1, 0, 0);		/*      int f1; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf1,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=1",
+		"[3] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0");
+
+	ASSERT_STREQ(btf_type_c_dump(btf1), "\
+struct s1 {\n\
+	int f1;\n\
+};\n\n", "c_dump");
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	/* pointer size should be "inherited" from main BTF */
+	ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz");
+
+	str_off = btf__find_str(btf2, "int");
+	ASSERT_NEQ(str_off, -ENOENT, "str_int_missing");
+
+	t = btf__type_by_id(btf2, 1);
+	if (!ASSERT_OK_PTR(t, "int_type"))
+		goto cleanup;
+	ASSERT_EQ(btf_is_int(t), true, "int_kind");
+	ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name");
+
+	btf__add_struct(btf2, "s2", 16);		/* [4] struct s2 {	*/
+	btf__add_field(btf2, "f1", 6, 0, 0);		/*      struct s1 f1;	*/
+	btf__add_field(btf2, "f2", 5, 32, 0);		/*      int f2;		*/
+	btf__add_field(btf2, "f3", 2, 64, 0);		/*      int *f3;	*/
+							/* } */
+
+	/* duplicated int */
+	btf__add_int(btf2, "int", 4, BTF_INT_SIGNED);	/* [5] int */
+
+	/* duplicated struct s1 */
+	btf__add_struct(btf2, "s1", 4);			/* [6] struct s1 { */
+	btf__add_field(btf2, "f1", 5, 0, 0);		/*      int f1; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=1",
+		"[3] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[4] STRUCT 's2' size=16 vlen=3\n"
+		"\t'f1' type_id=6 bits_offset=0\n"
+		"\t'f2' type_id=5 bits_offset=32\n"
+		"\t'f3' type_id=2 bits_offset=64",
+		"[5] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[6] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=5 bits_offset=0");
+
+	ASSERT_STREQ(btf_type_c_dump(btf2), "\
+struct s1 {\n\
+	int f1;\n\
+};\n\
+\n\
+struct s1___2 {\n\
+	int f1;\n\
+};\n\
+\n\
+struct s2 {\n\
+	struct s1___2 f1;\n\
+	int f2;\n\
+	int *f3;\n\
+};\n\n", "c_dump");
+
+	err = btf__dedup(btf2, NULL, NULL);
+	if (!ASSERT_OK(err, "btf_dedup"))
+		goto cleanup;
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=1",
+		"[3] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[4] STRUCT 's2' size=16 vlen=3\n"
+		"\t'f1' type_id=3 bits_offset=0\n"
+		"\t'f2' type_id=1 bits_offset=32\n"
+		"\t'f3' type_id=2 bits_offset=64");
+
+	ASSERT_STREQ(btf_type_c_dump(btf2), "\
+struct s1 {\n\
+	int f1;\n\
+};\n\
+\n\
+struct s2 {\n\
+	struct s1 f1;\n\
+	int f2;\n\
+	int *f3;\n\
+};\n\n", "c_dump");
+
+cleanup:
+	btf__free(btf2);
+	btf__free(btf1);
+}
+
+static void test_split_fwd_resolve() {
+	struct btf *btf1, *btf2;
+	int err;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);	/* [1] int */
+	btf__add_ptr(btf1, 4);				/* [2] ptr to struct s1 */
+	btf__add_ptr(btf1, 5);				/* [3] ptr to struct s2 */
+	btf__add_struct(btf1, "s1", 16);		/* [4] struct s1 { */
+	btf__add_field(btf1, "f1", 2, 0, 0);		/*      struct s1 *f1; */
+	btf__add_field(btf1, "f2", 3, 64, 0);		/*      struct s2 *f2; */
+							/* } */
+	btf__add_struct(btf1, "s2", 4);			/* [5] struct s2 { */
+	btf__add_field(btf1, "f1", 1, 0, 0);		/*      int f1; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf1,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=4",
+		"[3] PTR '(anon)' type_id=5",
+		"[4] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=3 bits_offset=64",
+		"[5] STRUCT 's2' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0");
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	btf__add_int(btf2, "int", 4, BTF_INT_SIGNED);	/* [6] int */
+	btf__add_ptr(btf2, 10);				/* [7] ptr to struct s1 */
+	btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT);	/* [8] fwd for struct s2 */
+	btf__add_ptr(btf2, 8);				/* [9] ptr to fwd struct s2 */
+	btf__add_struct(btf2, "s1", 16);		/* [10] struct s1 { */
+	btf__add_field(btf2, "f1", 7, 0, 0);		/*      struct s1 *f1; */
+	btf__add_field(btf2, "f2", 9, 64, 0);		/*      struct s2 *f2; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=4",
+		"[3] PTR '(anon)' type_id=5",
+		"[4] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=3 bits_offset=64",
+		"[5] STRUCT 's2' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[7] PTR '(anon)' type_id=10",
+		"[8] FWD 's2' fwd_kind=struct",
+		"[9] PTR '(anon)' type_id=8",
+		"[10] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=7 bits_offset=0\n"
+		"\t'f2' type_id=9 bits_offset=64");
+
+	err = btf__dedup(btf2, NULL, NULL);
+	if (!ASSERT_OK(err, "btf_dedup"))
+		goto cleanup;
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=4",
+		"[3] PTR '(anon)' type_id=5",
+		"[4] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=3 bits_offset=64",
+		"[5] STRUCT 's2' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0");
+
+cleanup:
+	btf__free(btf2);
+	btf__free(btf1);
+}
+
+static void test_split_struct_duped() {
+	struct btf *btf1, *btf2;
+	int err;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);	/* [1] int */
+	btf__add_ptr(btf1, 5);				/* [2] ptr to struct s1 */
+	btf__add_fwd(btf1, "s2", BTF_FWD_STRUCT);	/* [3] fwd for struct s2 */
+	btf__add_ptr(btf1, 3);				/* [4] ptr to fwd struct s2 */
+	btf__add_struct(btf1, "s1", 16);		/* [5] struct s1 { */
+	btf__add_field(btf1, "f1", 2, 0, 0);		/*      struct s1 *f1; */
+	btf__add_field(btf1, "f2", 4, 64, 0);		/*      struct s2 *f2; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf1,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=5",
+		"[3] FWD 's2' fwd_kind=struct",
+		"[4] PTR '(anon)' type_id=3",
+		"[5] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=4 bits_offset=64");
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	btf__add_int(btf2, "int", 4, BTF_INT_SIGNED);	/* [6] int */
+	btf__add_ptr(btf2, 10);				/* [7] ptr to struct s1 */
+	btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT);	/* [8] fwd for struct s2 */
+	btf__add_ptr(btf2, 11);				/* [9] ptr to struct s2 */
+	btf__add_struct(btf2, "s1", 16);		/* [10] struct s1 { */
+	btf__add_field(btf2, "f1", 7, 0, 0);		/*      struct s1 *f1; */
+	btf__add_field(btf2, "f2", 9, 64, 0);		/*      struct s2 *f2; */
+							/* } */
+	btf__add_struct(btf2, "s2", 40);		/* [11] struct s2 {	*/
+	btf__add_field(btf2, "f1", 7, 0, 0);		/*      struct s1 *f1;	*/
+	btf__add_field(btf2, "f2", 9, 64, 0);		/*      struct s2 *f2;	*/
+	btf__add_field(btf2, "f3", 6, 128, 0);		/*      int f3;		*/
+	btf__add_field(btf2, "f4", 10, 192, 0);		/*      struct s1 f4;	*/
+							/* } */
+	btf__add_ptr(btf2, 8);				/* [12] ptr to fwd struct s2 */
+	btf__add_struct(btf2, "s3", 8);			/* [13] struct s3 { */
+	btf__add_field(btf2, "f1", 12, 0, 0);		/*      struct s2 *f1; (fwd) */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=5",
+		"[3] FWD 's2' fwd_kind=struct",
+		"[4] PTR '(anon)' type_id=3",
+		"[5] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=4 bits_offset=64",
+		"[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[7] PTR '(anon)' type_id=10",
+		"[8] FWD 's2' fwd_kind=struct",
+		"[9] PTR '(anon)' type_id=11",
+		"[10] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=7 bits_offset=0\n"
+		"\t'f2' type_id=9 bits_offset=64",
+		"[11] STRUCT 's2' size=40 vlen=4\n"
+		"\t'f1' type_id=7 bits_offset=0\n"
+		"\t'f2' type_id=9 bits_offset=64\n"
+		"\t'f3' type_id=6 bits_offset=128\n"
+		"\t'f4' type_id=10 bits_offset=192",
+		"[12] PTR '(anon)' type_id=8",
+		"[13] STRUCT 's3' size=8 vlen=1\n"
+		"\t'f1' type_id=12 bits_offset=0");
+
+	err = btf__dedup(btf2, NULL, NULL);
+	if (!ASSERT_OK(err, "btf_dedup"))
+		goto cleanup;
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=5",
+		"[3] FWD 's2' fwd_kind=struct",
+		"[4] PTR '(anon)' type_id=3",
+		"[5] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=2 bits_offset=0\n"
+		"\t'f2' type_id=4 bits_offset=64",
+		"[6] PTR '(anon)' type_id=8",
+		"[7] PTR '(anon)' type_id=9",
+		"[8] STRUCT 's1' size=16 vlen=2\n"
+		"\t'f1' type_id=6 bits_offset=0\n"
+		"\t'f2' type_id=7 bits_offset=64",
+		"[9] STRUCT 's2' size=40 vlen=4\n"
+		"\t'f1' type_id=6 bits_offset=0\n"
+		"\t'f2' type_id=7 bits_offset=64\n"
+		"\t'f3' type_id=1 bits_offset=128\n"
+		"\t'f4' type_id=8 bits_offset=192",
+		"[10] STRUCT 's3' size=8 vlen=1\n"
+		"\t'f1' type_id=7 bits_offset=0");
+
+cleanup:
+	btf__free(btf2);
+	btf__free(btf1);
+}
+
+void test_btf_dedup_split()
+{
+	if (test__start_subtest("split_simple"))
+		test_split_simple();
+	if (test__start_subtest("split_struct_duped"))
+		test_split_struct_duped();
+	if (test__start_subtest("split_fwd_resolve"))
+		test_split_fwd_resolve();
+}
-- 
cgit v1.2.3-70-g09d2


From 75fa1777694c245c1e59ac774cb1d58a15ecefeb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 4 Nov 2020 20:34:01 -0800
Subject: tools/bpftool: Add bpftool support for split BTF

Add ability to work with split BTF by providing extra -B flag, which allows to
specify the path to the base BTF file.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201105043402.2530976-12-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c  |  9 ++++++---
 tools/bpf/bpftool/main.c | 15 ++++++++++++++-
 tools/bpf/bpftool/main.h |  1 +
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 8ab142ff5eac..c96b56e8e3a4 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -358,8 +358,12 @@ static int dump_btf_raw(const struct btf *btf,
 		}
 	} else {
 		int cnt = btf__get_nr_types(btf);
+		int start_id = 1;
 
-		for (i = 1; i <= cnt; i++) {
+		if (base_btf)
+			start_id = btf__get_nr_types(base_btf) + 1;
+
+		for (i = start_id; i <= cnt; i++) {
 			t = btf__type_by_id(btf, i);
 			dump_btf_type(btf, i, t);
 		}
@@ -438,7 +442,6 @@ static int do_dump(int argc, char **argv)
 		return -1;
 	}
 	src = GET_ARG();
-
 	if (is_prefix(src, "map")) {
 		struct bpf_map_info info = {};
 		__u32 len = sizeof(info);
@@ -499,7 +502,7 @@ static int do_dump(int argc, char **argv)
 		}
 		NEXT_ARG();
 	} else if (is_prefix(src, "file")) {
-		btf = btf__parse(*argv, NULL);
+		btf = btf__parse_split(*argv, base_btf);
 		if (IS_ERR(btf)) {
 			err = -PTR_ERR(btf);
 			btf = NULL;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 682daaa49e6a..b86f450e6fce 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -11,6 +11,7 @@
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+#include <bpf/btf.h>
 
 #include "main.h"
 
@@ -28,6 +29,7 @@ bool show_pinned;
 bool block_mount;
 bool verifier_logs;
 bool relaxed_maps;
+struct btf *base_btf;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 struct pinned_obj_table link_table;
@@ -391,6 +393,7 @@ int main(int argc, char **argv)
 		{ "mapcompat",	no_argument,	NULL,	'm' },
 		{ "nomount",	no_argument,	NULL,	'n' },
 		{ "debug",	no_argument,	NULL,	'd' },
+		{ "base-btf",	required_argument, NULL, 'B' },
 		{ 0 }
 	};
 	int opt, ret;
@@ -407,7 +410,7 @@ int main(int argc, char **argv)
 	hash_init(link_table.table);
 
 	opterr = 0;
-	while ((opt = getopt_long(argc, argv, "Vhpjfmnd",
+	while ((opt = getopt_long(argc, argv, "VhpjfmndB:",
 				  options, NULL)) >= 0) {
 		switch (opt) {
 		case 'V':
@@ -441,6 +444,15 @@ int main(int argc, char **argv)
 			libbpf_set_print(print_all_levels);
 			verifier_logs = true;
 			break;
+		case 'B':
+			base_btf = btf__parse(optarg, NULL);
+			if (libbpf_get_error(base_btf)) {
+				p_err("failed to parse base BTF at '%s': %ld\n",
+				      optarg, libbpf_get_error(base_btf));
+				base_btf = NULL;
+				return -1;
+			}
+			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
@@ -465,6 +477,7 @@ int main(int argc, char **argv)
 		delete_pinned_obj_table(&map_table);
 		delete_pinned_obj_table(&link_table);
 	}
+	btf__free(base_btf);
 
 	return ret;
 }
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index c46e52137b87..76e91641262b 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -90,6 +90,7 @@ extern bool show_pids;
 extern bool block_mount;
 extern bool verifier_logs;
 extern bool relaxed_maps;
+extern struct btf *base_btf;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
 extern struct pinned_obj_table link_table;
-- 
cgit v1.2.3-70-g09d2


From d3bec0138bfbe58606fc1d6f57a4cdc1a20218db Mon Sep 17 00:00:00 2001
From: David Verbeiren <david.verbeiren@tessares.net>
Date: Wed, 4 Nov 2020 12:23:32 +0100
Subject: bpf: Zero-fill re-used per-cpu map element

Zero-fill element values for all other cpus than current, just as
when not using prealloc. This is the only way the bpf program can
ensure known initial values for all cpus ('onallcpus' cannot be
set when coming from the bpf program).

The scenario is: bpf program inserts some elements in a per-cpu
map, then deletes some (or userspace does). When later adding
new elements using bpf_map_update_elem(), the bpf program can
only set the value of the new elements for the current cpu.
When prealloc is enabled, previously deleted elements are re-used.
Without the fix, values for other cpus remain whatever they were
when the re-used entry was previously freed.

A selftest is added to validate correct operation in above
scenario as well as in case of LRU per-cpu map element re-use.

Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Signed-off-by: David Verbeiren <david.verbeiren@tessares.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201104112332.15191-1-david.verbeiren@tessares.net
---
 kernel/bpf/hashtab.c                              |  30 ++-
 tools/testing/selftests/bpf/prog_tests/map_init.c | 214 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_map_init.c |  33 ++++
 3 files changed, 275 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/map_init.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_map_init.c

(limited to 'tools')

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 1815e97d4c9c..1fccba6e88c4 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -821,6 +821,32 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
 	}
 }
 
+static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
+			    void *value, bool onallcpus)
+{
+	/* When using prealloc and not setting the initial value on all cpus,
+	 * zero-fill element values for other cpus (just as what happens when
+	 * not using prealloc). Otherwise, bpf program has no way to ensure
+	 * known initial values for cpus other than current one
+	 * (onallcpus=false always when coming from bpf prog).
+	 */
+	if (htab_is_prealloc(htab) && !onallcpus) {
+		u32 size = round_up(htab->map.value_size, 8);
+		int current_cpu = raw_smp_processor_id();
+		int cpu;
+
+		for_each_possible_cpu(cpu) {
+			if (cpu == current_cpu)
+				bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
+						size);
+			else
+				memset(per_cpu_ptr(pptr, cpu), 0, size);
+		}
+	} else {
+		pcpu_copy_value(htab, pptr, value, onallcpus);
+	}
+}
+
 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
 {
 	return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
@@ -891,7 +917,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 			}
 		}
 
-		pcpu_copy_value(htab, pptr, value, onallcpus);
+		pcpu_init_value(htab, pptr, value, onallcpus);
 
 		if (!prealloc)
 			htab_elem_set_ptr(l_new, key_size, pptr);
@@ -1183,7 +1209,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 		pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
 				value, onallcpus);
 	} else {
-		pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
+		pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size),
 				value, onallcpus);
 		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 		l_new = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c
new file mode 100644
index 000000000000..14a31109dd0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_init.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include <test_progs.h>
+#include "test_map_init.skel.h"
+
+#define TEST_VALUE 0x1234
+#define FILL_VALUE 0xdeadbeef
+
+static int nr_cpus;
+static int duration;
+
+typedef unsigned long long map_key_t;
+typedef unsigned long long map_value_t;
+typedef struct {
+	map_value_t v; /* padding */
+} __bpf_percpu_val_align pcpu_map_value_t;
+
+
+static int map_populate(int map_fd, int num)
+{
+	pcpu_map_value_t value[nr_cpus];
+	int i, err;
+	map_key_t key;
+
+	for (i = 0; i < nr_cpus; i++)
+		bpf_percpu(value, i) = FILL_VALUE;
+
+	for (key = 1; key <= num; key++) {
+		err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
+			return -1;
+	}
+
+	return 0;
+}
+
+static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz,
+			    int *map_fd, int populate)
+{
+	struct test_map_init *skel;
+	int err;
+
+	skel = test_map_init__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return NULL;
+
+	err = bpf_map__set_type(skel->maps.hashmap1, map_type);
+	if (!ASSERT_OK(err, "bpf_map__set_type"))
+		goto error;
+
+	err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz);
+	if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+		goto error;
+
+	err = test_map_init__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto error;
+
+	*map_fd = bpf_map__fd(skel->maps.hashmap1);
+	if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n"))
+		goto error;
+
+	err = map_populate(*map_fd, populate);
+	if (!ASSERT_OK(err, "map_populate"))
+		goto error_map;
+
+	return skel;
+
+error_map:
+	close(*map_fd);
+error:
+	test_map_init__destroy(skel);
+	return NULL;
+}
+
+/* executes bpf program that updates map with key, value */
+static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key,
+				map_value_t value)
+{
+	struct test_map_init__bss *bss;
+
+	bss = skel->bss;
+
+	bss->inKey = key;
+	bss->inValue = value;
+	bss->inPid = getpid();
+
+	if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach"))
+		return -1;
+
+	/* Let tracepoint trigger */
+	syscall(__NR_getpgid);
+
+	test_map_init__detach(skel);
+
+	return 0;
+}
+
+static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected)
+{
+	int i, nzCnt = 0;
+	map_value_t val;
+
+	for (i = 0; i < nr_cpus; i++) {
+		val = bpf_percpu(value, i);
+		if (val) {
+			if (CHECK(val != expected, "map value",
+				  "unexpected for cpu %d: 0x%llx\n", i, val))
+				return -1;
+			nzCnt++;
+		}
+	}
+
+	if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+		  nzCnt))
+		return -1;
+
+	return 0;
+}
+
+/* Add key=1 elem with values set for all CPUs
+ * Delete elem key=1
+ * Run bpf prog that inserts new key=1 elem with value=0x1234
+ *   (bpf prog can only set value for current CPU)
+ * Lookup Key=1 and check value is as expected for all CPUs:
+ *   value set by bpf prog for one CPU, 0 for all others
+ */
+static void test_pcpu_map_init(void)
+{
+	pcpu_map_value_t value[nr_cpus];
+	struct test_map_init *skel;
+	int map_fd, err;
+	map_key_t key;
+
+	/* max 1 elem in map so insertion is forced to reuse freed entry */
+	skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1);
+	if (!ASSERT_OK_PTR(skel, "prog_setup"))
+		return;
+
+	/* delete element so the entry can be re-used*/
+	key = 1;
+	err = bpf_map_delete_elem(map_fd, &key);
+	if (!ASSERT_OK(err, "bpf_map_delete_elem"))
+		goto cleanup;
+
+	/* run bpf prog that inserts new elem, re-using the slot just freed */
+	err = prog_run_insert_elem(skel, key, TEST_VALUE);
+	if (!ASSERT_OK(err, "prog_run_insert_elem"))
+		goto cleanup;
+
+	/* check that key=1 was re-created by bpf prog */
+	err = bpf_map_lookup_elem(map_fd, &key, value);
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+		goto cleanup;
+
+	/* and has expected values */
+	check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+	test_map_init__destroy(skel);
+}
+
+/* Add key=1 and key=2 elems with values set for all CPUs
+ * Run bpf prog that inserts new key=3 elem
+ *   (only for current cpu; other cpus should have initial value = 0)
+ * Lookup Key=1 and check value is as expected for all CPUs
+ */
+static void test_pcpu_lru_map_init(void)
+{
+	pcpu_map_value_t value[nr_cpus];
+	struct test_map_init *skel;
+	int map_fd, err;
+	map_key_t key;
+
+	/* Set up LRU map with 2 elements, values filled for all CPUs.
+	 * With these 2 elements, the LRU map is full
+	 */
+	skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2);
+	if (!ASSERT_OK_PTR(skel, "prog_setup"))
+		return;
+
+	/* run bpf prog that inserts new key=3 element, re-using LRU slot */
+	key = 3;
+	err = prog_run_insert_elem(skel, key, TEST_VALUE);
+	if (!ASSERT_OK(err, "prog_run_insert_elem"))
+		goto cleanup;
+
+	/* check that key=3 replaced one of earlier elements */
+	err = bpf_map_lookup_elem(map_fd, &key, value);
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+		goto cleanup;
+
+	/* and has expected values */
+	check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+	test_map_init__destroy(skel);
+}
+
+void test_map_init(void)
+{
+	nr_cpus = bpf_num_possible_cpus();
+	if (nr_cpus <= 1) {
+		printf("%s:SKIP: >1 cpu needed for this test\n", __func__);
+		test__skip();
+		return;
+	}
+
+	if (test__start_subtest("pcpu_map_init"))
+		test_pcpu_map_init();
+	if (test__start_subtest("pcpu_lru_map_init"))
+		test_pcpu_lru_map_init();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c
new file mode 100644
index 000000000000..c89d28ead673
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_init.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u64 inKey = 0;
+__u64 inValue = 0;
+__u32 inPid = 0;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 2);
+	__type(key, __u64);
+	__type(value, __u64);
+} hashmap1 SEC(".maps");
+
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int sysenter_getpgid(const void *ctx)
+{
+	/* Just do it for once, when called from our own test prog. This
+	 * ensures the map value is only updated for a single CPU.
+	 */
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (cur_pid == inPid)
+		bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From c6bde958a62b8ca5ee8d2c1fe429aec4ad54efad Mon Sep 17 00:00:00 2001
From: Florian Lehner <dev@der-flo.net>
Date: Thu, 29 Oct 2020 21:14:42 +0100
Subject: bpf: Lift hashtab key_size limit

Currently key_size of hashtab is limited to MAX_BPF_STACK.
As the key of hashtab can also be a value from a per cpu map it can be
larger than MAX_BPF_STACK.

The use-case for this patch originates to implement allow/disallow
lists for files and file paths. The maximum length of file paths is
defined by PATH_MAX with 4096 chars including nul.
This limit exceeds MAX_BPF_STACK.

Changelog:

v5:
 - Fix cast overflow

v4:
 - Utilize BPF skeleton in tests
 - Rebase

v3:
 - Rebase

v2:
 - Add a test for bpf side

Signed-off-by: Florian Lehner <dev@der-flo.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201029201442.596690-1-dev@der-flo.net
---
 kernel/bpf/hashtab.c                               | 16 +++-----
 .../selftests/bpf/prog_tests/hash_large_key.c      | 43 +++++++++++++++++++++
 .../selftests/bpf/progs/test_hash_large_key.c      | 44 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_maps.c            |  3 +-
 4 files changed, 94 insertions(+), 12 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/hash_large_key.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_hash_large_key.c

(limited to 'tools')

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 23f73d4649c9..7bf18d92af41 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -415,17 +415,11 @@ static int htab_map_alloc_check(union bpf_attr *attr)
 	    attr->value_size == 0)
 		return -EINVAL;
 
-	if (attr->key_size > MAX_BPF_STACK)
-		/* eBPF programs initialize keys on stack, so they cannot be
-		 * larger than max stack size
-		 */
-		return -E2BIG;
-
-	if (attr->value_size >= KMALLOC_MAX_SIZE -
-	    MAX_BPF_STACK - sizeof(struct htab_elem))
-		/* if value_size is bigger, the user space won't be able to
-		 * access the elements via bpf syscall. This check also makes
-		 * sure that the elem_size doesn't overflow and it's
+	if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE -
+	   sizeof(struct htab_elem))
+		/* if key_size + value_size is bigger, the user space won't be
+		 * able to access the elements via bpf syscall. This check
+		 * also makes sure that the elem_size doesn't overflow and it's
 		 * kmalloc-able later in htab_map_update_elem()
 		 */
 		return -E2BIG;
diff --git a/tools/testing/selftests/bpf/prog_tests/hash_large_key.c b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c
new file mode 100644
index 000000000000..34684c0fc76d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_hash_large_key.skel.h"
+
+void test_hash_large_key(void)
+{
+	int err, value = 21, duration = 0, hash_map_fd;
+	struct test_hash_large_key *skel;
+
+	struct bigelement {
+		int a;
+		char b[4096];
+		long long c;
+	} key;
+	bzero(&key, sizeof(key));
+
+	skel = test_hash_large_key__open_and_load();
+	if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+		return;
+
+	hash_map_fd = bpf_map__fd(skel->maps.hash_map);
+	if (CHECK(hash_map_fd < 0, "bpf_map__fd", "failed\n"))
+		goto cleanup;
+
+	err = test_hash_large_key__attach(skel);
+	if (CHECK(err, "attach_raw_tp", "err %d\n", err))
+		goto cleanup;
+
+	err = bpf_map_update_elem(hash_map_fd, &key, &value, BPF_ANY);
+	if (CHECK(err, "bpf_map_update_elem", "errno=%d\n", errno))
+		goto cleanup;
+
+	key.c = 1;
+	err = bpf_map_lookup_elem(hash_map_fd, &key, &value);
+	if (CHECK(err, "bpf_map_lookup_elem", "errno=%d\n", errno))
+		goto cleanup;
+
+	CHECK_FAIL(value != 42);
+
+cleanup:
+	test_hash_large_key__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_hash_large_key.c b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
new file mode 100644
index 000000000000..473a22794a62
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 2);
+	__type(key, struct bigelement);
+	__type(value, __u32);
+} hash_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct bigelement);
+} key_map SEC(".maps");
+
+struct bigelement {
+	int a;
+	char b[4096];
+	long long c;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int bpf_hash_large_key_test(void *ctx)
+{
+	int zero = 0, err = 1, value = 42;
+	struct bigelement *key;
+
+	key = bpf_map_lookup_elem(&key_map, &zero);
+	if (!key)
+		return 0;
+
+	key->c = 1;
+	if (bpf_map_update_elem(&hash_map, key, &value, BPF_ANY))
+		return 0;
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 0d92ebcb335d..0ad3e6305ff0 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1223,9 +1223,10 @@ out_map_in_map:
 
 static void test_map_large(void)
 {
+
 	struct bigkey {
 		int a;
-		char b[116];
+		char b[4096];
 		long long c;
 	} key;
 	int fd, i, value;
-- 
cgit v1.2.3-70-g09d2


From 4cf1bc1f10452065a29d576fc5693fc4fab5b919 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:40 +0000
Subject: bpf: Implement task local storage

Similar to bpf_local_storage for sockets and inodes add local storage
for task_struct.

The life-cycle of storage is managed with the life-cycle of the
task_struct.  i.e. the storage is destroyed along with the owning task
with a callback to the bpf_task_storage_free from the task_free LSM
hook.

The BPF LSM allocates an __rcu pointer to the bpf_local_storage in
the security blob which are now stackable and can co-exist with other
LSMs.

The userspace map operations can be done by using a pid fd as a key
passed to the lookup, update and delete operations.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-3-kpsingh@chromium.org
---
 include/linux/bpf_lsm.h        |  23 +++
 include/linux/bpf_types.h      |   1 +
 include/uapi/linux/bpf.h       |  39 +++++
 kernel/bpf/Makefile            |   1 +
 kernel/bpf/bpf_lsm.c           |   4 +
 kernel/bpf/bpf_task_storage.c  | 315 +++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |   3 +-
 kernel/bpf/verifier.c          |  10 ++
 security/bpf/hooks.c           |   2 +
 tools/include/uapi/linux/bpf.h |  39 +++++
 10 files changed, 436 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/bpf_task_storage.c

(limited to 'tools')

diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index aaacb6aafc87..73226181b744 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -7,6 +7,7 @@
 #ifndef _LINUX_BPF_LSM_H
 #define _LINUX_BPF_LSM_H
 
+#include <linux/sched.h>
 #include <linux/bpf.h>
 #include <linux/lsm_hooks.h>
 
@@ -35,9 +36,21 @@ static inline struct bpf_storage_blob *bpf_inode(
 	return inode->i_security + bpf_lsm_blob_sizes.lbs_inode;
 }
 
+static inline struct bpf_storage_blob *bpf_task(
+	const struct task_struct *task)
+{
+	if (unlikely(!task->security))
+		return NULL;
+
+	return task->security + bpf_lsm_blob_sizes.lbs_task;
+}
+
 extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
+extern const struct bpf_func_proto bpf_task_storage_get_proto;
+extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
+void bpf_task_storage_free(struct task_struct *task);
 
 #else /* !CONFIG_BPF_LSM */
 
@@ -53,10 +66,20 @@ static inline struct bpf_storage_blob *bpf_inode(
 	return NULL;
 }
 
+static inline struct bpf_storage_blob *bpf_task(
+	const struct task_struct *task)
+{
+	return NULL;
+}
+
 static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
+static inline void bpf_task_storage_free(struct task_struct *task)
+{
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2e6f568377f1..99f7fd657d87 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -109,6 +109,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e6ceac3f7d62..f4037b2161a6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -157,6 +157,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
+	BPF_MAP_TYPE_TASK_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3742,6 +3743,42 @@ union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *task*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *task* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
+ *		helper enforces the key must be an task_struct and the map must also
+ *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *task* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *task*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task)
+ *	Description
+ *		Delete a bpf_local_storage from a *task*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3900,6 +3937,8 @@ union bpf_attr {
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
 	FN(redirect_peer),		\
+	FN(task_storage_get),		\
+	FN(task_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index bdc8cd1b6767..f0b93ced5a7f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
+obj-${CONFIG_BPF_LSM}	  += bpf_task_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index cd8a617f2109..e92c51bebb47 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -63,6 +63,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_spin_lock_proto;
 	case BPF_FUNC_spin_unlock:
 		return &bpf_spin_unlock_proto;
+	case BPF_FUNC_task_storage_get:
+		return &bpf_task_storage_get_proto;
+	case BPF_FUNC_task_storage_delete:
+		return &bpf_task_storage_delete_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
new file mode 100644
index 000000000000..39a45fba4fb0
--- /dev/null
+++ b/kernel/bpf/bpf_task_storage.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Facebook
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <linux/filter.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
+#include <linux/fdtable.h>
+
+DEFINE_BPF_STORAGE_CACHE(task_cache);
+
+static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
+{
+	struct task_struct *task = owner;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_task(task);
+	if (!bsb)
+		return NULL;
+	return &bsb->storage;
+}
+
+static struct bpf_local_storage_data *
+task_storage_lookup(struct task_struct *task, struct bpf_map *map,
+		    bool cacheit_lockit)
+{
+	struct bpf_local_storage *task_storage;
+	struct bpf_local_storage_map *smap;
+	struct bpf_storage_blob *bsb;
+
+	bsb = bpf_task(task);
+	if (!bsb)
+		return NULL;
+
+	task_storage = rcu_dereference(bsb->storage);
+	if (!task_storage)
+		return NULL;
+
+	smap = (struct bpf_local_storage_map *)map;
+	return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit);
+}
+
+void bpf_task_storage_free(struct task_struct *task)
+{
+	struct bpf_local_storage_elem *selem;
+	struct bpf_local_storage *local_storage;
+	bool free_task_storage = false;
+	struct bpf_storage_blob *bsb;
+	struct hlist_node *n;
+
+	bsb = bpf_task(task);
+	if (!bsb)
+		return;
+
+	rcu_read_lock();
+
+	local_storage = rcu_dereference(bsb->storage);
+	if (!local_storage) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/* Neither the bpf_prog nor the bpf-map's syscall
+	 * could be modifying the local_storage->list now.
+	 * Thus, no elem can be added-to or deleted-from the
+	 * local_storage->list by the bpf_prog or by the bpf-map's syscall.
+	 *
+	 * It is racing with bpf_local_storage_map_free() alone
+	 * when unlinking elem from the local_storage->list and
+	 * the map's bucket->list.
+	 */
+	raw_spin_lock_bh(&local_storage->lock);
+	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+		/* Always unlink from map before unlinking from
+		 * local_storage.
+		 */
+		bpf_selem_unlink_map(selem);
+		free_task_storage = bpf_selem_unlink_storage_nolock(
+			local_storage, selem, false);
+	}
+	raw_spin_unlock_bh(&local_storage->lock);
+	rcu_read_unlock();
+
+	/* free_task_storage should always be true as long as
+	 * local_storage->list was non-empty.
+	 */
+	if (free_task_storage)
+		kfree_rcu(local_storage, rcu);
+}
+
+static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_local_storage_data *sdata;
+	struct task_struct *task;
+	unsigned int f_flags;
+	struct pid *pid;
+	int fd, err;
+
+	fd = *(int *)key;
+	pid = pidfd_get_pid(fd, &f_flags);
+	if (IS_ERR(pid))
+		return ERR_CAST(pid);
+
+	/* We should be in an RCU read side critical section, it should be safe
+	 * to call pid_task.
+	 */
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	task = pid_task(pid, PIDTYPE_PID);
+	if (!task) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	sdata = task_storage_lookup(task, map, true);
+	put_pid(pid);
+	return sdata ? sdata->data : NULL;
+out:
+	put_pid(pid);
+	return ERR_PTR(err);
+}
+
+static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
+					    void *value, u64 map_flags)
+{
+	struct bpf_local_storage_data *sdata;
+	struct task_struct *task;
+	unsigned int f_flags;
+	struct pid *pid;
+	int fd, err;
+
+	fd = *(int *)key;
+	pid = pidfd_get_pid(fd, &f_flags);
+	if (IS_ERR(pid))
+		return PTR_ERR(pid);
+
+	/* We should be in an RCU read side critical section, it should be safe
+	 * to call pid_task.
+	 */
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	task = pid_task(pid, PIDTYPE_PID);
+	if (!task) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	sdata = bpf_local_storage_update(
+		task, (struct bpf_local_storage_map *)map, value, map_flags);
+
+	err = PTR_ERR_OR_ZERO(sdata);
+out:
+	put_pid(pid);
+	return err;
+}
+
+static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
+{
+	struct bpf_local_storage_data *sdata;
+
+	sdata = task_storage_lookup(task, map, false);
+	if (!sdata)
+		return -ENOENT;
+
+	bpf_selem_unlink(SELEM(sdata));
+
+	return 0;
+}
+
+static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	struct task_struct *task;
+	unsigned int f_flags;
+	struct pid *pid;
+	int fd, err;
+
+	fd = *(int *)key;
+	pid = pidfd_get_pid(fd, &f_flags);
+	if (IS_ERR(pid))
+		return PTR_ERR(pid);
+
+	/* We should be in an RCU read side critical section, it should be safe
+	 * to call pid_task.
+	 */
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	task = pid_task(pid, PIDTYPE_PID);
+	if (!task) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	err = task_storage_delete(task, map);
+out:
+	put_pid(pid);
+	return err;
+}
+
+BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+	   task, void *, value, u64, flags)
+{
+	struct bpf_local_storage_data *sdata;
+
+	if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+		return (unsigned long)NULL;
+
+	/* explicitly check that the task_storage_ptr is not
+	 * NULL as task_storage_lookup returns NULL in this case and
+	 * bpf_local_storage_update expects the owner to have a
+	 * valid storage pointer.
+	 */
+	if (!task_storage_ptr(task))
+		return (unsigned long)NULL;
+
+	sdata = task_storage_lookup(task, map, true);
+	if (sdata)
+		return (unsigned long)sdata->data;
+
+	/* This helper must only be called from places where the lifetime of the task
+	 * is guaranteed. Either by being refcounted or by being protected
+	 * by an RCU read-side critical section.
+	 */
+	if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+		sdata = bpf_local_storage_update(
+			task, (struct bpf_local_storage_map *)map, value,
+			BPF_NOEXIST);
+		return IS_ERR(sdata) ? (unsigned long)NULL :
+					     (unsigned long)sdata->data;
+	}
+
+	return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
+	   task)
+{
+	/* This helper must only be called from places where the lifetime of the task
+	 * is guaranteed. Either by being refcounted or by being protected
+	 * by an RCU read-side critical section.
+	 */
+	return task_storage_delete(task, map);
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	return -ENOTSUPP;
+}
+
+static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = bpf_local_storage_map_alloc(attr);
+	if (IS_ERR(smap))
+		return ERR_CAST(smap);
+
+	smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
+	return &smap->map;
+}
+
+static void task_storage_map_free(struct bpf_map *map)
+{
+	struct bpf_local_storage_map *smap;
+
+	smap = (struct bpf_local_storage_map *)map;
+	bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
+	bpf_local_storage_map_free(smap);
+}
+
+static int task_storage_map_btf_id;
+const struct bpf_map_ops task_storage_map_ops = {
+	.map_meta_equal = bpf_map_meta_equal,
+	.map_alloc_check = bpf_local_storage_map_alloc_check,
+	.map_alloc = task_storage_map_alloc,
+	.map_free = task_storage_map_free,
+	.map_get_next_key = notsupp_get_next_key,
+	.map_lookup_elem = bpf_pid_task_storage_lookup_elem,
+	.map_update_elem = bpf_pid_task_storage_update_elem,
+	.map_delete_elem = bpf_pid_task_storage_delete_elem,
+	.map_check_btf = bpf_local_storage_map_check_btf,
+	.map_btf_name = "bpf_local_storage_map",
+	.map_btf_id = &task_storage_map_btf_id,
+	.map_owner_storage_ptr = task_storage_ptr,
+};
+
+BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct)
+
+const struct bpf_func_proto bpf_task_storage_get_proto = {
+	.func = bpf_task_storage_get,
+	.gpl_only = false,
+	.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type = ARG_CONST_MAP_PTR,
+	.arg2_type = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_task_storage_btf_ids[0],
+	.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg4_type = ARG_ANYTHING,
+};
+
+const struct bpf_func_proto bpf_task_storage_delete_proto = {
+	.func = bpf_task_storage_delete,
+	.gpl_only = false,
+	.ret_type = RET_INTEGER,
+	.arg1_type = ARG_CONST_MAP_PTR,
+	.arg2_type = ARG_PTR_TO_BTF_ID,
+	.arg2_btf_id = &bpf_task_storage_btf_ids[0],
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8f50c9c19f1b..f3fe9f53f93c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -773,7 +773,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 		    map->map_type != BPF_MAP_TYPE_ARRAY &&
 		    map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
 		    map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
-		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
+		    map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
 			return -ENOTSUPP;
 		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
 		    map->value_size) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f863aa84d0a2..00960f6a83ec 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4469,6 +4469,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_inode_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_TASK_STORAGE:
+		if (func_id != BPF_FUNC_task_storage_get &&
+		    func_id != BPF_FUNC_task_storage_delete)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -4547,6 +4552,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_task_storage_get:
+	case BPF_FUNC_task_storage_delete:
+		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c
index 788667d582ae..e5971fa74fd7 100644
--- a/security/bpf/hooks.c
+++ b/security/bpf/hooks.c
@@ -12,6 +12,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = {
 	#include <linux/lsm_hook_defs.h>
 	#undef LSM_HOOK
 	LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free),
+	LSM_HOOK_INIT(task_free, bpf_task_storage_free),
 };
 
 static int __init bpf_lsm_init(void)
@@ -23,6 +24,7 @@ static int __init bpf_lsm_init(void)
 
 struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = {
 	.lbs_inode = sizeof(struct bpf_storage_blob),
+	.lbs_task = sizeof(struct bpf_storage_blob),
 };
 
 DEFINE_LSM(bpf) = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e6ceac3f7d62..f4037b2161a6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -157,6 +157,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_STRUCT_OPS,
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
+	BPF_MAP_TYPE_TASK_STORAGE,
 };
 
 /* Note that tracing related programs such as
@@ -3742,6 +3743,42 @@ union bpf_attr {
  * 	Return
  * 		The helper returns **TC_ACT_REDIRECT** on success or
  * 		**TC_ACT_SHOT** on error.
+ *
+ * void *bpf_task_storage_get(struct bpf_map *map, struct task_struct *task, void *value, u64 flags)
+ *	Description
+ *		Get a bpf_local_storage from the *task*.
+ *
+ *		Logically, it could be thought of as getting the value from
+ *		a *map* with *task* as the **key**.  From this
+ *		perspective,  the usage is not much different from
+ *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
+ *		helper enforces the key must be an task_struct and the map must also
+ *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
+ *
+ *		Underneath, the value is stored locally at *task* instead of
+ *		the *map*.  The *map* is used as the bpf-local-storage
+ *		"type". The bpf-local-storage "type" (i.e. the *map*) is
+ *		searched against all bpf_local_storage residing at *task*.
+ *
+ *		An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be
+ *		used such that a new bpf_local_storage will be
+ *		created if one does not exist.  *value* can be used
+ *		together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify
+ *		the initial value of a bpf_local_storage.  If *value* is
+ *		**NULL**, the new bpf_local_storage will be zero initialized.
+ *	Return
+ *		A bpf_local_storage pointer is returned on success.
+ *
+ *		**NULL** if not found or there was an error in adding
+ *		a new bpf_local_storage.
+ *
+ * long bpf_task_storage_delete(struct bpf_map *map, struct task_struct *task)
+ *	Description
+ *		Delete a bpf_local_storage from a *task*.
+ *	Return
+ *		0 on success.
+ *
+ *		**-ENOENT** if the bpf_local_storage cannot be found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3900,6 +3937,8 @@ union bpf_attr {
 	FN(bpf_per_cpu_ptr),            \
 	FN(bpf_this_cpu_ptr),		\
 	FN(redirect_peer),		\
+	FN(task_storage_get),		\
+	FN(task_storage_delete),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3-70-g09d2


From 8885274d225985807deeb95de74642073c3b97bb Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:41 +0000
Subject: libbpf: Add support for task local storage

Updates the bpf_probe_map_type API to also support
BPF_MAP_TYPE_TASK_STORAGE similar to other local storage maps.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-4-kpsingh@chromium.org
---
 tools/lib/bpf/libbpf_probes.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 5482a9b7ae2d..ecaae2927ab8 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -230,6 +230,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 		break;
 	case BPF_MAP_TYPE_SK_STORAGE:
 	case BPF_MAP_TYPE_INODE_STORAGE:
+	case BPF_MAP_TYPE_TASK_STORAGE:
 		btf_key_type_id = 1;
 		btf_value_type_id = 3;
 		value_size = 8;
-- 
cgit v1.2.3-70-g09d2


From 864ab0616dccf14e51618a24acc7cf23ddd2b98a Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:42 +0000
Subject: bpftool: Add support for task local storage

Updates the binary to handle the BPF_MAP_TYPE_TASK_STORAGE as
"task_storage" for printing and parsing. Also updates the documentation
and bash completion

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-5-kpsingh@chromium.org
---
 tools/bpf/bpftool/Documentation/bpftool-map.rst | 3 ++-
 tools/bpf/bpftool/bash-completion/bpftool       | 2 +-
 tools/bpf/bpftool/map.c                         | 4 +++-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index dade10cdf295..3d52256ba75f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -50,7 +50,8 @@ MAP COMMANDS
 |		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
 |		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** }
+|		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
+		| **task_storage** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 3f1da30c4da6..fdffbc64c65c 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -705,7 +705,7 @@ _bpftool()
                                 hash_of_maps devmap devmap_hash sockmap cpumap \
                                 xskmap sockhash cgroup_storage reuseport_sockarray \
                                 percpu_cgroup_storage queue stack sk_storage \
-                                struct_ops inode_storage' -- \
+                                struct_ops inode_storage task_storage' -- \
                                                    "$cur" ) )
                             return 0
                             ;;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a7efbd84fbcc..b400364ee054 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -51,6 +51,7 @@ const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_STRUCT_OPS]		= "struct_ops",
 	[BPF_MAP_TYPE_RINGBUF]			= "ringbuf",
 	[BPF_MAP_TYPE_INODE_STORAGE]		= "inode_storage",
+	[BPF_MAP_TYPE_TASK_STORAGE]		= "task_storage",
 };
 
 const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@ -1464,7 +1465,8 @@ static int do_help(int argc, char **argv)
 		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
-		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage }\n"
+		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
+		"		  task_storage }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2]);
-- 
cgit v1.2.3-70-g09d2


From 3ca1032ab7ab010eccb107aa515598788f7d93bb Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:43 +0000
Subject: bpf: Implement get_current_task_btf and RET_PTR_TO_BTF_ID

The currently available bpf_get_current_task returns an unsigned integer
which can be used along with BPF_CORE_READ to read data from
the task_struct but still cannot be used as an input argument to a
helper that accepts an ARG_PTR_TO_BTF_ID of type task_struct.

In order to implement this helper a new return type, RET_PTR_TO_BTF_ID,
is added. This is similar to RET_PTR_TO_BTF_ID_OR_NULL but does not
require checking the nullness of returned pointer.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-6-kpsingh@chromium.org
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       |  9 +++++++++
 kernel/bpf/verifier.c          |  7 +++++--
 kernel/trace/bpf_trace.c       | 16 ++++++++++++++++
 tools/include/uapi/linux/bpf.h |  9 +++++++++
 5 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2fffd30e13ac..73d5381a5d5c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -310,6 +310,7 @@ enum bpf_return_type {
 	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
 	RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */
 	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
+	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f4037b2161a6..9879d6793e90 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3779,6 +3779,14 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct task_struct *bpf_get_current_task_btf(void)
+ *	Description
+ *		Return a BTF pointer to the "current" task.
+ *		This pointer can also be used in helpers that accept an
+ *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
+ *	Return
+ *		Pointer to the current task.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3939,6 +3947,7 @@ union bpf_attr {
 	FN(redirect_peer),		\
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
+	FN(get_current_task_btf),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 00960f6a83ec..10da26e55130 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5186,11 +5186,14 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 				PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
 		}
-	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
+	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL ||
+		   fn->ret_type == RET_PTR_TO_BTF_ID) {
 		int ret_btf_id;
 
 		mark_reg_known_zero(env, regs, BPF_REG_0);
-		regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
+		regs[BPF_REG_0].type = fn->ret_type == RET_PTR_TO_BTF_ID ?
+						     PTR_TO_BTF_ID :
+						     PTR_TO_BTF_ID_OR_NULL;
 		ret_btf_id = *fn->ret_btf_id;
 		if (ret_btf_id == 0) {
 			verbose(env, "invalid return type %d of func %s#%d\n",
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4517c8b66518..e4515b0f62a8 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1022,6 +1022,20 @@ const struct bpf_func_proto bpf_get_current_task_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_0(bpf_get_current_task_btf)
+{
+	return (unsigned long) current;
+}
+
+BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct)
+
+static const struct bpf_func_proto bpf_get_current_task_btf_proto = {
+	.func		= bpf_get_current_task_btf,
+	.gpl_only	= true,
+	.ret_type	= RET_PTR_TO_BTF_ID,
+	.ret_btf_id	= &bpf_get_current_btf_ids[0],
+};
+
 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -1265,6 +1279,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_current_pid_tgid_proto;
 	case BPF_FUNC_get_current_task:
 		return &bpf_get_current_task_proto;
+	case BPF_FUNC_get_current_task_btf:
+		return &bpf_get_current_task_btf_proto;
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
 	case BPF_FUNC_get_current_comm:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f4037b2161a6..9879d6793e90 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3779,6 +3779,14 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct task_struct *bpf_get_current_task_btf(void)
+ *	Description
+ *		Return a BTF pointer to the "current" task.
+ *		This pointer can also be used in helpers that accept an
+ *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
+ *	Return
+ *		Pointer to the current task.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3939,6 +3947,7 @@ union bpf_attr {
 	FN(redirect_peer),		\
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
+	FN(get_current_task_btf),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3-70-g09d2


From f0e5ba0bc481df77cf0afac2b33e420b33eeb463 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:44 +0000
Subject: bpf: Fix tests for local_storage

The {inode,sk}_storage_result checking if the correct value was retrieved
was being clobbered unconditionally by the return value of the
bpf_{inode,sk}_storage_delete call.

Also, consistently use the newly added BPF_LOCAL_STORAGE_GET_F_CREATE
flag.

Fixes: cd324d7abb3d ("bpf: Add selftests for local_storage")
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-7-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/progs/local_storage.c | 24 ++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 0758ba229ae0..09529e33be98 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -58,20 +58,22 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
 	struct dummy_storage *storage;
+	int err;
 
 	if (pid != monitored_pid)
 		return 0;
 
 	storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
-				     BPF_SK_STORAGE_GET_F_CREATE);
+					BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
-	if (storage->value == DUMMY_STORAGE_VALUE)
+	if (storage->value != DUMMY_STORAGE_VALUE)
 		inode_storage_result = -1;
 
-	inode_storage_result =
-		bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+	err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+	if (!err)
+		inode_storage_result = err;
 
 	return 0;
 }
@@ -82,19 +84,23 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
 	struct dummy_storage *storage;
+	int err;
 
 	if (pid != monitored_pid)
 		return 0;
 
 	storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
-				     BPF_SK_STORAGE_GET_F_CREATE);
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
-	if (storage->value == DUMMY_STORAGE_VALUE)
+	if (storage->value != DUMMY_STORAGE_VALUE)
 		sk_storage_result = -1;
 
-	sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+	err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+	if (!err)
+		sk_storage_result = err;
+
 	return 0;
 }
 
@@ -109,7 +115,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 		return 0;
 
 	storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
-				     BPF_SK_STORAGE_GET_F_CREATE);
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
@@ -131,7 +137,7 @@ int BPF_PROG(file_open, struct file *file)
 		return 0;
 
 	storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0,
-				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+					BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
-- 
cgit v1.2.3-70-g09d2


From a367efa71b3f5a53281ca9772f8bf43166dfdf5f Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:45 +0000
Subject: bpf: Update selftests for local_storage to use vmlinux.h

With the fixing of BTF pruning of embedded types being fixed, the test
can be simplified to use vmlinux.h

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-8-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/progs/local_storage.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 09529e33be98..ef3822bc7542 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -4,9 +4,8 @@
  * Copyright 2020 Google LLC.
  */
 
+#include "vmlinux.h"
 #include <errno.h>
-#include <linux/bpf.h>
-#include <stdbool.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
@@ -36,23 +35,6 @@ struct {
 	__type(value, struct dummy_storage);
 } sk_storage_map SEC(".maps");
 
-/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed.
- */
-struct sock {} __attribute__((preserve_access_index));
-struct sockaddr {} __attribute__((preserve_access_index));
-struct socket {
-	struct sock *sk;
-} __attribute__((preserve_access_index));
-
-struct inode {} __attribute__((preserve_access_index));
-struct dentry {
-	struct inode *d_inode;
-} __attribute__((preserve_access_index));
-struct file {
-	struct inode *f_inode;
-} __attribute__((preserve_access_index));
-
-
 SEC("lsm/inode_unlink")
 int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 {
-- 
cgit v1.2.3-70-g09d2


From 9cde3beeadb311d4b435a7d28d5ab72bcc5de65d Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:46 +0000
Subject: bpf: Add tests for task_local_storage

The test exercises the syscall based map operations by creating a pidfd
for the current process.

For verifying kernel / LSM functionality, the test implements a simple
MAC policy which denies an executable from unlinking itself. The LSM
program bprm_committed_creds sets a task_local_storage with a pointer to
the inode. This is then used to detect if the task is trying to unlink
itself in the inode_unlink LSM hook.

The test copies /bin/rm to /tmp and executes it in a child thread with
the intention of deleting itself. A successful test should prevent the
the running executable from deleting itself.

The bpf programs are also updated to call bpf_spin_{lock, unlock} to
trigger the verfier checks for spin locks.

The temporary file is cleaned up later in the test.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-9-kpsingh@chromium.org
---
 .../selftests/bpf/prog_tests/test_local_storage.c  | 185 +++++++++++++++++++--
 tools/testing/selftests/bpf/progs/local_storage.c  |  61 ++++++-
 2 files changed, 226 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index 91cd6f357246..4e7f6a4965f2 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -4,30 +4,161 @@
  * Copyright (C) 2020 Google LLC.
  */
 
+#include <asm-generic/errno-base.h>
+#include <sys/stat.h>
 #include <test_progs.h>
 #include <linux/limits.h>
 
 #include "local_storage.skel.h"
 #include "network_helpers.h"
 
-int create_and_unlink_file(void)
+static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
 {
-	char fname[PATH_MAX] = "/tmp/fileXXXXXX";
-	int fd;
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static inline ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out,
+				      loff_t *off_out, size_t len,
+				      unsigned int flags)
+{
+	return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out,
+		       len, flags);
+}
+
+static unsigned int duration;
+
+#define TEST_STORAGE_VALUE 0xbeefdead
 
-	fd = mkstemp(fname);
-	if (fd < 0)
-		return fd;
+struct storage {
+	void *inode;
+	unsigned int value;
+	/* Lock ensures that spin locked versions of local stoage operations
+	 * also work, most operations in this tests are still single threaded
+	 */
+	struct bpf_spin_lock lock;
+};
+
+/* Copies an rm binary to a temp file. dest is a mkstemp template */
+static int copy_rm(char *dest)
+{
+	int fd_in, fd_out = -1, ret = 0;
+	struct stat stat;
+
+	fd_in = open("/bin/rm", O_RDONLY);
+	if (fd_in < 0)
+		return -errno;
+
+	fd_out = mkstemp(dest);
+	if (fd_out < 0) {
+		ret = -errno;
+		goto out;
+	}
+
+	ret = fstat(fd_in, &stat);
+	if (ret == -1) {
+		ret = -errno;
+		goto out;
+	}
+
+	ret = copy_file_range(fd_in, NULL, fd_out, NULL, stat.st_size, 0);
+	if (ret == -1) {
+		ret = -errno;
+		goto out;
+	}
+
+	/* Set executable permission on the copied file */
+	ret = chmod(dest, 0100);
+	if (ret == -1)
+		ret = -errno;
+
+out:
+	close(fd_in);
+	close(fd_out);
+	return ret;
+}
+
+/* Fork and exec the provided rm binary and return the exit code of the
+ * forked process and its pid.
+ */
+static int run_self_unlink(int *monitored_pid, const char *rm_path)
+{
+	int child_pid, child_status, ret;
+	int null_fd;
+
+	child_pid = fork();
+	if (child_pid == 0) {
+		null_fd = open("/dev/null", O_WRONLY);
+		dup2(null_fd, STDOUT_FILENO);
+		dup2(null_fd, STDERR_FILENO);
+		close(null_fd);
+
+		*monitored_pid = getpid();
+		/* Use the copied /usr/bin/rm to delete itself
+		 * /tmp/copy_of_rm /tmp/copy_of_rm.
+		 */
+		ret = execlp(rm_path, rm_path, rm_path, NULL);
+		if (ret)
+			exit(errno);
+	} else if (child_pid > 0) {
+		waitpid(child_pid, &child_status, 0);
+		return WEXITSTATUS(child_status);
+	}
+
+	return -EINVAL;
+}
 
-	close(fd);
-	unlink(fname);
-	return 0;
+static bool check_syscall_operations(int map_fd, int obj_fd)
+{
+	struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } },
+		       lookup_val = { .value = 0, .lock = { 0 } };
+	int err;
+
+	/* Looking up an existing element should fail initially */
+	err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
+					BPF_F_LOCK);
+	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
+		  "err:%d errno:%d\n", err, errno))
+		return false;
+
+	/* Create a new element */
+	err = bpf_map_update_elem(map_fd, &obj_fd, &val,
+				  BPF_NOEXIST | BPF_F_LOCK);
+	if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err,
+		  errno))
+		return false;
+
+	/* Lookup the newly created element */
+	err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
+					BPF_F_LOCK);
+	if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err,
+		  errno))
+		return false;
+
+	/* Check the value of the newly created element */
+	if (CHECK(lookup_val.value != val.value, "bpf_map_lookup_elem",
+		  "value got = %x errno:%d", lookup_val.value, val.value))
+		return false;
+
+	err = bpf_map_delete_elem(map_fd, &obj_fd);
+	if (CHECK(err, "bpf_map_delete_elem()", "err:%d errno:%d\n", err,
+		  errno))
+		return false;
+
+	/* The lookup should fail, now that the element has been deleted */
+	err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val,
+					BPF_F_LOCK);
+	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
+		  "err:%d errno:%d\n", err, errno))
+		return false;
+
+	return true;
 }
 
 void test_test_local_storage(void)
 {
+	char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX";
+	int err, serv_sk = -1, task_fd = -1;
 	struct local_storage *skel = NULL;
-	int err, duration = 0, serv_sk = -1;
 
 	skel = local_storage__open_and_load();
 	if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
@@ -37,12 +168,37 @@ void test_test_local_storage(void)
 	if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
 		goto close_prog;
 
-	skel->bss->monitored_pid = getpid();
+	task_fd = sys_pidfd_open(getpid(), 0);
+	if (CHECK(task_fd < 0, "pidfd_open",
+		  "failed to get pidfd err:%d, errno:%d", task_fd, errno))
+		goto close_prog;
 
-	err = create_and_unlink_file();
-	if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+	if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map),
+				      task_fd))
 		goto close_prog;
 
+	err = copy_rm(tmp_exec_path);
+	if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	/* Sets skel->bss->monitored_pid to the pid of the forked child
+	 * forks a child process that executes tmp_exec_path and tries to
+	 * unlink its executable. This operation should be denied by the loaded
+	 * LSM program.
+	 */
+	err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path);
+	if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err))
+		goto close_prog_unlink;
+
+	/* Set the process being monitored to be the current process */
+	skel->bss->monitored_pid = getpid();
+
+	/* Remove the temporary created executable */
+	err = unlink(tmp_exec_path);
+	if (CHECK(err != 0, "unlink", "unable to unlink %s: %d", tmp_exec_path,
+		  errno))
+		goto close_prog_unlink;
+
 	CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
 	      "inode_local_storage not set\n");
 
@@ -55,6 +211,9 @@ void test_test_local_storage(void)
 
 	close(serv_sk);
 
+close_prog_unlink:
+	unlink(tmp_exec_path);
 close_prog:
+	close(task_fd);
 	local_storage__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index ef3822bc7542..3e3de130f28f 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -17,41 +17,64 @@ int monitored_pid = 0;
 int inode_storage_result = -1;
 int sk_storage_result = -1;
 
-struct dummy_storage {
+struct local_storage {
+	struct inode *exec_inode;
 	__u32 value;
+	struct bpf_spin_lock lock;
 };
 
 struct {
 	__uint(type, BPF_MAP_TYPE_INODE_STORAGE);
 	__uint(map_flags, BPF_F_NO_PREALLOC);
 	__type(key, int);
-	__type(value, struct dummy_storage);
+	__type(value, struct local_storage);
 } inode_storage_map SEC(".maps");
 
 struct {
 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
 	__uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
 	__type(key, int);
-	__type(value, struct dummy_storage);
+	__type(value, struct local_storage);
 } sk_storage_map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct local_storage);
+} task_storage_map SEC(".maps");
+
 SEC("lsm/inode_unlink")
 int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
-	struct dummy_storage *storage;
+	struct local_storage *storage;
+	bool is_self_unlink;
 	int err;
 
 	if (pid != monitored_pid)
 		return 0;
 
+	storage = bpf_task_storage_get(&task_storage_map,
+				       bpf_get_current_task_btf(), 0, 0);
+	if (storage) {
+		/* Don't let an executable delete itself */
+		bpf_spin_lock(&storage->lock);
+		is_self_unlink = storage->exec_inode == victim->d_inode;
+		bpf_spin_unlock(&storage->lock);
+		if (is_self_unlink)
+			return -EPERM;
+	}
+
 	storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
 					BPF_LOCAL_STORAGE_GET_F_CREATE);
 	if (!storage)
 		return 0;
 
+	bpf_spin_lock(&storage->lock);
 	if (storage->value != DUMMY_STORAGE_VALUE)
 		inode_storage_result = -1;
+	bpf_spin_unlock(&storage->lock);
 
 	err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
 	if (!err)
@@ -65,7 +88,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
 	     int addrlen)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
-	struct dummy_storage *storage;
+	struct local_storage *storage;
 	int err;
 
 	if (pid != monitored_pid)
@@ -76,8 +99,10 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
 	if (!storage)
 		return 0;
 
+	bpf_spin_lock(&storage->lock);
 	if (storage->value != DUMMY_STORAGE_VALUE)
 		sk_storage_result = -1;
+	bpf_spin_unlock(&storage->lock);
 
 	err = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
 	if (!err)
@@ -91,7 +116,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 	     int protocol, int kern)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
-	struct dummy_storage *storage;
+	struct local_storage *storage;
 
 	if (pid != monitored_pid)
 		return 0;
@@ -101,7 +126,9 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 	if (!storage)
 		return 0;
 
+	bpf_spin_lock(&storage->lock);
 	storage->value = DUMMY_STORAGE_VALUE;
+	bpf_spin_unlock(&storage->lock);
 
 	return 0;
 }
@@ -110,7 +137,7 @@ SEC("lsm/file_open")
 int BPF_PROG(file_open, struct file *file)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
-	struct dummy_storage *storage;
+	struct local_storage *storage;
 
 	if (pid != monitored_pid)
 		return 0;
@@ -123,6 +150,26 @@ int BPF_PROG(file_open, struct file *file)
 	if (!storage)
 		return 0;
 
+	bpf_spin_lock(&storage->lock);
 	storage->value = DUMMY_STORAGE_VALUE;
+	bpf_spin_unlock(&storage->lock);
 	return 0;
 }
+
+/* This uses the local storage to remember the inode of the binary that a
+ * process was originally executing.
+ */
+SEC("lsm/bprm_committed_creds")
+void BPF_PROG(exec, struct linux_binprm *bprm)
+{
+	struct local_storage *storage;
+
+	storage = bpf_task_storage_get(&task_storage_map,
+				       bpf_get_current_task_btf(), 0,
+				       BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (storage) {
+		bpf_spin_lock(&storage->lock);
+		storage->exec_inode = bprm->file->f_inode;
+		bpf_spin_unlock(&storage->lock);
+	}
+}
-- 
cgit v1.2.3-70-g09d2


From 4170bc6baa5446e1d85e0b7647ea54ba72aa85c4 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Fri, 6 Nov 2020 10:37:47 +0000
Subject: bpf: Exercise syscall operations for inode and sk storage

Use the check_syscall_operations added for task_local_storage to
exercise syscall operations for other local storage maps:

* Check the absence of an element for the given fd.
* Create a new element, retrieve and compare its value.
* Delete the element and check again for absence.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201106103747.2780972-10-kpsingh@chromium.org
---
 .../selftests/bpf/prog_tests/test_local_storage.c       | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index 4e7f6a4965f2..5fda45982be0 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -157,7 +157,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd)
 void test_test_local_storage(void)
 {
 	char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX";
-	int err, serv_sk = -1, task_fd = -1;
+	int err, serv_sk = -1, task_fd = -1, rm_fd = -1;
 	struct local_storage *skel = NULL;
 
 	skel = local_storage__open_and_load();
@@ -181,6 +181,15 @@ void test_test_local_storage(void)
 	if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno))
 		goto close_prog;
 
+	rm_fd = open(tmp_exec_path, O_RDONLY);
+	if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d",
+		  tmp_exec_path, rm_fd, errno))
+		goto close_prog;
+
+	if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map),
+				      rm_fd))
+		goto close_prog;
+
 	/* Sets skel->bss->monitored_pid to the pid of the forked child
 	 * forks a child process that executes tmp_exec_path and tries to
 	 * unlink its executable. This operation should be denied by the loaded
@@ -209,11 +218,15 @@ void test_test_local_storage(void)
 	CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
 	      "sk_local_storage not set\n");
 
-	close(serv_sk);
+	if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map),
+				      serv_sk))
+		goto close_prog;
 
 close_prog_unlink:
 	unlink(tmp_exec_path);
 close_prog:
+	close(serv_sk);
+	close(rm_fd);
 	close(task_fd);
 	local_storage__destroy(skel);
 }
-- 
cgit v1.2.3-70-g09d2


From 21584e6a92bd2a85411793c0da3d48ab327e9b72 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 4 Nov 2020 15:30:40 +0200
Subject: selftests: netdevsim: Add test for nexthop offload API

Test various aspects of the nexthop offload API on top of the netdevsim
implementation. Both good and bad flows are tested.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/netdevsim/nexthop.sh     | 436 +++++++++++++++++++++
 1 file changed, 436 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/nexthop.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
new file mode 100755
index 000000000000..be0c1b5ee6b8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -0,0 +1,436 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the nexthop offload API. It makes use of netdevsim
+# which registers a listener to the nexthop notification chain.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	nexthop_single_add_test
+	nexthop_single_add_err_test
+	nexthop_group_add_test
+	nexthop_group_add_err_test
+	nexthop_group_replace_test
+	nexthop_group_replace_err_test
+	nexthop_single_replace_test
+	nexthop_single_replace_err_test
+	nexthop_single_in_group_replace_test
+	nexthop_single_in_group_replace_err_test
+	nexthop_single_in_group_delete_test
+	nexthop_single_in_group_delete_err_test
+	nexthop_replay_test
+	nexthop_replay_err_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+nexthop_check()
+{
+	local nharg="$1"; shift
+	local expected="$1"; shift
+
+	out=$($IP nexthop show ${nharg} | sed -e 's/ *$//')
+	if [[ "$out" != "$expected" ]]; then
+		return 1
+	fi
+
+	return 0
+}
+
+nexthop_resource_check()
+{
+	local expected_occ=$1; shift
+
+	occ=$($DEVLINK -jp resource show $DEVLINK_DEV \
+		| jq '.[][][] | select(.name=="nexthops") | .["occ"]')
+
+	if [ $expected_occ -ne $occ ]; then
+		return 1
+	fi
+
+	return 0
+}
+
+nexthop_resource_set()
+{
+	local size=$1; shift
+
+	$DEVLINK resource set $DEVLINK_DEV path nexthops size $size
+	$DEVLINK dev reload $DEVLINK_DEV
+}
+
+nexthop_single_add_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy"
+
+	$IP nexthop del id 1
+	nexthop_resource_check 0
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	log_test "Single nexthop add and delete"
+}
+
+nexthop_single_add_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 1
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop addition succeeded when should fail"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop add failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_group_add_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	$IP nexthop add id 10 group 1,20/2,39
+	nexthop_check "id 10" "id 10 group 1,20/2,39 trap"
+	check_err $? "Unexpected weighted nexthop group entry"
+
+	nexthop_resource_check 61
+	check_err $? "Wrong weighted nexthop occupancy"
+
+	$IP nexthop del id 10
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy after delete"
+
+	log_test "Nexthop group add and delete"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_group_add_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 2
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+	$IP nexthop add id 10 group 1/2 &> /dev/null
+	check_fail $? "Nexthop group addition succeeded when should fail"
+
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Nexthop group add failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 10 group 1/2/3
+	nexthop_check "id 10" "id 10 group 1/2/3 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 6
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Nexthop group replace"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_group_replace_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 5
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 10 group 1/2/3 &> /dev/null
+	check_fail $? "Nexthop group replacement succeeded when should fail"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_resource_check 5
+	check_err $? "Wrong nexthop occupancy after failure"
+
+	log_test "Nexthop group replace failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_single_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+	$IP nexthop replace id 1 via 192.0.2.3 dev dummy1
+	nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_replace_err_test()
+{
+	RET=0
+
+	# This is supposed to cause the replace to fail because the new nexthop
+	# is programmed before deleting the replaced one.
+	nexthop_resource_set 1
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+	$IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop replace succeeded when should fail"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after failure"
+
+	nexthop_resource_check 1
+	check_err $? "Wrong nexthop occupancy after failure"
+
+	log_test "Single nexthop replace failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_single_in_group_replace_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+	check_err $? "Failed to replace nexthop when should not"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_replace_err_test()
+{
+	RET=0
+
+	nexthop_resource_set 5
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+	check_fail $? "Nexthop replacement succeeded when should fail"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after failure"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry after failure"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop replace while in group failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_single_in_group_delete_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$IP nexthop del id 1
+	nexthop_check "id 10" "id 10 group 2 trap"
+	check_err $? "Unexpected nexthop group entry"
+
+	nexthop_resource_check 2
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in group"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_delete_err_test()
+{
+	RET=0
+
+	# First, nexthop 1 will be deleted, which will reduce the occupancy to
+	# 5. Afterwards, a replace notification will be sent for nexthop group
+	# 10 with only two nexthops. Since the new group is allocated before
+	# the old is deleted, the replacement will fail as it will result in an
+	# occupancy of 7.
+	nexthop_resource_set 6
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 3 via 192.0.2.4 dev dummy1
+	$IP nexthop add id 10 group 1/2/3
+
+	$IP nexthop del id 1
+
+	nexthop_resource_check 5
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Single nexthop delete while in group failure"
+
+	$IP nexthop flush &> /dev/null
+	nexthop_resource_set 9999
+}
+
+nexthop_replay_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	$DEVLINK dev reload $DEVLINK_DEV
+	check_err $? "Failed to reload when should not"
+
+	nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after reload"
+
+	nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap"
+	check_err $? "Unexpected nexthop entry after reload"
+
+	nexthop_check "id 10" "id 10 group 1/2 trap"
+	check_err $? "Unexpected nexthop group entry after reload"
+
+	nexthop_resource_check 4
+	check_err $? "Wrong nexthop occupancy"
+
+	log_test "Nexthop replay"
+
+	$IP nexthop flush &> /dev/null
+}
+
+nexthop_replay_err_test()
+{
+	RET=0
+
+	$IP nexthop add id 1 via 192.0.2.2 dev dummy1
+	$IP nexthop add id 2 via 192.0.2.3 dev dummy1
+	$IP nexthop add id 10 group 1/2
+
+	# Reduce size of nexthop resource so that reload will fail.
+	$DEVLINK resource set $DEVLINK_DEV path nexthops size 3
+	$DEVLINK dev reload $DEVLINK_DEV &> /dev/null
+	check_fail $? "Reload succeeded when should fail"
+
+	$DEVLINK resource set $DEVLINK_DEV path nexthops size 9999
+	$DEVLINK dev reload $DEVLINK_DEV
+	check_err $? "Failed to reload when should not"
+
+	log_test "Nexthop replay failure"
+
+	$IP nexthop flush &> /dev/null
+}
+
+setup_prepare()
+{
+	local netdev
+
+	modprobe netdevsim &> /dev/null
+
+	echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+	set -e
+
+	ip netns add testns1
+	devlink dev reload $DEVLINK_DEV netns testns1
+
+	IP="ip -netns testns1"
+	DEVLINK="devlink -N testns1"
+
+	$IP link add name dummy1 up type dummy
+	$IP address add 192.0.2.1/24 dev dummy1
+
+	set +e
+}
+
+cleanup()
+{
+	pre_cleanup
+	ip netns del testns1
+	echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+	modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-70-g09d2


From f055f355faf1991ef4e6b3c3517f8f2fc247805e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 6 Nov 2020 12:33:46 -0800
Subject: selftests/bpf: Fix selftest build with old libc

pidfd_open was added in 2019. Some versions of libc library don't define it.
Define it manually if it's not available.

Reported-by: Sergei Iudin <siudin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/test_local_storage.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index 5fda45982be0..fcca7ba1f368 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -12,6 +12,10 @@
 #include "local_storage.skel.h"
 #include "network_helpers.h"
 
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open 434
+#endif
+
 static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
 {
 	return syscall(__NR_pidfd_open, pid, flags);
-- 
cgit v1.2.3-70-g09d2


From 899f317e4886f916ed21027177177c11b577cea1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 9 Sep 2020 12:27:03 -0700
Subject: rcuscale: Add RCU Tasks Trace

This commit adds the ability to test performance and scalability of RCU
Tasks Trace updaters.

Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/rcu/rcuscale.c                              | 32 +++++++++++++++++++++-
 .../selftests/rcutorture/configs/rcuscale/CFcommon |  3 ++
 .../selftests/rcutorture/configs/rcuscale/TRACE01  | 15 ++++++++++
 .../rcutorture/configs/rcuscale/TRACE01.boot       |  1 +
 4 files changed, 50 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
 create mode 100644 tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot

(limited to 'tools')

diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 2819b95479af..c42f2401c374 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -38,6 +38,7 @@
 #include <asm/byteorder.h>
 #include <linux/torture.h>
 #include <linux/vmalloc.h>
+#include <linux/rcupdate_trace.h>
 
 #include "rcu.h"
 
@@ -294,6 +295,35 @@ static struct rcu_scale_ops tasks_ops = {
 	.name		= "tasks"
 };
 
+/*
+ * Definitions for RCU-tasks-trace scalability testing.
+ */
+
+static int tasks_trace_scale_read_lock(void)
+{
+	rcu_read_lock_trace();
+	return 0;
+}
+
+static void tasks_trace_scale_read_unlock(int idx)
+{
+	rcu_read_unlock_trace();
+}
+
+static struct rcu_scale_ops tasks_tracing_ops = {
+	.ptype		= RCU_TASKS_FLAVOR,
+	.init		= rcu_sync_scale_init,
+	.readlock	= tasks_trace_scale_read_lock,
+	.readunlock	= tasks_trace_scale_read_unlock,
+	.get_gp_seq	= rcu_no_completed,
+	.gp_diff	= rcu_seq_diff,
+	.async		= call_rcu_tasks_trace,
+	.gp_barrier	= rcu_barrier_tasks_trace,
+	.sync		= synchronize_rcu_tasks_trace,
+	.exp_sync	= synchronize_rcu_tasks_trace,
+	.name		= "tasks-tracing"
+};
+
 static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old)
 {
 	if (!cur_ops->gp_diff)
@@ -754,7 +784,7 @@ rcu_scale_init(void)
 	long i;
 	int firsterr = 0;
 	static struct rcu_scale_ops *scale_ops[] = {
-		&rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops,
+		&rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops, &tasks_tracing_ops
 	};
 
 	if (!torture_init_begin(scale_type, verbose))
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
index 87caa0e932c7..90942bb5bebc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -1,2 +1,5 @@
 CONFIG_RCU_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
+CONFIG_TASKS_RCU_GENERIC=y
+CONFIG_TASKS_RCU=y
+CONFIG_TASKS_TRACE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
new file mode 100644
index 000000000000..e6baa2fbaeb3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
@@ -0,0 +1,15 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot
new file mode 100644
index 000000000000..af0aff1457a4
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot
@@ -0,0 +1 @@
+rcuscale.scale_type=tasks-tracing
-- 
cgit v1.2.3-70-g09d2


From 45c7b962014da36c2ac1aee6e5014b644ba37a84 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 9 Sep 2020 22:24:57 -0700
Subject: rcuscale: Avoid divide by zero

The rcuscale test module does not use batches, so there is only
ever one batch.  This commit therefore informs the kvm-recheck-rcuscale.sh
script of this fact of life.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
index aa745152a525..b582113178ac 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
@@ -32,7 +32,7 @@ sed -e 's/^\[[^]]*]//' < $i/console.log |
 awk '
 /-scale: .* gps: .* batches:/ {
 	ngps = $9;
-	nbatches = $11;
+	nbatches = 1;
 }
 
 /-scale: .*writer-duration/ {
-- 
cgit v1.2.3-70-g09d2


From 8d68e68a781db80606c8e8f3e4383be6974878fd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 16 Sep 2020 10:10:10 -0700
Subject: torture: Exclude "NOHZ tick-stop error" from fatal errors

The "NOHZ tick-stop error: Non-RCU local softirq work is pending"
warning happens frequently and appears to be irrelevant to the various
torture tests.  This commit therefore filters it out.

If there proves to be a need to pay attention to it a later commit will
add an "advice" category to allow the user to immediately see that
although something happened, it was not an indictment of the system
being tortured.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/console-badness.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
index 0e4c0b2eb7f0..80ae7f08b363 100755
--- a/tools/testing/selftests/rcutorture/bin/console-badness.sh
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -13,4 +13,5 @@
 egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
 grep -v 'ODEBUG: ' |
 grep -v 'This means that this is a DEBUG kernel and it is' |
-grep -v 'Warning: unable to open an initial console'
+grep -v 'Warning: unable to open an initial console' |
+grep -v 'NOHZ tick-stop error: Non-RCU local softirq work is pending, handler'
-- 
cgit v1.2.3-70-g09d2


From 6f26d010e678249367cc00b5a827c3731c8138f3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 17 Sep 2020 12:34:01 -0700
Subject: rcutorture: Adjust scenarios SRCU-t and SRCU-u to make kconfig happy

The SRCU-u scenario expects to enable lockdep but to also disable the
CONFIG_PREEMPT_COUNT kconfig option.  This no longer works.  This commit
therefore instead enables lockdep in SRCU-t, which then allows SRCU-u
to disable CONFIG_PREEMPT_COUNT.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/SRCU-t | 3 ++-
 tools/testing/selftests/rcutorture/configs/rcu/SRCU-u | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
index 6c78022c8cd8..d6557c38dfe4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
@@ -4,7 +4,8 @@ CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
 #CHECK#CONFIG_TINY_SRCU=y
 CONFIG_RCU_TRACE=n
-CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_DEBUG_ATOMIC_SLEEP=y
 #CHECK#CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
index c15ada821e45..6bc24e99862f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
@@ -4,7 +4,6 @@ CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
 #CHECK#CONFIG_TINY_SRCU=y
 CONFIG_RCU_TRACE=n
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_PREEMPT_COUNT=n
-- 
cgit v1.2.3-70-g09d2


From c64659ef29e3901be0900ec6fb0485fa3dbdcfd8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Fri, 18 Sep 2020 13:26:22 -0700
Subject: torture: Prevent jitter processes from delaying failed run

Even when the kernel panics and qemu dies, runs with jitter enabled will
continue uselessly until the jitter.sh processes terminate.  This can
be annoying if a planned one-hour run instead dies during boot.

This commit therefore kills the jitter.sh processes when the run ends
more than one minute prior to the termination time specified by the
kvm.sh --duration argument or its default.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh | 14 ++++++++++++++
 tools/testing/selftests/rcutorture/bin/kvm.sh            |  5 ++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index d04966ab88cc..3cd03d01857c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -226,6 +226,20 @@ do
 				echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
 				ps -fp $killpid >> $resdir/Warnings 2>&1
 			fi
+			# Reduce probability of PID reuse by allowing a one-minute buffer
+			if test $((kruntime + 60)) -lt $seconds && test -s "$resdir/../jitter_pids"
+			then
+				awk < "$resdir/../jitter_pids" '
+				NF > 0 {
+					pidlist = pidlist " " $1;
+					n++;
+				}
+				END {
+					if (n > 0) {
+						print "kill " pidlist;
+					}
+				}' | sh
+			fi
 		else
 			echo ' ---' `date`: "Kernel done"
 		fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6eb1d3f6524d..5ad3882563ce 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -459,8 +459,11 @@ function dump(first, pastlast, batchnum)
 	print "if test -n \"$needqemurun\""
 	print "then"
 	print "\techo ---- Starting kernels. `date` | tee -a " rd "log";
-	for (j = 0; j < njitter; j++)
+	print "\techo > " rd "jitter_pids"
+	for (j = 0; j < njitter; j++) {
 		print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
+		print "\techo $! >> " rd "jitter_pids"
+	}
 	print "\twait"
 	print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log";
 	print "else"
-- 
cgit v1.2.3-70-g09d2


From c1e06287583e5ec496e4c02bf5b319e5e41a1fd2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Mon, 21 Sep 2020 13:18:48 -0700
Subject: torture: Force weak-hashed pointers on console log

Although the rcutorture scripting now deals correctly with full-up
security-induced pointer obfuscation, it is still counter-productive for
kernel hackers who are analyzing console output.  This commit therefore
sets the debug_boot_weak_hash kernel boot parameter, which enables
printing of weak-hashed pointers for torture-test runs.

Please note that this change applies only to runs initiated by the
kvm.sh scripting.  If you are instead using modprobe and rmmod, it is
your responsibility to build and boot the underlying kernel to your taste.

Please note further that this change does not result in a security hole
in normal use.  The rcutorture testing runs with a negligible userspace,
no networking, and no user interaction.  Besides which, there is no data
of value that can be extracted from an rcutorture guest OS that could
not also be extracted from the host that this guest is running on.

Suggested-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/functions.sh | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 51f3464b96d3..82663495fb38 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -169,6 +169,7 @@ identify_qemu () {
 # Output arguments for the qemu "-append" string based on CPU type
 # and the TORTURE_QEMU_INTERACTIVE environment variable.
 identify_qemu_append () {
+	echo debug_boot_weak_hash
 	local console=ttyS0
 	case "$1" in
 	qemu-system-x86_64|qemu-system-i386)
-- 
cgit v1.2.3-70-g09d2


From 7de1ca35269ee20e40c35666c810cbaea528c719 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 22 Sep 2020 17:20:11 -0700
Subject: torture: Accept time units on kvm.sh --duration argument

The "--duration <minutes>" has worked well for a very long time, but
it can be inconvenient to compute the minutes for (say) a 28-hour run.
It can also be annoying to have to let a simple boot test run for a full
minute.  This commit therefore permits an "s" suffix to specify seconds,
"m" to specify minutes (which remains the default), "h" suffix to specify
hours, and "d" to specify days.

With this change, "--duration 5" still specifies that each scenario
run for five minutes, but "--duration 30s" runs for only 30 seconds,
"--duration 8h" runs for eight hours, and "--duration 2d" runs for
two days.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm.sh | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 5ad3882563ce..c348d962304f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -58,7 +58,7 @@ usage () {
 	echo "       --datestamp string"
 	echo "       --defconfig string"
 	echo "       --dryrun sched|script"
-	echo "       --duration minutes"
+	echo "       --duration minutes | <seconds>s | <hours>h | <days>d"
 	echo "       --gdb"
 	echo "       --help"
 	echo "       --interactive"
@@ -128,8 +128,20 @@ do
 		shift
 		;;
 	--duration)
-		checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
-		dur=$(($2*60))
+		checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(s\|m\|h\|d\|\)$' '^error'
+		mult=60
+		if echo "$2" | grep -q 's$'
+		then
+			mult=1
+		elif echo "$2" | grep -q 'h$'
+		then
+			mult=3600
+		elif echo "$2" | grep -q 'd$'
+		then
+			mult=86400
+		fi
+		ts=`echo $2 | sed -e 's/[smhd]$//'`
+		dur=$(($ts*mult))
 		shift
 		;;
 	--gdb)
-- 
cgit v1.2.3-70-g09d2


From a5136f4ffb44f8c1a80406c5bfd4d233433398e6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 24 Sep 2020 08:52:33 -0700
Subject: torture: Allow alternative forms of kvm.sh command-line arguments

This commit allows --build-only as a synonym for --buildonly, --kconfigs
for --kconfig, and --kmake-args for --kmake-arg.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index c348d962304f..45d07b7b69f5 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -93,7 +93,7 @@ do
 		TORTURE_BOOT_IMAGE="$2"
 		shift
 		;;
-	--buildonly)
+	--buildonly|--build-only)
 		TORTURE_BUILDONLY=1
 		;;
 	--configs|--config)
@@ -160,7 +160,7 @@ do
 		jitter="$2"
 		shift
 		;;
-	--kconfig)
+	--kconfig|--kconfigs)
 		checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
 		TORTURE_KCONFIG_ARG="$2"
 		shift
@@ -171,7 +171,7 @@ do
 	--kcsan)
 		TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG
 		;;
-	--kmake-arg)
+	--kmake-arg|--kmake-args)
 		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
 		TORTURE_KMAKE_ARG="$2"
 		shift
-- 
cgit v1.2.3-70-g09d2


From 6c5b9de2c63b2f513a580c6c80d455350012e99b Mon Sep 17 00:00:00 2001
From: Samuel Hernandez <sam.hernandez.amador@gmail.com>
Date: Sun, 11 Oct 2020 14:22:31 -0400
Subject: rcutorture/nolibc: Fix a typo in header file

This fixes a typo. Before this, the AT_FDCWD macro would be defined
regardless of whether or not it's been defined before.

Signed-off-by: Samuel Hernandez <sam.hernandez.amador@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 2551e9b71167..d6d2623c99ad 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -231,7 +231,7 @@ struct rusage {
 #define DT_SOCK   12
 
 /* all the *at functions */
-#ifndef AT_FDWCD
+#ifndef AT_FDCWD
 #define AT_FDCWD             -100
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 5be7d80deb80ceef50a6bd86d83c8fd62264778a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 15 Oct 2020 11:42:17 -0700
Subject: torture: Make kvm-check-branches.sh use --allcpus

Currently the kvm-check-branches.sh script calculates the number of CPUs
and passes this to the kvm.sh --cpus command-line argument.  This works,
but this commit saves a line by instead using the new kvm.sh --allcpus
command-line argument.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
index 6e65c134e5f1..370406bbfeed 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
@@ -52,8 +52,7 @@ echo Results directory: $resdir/$ds
 KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
 PATH=${KVM}/bin:$PATH; export PATH
 . functions.sh
-cpus="`identify_qemu_vcpus`"
-echo Using up to $cpus CPUs.
+echo Using all `identify_qemu_vcpus` CPUs.
 
 # Each pass through this loop does one command-line argument.
 for gitbr in $@
@@ -74,7 +73,7 @@ do
 		# Test the specified commit.
 		git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1
 		echo git checkout return code: $? "(Commit $ntry: $i)"
-		kvm.sh --cpus $cpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1
+		kvm.sh --allcpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1
 		ret=$?
 		echo kvm.sh return code $ret for commit $i from branch $gitbr
 
-- 
cgit v1.2.3-70-g09d2


From 06dc8d4591b8d8ce0ece94474718b53f0a5c5de3 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Tue, 20 Oct 2020 21:22:56 +0200
Subject: tools/nolibc:  Fix a spelling error in a comment

Fix a spelling in the comment line.

s/memry/memory/p

This is on linux-next.

Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index d6d2623c99ad..e61d36cd4e50 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -107,7 +107,7 @@ static int errno;
 #endif
 
 /* errno codes all ensure that they will not conflict with a valid pointer
- * because they all correspond to the highest addressable memry page.
+ * because they all correspond to the highest addressable memory page.
  */
 #define MAX_ERRNO 4095
 
-- 
cgit v1.2.3-70-g09d2


From 01f9e708d9eae6335ae9ff25ab09893c20727a55 Mon Sep 17 00:00:00 2001
From: Anna-Maria Behnsen <anna-maria@linutronix.de>
Date: Mon, 2 Nov 2020 18:12:20 +0100
Subject: tools/rcutorture: Fix BUG parsing of console.log

For the rcutorture test summary log file console.log of virtual machines is
parsed. When a console.log contains "DEBUG", BUG counter is incremented
because regular expression does not handle to ignore DEBUG.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Reviewed-by: Benedikt Spranger <b.spranger@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/testing/selftests/rcutorture/bin/parse-console.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index e03338091a06..263b1be50008 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -133,7 +133,7 @@ then
 	then
 		summary="$summary  Warnings: $n_warn"
 	fi
-	n_bugs=`egrep -c 'BUG|Oops:' $file`
+	n_bugs=`egrep -c '\bBUG|Oops:' $file`
 	if test "$n_bugs" -ne 0
 	then
 		summary="$summary  Bugs: $n_bugs"
-- 
cgit v1.2.3-70-g09d2


From ebb477cb2fb7a44ff600e0a7393bad906a0ecd80 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 11 Aug 2020 11:27:33 -0700
Subject: tools/memory-model: Document categories of ordering primitives

The Linux kernel has a number of categories of ordering primitives, which
are recorded in the LKMM implementation and hinted at by cheatsheet.txt.
But there is no overview of these categories, and such an overview
is needed in order to understand multithreaded LKMM litmus tests.
This commit therefore adds an ordering.txt as well as extracting a
control-dependencies.txt from memory-barriers.txt.  It also updates the
README file.

[ paulmck:  Apply Akira Yokosawa file-placement feedback. ]
[ paulmck:  Apply Alan Stern feedback. ]
[ paulmck:  Apply self-review feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/Documentation/README            |  17 +
 .../Documentation/control-dependencies.txt         | 258 ++++++++++
 tools/memory-model/Documentation/ordering.txt      | 556 +++++++++++++++++++++
 3 files changed, 831 insertions(+)
 create mode 100644 tools/memory-model/Documentation/control-dependencies.txt
 create mode 100644 tools/memory-model/Documentation/ordering.txt

(limited to 'tools')

diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README
index 2d9539f19912..db90a26dbdf4 100644
--- a/tools/memory-model/Documentation/README
+++ b/tools/memory-model/Documentation/README
@@ -11,6 +11,12 @@ the material provided by documents earlier in this list.
 
 o	You are new to Linux-kernel concurrency: simple.txt
 
+o	You have some background in Linux-kernel concurrency, and would
+	like an overview of the types of low-level concurrency primitives
+	that the Linux kernel provides:  ordering.txt
+
+	Here, "low level" means atomic operations to single variables.
+
 o	You are familiar with the Linux-kernel concurrency primitives
 	that you need, and just want to get started with LKMM litmus
 	tests:  litmus-tests.txt
@@ -19,6 +25,9 @@ o	You are familiar with Linux-kernel concurrency, and would
 	like a detailed intuitive understanding of LKMM, including
 	situations involving more than two threads:  recipes.txt
 
+o	You would like a detailed understanding of what your compiler can
+	and cannot do to control dependencies:  control-dependencies.txt
+
 o	You are familiar with Linux-kernel concurrency and the use of
 	LKMM, and would like a quick reference:  cheatsheet.txt
 
@@ -41,6 +50,10 @@ README
 cheatsheet.txt
 	Quick-reference guide to the Linux-kernel memory model.
 
+control-dependencies.txt
+	Guide to preventing compiler optimizations from destroying
+	your control dependencies.
+
 explanation.txt
 	Detailed description of the memory model.
 
@@ -48,6 +61,10 @@ litmus-tests.txt
 	The format, features, capabilities, and limitations of the litmus
 	tests that LKMM can evaluate.
 
+ordering.txt
+	Overview of the Linux kernel's low-level memory-ordering
+	primitives by category.
+
 recipes.txt
 	Common memory-ordering patterns.
 
diff --git a/tools/memory-model/Documentation/control-dependencies.txt b/tools/memory-model/Documentation/control-dependencies.txt
new file mode 100644
index 000000000000..8b743d20fe27
--- /dev/null
+++ b/tools/memory-model/Documentation/control-dependencies.txt
@@ -0,0 +1,258 @@
+CONTROL DEPENDENCIES
+====================
+
+A major difficulty with control dependencies is that current compilers
+do not support them.  One purpose of this document is therefore to
+help you prevent your compiler from breaking your code.  However,
+control dependencies also pose other challenges, which leads to the
+second purpose of this document, namely to help you to avoid breaking
+your own code, even in the absence of help from your compiler.
+
+One such challenge is that control dependencies order only later stores.
+Therefore, a load-load control dependency will not preserve ordering
+unless a read memory barrier is provided.  Consider the following code:
+
+	q = READ_ONCE(a);
+	if (q)
+		p = READ_ONCE(b);
+
+This is not guaranteed to provide any ordering because some types of CPUs
+are permitted to predict the result of the load from "b".  This prediction
+can cause other CPUs to see this load as having happened before the load
+from "a".  This means that an explicit read barrier is required, for example
+as follows:
+
+	q = READ_ONCE(a);
+	if (q) {
+		smp_rmb();
+		p = READ_ONCE(b);
+	}
+
+However, stores are not speculated.  This means that ordering is
+(usually) guaranteed for load-store control dependencies, as in the
+following example:
+
+	q = READ_ONCE(a);
+	if (q)
+		WRITE_ONCE(b, 1);
+
+Control dependencies can pair with each other and with other types
+of ordering.  But please note that neither the READ_ONCE() nor the
+WRITE_ONCE() are optional.  Without the READ_ONCE(), the compiler might
+fuse the load from "a" with other loads.  Without the WRITE_ONCE(),
+the compiler might fuse the store to "b" with other stores.  Worse yet,
+the compiler might convert the store into a load and a check followed
+by a store, and this compiler-generated load would not be ordered by
+the control dependency.
+
+Furthermore, if the compiler is able to prove that the value of variable
+"a" is always non-zero, it would be well within its rights to optimize
+the original example by eliminating the "if" statement as follows:
+
+	q = a;
+	b = 1;  /* BUG: Compiler and CPU can both reorder!!! */
+
+So don't leave out either the READ_ONCE() or the WRITE_ONCE().
+In particular, although READ_ONCE() does force the compiler to emit a
+load, it does *not* force the compiler to actually use the loaded value.
+
+It is tempting to try use control dependencies to enforce ordering on
+identical stores on both branches of the "if" statement as follows:
+
+	q = READ_ONCE(a);
+	if (q) {
+		barrier();
+		WRITE_ONCE(b, 1);
+		do_something();
+	} else {
+		barrier();
+		WRITE_ONCE(b, 1);
+		do_something_else();
+	}
+
+Unfortunately, current compilers will transform this as follows at high
+optimization levels:
+
+	q = READ_ONCE(a);
+	barrier();
+	WRITE_ONCE(b, 1);  /* BUG: No ordering vs. load from a!!! */
+	if (q) {
+		/* WRITE_ONCE(b, 1); -- moved up, BUG!!! */
+		do_something();
+	} else {
+		/* WRITE_ONCE(b, 1); -- moved up, BUG!!! */
+		do_something_else();
+	}
+
+Now there is no conditional between the load from "a" and the store to
+"b", which means that the CPU is within its rights to reorder them:  The
+conditional is absolutely required, and must be present in the final
+assembly code, after all of the compiler and link-time optimizations
+have been applied.  Therefore, if you need ordering in this example,
+you must use explicit memory ordering, for example, smp_store_release():
+
+	q = READ_ONCE(a);
+	if (q) {
+		smp_store_release(&b, 1);
+		do_something();
+	} else {
+		smp_store_release(&b, 1);
+		do_something_else();
+	}
+
+Without explicit memory ordering, control-dependency-based ordering is
+guaranteed only when the stores differ, for example:
+
+	q = READ_ONCE(a);
+	if (q) {
+		WRITE_ONCE(b, 1);
+		do_something();
+	} else {
+		WRITE_ONCE(b, 2);
+		do_something_else();
+	}
+
+The initial READ_ONCE() is still required to prevent the compiler from
+knowing too much about the value of "a".
+
+But please note that you need to be careful what you do with the local
+variable "q", otherwise the compiler might be able to guess the value
+and again remove the conditional branch that is absolutely required to
+preserve ordering.  For example:
+
+	q = READ_ONCE(a);
+	if (q % MAX) {
+		WRITE_ONCE(b, 1);
+		do_something();
+	} else {
+		WRITE_ONCE(b, 2);
+		do_something_else();
+	}
+
+If MAX is compile-time defined to be 1, then the compiler knows that
+(q % MAX) must be equal to zero, regardless of the value of "q".
+The compiler is therefore within its rights to transform the above code
+into the following:
+
+	q = READ_ONCE(a);
+	WRITE_ONCE(b, 2);
+	do_something_else();
+
+Given this transformation, the CPU is not required to respect the ordering
+between the load from variable "a" and the store to variable "b".  It is
+tempting to add a barrier(), but this does not help.  The conditional
+is gone, and the barrier won't bring it back.  Therefore, if you need
+to relying on control dependencies to produce this ordering, you should
+make sure that MAX is greater than one, perhaps as follows:
+
+	q = READ_ONCE(a);
+	BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
+	if (q % MAX) {
+		WRITE_ONCE(b, 1);
+		do_something();
+	} else {
+		WRITE_ONCE(b, 2);
+		do_something_else();
+	}
+
+Please note once again that each leg of the "if" statement absolutely
+must store different values to "b".  As in previous examples, if the two
+values were identical, the compiler could pull this store outside of the
+"if" statement, destroying the control dependency's ordering properties.
+
+You must also be careful avoid relying too much on boolean short-circuit
+evaluation.  Consider this example:
+
+	q = READ_ONCE(a);
+	if (q || 1 > 0)
+		WRITE_ONCE(b, 1);
+
+Because the first condition cannot fault and the second condition is
+always true, the compiler can transform this example as follows, again
+destroying the control dependency's ordering:
+
+	q = READ_ONCE(a);
+	WRITE_ONCE(b, 1);
+
+This is yet another example showing the importance of preventing the
+compiler from out-guessing your code.  Again, although READ_ONCE() really
+does force the compiler to emit code for a given load, the compiler is
+within its rights to discard the loaded value.
+
+In addition, control dependencies apply only to the then-clause and
+else-clause of the "if" statement in question.  In particular, they do
+not necessarily order the code following the entire "if" statement:
+
+	q = READ_ONCE(a);
+	if (q) {
+		WRITE_ONCE(b, 1);
+	} else {
+		WRITE_ONCE(b, 2);
+	}
+	WRITE_ONCE(c, 1);  /* BUG: No ordering against the read from "a". */
+
+It is tempting to argue that there in fact is ordering because the
+compiler cannot reorder volatile accesses and also cannot reorder
+the writes to "b" with the condition.  Unfortunately for this line
+of reasoning, the compiler might compile the two writes to "b" as
+conditional-move instructions, as in this fanciful pseudo-assembly
+language:
+
+	ld r1,a
+	cmp r1,$0
+	cmov,ne r4,$1
+	cmov,eq r4,$2
+	st r4,b
+	st $1,c
+
+The control dependencies would then extend only to the pair of cmov
+instructions and the store depending on them.  This means that a weakly
+ordered CPU would have no dependency of any sort between the load from
+"a" and the store to "c".  In short, control dependencies provide ordering
+only to the stores in the then-clause and else-clause of the "if" statement
+in question (including functions invoked by those two clauses), and not
+to code following that "if" statement.
+
+
+In summary:
+
+  (*) Control dependencies can order prior loads against later stores.
+      However, they do *not* guarantee any other sort of ordering:
+      Not prior loads against later loads, nor prior stores against
+      later anything.  If you need these other forms of ordering, use
+      smp_load_acquire(), smp_store_release(), or, in the case of prior
+      stores and later loads, smp_mb().
+
+  (*) If both legs of the "if" statement contain identical stores to
+      the same variable, then you must explicitly order those stores,
+      either by preceding both of them with smp_mb() or by using
+      smp_store_release().  Please note that it is *not* sufficient to use
+      barrier() at beginning and end of each leg of the "if" statement
+      because, as shown by the example above, optimizing compilers can
+      destroy the control dependency while respecting the letter of the
+      barrier() law.
+
+  (*) Control dependencies require at least one run-time conditional
+      between the prior load and the subsequent store, and this
+      conditional must involve the prior load.  If the compiler is able
+      to optimize the conditional away, it will have also optimized
+      away the ordering.  Careful use of READ_ONCE() and WRITE_ONCE()
+      can help to preserve the needed conditional.
+
+  (*) Control dependencies require that the compiler avoid reordering the
+      dependency into nonexistence.  Careful use of READ_ONCE() or
+      atomic{,64}_read() can help to preserve your control dependency.
+
+  (*) Control dependencies apply only to the then-clause and else-clause
+      of the "if" statement containing the control dependency, including
+      any functions that these two clauses call.  Control dependencies
+      do *not* apply to code beyond the end of that "if" statement.
+
+  (*) Control dependencies pair normally with other types of barriers.
+
+  (*) Control dependencies do *not* provide multicopy atomicity.  If you
+      need all the CPUs to agree on the ordering of a given store against
+      all other accesses, use smp_mb().
+
+  (*) Compilers do not understand control dependencies.  It is therefore
+      your job to ensure that they do not break your code.
diff --git a/tools/memory-model/Documentation/ordering.txt b/tools/memory-model/Documentation/ordering.txt
new file mode 100644
index 000000000000..9b0949d3f5ec
--- /dev/null
+++ b/tools/memory-model/Documentation/ordering.txt
@@ -0,0 +1,556 @@
+This document gives an overview of the categories of memory-ordering
+operations provided by the Linux-kernel memory model (LKMM).
+
+
+Categories of Ordering
+======================
+
+This section lists LKMM's three top-level categories of memory-ordering
+operations in decreasing order of strength:
+
+1.	Barriers (also known as "fences").  A barrier orders some or
+	all of the CPU's prior operations against some or all of its
+	subsequent operations.
+
+2.	Ordered memory accesses.  These operations order themselves
+	against some or all of the CPU's prior accesses or some or all
+	of the CPU's subsequent accesses, depending on the subcategory
+	of the operation.
+
+3.	Unordered accesses, as the name indicates, have no ordering
+	properties except to the extent that they interact with an
+	operation in the previous categories.  This being the real world,
+	some of these "unordered" operations provide limited ordering
+	in some special situations.
+
+Each of the above categories is described in more detail by one of the
+following sections.
+
+
+Barriers
+========
+
+Each of the following categories of barriers is described in its own
+subsection below:
+
+a.	Full memory barriers.
+
+b.	Read-modify-write (RMW) ordering augmentation barriers.
+
+c.	Write memory barrier.
+
+d.	Read memory barrier.
+
+e.	Compiler barrier.
+
+Note well that many of these primitives generate absolutely no code
+in kernels built with CONFIG_SMP=n.  Therefore, if you are writing
+a device driver, which must correctly order accesses to a physical
+device even in kernels built with CONFIG_SMP=n, please use the
+ordering primitives provided for that purpose.  For example, instead of
+smp_mb(), use mb().  See the "Linux Kernel Device Drivers" book or the
+https://lwn.net/Articles/698014/ article for more information.
+
+
+Full Memory Barriers
+--------------------
+
+The Linux-kernel primitives that provide full ordering include:
+
+o	The smp_mb() full memory barrier.
+
+o	Value-returning RMW atomic operations whose names do not end in
+	_acquire, _release, or _relaxed.
+
+o	RCU's grace-period primitives.
+
+First, the smp_mb() full memory barrier orders all of the CPU's prior
+accesses against all subsequent accesses from the viewpoint of all CPUs.
+In other words, all CPUs will agree that any earlier action taken
+by that CPU happened before any later action taken by that same CPU.
+For example, consider the following:
+
+	WRITE_ONCE(x, 1);
+	smp_mb(); // Order store to x before load from y.
+	r1 = READ_ONCE(y);
+
+All CPUs will agree that the store to "x" happened before the load
+from "y", as indicated by the comment.  And yes, please comment your
+memory-ordering primitives.  It is surprisingly hard to remember their
+purpose after even a few months.
+
+Second, some RMW atomic operations provide full ordering.  These
+operations include value-returning RMW atomic operations (that is, those
+with non-void return types) whose names do not end in _acquire, _release,
+or _relaxed.  Examples include atomic_add_return(), atomic_dec_and_test(),
+cmpxchg(), and xchg().  Note that conditional RMW atomic operations such
+as cmpxchg() are only guaranteed to provide ordering when they succeed.
+When RMW atomic operations provide full ordering, they partition the
+CPU's accesses into three groups:
+
+1.	All code that executed prior to the RMW atomic operation.
+
+2.	The RMW atomic operation itself.
+
+3.	All code that executed after the RMW atomic operation.
+
+All CPUs will agree that any operation in a given partition happened
+before any operation in a higher-numbered partition.
+
+In contrast, non-value-returning RMW atomic operations (that is, those
+with void return types) do not guarantee any ordering whatsoever.  Nor do
+value-returning RMW atomic operations whose names end in _relaxed.
+Examples of the former include atomic_inc() and atomic_dec(),
+while examples of the latter include atomic_cmpxchg_relaxed() and
+atomic_xchg_relaxed().  Similarly, value-returning non-RMW atomic
+operations such as atomic_read() do not guarantee full ordering, and
+are covered in the later section on unordered operations.
+
+Value-returning RMW atomic operations whose names end in _acquire or
+_release provide limited ordering, and will be described later in this
+document.
+
+Finally, RCU's grace-period primitives provide full ordering.  These
+primitives include synchronize_rcu(), synchronize_rcu_expedited(),
+synchronize_srcu() and so on.  However, these primitives have orders
+of magnitude greater overhead than smp_mb(), atomic_xchg(), and so on.
+Furthermore, RCU's grace-period primitives can only be invoked in
+sleepable contexts.  Therefore, RCU's grace-period primitives are
+typically instead used to provide ordering against RCU read-side critical
+sections, as documented in their comment headers.  But of course if you
+need a synchronize_rcu() to interact with readers, it costs you nothing
+to also rely on its additional full-memory-barrier semantics.  Just please
+carefully comment this, otherwise your future self will hate you.
+
+
+RMW Ordering Augmentation Barriers
+----------------------------------
+
+As noted in the previous section, non-value-returning RMW operations
+such as atomic_inc() and atomic_dec() guarantee no ordering whatsoever.
+Nevertheless, a number of popular CPU families, including x86, provide
+full ordering for these primitives.  One way to obtain full ordering on
+all architectures is to add a call to smp_mb():
+
+	WRITE_ONCE(x, 1);
+	atomic_inc(&my_counter);
+	smp_mb(); // Inefficient on x86!!!
+	r1 = READ_ONCE(y);
+
+This works, but the added smp_mb() adds needless overhead for
+x86, on which atomic_inc() provides full ordering all by itself.
+The smp_mb__after_atomic() primitive can be used instead:
+
+	WRITE_ONCE(x, 1);
+	atomic_inc(&my_counter);
+	smp_mb__after_atomic(); // Order store to x before load from y.
+	r1 = READ_ONCE(y);
+
+The smp_mb__after_atomic() primitive emits code only on CPUs whose
+atomic_inc() implementations do not guarantee full ordering, thus
+incurring no unnecessary overhead on x86.  There are a number of
+variations on the smp_mb__*() theme:
+
+o	smp_mb__before_atomic(), which provides full ordering prior
+	to an unordered RMW atomic operation.
+
+o	smp_mb__after_atomic(), which, as shown above, provides full
+	ordering subsequent to an unordered RMW atomic operation.
+
+o	smp_mb__after_spinlock(), which provides full ordering subsequent
+	to a successful spinlock acquisition.  Note that spin_lock() is
+	always successful but spin_trylock() might not be.
+
+o	smp_mb__after_srcu_read_unlock(), which provides full ordering
+	subsequent to an srcu_read_unlock().
+
+It is bad practice to place code between the smp__*() primitive and the
+operation whose ordering that it is augmenting.  The reason is that the
+ordering of this intervening code will differ from one CPU architecture
+to another.
+
+
+Write Memory Barrier
+--------------------
+
+The Linux kernel's write memory barrier is smp_wmb().  If a CPU executes
+the following code:
+
+	WRITE_ONCE(x, 1);
+	smp_wmb();
+	WRITE_ONCE(y, 1);
+
+Then any given CPU will see the write to "x" has having happened before
+the write to "y".  However, you are usually better off using a release
+store, as described in the "Release Operations" section below.
+
+Note that smp_wmb() might fail to provide ordering for unmarked C-language
+stores because profile-driven optimization could determine that the
+value being overwritten is almost always equal to the new value.  Such a
+compiler might then reasonably decide to transform "x = 1" and "y = 1"
+as follows:
+
+	if (x != 1)
+		x = 1;
+	smp_wmb(); // BUG: does not order the reads!!!
+	if (y != 1)
+		y = 1;
+
+Therefore, if you need to use smp_wmb() with unmarked C-language writes,
+you will need to make sure that none of the compilers used to build
+the Linux kernel carry out this sort of transformation, both now and in
+the future.
+
+
+Read Memory Barrier
+-------------------
+
+The Linux kernel's read memory barrier is smp_rmb().  If a CPU executes
+the following code:
+
+	r0 = READ_ONCE(y);
+	smp_rmb();
+	r1 = READ_ONCE(x);
+
+Then any given CPU will see the read from "y" as having preceded the read from
+"x".  However, you are usually better off using an acquire load, as described
+in the "Acquire Operations" section below.
+
+Compiler Barrier
+----------------
+
+The Linux kernel's compiler barrier is barrier().  This primitive
+prohibits compiler code-motion optimizations that might move memory
+references across the point in the code containing the barrier(), but
+does not constrain hardware memory ordering.  For example, this can be
+used to prevent to compiler from moving code across an infinite loop:
+
+	WRITE_ONCE(x, 1);
+	while (dontstop)
+		barrier();
+	r1 = READ_ONCE(y);
+
+Without the barrier(), the compiler would be within its rights to move the
+WRITE_ONCE() to follow the loop.  This code motion could be problematic
+in the case where an interrupt handler terminates the loop.  Another way
+to handle this is to use READ_ONCE() for the load of "dontstop".
+
+Note that the barriers discussed previously use barrier() or its low-level
+equivalent in their implementations.
+
+
+Ordered Memory Accesses
+=======================
+
+The Linux kernel provides a wide variety of ordered memory accesses:
+
+a.	Release operations.
+
+b.	Acquire operations.
+
+c.	RCU read-side ordering.
+
+d.	Control dependencies.
+
+Each of the above categories has its own section below.
+
+
+Release Operations
+------------------
+
+Release operations include smp_store_release(), atomic_set_release(),
+rcu_assign_pointer(), and value-returning RMW operations whose names
+end in _release.  These operations order their own store against all
+of the CPU's prior memory accesses.  Release operations often provide
+improved readability and performance compared to explicit barriers.
+For example, use of smp_store_release() saves a line compared to the
+smp_wmb() example above:
+
+	WRITE_ONCE(x, 1);
+	smp_store_release(&y, 1);
+
+More important, smp_store_release() makes it easier to connect up the
+different pieces of the concurrent algorithm.  The variable stored to
+by the smp_store_release(), in this case "y", will normally be used in
+an acquire operation in other parts of the concurrent algorithm.
+
+To see the performance advantages, suppose that the above example read
+from "x" instead of writing to it.  Then an smp_wmb() could not guarantee
+ordering, and an smp_mb() would be needed instead:
+
+	r1 = READ_ONCE(x);
+	smp_mb();
+	WRITE_ONCE(y, 1);
+
+But smp_mb() often incurs much higher overhead than does
+smp_store_release(), which still provides the needed ordering of "x"
+against "y".  On x86, the version using smp_store_release() might compile
+to a simple load instruction followed by a simple store instruction.
+In contrast, the smp_mb() compiles to an expensive instruction that
+provides the needed ordering.
+
+There is a wide variety of release operations:
+
+o	Store operations, including not only the aforementioned
+	smp_store_release(), but also atomic_set_release(), and
+	atomic_long_set_release().
+
+o	RCU's rcu_assign_pointer() operation.  This is the same as
+	smp_store_release() except that: (1) It takes the pointer to
+	be assigned to instead of a pointer to that pointer, (2) It
+	is intended to be used in conjunction with rcu_dereference()
+	and similar rather than smp_load_acquire(), and (3) It checks
+	for an RCU-protected pointer in "sparse" runs.
+
+o	Value-returning RMW operations whose names end in _release,
+	such as atomic_fetch_add_release() and cmpxchg_release().
+	Note that release ordering is guaranteed only against the
+	memory-store portion of the RMW operation, and not against the
+	memory-load portion.  Note also that conditional operations such
+	as cmpxchg_release() are only guaranteed to provide ordering
+	when they succeed.
+
+As mentioned earlier, release operations are often paired with acquire
+operations, which are the subject of the next section.
+
+
+Acquire Operations
+------------------
+
+Acquire operations include smp_load_acquire(), atomic_read_acquire(),
+and value-returning RMW operations whose names end in _acquire.   These
+operations order their own load against all of the CPU's subsequent
+memory accesses.  Acquire operations often provide improved performance
+and readability compared to explicit barriers.  For example, use of
+smp_load_acquire() saves a line compared to the smp_rmb() example above:
+
+	r0 = smp_load_acquire(&y);
+	r1 = READ_ONCE(x);
+
+As with smp_store_release(), this also makes it easier to connect
+the different pieces of the concurrent algorithm by looking for the
+smp_store_release() that stores to "y".  In addition, smp_load_acquire()
+improves upon smp_rmb() by ordering against subsequent stores as well
+as against subsequent loads.
+
+There are a couple of categories of acquire operations:
+
+o	Load operations, including not only the aforementioned
+	smp_load_acquire(), but also atomic_read_acquire(), and
+	atomic64_read_acquire().
+
+o	Value-returning RMW operations whose names end in _acquire,
+	such as atomic_xchg_acquire() and atomic_cmpxchg_acquire().
+	Note that acquire ordering is guaranteed only against the
+	memory-load portion of the RMW operation, and not against the
+	memory-store portion.  Note also that conditional operations
+	such as atomic_cmpxchg_acquire() are only guaranteed to provide
+	ordering when they succeed.
+
+Symmetry being what it is, acquire operations are often paired with the
+release operations covered earlier.  For example, consider the following
+example, where task0() and task1() execute concurrently:
+
+	void task0(void)
+	{
+		WRITE_ONCE(x, 1);
+		smp_store_release(&y, 1);
+	}
+
+	void task1(void)
+	{
+		r0 = smp_load_acquire(&y);
+		r1 = READ_ONCE(x);
+	}
+
+If "x" and "y" are both initially zero, then either r0's final value
+will be zero or r1's final value will be one, thus providing the required
+ordering.
+
+
+RCU Read-Side Ordering
+----------------------
+
+This category includes read-side markers such as rcu_read_lock()
+and rcu_read_unlock() as well as pointer-traversal primitives such as
+rcu_dereference() and srcu_dereference().
+
+Compared to locking primitives and RMW atomic operations, markers
+for RCU read-side critical sections incur very low overhead because
+they interact only with the corresponding grace-period primitives.
+For example, the rcu_read_lock() and rcu_read_unlock() markers interact
+with synchronize_rcu(), synchronize_rcu_expedited(), and call_rcu().
+The way this works is that if a given call to synchronize_rcu() cannot
+prove that it started before a given call to rcu_read_lock(), then
+that synchronize_rcu() must block until the matching rcu_read_unlock()
+is reached.  For more information, please see the synchronize_rcu()
+docbook header comment and the material in Documentation/RCU.
+
+RCU's pointer-traversal primitives, including rcu_dereference() and
+srcu_dereference(), order their load (which must be a pointer) against any
+of the CPU's subsequent memory accesses whose address has been calculated
+from the value loaded.  There is said to be an *address dependency*
+from the value returned by the rcu_dereference() or srcu_dereference()
+to that subsequent memory access.
+
+A call to rcu_dereference() for a given RCU-protected pointer is
+usually paired with a call to a call to rcu_assign_pointer() for that
+same pointer in much the same way that a call to smp_load_acquire() is
+paired with a call to smp_store_release().  Calls to rcu_dereference()
+and rcu_assign_pointer are often buried in other APIs, for example,
+the RCU list API members defined in include/linux/rculist.h.  For more
+information, please see the docbook headers in that file, the most
+recent LWN article on the RCU API (https://lwn.net/Articles/777036/),
+and of course the material in Documentation/RCU.
+
+If the pointer value is manipulated between the rcu_dereference()
+that returned it and a later dereference(), please read
+Documentation/RCU/rcu_dereference.rst.  It can also be quite helpful to
+review uses in the Linux kernel.
+
+
+Control Dependencies
+--------------------
+
+A control dependency extends from a marked load (READ_ONCE() or stronger)
+through an "if" condition to a marked store (WRITE_ONCE() or stronger)
+that is executed only by one of the legs of that "if" statement.
+Control dependencies are so named because they are mediated by
+control-flow instructions such as comparisons and conditional branches.
+
+In short, you can use a control dependency to enforce ordering between
+an READ_ONCE() and a WRITE_ONCE() when there is an "if" condition
+between them.  The canonical example is as follows:
+
+	q = READ_ONCE(a);
+	if (q)
+		WRITE_ONCE(b, 1);
+
+In this case, all CPUs would see the read from "a" as happening before
+the write to "b".
+
+However, control dependencies are easily destroyed by compiler
+optimizations, so any use of control dependencies must take into account
+all of the compilers used to build the Linux kernel.  Please see the
+"control-dependencies.txt" file for more information.
+
+
+Unordered Accesses
+==================
+
+Each of these two categories of unordered accesses has a section below:
+
+a.	Unordered marked operations.
+
+b.	Unmarked C-language accesses.
+
+
+Unordered Marked Operations
+---------------------------
+
+Unordered operations to different variables are just that, unordered.
+However, if a group of CPUs apply these operations to a single variable,
+all the CPUs will agree on the operation order.  Of course, the ordering
+of unordered marked accesses can also be constrained using the mechanisms
+described earlier in this document.
+
+These operations come in three categories:
+
+o	Marked writes, such as WRITE_ONCE() and atomic_set().  These
+	primitives required the compiler to emit the corresponding store
+	instructions in the expected execution order, thus suppressing
+	a number of destructive optimizations.	However, they provide no
+	hardware ordering guarantees, and in fact many CPUs will happily
+	reorder marked writes with each other or with other unordered
+	operations, unless these operations are to the same variable.
+
+o	Marked reads, such as READ_ONCE() and atomic_read().  These
+	primitives required the compiler to emit the corresponding load
+	instructions in the expected execution order, thus suppressing
+	a number of destructive optimizations.	However, they provide no
+	hardware ordering guarantees, and in fact many CPUs will happily
+	reorder marked reads with each other or with other unordered
+	operations, unless these operations are to the same variable.
+
+o	Unordered RMW atomic operations.  These are non-value-returning
+	RMW atomic operations whose names do not end in _acquire or
+	_release, and also value-returning RMW operations whose names
+	end in _relaxed.  Examples include atomic_add(), atomic_or(),
+	and atomic64_fetch_xor_relaxed().  These operations do carry
+	out the specified RMW operation atomically, for example, five
+	concurrent atomic_inc() operations applied to a given variable
+	will reliably increase the value of that variable by five.
+	However, many CPUs will happily reorder these operations with
+	each other or with other unordered operations.
+
+	This category of operations can be efficiently ordered using
+	smp_mb__before_atomic() and smp_mb__after_atomic(), as was
+	discussed in the "RMW Ordering Augmentation Barriers" section.
+
+In short, these operations can be freely reordered unless they are all
+operating on a single variable or unless they are constrained by one of
+the operations called out earlier in this document.
+
+
+Unmarked C-Language Accesses
+----------------------------
+
+Unmarked C-language accesses are normal variable accesses to normal
+variables, that is, to variables that are not "volatile" and are not
+C11 atomic variables.  These operations provide no ordering guarantees,
+and further do not guarantee "atomic" access.  For example, the compiler
+might (and sometimes does) split a plain C-language store into multiple
+smaller stores.  A load from that same variable running on some other
+CPU while such a store is executing might see a value that is a mashup
+of the old value and the new value.
+
+Unmarked C-language accesses are unordered, and are also subject to
+any number of compiler optimizations, many of which can break your
+concurrent code.  It is possible to used unmarked C-language accesses for
+shared variables that are subject to concurrent access, but great care
+is required on an ongoing basis.  The compiler-constraining barrier()
+primitive can be helpful, as can the various ordering primitives discussed
+in this document.  It nevertheless bears repeating that use of unmarked
+C-language accesses requires careful attention to not just your code,
+but to all the compilers that might be used to build it.  Such compilers
+might replace a series of loads with a single load, and might replace
+a series of stores with a single store.  Some compilers will even split
+a single store into multiple smaller stores.
+
+But there are some ways of using unmarked C-language accesses for shared
+variables without such worries:
+
+o	Guard all accesses to a given variable by a particular lock,
+	so that there are never concurrent conflicting accesses to
+	that variable.	(There are "conflicting accesses" when
+	(1) at least one of the concurrent accesses to a variable is an
+	unmarked C-language access and (2) when at least one of those
+	accesses is a write, whether marked or not.)
+
+o	As above, but using other synchronization primitives such
+	as reader-writer locks or sequence locks.
+
+o	Use locking or other means to ensure that all concurrent accesses
+	to a given variable are reads.
+
+o	Restrict use of a given variable to statistics or heuristics
+	where the occasional bogus value can be tolerated.
+
+o	Declare the accessed variables as C11 atomics.
+	https://lwn.net/Articles/691128/
+
+o	Declare the accessed variables as "volatile".
+
+If you need to live more dangerously, please do take the time to
+understand the compilers.  One place to start is these two LWN
+articles:
+
+Who's afraid of a big bad optimizing compiler?
+	https://lwn.net/Articles/793253
+Calibrating your fear of big bad optimizing compilers
+	https://lwn.net/Articles/799218
+
+Used properly, unmarked C-language accesses can reduce overhead on
+fastpaths.  However, the price is great care and continual attention
+to your compiler as new versions come out and as new optimizations
+are enabled.
-- 
cgit v1.2.3-70-g09d2


From 0a27ce6b6968866fa8e3bd70371d67752db7718f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 22 Oct 2020 15:16:08 -0700
Subject: tools/memory-model: Add a glossary of LKMM terms

[ paulmck: Apply Alan Stern feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/Documentation/glossary.txt | 172 ++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)
 create mode 100644 tools/memory-model/Documentation/glossary.txt

(limited to 'tools')

diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt
new file mode 100644
index 000000000000..79acb75d56ea
--- /dev/null
+++ b/tools/memory-model/Documentation/glossary.txt
@@ -0,0 +1,172 @@
+This document contains brief definitions of LKMM-related terms.  Like most
+glossaries, it is not intended to be read front to back (except perhaps
+as a way of confirming a diagnosis of OCD), but rather to be searched
+for specific terms.
+
+
+Address Dependency:  When the address of a later memory access is computed
+	based on the value returned by an earlier load, an "address
+	dependency" extends from that load extending to the later access.
+	Address dependencies are quite common in RCU read-side critical
+	sections:
+
+	 1 rcu_read_lock();
+	 2 p = rcu_dereference(gp);
+	 3 do_something(p->a);
+	 4 rcu_read_unlock();
+
+	 In this case, because the address of "p->a" on line 3 is computed
+	 from the value returned by the rcu_dereference() on line 2, the
+	 address dependency extends from that rcu_dereference() to that
+	 "p->a".  In rare cases, optimizing compilers can destroy address
+	 dependencies.	Please see Documentation/RCU/rcu_dereference.txt
+	 for more information.
+
+	 See also "Control Dependency" and "Data Dependency".
+
+Acquire:  With respect to a lock, acquiring that lock, for example,
+	using spin_lock().  With respect to a non-lock shared variable,
+	a special operation that includes a load and which orders that
+	load before later memory references running on that same CPU.
+	An example special acquire operation is smp_load_acquire(),
+	but atomic_read_acquire() and atomic_xchg_acquire() also include
+	acquire loads.
+
+	When an acquire load returns the value stored by a release store
+	to that same variable, then all operations preceding that store
+	happen before any operations following that load acquire.
+
+	See also "Relaxed" and "Release".
+
+Coherence (co):  When one CPU's store to a given variable overwrites
+	either the value from another CPU's store or some later value,
+	there is said to be a coherence link from the second CPU to
+	the first.
+
+	It is also possible to have a coherence link within a CPU, which
+	is a "coherence internal" (coi) link.  The term "coherence
+	external" (coe) link is used when it is necessary to exclude
+	the coi case.
+
+	See also "From-reads" and "Reads-from".
+
+Control Dependency:  When a later store's execution depends on a test
+	of a value computed from a value returned by an earlier load,
+	a "control dependency" extends from that load to that store.
+	For example:
+
+	 1 if (READ_ONCE(x))
+	 2   WRITE_ONCE(y, 1);
+
+	 Here, the control dependency extends from the READ_ONCE() on
+	 line 1 to the WRITE_ONCE() on line 2.	Control dependencies are
+	 fragile, and can be easily destroyed by optimizing compilers.
+	 Please see control-dependencies.txt for more information.
+
+	 See also "Address Dependency" and "Data Dependency".
+
+Cycle:	Memory-barrier pairing is restricted to a pair of CPUs, as the
+	name suggests.	And in a great many cases, a pair of CPUs is all
+	that is required.  In other cases, the notion of pairing must be
+	extended to additional CPUs, and the result is called a "cycle".
+	In a cycle, each CPU's ordering interacts with that of the next:
+
+	CPU 0                CPU 1                CPU 2
+	WRITE_ONCE(x, 1);    WRITE_ONCE(y, 1);    WRITE_ONCE(z, 1);
+	smp_mb();            smp_mb();            smp_mb();
+	r0 = READ_ONCE(y);   r1 = READ_ONCE(z);   r2 = READ_ONCE(x);
+
+	CPU 0's smp_mb() interacts with that of CPU 1, which interacts
+	with that of CPU 2, which in turn interacts with that of CPU 0
+	to complete the cycle.	Because of the smp_mb() calls between
+	each pair of memory accesses, the outcome where r0, r1, and r2
+	are all equal to zero is forbidden by LKMM.
+
+	See also "Pairing".
+
+Data Dependency:  When the data written by a later store is computed based
+	on the value returned by an earlier load, a "data dependency"
+	extends from that load to that later store.  For example:
+
+	 1 r1 = READ_ONCE(x);
+	 2 WRITE_ONCE(y, r1 + 1);
+
+	In this case, the data dependency extends from the READ_ONCE()
+	on line 1 to the WRITE_ONCE() on line 2.  Data dependencies are
+	fragile and can be easily destroyed by optimizing compilers.
+	Because optimizing compilers put a great deal of effort into
+	working out what values integer variables might have, this is
+	especially true in cases where the dependency is carried through
+	an integer.
+
+	See also "Address Dependency" and "Control Dependency".
+
+From-Reads (fr):  When one CPU's store to a given variable happened
+	too late to affect the value returned by another CPU's
+	load from that same variable, there is said to be a from-reads
+	link from the load to the store.
+
+	It is also possible to have a from-reads link within a CPU, which
+	is a "from-reads internal" (fri) link.  The term "from-reads
+	external" (fre) link is used when it is necessary to exclude
+	the fri case.
+
+	See also "Coherence" and "Reads-from".
+
+Fully Ordered:  An operation such as smp_mb() that orders all of
+	its CPU's prior accesses with all of that CPU's subsequent
+	accesses, or a marked access such as atomic_add_return()
+	that orders all of its CPU's prior accesses, itself, and
+	all of its CPU's subsequent accesses.
+
+Marked Access:  An access to a variable that uses an special function or
+	macro such as "r1 = READ_ONCE(x)" or "smp_store_release(&a, 1)".
+
+	See also "Unmarked Access".
+
+Pairing: "Memory-barrier pairing" reflects the fact that synchronizing
+	data between two CPUs requires that both CPUs their accesses.
+	Memory barriers thus tend to come in pairs, one executed by
+	one of the CPUs and the other by the other CPU.  Of course,
+	pairing also occurs with other types of operations, so that a
+	smp_store_release() pairs with an smp_load_acquire() that reads
+	the value stored.
+
+	See also "Cycle".
+
+Reads-From (rf):  When one CPU's load returns the value stored by some other
+	CPU, there is said to be a reads-from link from the second
+	CPU's store to the first CPU's load.  Reads-from links have the
+	nice property that time must advance from the store to the load,
+	which means that algorithms using reads-from links can use lighter
+	weight ordering and synchronization compared to algorithms using
+	coherence and from-reads links.
+
+	It is also possible to have a reads-from link within a CPU, which
+	is a "reads-from internal" (rfi) link.	The term "reads-from
+	external" (rfe) link is used when it is necessary to exclude
+	the rfi case.
+
+	See also Coherence" and "From-reads".
+
+Relaxed:  A marked access that does not imply ordering, for example, a
+	READ_ONCE(), WRITE_ONCE(), a non-value-returning read-modify-write
+	operation, or a value-returning read-modify-write operation whose
+	name ends in "_relaxed".
+
+	See also "Acquire" and "Release".
+
+Release:  With respect to a lock, releasing that lock, for example,
+	using spin_unlock().  With respect to a non-lock shared variable,
+	a special operation that includes a store and which orders that
+	store after earlier memory references that ran on that same CPU.
+	An example special release store is smp_store_release(), but
+	atomic_set_release() and atomic_cmpxchg_release() also include
+	release stores.
+
+	See also "Acquire" and "Relaxed".
+
+Unmarked Access:  An access to a variable that uses normal C-language
+	syntax, for example, "a = b[2]";
+
+	See also "Marked Access".
-- 
cgit v1.2.3-70-g09d2


From 1947bfcf81a905e84a58b423063e81034a90efed Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 5 Nov 2020 13:20:56 -0800
Subject: tools/memory-model: Add types to litmus tests

This commit adds type information for global variables in the litmus
tests in order to allow easier use with klitmus7.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus        | 4 +++-
 tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus        | 4 +++-
 tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus        | 4 +++-
 tools/memory-model/litmus-tests/CoWW+poonceonce.litmus             | 4 +++-
 .../litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus             | 5 ++++-
 tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus   | 5 ++++-
 .../litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus        | 7 ++++++-
 tools/memory-model/litmus-tests/ISA2+poonceonces.litmus            | 6 +++++-
 .../ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus       | 6 +++++-
 .../litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus            | 5 ++++-
 .../litmus-tests/LB+poacquireonce+pooncerelease.litmus             | 5 ++++-
 tools/memory-model/litmus-tests/LB+poonceonces.litmus              | 5 ++++-
 .../litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus       | 5 ++++-
 tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus     | 5 +++--
 .../litmus-tests/MP+polockmbonce+poacquiresilsil.litmus            | 2 ++
 .../memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus | 2 ++
 tools/memory-model/litmus-tests/MP+polocks.litmus                  | 6 +++++-
 tools/memory-model/litmus-tests/MP+poonceonces.litmus              | 5 ++++-
 .../litmus-tests/MP+pooncerelease+poacquireonce.litmus             | 5 ++++-
 tools/memory-model/litmus-tests/MP+porevlocks.litmus               | 6 +++++-
 tools/memory-model/litmus-tests/R+fencembonceonces.litmus          | 5 ++++-
 tools/memory-model/litmus-tests/R+poonceonces.litmus               | 5 ++++-
 .../litmus-tests/S+fencewmbonceonce+poacquireonce.litmus           | 5 ++++-
 tools/memory-model/litmus-tests/S+poonceonces.litmus               | 5 ++++-
 tools/memory-model/litmus-tests/SB+fencembonceonces.litmus         | 5 ++++-
 tools/memory-model/litmus-tests/SB+poonceonces.litmus              | 5 ++++-
 tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus  | 5 ++++-
 tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus        | 5 ++++-
 .../litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus    | 5 ++++-
 .../litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus        | 7 ++++++-
 .../litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus        | 7 ++++++-
 .../Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus     | 6 +++++-
 32 files changed, 130 insertions(+), 31 deletions(-)

(limited to 'tools')

diff --git a/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
index 967f9f2a6226..772544f03fb5 100644
--- a/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
@@ -7,7 +7,9 @@ C CoRR+poonceonce+Once
  * reads from the same variable are ordered.
  *)
 
-{}
+{
+	int x;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
index 4635739f3974..5faae98f7ffb 100644
--- a/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
@@ -7,7 +7,9 @@ C CoRW+poonceonce+Once
  * a given variable and a later write to that same variable are ordered.
  *)
 
-{}
+{
+	int x;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
index bb068c92d8da..77c9cc9f8dc6 100644
--- a/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
@@ -7,7 +7,9 @@ C CoWR+poonceonce+Once
  * given variable and a later read from that same variable are ordered.
  *)
 
-{}
+{
+	int x;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
index 0d9f0a958799..85ef746f511a 100644
--- a/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
+++ b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
@@ -7,7 +7,9 @@ C CoWW+poonceonce
  * writes to the same variable are ordered.
  *)
 
-{}
+{
+	int x;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
index e729d2776e89..87aa900125ab 100644
--- a/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
+++ b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
@@ -10,7 +10,10 @@ C IRIW+fencembonceonces+OnceOnce
  * process?  This litmus test exercises LKMM's "propagation" rule.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus
index 4b54dd6a6cd9..f84022dca555 100644
--- a/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus
+++ b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus
@@ -10,7 +10,10 @@ C IRIW+poonceonces+OnceOnce
  * different process?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
index 094d58df7789..398f624daa77 100644
--- a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
@@ -7,7 +7,12 @@ C ISA2+pooncelock+pooncelock+pombonce
  * (in P0() and P1()) is visible to external process P2().
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
index b321aa6f4ea5..212a432ba16b 100644
--- a/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
@@ -9,7 +9,11 @@ C ISA2+poonceonces
  * of the smp_load_acquire() invocations are replaced by READ_ONCE()?
  *)
 
-{}
+{
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
index 025b0462ec9b..7afd85672ccd 100644
--- a/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
@@ -11,7 +11,11 @@ C ISA2+pooncerelease+poacquirerelease+poacquireonce
  * (AKA non-rf) link, so release-acquire is all that is needed.
  *)
 
-{}
+{
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
index 4727f5aaf03b..c8a93c7ee556 100644
--- a/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
+++ b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
@@ -11,7 +11,10 @@ C LB+fencembonceonce+ctrlonceonce
  * another control dependency and order would still be maintained.)
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus
index 07b9904b0e49..2fa029568fa1 100644
--- a/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus
+++ b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus
@@ -8,7 +8,10 @@ C LB+poacquireonce+pooncerelease
  * to the other?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/LB+poonceonces.litmus b/tools/memory-model/litmus-tests/LB+poonceonces.litmus
index 74c49cb3c37b..2107306e8625 100644
--- a/tools/memory-model/litmus-tests/LB+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/LB+poonceonces.litmus
@@ -7,7 +7,10 @@ C LB+poonceonces
  * be prevented even with no explicit ordering?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
index a273da9faa6d..e04b71b0ce2b 100644
--- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
@@ -8,7 +8,10 @@ C MP+fencewmbonceonce+fencermbonceonce
  * is usually better to use smp_store_release() and smp_load_acquire().
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
index 97731b4bbdd8..18df682b08b2 100644
--- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
@@ -10,8 +10,9 @@ C MP+onceassign+derefonce
  *)
 
 {
-y=z;
-z=0;
+	int x;
+	int *y=z;
+	int z=0;
 }
 
 P0(int *x, int **y)
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
index 50f4d62bbf0e..b1b1266fb49a 100644
--- a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
+++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
@@ -11,6 +11,8 @@ C MP+polockmbonce+poacquiresilsil
  *)
 
 {
+	spinlock_t lo;
+	int x;
 }
 
 P0(spinlock_t *lo, int *x)
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
index abf81e7a0895..867c75d8b960 100644
--- a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
+++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
@@ -11,6 +11,8 @@ C MP+polockonce+poacquiresilsil
  *)
 
 {
+	spinlock_t lo;
+	int x;
 }
 
 P0(spinlock_t *lo, int *x)
diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus
index 712a4fcdf6ce..63e0f67c9b9d 100644
--- a/tools/memory-model/litmus-tests/MP+polocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+polocks.litmus
@@ -11,7 +11,11 @@ C MP+polocks
  * to see all prior accesses by those other CPUs.
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
index 172f0145301c..68180a403e5c 100644
--- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -7,7 +7,10 @@ C MP+poonceonces
  * no ordering at all?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
index d52c68429722..19f3e6874b50 100644
--- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
@@ -8,7 +8,10 @@ C MP+pooncerelease+poacquireonce
  * pattern.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
index 72c9276b363e..4ac189adf41e 100644
--- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
@@ -11,7 +11,11 @@ C MP+porevlocks
  * see all prior accesses by those other CPUs.
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/R+fencembonceonces.litmus b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
index 222a0b850b4a..af9463b39b4a 100644
--- a/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
+++ b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
@@ -9,7 +9,10 @@ C R+fencembonceonces
  * cause the resulting test to be allowed.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/R+poonceonces.litmus b/tools/memory-model/litmus-tests/R+poonceonces.litmus
index 5386f128a131..bcd5574e304a 100644
--- a/tools/memory-model/litmus-tests/R+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/R+poonceonces.litmus
@@ -8,7 +8,10 @@ C R+poonceonces
  * store propagation delays.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
index 18479823cd6c..c36341d1aed6 100644
--- a/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
@@ -7,7 +7,10 @@ C S+fencewmbonceonce+poacquireonce
  * store against a subsequent store?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/S+poonceonces.litmus b/tools/memory-model/litmus-tests/S+poonceonces.litmus
index 8c9c2f81a580..7775c23143a0 100644
--- a/tools/memory-model/litmus-tests/S+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/S+poonceonces.litmus
@@ -9,7 +9,10 @@ C S+poonceonces
  * READ_ONCE(), is ordering preserved?
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
index ed5fff18d223..833cdfeb7c09 100644
--- a/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
+++ b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
@@ -9,7 +9,10 @@ C SB+fencembonceonces
  * suffice, but not much else.)
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/SB+poonceonces.litmus b/tools/memory-model/litmus-tests/SB+poonceonces.litmus
index 10d550730b25..c92211ecbfdf 100644
--- a/tools/memory-model/litmus-tests/SB+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/SB+poonceonces.litmus
@@ -8,7 +8,10 @@ C SB+poonceonces
  * variable that the preceding process reads.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
index 04a16603660b..84344b455eb7 100644
--- a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
@@ -6,7 +6,10 @@ C SB+rfionceonce-poonceonces
  * This litmus test demonstrates that LKMM is not fully multicopy atomic.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x, int *y)
 {
diff --git a/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
index 6a2bc12a1af1..431494708611 100644
--- a/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
+++ b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
@@ -8,7 +8,10 @@ C WRC+poonceonces+Once
  * test has no ordering at all.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
index e9947250d7de..554999c64db5 100644
--- a/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
@@ -10,7 +10,10 @@ C WRC+pooncerelease+fencermbonceonce+Once
  * is A-cumulative in LKMM.
  *)
 
-{}
+{
+	int x;
+	int y;
+}
 
 P0(int *x)
 {
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
index 415248fb6699..265a95ffef13 100644
--- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
@@ -9,7 +9,12 @@ C Z6.0+pooncelock+poonceLock+pombonce
  * by CPUs not holding that lock.
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
index 10a2aa04cd07..0c9aea8e80df 100644
--- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
@@ -8,7 +8,12 @@ C Z6.0+pooncelock+pooncelock+pombonce
  * seen as ordered by a third process not holding that lock.
  *)
 
-{}
+{
+	spinlock_t mylock;
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y, spinlock_t *mylock)
 {
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
index 88e70b87a683..661f9aaa5791 100644
--- a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
@@ -14,7 +14,11 @@ C Z6.0+pooncerelease+poacquirerelease+fencembonceonce
  * involving locking.)
  *)
 
-{}
+{
+	int x;
+	int y;
+	int z;
+}
 
 P0(int *x, int *y)
 {
-- 
cgit v1.2.3-70-g09d2


From acc4bdc55dcb7d7fe0be736999572a55e121873f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 5 Nov 2020 13:30:11 -0800
Subject: tools/memory-model: Use "buf" and "flag" for message-passing tests

The use of "x" and "y" for message-passing tests is fine for people
familiar with memory models and litmus-test nomenclature, but is a bit
obtuse for others.  This commit therefore substitutes "buf" for "x" and
"flag" for "y" for the MP tests.  There are a few special-case MP tests
that use locks and these are unchanged.  There is another MP test that
uses pointers, and this is changed to name the pointer "p".

Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../MP+fencewmbonceonce+fencermbonceonce.litmus          | 16 ++++++++--------
 .../litmus-tests/MP+onceassign+derefonce.litmus          | 12 ++++++------
 tools/memory-model/litmus-tests/MP+polocks.litmus        | 16 ++++++++--------
 tools/memory-model/litmus-tests/MP+poonceonces.litmus    | 16 ++++++++--------
 .../litmus-tests/MP+pooncerelease+poacquireonce.litmus   | 16 ++++++++--------
 tools/memory-model/litmus-tests/MP+porevlocks.litmus     | 16 ++++++++--------
 6 files changed, 46 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
index e04b71b0ce2b..f15e501849dd 100644
--- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
@@ -9,25 +9,25 @@ C MP+fencewmbonceonce+fencermbonceonce
  *)
 
 {
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y)
+P0(int *buf, int *flag)
 {
-	WRITE_ONCE(*x, 1);
+	WRITE_ONCE(*buf, 1);
 	smp_wmb();
-	WRITE_ONCE(*y, 1);
+	WRITE_ONCE(*flag, 1);
 }
 
-P1(int *x, int *y)
+P1(int *buf, int *flag)
 {
 	int r0;
 	int r1;
 
-	r0 = READ_ONCE(*y);
+	r0 = READ_ONCE(*flag);
 	smp_rmb();
-	r1 = READ_ONCE(*x);
+	r1 = READ_ONCE(*buf);
 }
 
 exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
index 18df682b08b2..ed8ee9bde0c9 100644
--- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
@@ -10,24 +10,24 @@ C MP+onceassign+derefonce
  *)
 
 {
+	int *p=y;
 	int x;
-	int *y=z;
-	int z=0;
+	int y=0;
 }
 
-P0(int *x, int **y)
+P0(int *x, int **p)
 {
 	WRITE_ONCE(*x, 1);
-	rcu_assign_pointer(*y, x);
+	rcu_assign_pointer(*p, x);
 }
 
-P1(int *x, int **y)
+P1(int *x, int **p)
 {
 	int *r0;
 	int r1;
 
 	rcu_read_lock();
-	r0 = rcu_dereference(*y);
+	r0 = rcu_dereference(*p);
 	r1 = READ_ONCE(*r0);
 	rcu_read_unlock();
 }
diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus
index 63e0f67c9b9d..4b0c2edcc029 100644
--- a/tools/memory-model/litmus-tests/MP+polocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+polocks.litmus
@@ -13,27 +13,27 @@ C MP+polocks
 
 {
 	spinlock_t mylock;
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y, spinlock_t *mylock)
+P0(int *buf, int *flag, spinlock_t *mylock)
 {
-	WRITE_ONCE(*x, 1);
+	WRITE_ONCE(*buf, 1);
 	spin_lock(mylock);
-	WRITE_ONCE(*y, 1);
+	WRITE_ONCE(*flag, 1);
 	spin_unlock(mylock);
 }
 
-P1(int *x, int *y, spinlock_t *mylock)
+P1(int *buf, int *flag, spinlock_t *mylock)
 {
 	int r0;
 	int r1;
 
 	spin_lock(mylock);
-	r0 = READ_ONCE(*y);
+	r0 = READ_ONCE(*flag);
 	spin_unlock(mylock);
-	r1 = READ_ONCE(*x);
+	r1 = READ_ONCE(*buf);
 }
 
 exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
index 68180a403e5c..3010bbaec46c 100644
--- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -8,23 +8,23 @@ C MP+poonceonces
  *)
 
 {
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y)
+P0(int *buf, int *flag)
 {
-	WRITE_ONCE(*x, 1);
-	WRITE_ONCE(*y, 1);
+	WRITE_ONCE(*buf, 1);
+	WRITE_ONCE(*flag, 1);
 }
 
-P1(int *x, int *y)
+P1(int *buf, int *flag)
 {
 	int r0;
 	int r1;
 
-	r0 = READ_ONCE(*y);
-	r1 = READ_ONCE(*x);
+	r0 = READ_ONCE(*flag);
+	r1 = READ_ONCE(*buf);
 }
 
 exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
index 19f3e6874b50..21e825d5dea6 100644
--- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
@@ -9,23 +9,23 @@ C MP+pooncerelease+poacquireonce
  *)
 
 {
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y)
+P0(int *buf, int *flag)
 {
-	WRITE_ONCE(*x, 1);
-	smp_store_release(y, 1);
+	WRITE_ONCE(*buf, 1);
+	smp_store_release(flag, 1);
 }
 
-P1(int *x, int *y)
+P1(int *buf, int *flag)
 {
 	int r0;
 	int r1;
 
-	r0 = smp_load_acquire(y);
-	r1 = READ_ONCE(*x);
+	r0 = smp_load_acquire(flag);
+	r1 = READ_ONCE(*buf);
 }
 
 exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
index 4ac189adf41e..9691d55b4e21 100644
--- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
@@ -13,27 +13,27 @@ C MP+porevlocks
 
 {
 	spinlock_t mylock;
-	int x;
-	int y;
+	int buf;
+	int flag;
 }
 
-P0(int *x, int *y, spinlock_t *mylock)
+P0(int *buf, int *flag, spinlock_t *mylock)
 {
 	int r0;
 	int r1;
 
-	r0 = READ_ONCE(*y);
+	r0 = READ_ONCE(*flag);
 	spin_lock(mylock);
-	r1 = READ_ONCE(*x);
+	r1 = READ_ONCE(*buf);
 	spin_unlock(mylock);
 }
 
-P1(int *x, int *y, spinlock_t *mylock)
+P1(int *buf, int *flag, spinlock_t *mylock)
 {
 	spin_lock(mylock);
-	WRITE_ONCE(*x, 1);
+	WRITE_ONCE(*buf, 1);
 	spin_unlock(mylock);
-	WRITE_ONCE(*y, 1);
+	WRITE_ONCE(*flag, 1);
 }
 
 exists (0:r0=1 /\ 0:r1=0)
-- 
cgit v1.2.3-70-g09d2


From b6ff30849ca723b78306514246b98ca5645d92f5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 5 Nov 2020 13:39:28 -0800
Subject: tools/memory-model: Label MP tests' producers and consumers

This commit adds comments that label the MP tests' producer and consumer
processes, and also that label the "exists" clause as the bad outcome.

Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus        | 6 +++---
 tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus      | 6 +++---
 .../litmus-tests/MP+polockmbonce+poacquiresilsil.litmus             | 6 +++---
 .../memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus  | 6 +++---
 tools/memory-model/litmus-tests/MP+polocks.litmus                   | 6 +++---
 tools/memory-model/litmus-tests/MP+poonceonces.litmus               | 6 +++---
 .../memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus | 6 +++---
 tools/memory-model/litmus-tests/MP+porevlocks.litmus                | 6 +++---
 8 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
index f15e501849dd..c5c168d92973 100644
--- a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
@@ -13,14 +13,14 @@ C MP+fencewmbonceonce+fencermbonceonce
 	int flag;
 }
 
-P0(int *buf, int *flag)
+P0(int *buf, int *flag) // Producer
 {
 	WRITE_ONCE(*buf, 1);
 	smp_wmb();
 	WRITE_ONCE(*flag, 1);
 }
 
-P1(int *buf, int *flag)
+P1(int *buf, int *flag) // Consumer
 {
 	int r0;
 	int r1;
@@ -30,4 +30,4 @@ P1(int *buf, int *flag)
 	r1 = READ_ONCE(*buf);
 }
 
-exists (1:r0=1 /\ 1:r1=0)
+exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
index ed8ee9bde0c9..20ff62649f1e 100644
--- a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
@@ -15,13 +15,13 @@ C MP+onceassign+derefonce
 	int y=0;
 }
 
-P0(int *x, int **p)
+P0(int *x, int **p) // Producer
 {
 	WRITE_ONCE(*x, 1);
 	rcu_assign_pointer(*p, x);
 }
 
-P1(int *x, int **p)
+P1(int *x, int **p) // Consumer
 {
 	int *r0;
 	int r1;
@@ -32,4 +32,4 @@ P1(int *x, int **p)
 	rcu_read_unlock();
 }
 
-exists (1:r0=x /\ 1:r1=0)
+exists (1:r0=x /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
index b1b1266fb49a..153917ad5dc9 100644
--- a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
+++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
@@ -15,7 +15,7 @@ C MP+polockmbonce+poacquiresilsil
 	int x;
 }
 
-P0(spinlock_t *lo, int *x)
+P0(spinlock_t *lo, int *x) // Producer
 {
 	spin_lock(lo);
 	smp_mb__after_spinlock();
@@ -23,7 +23,7 @@ P0(spinlock_t *lo, int *x)
 	spin_unlock(lo);
 }
 
-P1(spinlock_t *lo, int *x)
+P1(spinlock_t *lo, int *x) // Consumer
 {
 	int r1;
 	int r2;
@@ -34,4 +34,4 @@ P1(spinlock_t *lo, int *x)
 	r3 = spin_is_locked(lo);
 }
 
-exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
index 867c75d8b960..aad64397bb8c 100644
--- a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
+++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
@@ -15,14 +15,14 @@ C MP+polockonce+poacquiresilsil
 	int x;
 }
 
-P0(spinlock_t *lo, int *x)
+P0(spinlock_t *lo, int *x) // Producer
 {
 	spin_lock(lo);
 	WRITE_ONCE(*x, 1);
 	spin_unlock(lo);
 }
 
-P1(spinlock_t *lo, int *x)
+P1(spinlock_t *lo, int *x) // Consumer
 {
 	int r1;
 	int r2;
@@ -33,4 +33,4 @@ P1(spinlock_t *lo, int *x)
 	r3 = spin_is_locked(lo);
 }
 
-exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus
index 4b0c2edcc029..21cbca6f3be4 100644
--- a/tools/memory-model/litmus-tests/MP+polocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+polocks.litmus
@@ -17,7 +17,7 @@ C MP+polocks
 	int flag;
 }
 
-P0(int *buf, int *flag, spinlock_t *mylock)
+P0(int *buf, int *flag, spinlock_t *mylock) // Producer
 {
 	WRITE_ONCE(*buf, 1);
 	spin_lock(mylock);
@@ -25,7 +25,7 @@ P0(int *buf, int *flag, spinlock_t *mylock)
 	spin_unlock(mylock);
 }
 
-P1(int *buf, int *flag, spinlock_t *mylock)
+P1(int *buf, int *flag, spinlock_t *mylock) // Consumer
 {
 	int r0;
 	int r1;
@@ -36,4 +36,4 @@ P1(int *buf, int *flag, spinlock_t *mylock)
 	r1 = READ_ONCE(*buf);
 }
 
-exists (1:r0=1 /\ 1:r1=0)
+exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
index 3010bbaec46c..9f9769d647c7 100644
--- a/tools/memory-model/litmus-tests/MP+poonceonces.litmus
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -12,13 +12,13 @@ C MP+poonceonces
 	int flag;
 }
 
-P0(int *buf, int *flag)
+P0(int *buf, int *flag) // Producer
 {
 	WRITE_ONCE(*buf, 1);
 	WRITE_ONCE(*flag, 1);
 }
 
-P1(int *buf, int *flag)
+P1(int *buf, int *flag) // Consumer
 {
 	int r0;
 	int r1;
@@ -27,4 +27,4 @@ P1(int *buf, int *flag)
 	r1 = READ_ONCE(*buf);
 }
 
-exists (1:r0=1 /\ 1:r1=0)
+exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
index 21e825d5dea6..cbe28e733443 100644
--- a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
@@ -13,13 +13,13 @@ C MP+pooncerelease+poacquireonce
 	int flag;
 }
 
-P0(int *buf, int *flag)
+P0(int *buf, int *flag) // Producer
 {
 	WRITE_ONCE(*buf, 1);
 	smp_store_release(flag, 1);
 }
 
-P1(int *buf, int *flag)
+P1(int *buf, int *flag) // Consumer
 {
 	int r0;
 	int r1;
@@ -28,4 +28,4 @@ P1(int *buf, int *flag)
 	r1 = READ_ONCE(*buf);
 }
 
-exists (1:r0=1 /\ 1:r1=0)
+exists (1:r0=1 /\ 1:r1=0) (* Bad outcome. *)
diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
index 9691d55b4e21..012041bd4feb 100644
--- a/tools/memory-model/litmus-tests/MP+porevlocks.litmus
+++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
@@ -17,7 +17,7 @@ C MP+porevlocks
 	int flag;
 }
 
-P0(int *buf, int *flag, spinlock_t *mylock)
+P0(int *buf, int *flag, spinlock_t *mylock) // Consumer
 {
 	int r0;
 	int r1;
@@ -28,7 +28,7 @@ P0(int *buf, int *flag, spinlock_t *mylock)
 	spin_unlock(mylock);
 }
 
-P1(int *buf, int *flag, spinlock_t *mylock)
+P1(int *buf, int *flag, spinlock_t *mylock) // Producer
 {
 	spin_lock(mylock);
 	WRITE_ONCE(*buf, 1);
@@ -36,4 +36,4 @@ P1(int *buf, int *flag, spinlock_t *mylock)
 	WRITE_ONCE(*flag, 1);
 }
 
-exists (0:r0=1 /\ 0:r1=0)
+exists (0:r0=1 /\ 0:r1=0) (* Bad outcome. *)
-- 
cgit v1.2.3-70-g09d2


From df11f7dd5834146defa448acba097e8d7703cc42 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 12 Oct 2020 12:47:13 -0700
Subject: selftests: kvm: Fix the segment descriptor layout to match the actual
 layout

Fix the layout of 'struct desc64' to match the layout described in the
SDM Vol 3, Chapter 3 "Protected-Mode Memory Management", section 3.4.5
"Segment Descriptors", Figure 3-8 "Segment Descriptor".  The test added
later in this series relies on this and crashes if this layout is not
correct.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: Alexander Graf <graf@amazon.com>
Message-Id: <20201012194716.3950330-2-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/processor.h | 2 +-
 tools/testing/selftests/kvm/lib/x86_64/processor.c     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 82b7fe16a824..0a65e7bb5249 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -59,7 +59,7 @@ struct gpr64_regs {
 struct desc64 {
 	uint16_t limit0;
 	uint16_t base0;
-	unsigned base1:8, s:1, type:4, dpl:2, p:1;
+	unsigned base1:8, type:4, s:1, dpl:2, p:1;
 	unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
 	uint32_t base3;
 	uint32_t zero1;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index f6eb34eaa0d2..1ccf6c9b3476 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -392,11 +392,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
 	desc->limit0 = segp->limit & 0xFFFF;
 	desc->base0 = segp->base & 0xFFFF;
 	desc->base1 = segp->base >> 16;
-	desc->s = segp->s;
 	desc->type = segp->type;
+	desc->s = segp->s;
 	desc->dpl = segp->dpl;
 	desc->p = segp->present;
 	desc->limit1 = segp->limit >> 16;
+	desc->avl = segp->avl;
 	desc->l = segp->l;
 	desc->db = segp->db;
 	desc->g = segp->g;
-- 
cgit v1.2.3-70-g09d2


From 85f2a4320ef27ce74b9da0631460561028c48756 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 12 Oct 2020 12:47:14 -0700
Subject: selftests: kvm: Clear uc so UCALL_NONE is being properly reported

Ensure the out value 'uc' in get_ucall() is properly reporting
UCALL_NONE if the call fails.  The return value will be correctly
reported, however, the out parameter 'uc' will not be.  Clear the struct
to ensure the correct value is being reported in the out parameter.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Alexander Graf <graf@amazon.com>
Message-Id: <20201012194716.3950330-3-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/aarch64/ucall.c | 3 +++
 tools/testing/selftests/kvm/lib/s390x/ucall.c   | 3 +++
 tools/testing/selftests/kvm/lib/x86_64/ucall.c  | 3 +++
 3 files changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index c8e0ec20d3bf..2f37b90ee1a9 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
 	struct kvm_run *run = vcpu_state(vm, vcpu_id);
 	struct ucall ucall = {};
 
+	if (uc)
+		memset(uc, 0, sizeof(*uc));
+
 	if (run->exit_reason == KVM_EXIT_MMIO &&
 	    run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
 		vm_vaddr_t gva;
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index fd589dc9bfab..9d3b0f15249a 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
 	struct kvm_run *run = vcpu_state(vm, vcpu_id);
 	struct ucall ucall = {};
 
+	if (uc)
+		memset(uc, 0, sizeof(*uc));
+
 	if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
 	    run->s390_sieic.icptcode == 4 &&
 	    (run->s390_sieic.ipa >> 8) == 0x83 &&    /* 0x83 means DIAGNOSE */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index da4d89ad5419..a3489973e290 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
 	struct kvm_run *run = vcpu_state(vm, vcpu_id);
 	struct ucall ucall = {};
 
+	if (uc)
+		memset(uc, 0, sizeof(*uc));
+
 	if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
 		struct kvm_regs regs;
 
-- 
cgit v1.2.3-70-g09d2


From 29faeb9632012d6c3fa4aa33c3d589b9ff18b206 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 12 Oct 2020 12:47:15 -0700
Subject: selftests: kvm: Add exception handling to selftests

Add the infrastructure needed to enable exception handling in selftests.
This allows any of the exception and interrupt vectors to be overridden
in the guest.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: Alexander Graf <graf@amazon.com>
Message-Id: <20201012194716.3950330-4-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile               |  19 ++--
 tools/testing/selftests/kvm/include/kvm_util.h     |   2 +
 .../selftests/kvm/include/x86_64/processor.h       |  24 +++++
 .../testing/selftests/kvm/lib/aarch64/processor.c  |   4 +
 tools/testing/selftests/kvm/lib/kvm_util.c         |   3 +
 .../testing/selftests/kvm/lib/kvm_util_internal.h  |   2 +
 tools/testing/selftests/kvm/lib/s390x/processor.c  |   4 +
 tools/testing/selftests/kvm/lib/x86_64/handlers.S  |  81 +++++++++++++++
 tools/testing/selftests/kvm/lib/x86_64/processor.c | 114 ++++++++++++++++++++-
 9 files changed, 244 insertions(+), 9 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/lib/x86_64/handlers.S

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 30afbad36cd5..b0f1fcab79f5 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -34,7 +34,7 @@ ifeq ($(ARCH),s390)
 endif
 
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
 
@@ -111,14 +111,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
 include ../lib.mk
 
 STATIC_LIBS := $(OUTPUT)/libkvm.a
-LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
-EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
+LIBKVM_C := $(filter %.c,$(LIBKVM))
+LIBKVM_S := $(filter %.S,$(LIBKVM))
+LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
+LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
+EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
-x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
-$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
-$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
 	$(AR) crs $@ $^
 
 x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 919e161dd289..356930690bea 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -294,6 +294,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
 	memcpy(&(g), _p, sizeof(g));				\
 })
 
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
+
 /* Common ucalls */
 enum {
 	UCALL_NONE,
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 0a65e7bb5249..02530dc6339b 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -36,6 +36,8 @@
 #define X86_CR4_SMAP		(1ul << 21)
 #define X86_CR4_PKE		(1ul << 22)
 
+#define UNEXPECTED_VECTOR_PORT 0xfff0u
+
 /* General Registers in 64-Bit Mode */
 struct gpr64_regs {
 	u64 rax;
@@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void)
 	return idt;
 }
 
+static inline void outl(uint16_t port, uint32_t value)
+{
+	__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
+}
+
 #define SET_XMM(__var, __xmm) \
 	asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
 
@@ -338,6 +345,23 @@ uint32_t kvm_get_cpuid_max_basic(void);
 uint32_t kvm_get_cpuid_max_extended(void);
 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
 
+struct ex_regs {
+	uint64_t rax, rcx, rdx, rbx;
+	uint64_t rbp, rsi, rdi;
+	uint64_t r8, r9, r10, r11;
+	uint64_t r12, r13, r14, r15;
+	uint64_t vector;
+	uint64_t error_code;
+	uint64_t rip;
+	uint64_t cs;
+	uint64_t rflags;
+};
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+void vm_handle_exception(struct kvm_vm *vm, int vector,
+			void (*handler)(struct ex_regs *));
+
 /*
  * Basic CPU control in CR0
  */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 2afa6618b396..d6c32c328e9a 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 
 	va_end(ap);
 }
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 3327cebc1095..be230f3728d5 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1204,6 +1204,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
 	do {
 		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
 	} while (rc == -1 && errno == EINTR);
+
+	assert_on_unhandled_exception(vm, vcpuid);
+
 	return rc;
 }
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 2ef446520748..f07d383d03a1 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -50,6 +50,8 @@ struct kvm_vm {
 	vm_paddr_t pgd;
 	vm_vaddr_t gdt;
 	vm_vaddr_t tss;
+	vm_vaddr_t idt;
+	vm_vaddr_t handlers;
 };
 
 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index a88c5d665725..7349bb2e1a24 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
 	fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
 		indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
 }
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
new file mode 100644
index 000000000000..aaf7bc7d2ce1
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
@@ -0,0 +1,81 @@
+handle_exception:
+	push %r15
+	push %r14
+	push %r13
+	push %r12
+	push %r11
+	push %r10
+	push %r9
+	push %r8
+
+	push %rdi
+	push %rsi
+	push %rbp
+	push %rbx
+	push %rdx
+	push %rcx
+	push %rax
+	mov %rsp, %rdi
+
+	call route_exception
+
+	pop %rax
+	pop %rcx
+	pop %rdx
+	pop %rbx
+	pop %rbp
+	pop %rsi
+	pop %rdi
+	pop %r8
+	pop %r9
+	pop %r10
+	pop %r11
+	pop %r12
+	pop %r13
+	pop %r14
+	pop %r15
+
+	/* Discard vector and error code. */
+	add $16, %rsp
+	iretq
+
+/*
+ * Build the handle_exception wrappers which push the vector/error code on the
+ * stack and an array of pointers to those wrappers.
+ */
+.pushsection .rodata
+.globl idt_handlers
+idt_handlers:
+.popsection
+
+.macro HANDLERS has_error from to
+	vector = \from
+	.rept \to - \from + 1
+	.align 8
+
+	/* Fetch current address and append it to idt_handlers. */
+	current_handler = .
+.pushsection .rodata
+.quad current_handler
+.popsection
+
+	.if ! \has_error
+	pushq $0
+	.endif
+	pushq $vector
+	jmp handle_exception
+	vector = vector + 1
+	.endr
+.endm
+
+.global idt_handler_code
+idt_handler_code:
+	HANDLERS has_error=0 from=0  to=7
+	HANDLERS has_error=1 from=8  to=8
+	HANDLERS has_error=0 from=9  to=9
+	HANDLERS has_error=1 from=10 to=14
+	HANDLERS has_error=0 from=15 to=16
+	HANDLERS has_error=1 from=17 to=17
+	HANDLERS has_error=0 from=18 to=255
+
+.section        .note.GNU-stack, "", %progbits
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 1ccf6c9b3476..66228297ac03 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -12,9 +12,18 @@
 #include "../kvm_util_internal.h"
 #include "processor.h"
 
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define DEFAULT_CODE_SELECTOR 0x8
+#define DEFAULT_DATA_SELECTOR 0x10
+
 /* Minimum physical address used for virtual translation tables. */
 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
 
+vm_vaddr_t exception_handlers;
+
 /* Virtual translation table structure declarations */
 struct pageMapL4Entry {
 	uint64_t present:1;
@@ -557,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
 		sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
 
 		kvm_seg_set_unusable(&sregs.ldt);
-		kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
-		kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
-		kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
+		kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
+		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
+		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
 		kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
 		break;
 
@@ -1119,3 +1128,102 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
 		*va_bits = (entry->eax >> 8) & 0xff;
 	}
 }
+
+struct idt_entry {
+	uint16_t offset0;
+	uint16_t selector;
+	uint16_t ist : 3;
+	uint16_t : 5;
+	uint16_t type : 4;
+	uint16_t : 1;
+	uint16_t dpl : 2;
+	uint16_t p : 1;
+	uint16_t offset1;
+	uint32_t offset2; uint32_t reserved;
+};
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+			  int dpl, unsigned short selector)
+{
+	struct idt_entry *base =
+		(struct idt_entry *)addr_gva2hva(vm, vm->idt);
+	struct idt_entry *e = &base[vector];
+
+	memset(e, 0, sizeof(*e));
+	e->offset0 = addr;
+	e->selector = selector;
+	e->ist = 0;
+	e->type = 14;
+	e->dpl = dpl;
+	e->p = 1;
+	e->offset1 = addr >> 16;
+	e->offset2 = addr >> 32;
+}
+
+void kvm_exit_unexpected_vector(uint32_t value)
+{
+	outl(UNEXPECTED_VECTOR_PORT, value);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+	typedef void(*handler)(struct ex_regs *);
+	handler *handlers = (handler *)exception_handlers;
+
+	if (handlers && handlers[regs->vector]) {
+		handlers[regs->vector](regs);
+		return;
+	}
+
+	kvm_exit_unexpected_vector(regs->vector);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+	extern void *idt_handlers;
+	int i;
+
+	vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
+	vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
+	/* Handlers have the same address in both address spaces.*/
+	for (i = 0; i < NUM_INTERRUPTS; i++)
+		set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
+			DEFAULT_CODE_SELECTOR);
+}
+
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct kvm_sregs sregs;
+
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+	sregs.idt.base = vm->idt;
+	sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+	sregs.gdt.base = vm->gdt;
+	sregs.gdt.limit = getpagesize() - 1;
+	kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
+	vcpu_sregs_set(vm, vcpuid, &sregs);
+	*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_handle_exception(struct kvm_vm *vm, int vector,
+			 void (*handler)(struct ex_regs *))
+{
+	vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+	handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
+		&& vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
+		&& vcpu_state(vm, vcpuid)->io.size == 4) {
+		/* Grab pointer to io data */
+		uint32_t *data = (void *)vcpu_state(vm, vcpuid)
+			+ vcpu_state(vm, vcpuid)->io.data_offset;
+
+		TEST_ASSERT(false,
+			    "Unexpected vectored event in guest (vector:0x%x)",
+			    *data);
+	}
+}
-- 
cgit v1.2.3-70-g09d2


From ac4a4d6de22e674cd6e3fe57199a15383496aad2 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oupton@google.com>
Date: Tue, 27 Oct 2020 16:10:44 -0700
Subject: selftests: kvm: test enforcement of paravirtual cpuid features

Add a set of tests that ensure the guest cannot access paravirtual msrs
and hypercalls that have been disabled in the KVM_CPUID_FEATURES leaf.
Expect a #GP in the case of msr accesses and -KVM_ENOSYS from
hypercalls.

Cc: Jim Mattson <jmattson@google.com>
Signed-off-by: Oliver Upton <oupton@google.com>
Reviewed-by: Peter Shier <pshier@google.com>
Reviewed-by: Aaron Lewis <aaronlewis@google.com>
Message-Id: <20201027231044.655110-7-oupton@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore             |   1 +
 tools/testing/selftests/kvm/Makefile               |   1 +
 tools/testing/selftests/kvm/include/kvm_util.h     |   3 +
 .../selftests/kvm/include/x86_64/processor.h       |  12 ++
 tools/testing/selftests/kvm/lib/kvm_util.c         |  28 +++
 tools/testing/selftests/kvm/lib/x86_64/processor.c |  29 +++
 tools/testing/selftests/kvm/x86_64/kvm_pv_test.c   | 234 +++++++++++++++++++++
 7 files changed, 308 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/kvm_pv_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index d2c2d6205008..22973aa75eda 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -5,6 +5,7 @@
 /x86_64/cr4_cpuid_sync_test
 /x86_64/debug_regs
 /x86_64/evmcs_test
+/x86_64/kvm_pv_test
 /x86_64/hyperv_cpuid
 /x86_64/mmio_warning_test
 /x86_64/platform_info_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index b0f1fcab79f5..44ca6badb544 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -41,6 +41,7 @@ LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 356930690bea..77f53dbc34ff 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -63,6 +63,9 @@ enum vm_mem_backing_src_type {
 
 int kvm_check_cap(long cap);
 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+		    struct kvm_enable_cap *cap);
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
 
 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
 struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 02530dc6339b..8e61340b3911 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -362,6 +362,18 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
 void vm_handle_exception(struct kvm_vm *vm, int vector,
 			void (*handler)(struct ex_regs *));
 
+/*
+ * set_cpuid() - overwrites a matching cpuid entry with the provided value.
+ *		 matches based on ent->function && ent->index. returns true
+ *		 if a match was found and successfully overwritten.
+ * @cpuid: the kvm cpuid list to modify.
+ * @ent: cpuid entry to insert
+ */
+bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+		       uint64_t a3);
+
 /*
  * Basic CPU control in CR0
  */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index be230f3728d5..91e547031d8c 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -86,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
 	return ret;
 }
 
+/* VCPU Enable Capability
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpu_id - VCPU
+ *   cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VCPU.
+ */
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+		    struct kvm_enable_cap *cap)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
+	int r;
+
+	TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
+
+	r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
+	TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
+			"  rc: %i, errno: %i", r, errno);
+
+	return r;
+}
+
 static void vm_open(struct kvm_vm *vm, int perm)
 {
 	vm->kvm_fd = open(KVM_DEV_PATH, perm);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 66228297ac03..d10c5c05bdf0 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1227,3 +1227,32 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
 			    *data);
 	}
 }
+
+bool set_cpuid(struct kvm_cpuid2 *cpuid,
+	       struct kvm_cpuid_entry2 *ent)
+{
+	int i;
+
+	for (i = 0; i < cpuid->nent; i++) {
+		struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
+
+		if (cur->function != ent->function || cur->index != ent->index)
+			continue;
+
+		memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2));
+		return true;
+	}
+
+	return false;
+}
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+		       uint64_t a3)
+{
+	uint64_t r;
+
+	asm volatile("vmcall"
+		     : "=a"(r)
+		     : "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+	return r;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
new file mode 100644
index 000000000000..b10a27485bad
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+extern unsigned char rdmsr_start;
+extern unsigned char rdmsr_end;
+
+static u64 do_rdmsr(u32 idx)
+{
+	u32 lo, hi;
+
+	asm volatile("rdmsr_start: rdmsr;"
+		     "rdmsr_end:"
+		     : "=a"(lo), "=c"(hi)
+		     : "c"(idx));
+
+	return (((u64) hi) << 32) | lo;
+}
+
+extern unsigned char wrmsr_start;
+extern unsigned char wrmsr_end;
+
+static void do_wrmsr(u32 idx, u64 val)
+{
+	u32 lo, hi;
+
+	lo = val;
+	hi = val >> 32;
+
+	asm volatile("wrmsr_start: wrmsr;"
+		     "wrmsr_end:"
+		     : : "a"(lo), "c"(idx), "d"(hi));
+}
+
+static int nr_gp;
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+	unsigned char *rip = (unsigned char *)regs->rip;
+	bool r, w;
+
+	r = rip == &rdmsr_start;
+	w = rip == &wrmsr_start;
+	GUEST_ASSERT(r || w);
+
+	nr_gp++;
+
+	if (r)
+		regs->rip = (uint64_t)&rdmsr_end;
+	else
+		regs->rip = (uint64_t)&wrmsr_end;
+}
+
+struct msr_data {
+	uint32_t idx;
+	const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+	TEST_MSR(MSR_KVM_SYSTEM_TIME),
+	TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+	TEST_MSR(MSR_KVM_WALL_CLOCK),
+	TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+	TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+	TEST_MSR(MSR_KVM_STEAL_TIME),
+	TEST_MSR(MSR_KVM_PV_EOI_EN),
+	TEST_MSR(MSR_KVM_POLL_CONTROL),
+	TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+	TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+	PR_MSR(msr);
+	do_rdmsr(msr->idx);
+	GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+
+	nr_gp = 0;
+	do_wrmsr(msr->idx, 0);
+	GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+	nr_gp = 0;
+}
+
+struct hcall_data {
+	uint64_t nr;
+	const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+	TEST_HCALL(KVM_HC_KICK_CPU),
+	TEST_HCALL(KVM_HC_SEND_IPI),
+	TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+	uint64_t r;
+
+	PR_HCALL(hc);
+	r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+	GUEST_ASSERT(r == -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+		test_msr(&msrs_to_test[i]);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+		test_hcall(&hcalls_to_test[i]);
+	}
+
+	GUEST_DONE();
+}
+
+static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid)
+{
+	struct kvm_cpuid_entry2 ent = {0};
+
+	ent.function = KVM_CPUID_FEATURES;
+	TEST_ASSERT(set_cpuid(cpuid, &ent),
+		    "failed to clear KVM_CPUID_FEATURES leaf");
+}
+
+static void pr_msr(struct ucall *uc)
+{
+	struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+	pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+	struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+	pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+	TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
+		  __FILE__, uc->args[1]);
+}
+
+#define VCPU_ID 0
+
+static void enter_guest(struct kvm_vm *vm)
+{
+	struct kvm_run *run;
+	struct ucall uc;
+	int r;
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (true) {
+		r = _vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    run->exit_reason, exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_PR_MSR:
+			pr_msr(&uc);
+			break;
+		case UCALL_PR_HCALL:
+			pr_hcall(&uc);
+			break;
+		case UCALL_ABORT:
+			handle_abort(&uc);
+			return;
+		case UCALL_DONE:
+			return;
+		}
+	}
+}
+
+int main(void)
+{
+	struct kvm_enable_cap cap = {0};
+	struct kvm_cpuid2 *best;
+	struct kvm_vm *vm;
+
+	if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
+		pr_info("will skip kvm paravirt restriction tests.\n");
+		return 0;
+	}
+
+	vm = vm_create_default(VCPU_ID, 0, guest_main);
+
+	cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID;
+	cap.args[0] = 1;
+	vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+	best = kvm_get_supported_cpuid();
+	clear_kvm_cpuid_features(best);
+	vcpu_set_cpuid(vm, VCPU_ID, best);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+	enter_guest(vm);
+	kvm_vm_free(vm);
+}
-- 
cgit v1.2.3-70-g09d2


From fd02029a9e019e941835e110651486e2d77d3f84 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Thu, 29 Oct 2020 21:17:01 +0100
Subject: KVM: selftests: Add aarch64 get-reg-list test

Check for KVM_GET_REG_LIST regressions. The blessed list was
created by running on v4.15 with the --core-reg-fixup option.
The following script was also used in order to annotate system
registers with their names when possible. When new system
registers are added the names can just be added manually using
the same grep.

while read reg; do
	if [[ ! $reg =~ ARM64_SYS_REG ]]; then
		printf "\t$reg\n"
		continue
	fi
	encoding=$(echo "$reg" | sed "s/ARM64_SYS_REG(//;s/),//")
	if ! name=$(grep "$encoding" ../../../../arch/arm64/include/asm/sysreg.h); then
		printf "\t$reg\n"
		continue
	fi
	name=$(echo "$name" | sed "s/.*SYS_//;s/[\t ]*sys_reg($encoding)$//")
	printf "\t$reg\t/* $name */\n"
done < <(aarch64/get-reg-list --core-reg-fixup --list)

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201029201703.102716-3-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore             |   1 +
 tools/testing/selftests/kvm/Makefile               |   1 +
 tools/testing/selftests/kvm/aarch64/get-reg-list.c | 671 +++++++++++++++++++++
 tools/testing/selftests/kvm/include/kvm_util.h     |   1 +
 tools/testing/selftests/kvm/lib/kvm_util.c         |  29 +
 5 files changed, 703 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list.c

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 22973aa75eda..2034765862a2 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+/aarch64/get-reg-list
 /s390x/memop
 /s390x/resets
 /s390x/sync_regs_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 44ca6badb544..771455ae4a1b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -66,6 +66,7 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
+TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
new file mode 100644
index 000000000000..3ff097f6886e
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -0,0 +1,671 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * When attempting to migrate from a host with an older kernel to a host
+ * with a newer kernel we allow the newer kernel on the destination to
+ * list new registers with get-reg-list. We assume they'll be unused, at
+ * least until the guest reboots, and so they're relatively harmless.
+ * However, if the destination host with the newer kernel is missing
+ * registers which the source host with the older kernel has, then that's
+ * a regression in get-reg-list. This test checks for that regression by
+ * checking the current list against a blessed list. We should never have
+ * missing registers, but if new ones appear then they can probably be
+ * added to the blessed list. A completely new blessed list can be created
+ * by running the test with the --list command line argument.
+ *
+ * Note, the blessed list should be created from the oldest possible
+ * kernel. We can't go older than v4.15, though, because that's the first
+ * release to expose the ID system registers in KVM_GET_REG_LIST, see
+ * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features
+ * from guests"). Also, one must use the --core-reg-fixup command line
+ * option when running on an older kernel that doesn't include df205b5c6328
+ * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST")
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "test_util.h"
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
+#define for_each_reg(i)								\
+	for ((i) = 0; (i) < reg_list->n; ++(i))
+
+#define for_each_missing_reg(i)							\
+	for ((i) = 0; (i) < blessed_n; ++(i))					\
+		if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
+
+#define for_each_new_reg(i)							\
+	for ((i) = 0; (i) < reg_list->n; ++(i))					\
+		if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+
+static struct kvm_reg_list *reg_list;
+
+static __u64 blessed_reg[];
+static __u64 blessed_n;
+
+static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+{
+	int i;
+
+	for (i = 0; i < nr_regs; ++i)
+		if (reg == regs[i])
+			return true;
+	return false;
+}
+
+static const char *str_with_index(const char *template, __u64 index)
+{
+	char *str, *p;
+	int n;
+
+	str = strdup(template);
+	p = strstr(str, "##");
+	n = sprintf(p, "%lld", index);
+	strcat(p + n, strstr(template, "##") + 2);
+
+	return (const char *)str;
+}
+
+#define CORE_REGS_XX_NR_WORDS	2
+#define CORE_SPSR_XX_NR_WORDS	2
+#define CORE_FPREGS_XX_NR_WORDS	4
+
+static const char *core_id_to_str(__u64 id)
+{
+	__u64 core_off = id & ~REG_MASK, idx;
+
+	/*
+	 * core_off is the offset into struct kvm_regs
+	 */
+	switch (core_off) {
+	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
+		idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
+		TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
+		return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
+	case KVM_REG_ARM_CORE_REG(regs.sp):
+		return "KVM_REG_ARM_CORE_REG(regs.sp)";
+	case KVM_REG_ARM_CORE_REG(regs.pc):
+		return "KVM_REG_ARM_CORE_REG(regs.pc)";
+	case KVM_REG_ARM_CORE_REG(regs.pstate):
+		return "KVM_REG_ARM_CORE_REG(regs.pstate)";
+	case KVM_REG_ARM_CORE_REG(sp_el1):
+		return "KVM_REG_ARM_CORE_REG(sp_el1)";
+	case KVM_REG_ARM_CORE_REG(elr_el1):
+		return "KVM_REG_ARM_CORE_REG(elr_el1)";
+	case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+	     KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+		idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
+		TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
+		return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
+	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+		idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
+		TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
+		return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
+	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+		return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
+	case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+		return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
+	}
+
+	TEST_FAIL("Unknown core reg id: 0x%llx", id);
+	return NULL;
+}
+
+static void print_reg(__u64 id)
+{
+	unsigned op0, op1, crn, crm, op2;
+	const char *reg_size = NULL;
+
+	TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
+		    "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+
+	switch (id & KVM_REG_SIZE_MASK) {
+	case KVM_REG_SIZE_U8:
+		reg_size = "KVM_REG_SIZE_U8";
+		break;
+	case KVM_REG_SIZE_U16:
+		reg_size = "KVM_REG_SIZE_U16";
+		break;
+	case KVM_REG_SIZE_U32:
+		reg_size = "KVM_REG_SIZE_U32";
+		break;
+	case KVM_REG_SIZE_U64:
+		reg_size = "KVM_REG_SIZE_U64";
+		break;
+	case KVM_REG_SIZE_U128:
+		reg_size = "KVM_REG_SIZE_U128";
+		break;
+	case KVM_REG_SIZE_U256:
+		reg_size = "KVM_REG_SIZE_U256";
+		break;
+	case KVM_REG_SIZE_U512:
+		reg_size = "KVM_REG_SIZE_U512";
+		break;
+	case KVM_REG_SIZE_U1024:
+		reg_size = "KVM_REG_SIZE_U1024";
+		break;
+	case KVM_REG_SIZE_U2048:
+		reg_size = "KVM_REG_SIZE_U2048";
+		break;
+	default:
+		TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
+			  (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+	}
+
+	switch (id & KVM_REG_ARM_COPROC_MASK) {
+	case KVM_REG_ARM_CORE:
+		printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+		break;
+	case KVM_REG_ARM_DEMUX:
+		TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
+			    "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+		printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
+		       reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
+		break;
+	case KVM_REG_ARM64_SYSREG:
+		op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
+		op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
+		crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
+		crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
+		op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
+		TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
+			    "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+		printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
+		break;
+	case KVM_REG_ARM_FW:
+		TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
+			    "Unexpected bits set in FW reg id: 0x%llx", id);
+		printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
+		break;
+	case KVM_REG_ARM64_SVE:
+		TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+		break;
+	default:
+		TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+			  (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+	}
+}
+
+/*
+ * Older kernels listed each 32-bit word of CORE registers separately.
+ * For 64 and 128-bit registers we need to ignore the extra words. We
+ * also need to fixup the sizes, because the older kernels stated all
+ * registers were 64-bit, even when they weren't.
+ */
+static void core_reg_fixup(void)
+{
+	struct kvm_reg_list *tmp;
+	__u64 id, core_off;
+	int i;
+
+	tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64));
+
+	for (i = 0; i < reg_list->n; ++i) {
+		id = reg_list->reg[i];
+
+		if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) {
+			tmp->reg[tmp->n++] = id;
+			continue;
+		}
+
+		core_off = id & ~REG_MASK;
+
+		switch (core_off) {
+		case 0x52: case 0xd2: case 0xd6:
+			/*
+			 * These offsets are pointing at padding.
+			 * We need to ignore them too.
+			 */
+			continue;
+		case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+		     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+			if (core_off & 3)
+				continue;
+			id &= ~KVM_REG_SIZE_MASK;
+			id |= KVM_REG_SIZE_U128;
+			tmp->reg[tmp->n++] = id;
+			continue;
+		case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+		case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+			id &= ~KVM_REG_SIZE_MASK;
+			id |= KVM_REG_SIZE_U32;
+			tmp->reg[tmp->n++] = id;
+			continue;
+		default:
+			if (core_off & 1)
+				continue;
+			tmp->reg[tmp->n++] = id;
+			break;
+		}
+	}
+
+	free(reg_list);
+	reg_list = tmp;
+}
+
+int main(int ac, char **av)
+{
+	int new_regs = 0, missing_regs = 0, i;
+	int failed_get = 0, failed_set = 0;
+	bool print_list = false, fixup_core_regs = false;
+	struct kvm_vm *vm;
+
+	for (i = 1; i < ac; ++i) {
+		if (strcmp(av[i], "--core-reg-fixup") == 0)
+			fixup_core_regs = true;
+		else if (strcmp(av[i], "--list") == 0)
+			print_list = true;
+		else
+			fprintf(stderr, "Ignoring unknown option: %s\n", av[i]);
+	}
+
+	vm = vm_create_default(0, 0, NULL);
+	reg_list = vcpu_get_reg_list(vm, 0);
+
+	if (fixup_core_regs)
+		core_reg_fixup();
+
+	if (print_list) {
+		putchar('\n');
+		for_each_reg(i)
+			print_reg(reg_list->reg[i]);
+		putchar('\n');
+		return 0;
+	}
+
+	/*
+	 * We only test that we can get the register and then write back the
+	 * same value. Some registers may allow other values to be written
+	 * back, but others only allow some bits to be changed, and at least
+	 * for ID registers set will fail if the value does not exactly match
+	 * what was returned by get. If registers that allow other values to
+	 * be written need to have the other values tested, then we should
+	 * create a new set of tests for those in a new independent test
+	 * executable.
+	 */
+	for_each_reg(i) {
+		uint8_t addr[2048 / 8];
+		struct kvm_one_reg reg = {
+			.id = reg_list->reg[i],
+			.addr = (__u64)&addr,
+		};
+		int ret;
+
+		ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
+		if (ret) {
+			puts("Failed to get ");
+			print_reg(reg.id);
+			putchar('\n');
+			++failed_get;
+		}
+
+		ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+		if (ret) {
+			puts("Failed to set ");
+			print_reg(reg.id);
+			putchar('\n');
+			++failed_set;
+		}
+	}
+
+	for_each_new_reg(i)
+		++new_regs;
+
+	for_each_missing_reg(i)
+		++missing_regs;
+
+	if (new_regs || missing_regs) {
+		printf("Number blessed registers: %5lld\n", blessed_n);
+		printf("Number registers:         %5lld\n", reg_list->n);
+	}
+
+	if (new_regs) {
+		printf("\nThere are %d new registers.\n"
+		       "Consider adding them to the blessed reg "
+		       "list with the following lines:\n\n", new_regs);
+		for_each_new_reg(i)
+			print_reg(reg_list->reg[i]);
+		putchar('\n');
+	}
+
+	if (missing_regs) {
+		printf("\nThere are %d missing registers.\n"
+		       "The following lines are missing registers:\n\n", missing_regs);
+		for_each_missing_reg(i)
+			print_reg(blessed_reg[i]);
+		putchar('\n');
+	}
+
+	TEST_ASSERT(!missing_regs && !failed_get && !failed_set,
+		    "There are %d missing registers; %d registers failed get; %d registers failed set",
+		    missing_regs, failed_get, failed_set);
+
+	return 0;
+}
+
+/*
+ * The current blessed list was primed with the output of kernel version
+ * v4.15 with --core-reg-fixup and then later updated with new registers.
+ */
+static __u64 blessed_reg[] = {
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
+	KVM_REG_ARM_FW_REG(0),
+	KVM_REG_ARM_FW_REG(1),
+	KVM_REG_ARM_FW_REG(2),
+	ARM64_SYS_REG(3, 3, 14, 3, 1),	/* CNTV_CTL_EL0 */
+	ARM64_SYS_REG(3, 3, 14, 3, 2),	/* CNTV_CVAL_EL0 */
+	ARM64_SYS_REG(3, 3, 14, 0, 2),
+	ARM64_SYS_REG(3, 0, 0, 0, 0),	/* MIDR_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 0, 6),	/* REVIDR_EL1 */
+	ARM64_SYS_REG(3, 1, 0, 0, 1),	/* CLIDR_EL1 */
+	ARM64_SYS_REG(3, 1, 0, 0, 7),	/* AIDR_EL1 */
+	ARM64_SYS_REG(3, 3, 0, 0, 1),	/* CTR_EL0 */
+	ARM64_SYS_REG(2, 0, 0, 0, 4),
+	ARM64_SYS_REG(2, 0, 0, 0, 5),
+	ARM64_SYS_REG(2, 0, 0, 0, 6),
+	ARM64_SYS_REG(2, 0, 0, 0, 7),
+	ARM64_SYS_REG(2, 0, 0, 1, 4),
+	ARM64_SYS_REG(2, 0, 0, 1, 5),
+	ARM64_SYS_REG(2, 0, 0, 1, 6),
+	ARM64_SYS_REG(2, 0, 0, 1, 7),
+	ARM64_SYS_REG(2, 0, 0, 2, 0),	/* MDCCINT_EL1 */
+	ARM64_SYS_REG(2, 0, 0, 2, 2),	/* MDSCR_EL1 */
+	ARM64_SYS_REG(2, 0, 0, 2, 4),
+	ARM64_SYS_REG(2, 0, 0, 2, 5),
+	ARM64_SYS_REG(2, 0, 0, 2, 6),
+	ARM64_SYS_REG(2, 0, 0, 2, 7),
+	ARM64_SYS_REG(2, 0, 0, 3, 4),
+	ARM64_SYS_REG(2, 0, 0, 3, 5),
+	ARM64_SYS_REG(2, 0, 0, 3, 6),
+	ARM64_SYS_REG(2, 0, 0, 3, 7),
+	ARM64_SYS_REG(2, 0, 0, 4, 4),
+	ARM64_SYS_REG(2, 0, 0, 4, 5),
+	ARM64_SYS_REG(2, 0, 0, 4, 6),
+	ARM64_SYS_REG(2, 0, 0, 4, 7),
+	ARM64_SYS_REG(2, 0, 0, 5, 4),
+	ARM64_SYS_REG(2, 0, 0, 5, 5),
+	ARM64_SYS_REG(2, 0, 0, 5, 6),
+	ARM64_SYS_REG(2, 0, 0, 5, 7),
+	ARM64_SYS_REG(2, 0, 0, 6, 4),
+	ARM64_SYS_REG(2, 0, 0, 6, 5),
+	ARM64_SYS_REG(2, 0, 0, 6, 6),
+	ARM64_SYS_REG(2, 0, 0, 6, 7),
+	ARM64_SYS_REG(2, 0, 0, 7, 4),
+	ARM64_SYS_REG(2, 0, 0, 7, 5),
+	ARM64_SYS_REG(2, 0, 0, 7, 6),
+	ARM64_SYS_REG(2, 0, 0, 7, 7),
+	ARM64_SYS_REG(2, 0, 0, 8, 4),
+	ARM64_SYS_REG(2, 0, 0, 8, 5),
+	ARM64_SYS_REG(2, 0, 0, 8, 6),
+	ARM64_SYS_REG(2, 0, 0, 8, 7),
+	ARM64_SYS_REG(2, 0, 0, 9, 4),
+	ARM64_SYS_REG(2, 0, 0, 9, 5),
+	ARM64_SYS_REG(2, 0, 0, 9, 6),
+	ARM64_SYS_REG(2, 0, 0, 9, 7),
+	ARM64_SYS_REG(2, 0, 0, 10, 4),
+	ARM64_SYS_REG(2, 0, 0, 10, 5),
+	ARM64_SYS_REG(2, 0, 0, 10, 6),
+	ARM64_SYS_REG(2, 0, 0, 10, 7),
+	ARM64_SYS_REG(2, 0, 0, 11, 4),
+	ARM64_SYS_REG(2, 0, 0, 11, 5),
+	ARM64_SYS_REG(2, 0, 0, 11, 6),
+	ARM64_SYS_REG(2, 0, 0, 11, 7),
+	ARM64_SYS_REG(2, 0, 0, 12, 4),
+	ARM64_SYS_REG(2, 0, 0, 12, 5),
+	ARM64_SYS_REG(2, 0, 0, 12, 6),
+	ARM64_SYS_REG(2, 0, 0, 12, 7),
+	ARM64_SYS_REG(2, 0, 0, 13, 4),
+	ARM64_SYS_REG(2, 0, 0, 13, 5),
+	ARM64_SYS_REG(2, 0, 0, 13, 6),
+	ARM64_SYS_REG(2, 0, 0, 13, 7),
+	ARM64_SYS_REG(2, 0, 0, 14, 4),
+	ARM64_SYS_REG(2, 0, 0, 14, 5),
+	ARM64_SYS_REG(2, 0, 0, 14, 6),
+	ARM64_SYS_REG(2, 0, 0, 14, 7),
+	ARM64_SYS_REG(2, 0, 0, 15, 4),
+	ARM64_SYS_REG(2, 0, 0, 15, 5),
+	ARM64_SYS_REG(2, 0, 0, 15, 6),
+	ARM64_SYS_REG(2, 0, 0, 15, 7),
+	ARM64_SYS_REG(2, 4, 0, 7, 0),	/* DBGVCR32_EL2 */
+	ARM64_SYS_REG(3, 0, 0, 0, 5),	/* MPIDR_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 0),	/* ID_PFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 1),	/* ID_PFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 2),	/* ID_DFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 3),	/* ID_AFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 4),	/* ID_MMFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 5),	/* ID_MMFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 6),	/* ID_MMFR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 1, 7),	/* ID_MMFR3_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 0),	/* ID_ISAR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 1),	/* ID_ISAR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 2),	/* ID_ISAR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 3),	/* ID_ISAR3_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 4),	/* ID_ISAR4_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 5),	/* ID_ISAR5_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 6),	/* ID_MMFR4_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 2, 7),	/* ID_ISAR6_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 0),	/* MVFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 1),	/* MVFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 2),	/* MVFR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 3),
+	ARM64_SYS_REG(3, 0, 0, 3, 4),	/* ID_PFR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 5),	/* ID_DFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 6),	/* ID_MMFR5_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 3, 7),
+	ARM64_SYS_REG(3, 0, 0, 4, 0),	/* ID_AA64PFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 4, 1),	/* ID_AA64PFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 4, 2),
+	ARM64_SYS_REG(3, 0, 0, 4, 3),
+	ARM64_SYS_REG(3, 0, 0, 4, 4),	/* ID_AA64ZFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 4, 5),
+	ARM64_SYS_REG(3, 0, 0, 4, 6),
+	ARM64_SYS_REG(3, 0, 0, 4, 7),
+	ARM64_SYS_REG(3, 0, 0, 5, 0),	/* ID_AA64DFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 5, 1),	/* ID_AA64DFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 5, 2),
+	ARM64_SYS_REG(3, 0, 0, 5, 3),
+	ARM64_SYS_REG(3, 0, 0, 5, 4),	/* ID_AA64AFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 5, 5),	/* ID_AA64AFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 5, 6),
+	ARM64_SYS_REG(3, 0, 0, 5, 7),
+	ARM64_SYS_REG(3, 0, 0, 6, 0),	/* ID_AA64ISAR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 6, 1),	/* ID_AA64ISAR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 6, 2),
+	ARM64_SYS_REG(3, 0, 0, 6, 3),
+	ARM64_SYS_REG(3, 0, 0, 6, 4),
+	ARM64_SYS_REG(3, 0, 0, 6, 5),
+	ARM64_SYS_REG(3, 0, 0, 6, 6),
+	ARM64_SYS_REG(3, 0, 0, 6, 7),
+	ARM64_SYS_REG(3, 0, 0, 7, 0),	/* ID_AA64MMFR0_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 7, 1),	/* ID_AA64MMFR1_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 7, 2),	/* ID_AA64MMFR2_EL1 */
+	ARM64_SYS_REG(3, 0, 0, 7, 3),
+	ARM64_SYS_REG(3, 0, 0, 7, 4),
+	ARM64_SYS_REG(3, 0, 0, 7, 5),
+	ARM64_SYS_REG(3, 0, 0, 7, 6),
+	ARM64_SYS_REG(3, 0, 0, 7, 7),
+	ARM64_SYS_REG(3, 0, 1, 0, 0),	/* SCTLR_EL1 */
+	ARM64_SYS_REG(3, 0, 1, 0, 1),	/* ACTLR_EL1 */
+	ARM64_SYS_REG(3, 0, 1, 0, 2),	/* CPACR_EL1 */
+	ARM64_SYS_REG(3, 0, 2, 0, 0),	/* TTBR0_EL1 */
+	ARM64_SYS_REG(3, 0, 2, 0, 1),	/* TTBR1_EL1 */
+	ARM64_SYS_REG(3, 0, 2, 0, 2),	/* TCR_EL1 */
+	ARM64_SYS_REG(3, 0, 5, 1, 0),	/* AFSR0_EL1 */
+	ARM64_SYS_REG(3, 0, 5, 1, 1),	/* AFSR1_EL1 */
+	ARM64_SYS_REG(3, 0, 5, 2, 0),	/* ESR_EL1 */
+	ARM64_SYS_REG(3, 0, 6, 0, 0),	/* FAR_EL1 */
+	ARM64_SYS_REG(3, 0, 7, 4, 0),	/* PAR_EL1 */
+	ARM64_SYS_REG(3, 0, 9, 14, 1),	/* PMINTENSET_EL1 */
+	ARM64_SYS_REG(3, 0, 9, 14, 2),	/* PMINTENCLR_EL1 */
+	ARM64_SYS_REG(3, 0, 10, 2, 0),	/* MAIR_EL1 */
+	ARM64_SYS_REG(3, 0, 10, 3, 0),	/* AMAIR_EL1 */
+	ARM64_SYS_REG(3, 0, 12, 0, 0),	/* VBAR_EL1 */
+	ARM64_SYS_REG(3, 0, 12, 1, 1),	/* DISR_EL1 */
+	ARM64_SYS_REG(3, 0, 13, 0, 1),	/* CONTEXTIDR_EL1 */
+	ARM64_SYS_REG(3, 0, 13, 0, 4),	/* TPIDR_EL1 */
+	ARM64_SYS_REG(3, 0, 14, 1, 0),	/* CNTKCTL_EL1 */
+	ARM64_SYS_REG(3, 2, 0, 0, 0),	/* CSSELR_EL1 */
+	ARM64_SYS_REG(3, 3, 9, 12, 0),	/* PMCR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 1),	/* PMCNTENSET_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 2),	/* PMCNTENCLR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 3),	/* PMOVSCLR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 4),	/* PMSWINC_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 12, 5),	/* PMSELR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 13, 0),	/* PMCCNTR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 14, 0),	/* PMUSERENR_EL0 */
+	ARM64_SYS_REG(3, 3, 9, 14, 3),	/* PMOVSSET_EL0 */
+	ARM64_SYS_REG(3, 3, 13, 0, 2),	/* TPIDR_EL0 */
+	ARM64_SYS_REG(3, 3, 13, 0, 3),	/* TPIDRRO_EL0 */
+	ARM64_SYS_REG(3, 3, 14, 8, 0),
+	ARM64_SYS_REG(3, 3, 14, 8, 1),
+	ARM64_SYS_REG(3, 3, 14, 8, 2),
+	ARM64_SYS_REG(3, 3, 14, 8, 3),
+	ARM64_SYS_REG(3, 3, 14, 8, 4),
+	ARM64_SYS_REG(3, 3, 14, 8, 5),
+	ARM64_SYS_REG(3, 3, 14, 8, 6),
+	ARM64_SYS_REG(3, 3, 14, 8, 7),
+	ARM64_SYS_REG(3, 3, 14, 9, 0),
+	ARM64_SYS_REG(3, 3, 14, 9, 1),
+	ARM64_SYS_REG(3, 3, 14, 9, 2),
+	ARM64_SYS_REG(3, 3, 14, 9, 3),
+	ARM64_SYS_REG(3, 3, 14, 9, 4),
+	ARM64_SYS_REG(3, 3, 14, 9, 5),
+	ARM64_SYS_REG(3, 3, 14, 9, 6),
+	ARM64_SYS_REG(3, 3, 14, 9, 7),
+	ARM64_SYS_REG(3, 3, 14, 10, 0),
+	ARM64_SYS_REG(3, 3, 14, 10, 1),
+	ARM64_SYS_REG(3, 3, 14, 10, 2),
+	ARM64_SYS_REG(3, 3, 14, 10, 3),
+	ARM64_SYS_REG(3, 3, 14, 10, 4),
+	ARM64_SYS_REG(3, 3, 14, 10, 5),
+	ARM64_SYS_REG(3, 3, 14, 10, 6),
+	ARM64_SYS_REG(3, 3, 14, 10, 7),
+	ARM64_SYS_REG(3, 3, 14, 11, 0),
+	ARM64_SYS_REG(3, 3, 14, 11, 1),
+	ARM64_SYS_REG(3, 3, 14, 11, 2),
+	ARM64_SYS_REG(3, 3, 14, 11, 3),
+	ARM64_SYS_REG(3, 3, 14, 11, 4),
+	ARM64_SYS_REG(3, 3, 14, 11, 5),
+	ARM64_SYS_REG(3, 3, 14, 11, 6),
+	ARM64_SYS_REG(3, 3, 14, 12, 0),
+	ARM64_SYS_REG(3, 3, 14, 12, 1),
+	ARM64_SYS_REG(3, 3, 14, 12, 2),
+	ARM64_SYS_REG(3, 3, 14, 12, 3),
+	ARM64_SYS_REG(3, 3, 14, 12, 4),
+	ARM64_SYS_REG(3, 3, 14, 12, 5),
+	ARM64_SYS_REG(3, 3, 14, 12, 6),
+	ARM64_SYS_REG(3, 3, 14, 12, 7),
+	ARM64_SYS_REG(3, 3, 14, 13, 0),
+	ARM64_SYS_REG(3, 3, 14, 13, 1),
+	ARM64_SYS_REG(3, 3, 14, 13, 2),
+	ARM64_SYS_REG(3, 3, 14, 13, 3),
+	ARM64_SYS_REG(3, 3, 14, 13, 4),
+	ARM64_SYS_REG(3, 3, 14, 13, 5),
+	ARM64_SYS_REG(3, 3, 14, 13, 6),
+	ARM64_SYS_REG(3, 3, 14, 13, 7),
+	ARM64_SYS_REG(3, 3, 14, 14, 0),
+	ARM64_SYS_REG(3, 3, 14, 14, 1),
+	ARM64_SYS_REG(3, 3, 14, 14, 2),
+	ARM64_SYS_REG(3, 3, 14, 14, 3),
+	ARM64_SYS_REG(3, 3, 14, 14, 4),
+	ARM64_SYS_REG(3, 3, 14, 14, 5),
+	ARM64_SYS_REG(3, 3, 14, 14, 6),
+	ARM64_SYS_REG(3, 3, 14, 14, 7),
+	ARM64_SYS_REG(3, 3, 14, 15, 0),
+	ARM64_SYS_REG(3, 3, 14, 15, 1),
+	ARM64_SYS_REG(3, 3, 14, 15, 2),
+	ARM64_SYS_REG(3, 3, 14, 15, 3),
+	ARM64_SYS_REG(3, 3, 14, 15, 4),
+	ARM64_SYS_REG(3, 3, 14, 15, 5),
+	ARM64_SYS_REG(3, 3, 14, 15, 6),
+	ARM64_SYS_REG(3, 3, 14, 15, 7),	/* PMCCFILTR_EL0 */
+	ARM64_SYS_REG(3, 4, 3, 0, 0),	/* DACR32_EL2 */
+	ARM64_SYS_REG(3, 4, 5, 0, 1),	/* IFSR32_EL2 */
+	ARM64_SYS_REG(3, 4, 5, 3, 0),	/* FPEXC32_EL2 */
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0,
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1,
+	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2,
+};
+static __u64 blessed_n = ARRAY_SIZE(blessed_reg);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 77f53dbc34ff..feb949a92055 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -152,6 +152,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
 			  struct kvm_guest_debug *debug);
 void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
 		       struct kvm_mp_state *mp_state);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
 void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 91e547031d8c..7669cb7c86e3 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1291,6 +1291,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
 		"rc: %i errno: %i", ret, errno);
 }
 
+/*
+ * VM VCPU Get Reg List
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args:
+ *   None
+ *
+ * Return:
+ *   A pointer to an allocated struct kvm_reg_list
+ *
+ * Get the list of guest registers which are supported for
+ * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
+ */
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
+	int ret;
+
+	ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
+	TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
+	reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
+	reg_list->n = reg_list_n.n;
+	vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
+	return reg_list;
+}
+
 /*
  * VM VCPU Regs Get
  *
-- 
cgit v1.2.3-70-g09d2


From 31d212959179015bc07f3af4e890cadd26e01ee0 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Thu, 29 Oct 2020 21:17:03 +0100
Subject: KVM: selftests: Add blessed SVE registers to get-reg-list

Add support for the SVE registers to get-reg-list and create a
new test, get-reg-list-sve, which tests them when running on a
machine with SVE support.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201029201703.102716-5-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore             |   1 +
 tools/testing/selftests/kvm/Makefile               |   1 +
 .../selftests/kvm/aarch64/get-reg-list-sve.c       |   3 +
 tools/testing/selftests/kvm/aarch64/get-reg-list.c | 254 +++++++++++++++++----
 4 files changed, 217 insertions(+), 42 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 2034765862a2..ea1692d43411 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 /aarch64/get-reg-list
+/aarch64/get-reg-list-sve
 /s390x/memop
 /s390x/resets
 /s390x/sync_regs_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 771455ae4a1b..2b0d7d325e2a 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
+TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
 TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
new file mode 100644
index 000000000000..efba76682b4b
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define REG_LIST_SVE
+#include "get-reg-list.c"
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 3ff097f6886e..33218a395d9f 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -29,6 +29,13 @@
 #include <string.h>
 #include "kvm_util.h"
 #include "test_util.h"
+#include "processor.h"
+
+#ifdef REG_LIST_SVE
+#define reg_list_sve() (true)
+#else
+#define reg_list_sve() (false)
+#endif
 
 #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
 
@@ -46,8 +53,9 @@
 
 static struct kvm_reg_list *reg_list;
 
-static __u64 blessed_reg[];
-static __u64 blessed_n;
+static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
+static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
+static __u64 *blessed_reg, blessed_n;
 
 static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
 {
@@ -119,6 +127,40 @@ static const char *core_id_to_str(__u64 id)
 	return NULL;
 }
 
+static const char *sve_id_to_str(__u64 id)
+{
+	__u64 sve_off, n, i;
+
+	if (id == KVM_REG_ARM64_SVE_VLS)
+		return "KVM_REG_ARM64_SVE_VLS";
+
+	sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
+	i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
+
+	TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+
+	switch (sve_off) {
+	case KVM_REG_ARM64_SVE_ZREG_BASE ...
+	     KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
+		n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
+		TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
+			    "Unexpected bits set in SVE ZREG id: 0x%llx", id);
+		return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
+	case KVM_REG_ARM64_SVE_PREG_BASE ...
+	     KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
+		n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
+		TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
+			    "Unexpected bits set in SVE PREG id: 0x%llx", id);
+		return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
+	case KVM_REG_ARM64_SVE_FFR_BASE:
+		TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
+			    "Unexpected bits set in SVE FFR id: 0x%llx", id);
+		return "KVM_REG_ARM64_SVE_FFR(0)";
+	}
+
+	return NULL;
+}
+
 static void print_reg(__u64 id)
 {
 	unsigned op0, op1, crn, crm, op2;
@@ -186,7 +228,10 @@ static void print_reg(__u64 id)
 		printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
 		break;
 	case KVM_REG_ARM64_SVE:
-		TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+		if (reg_list_sve())
+			printf("\t%s,\n", sve_id_to_str(id));
+		else
+			TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
 		break;
 	default:
 		TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
@@ -251,12 +296,40 @@ static void core_reg_fixup(void)
 	reg_list = tmp;
 }
 
+static void prepare_vcpu_init(struct kvm_vcpu_init *init)
+{
+	if (reg_list_sve())
+		init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
+}
+
+static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	int feature;
+
+	if (reg_list_sve()) {
+		feature = KVM_ARM_VCPU_SVE;
+		vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+	}
+}
+
+static void check_supported(void)
+{
+	if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
+		fprintf(stderr, "SVE not available, skipping tests\n");
+		exit(KSFT_SKIP);
+	}
+}
+
 int main(int ac, char **av)
 {
+	struct kvm_vcpu_init init = { .target = -1, };
 	int new_regs = 0, missing_regs = 0, i;
-	int failed_get = 0, failed_set = 0;
+	int failed_get = 0, failed_set = 0, failed_reject = 0;
 	bool print_list = false, fixup_core_regs = false;
 	struct kvm_vm *vm;
+	__u64 *vec_regs;
+
+	check_supported();
 
 	for (i = 1; i < ac; ++i) {
 		if (strcmp(av[i], "--core-reg-fixup") == 0)
@@ -267,7 +340,11 @@ int main(int ac, char **av)
 			fprintf(stderr, "Ignoring unknown option: %s\n", av[i]);
 	}
 
-	vm = vm_create_default(0, 0, NULL);
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+	prepare_vcpu_init(&init);
+	aarch64_vcpu_add_default(vm, 0, &init, NULL);
+	finalize_vcpu(vm, 0);
+
 	reg_list = vcpu_get_reg_list(vm, 0);
 
 	if (fixup_core_regs)
@@ -307,6 +384,18 @@ int main(int ac, char **av)
 			++failed_get;
 		}
 
+		/* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
+		if (find_reg(rejects_set, rejects_set_n, reg.id)) {
+			ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+			if (ret != -1 || errno != EPERM) {
+				printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
+				print_reg(reg.id);
+				putchar('\n');
+				++failed_reject;
+			}
+			continue;
+		}
+
 		ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
 		if (ret) {
 			puts("Failed to set ");
@@ -316,6 +405,20 @@ int main(int ac, char **av)
 		}
 	}
 
+	if (reg_list_sve()) {
+		blessed_n = base_regs_n + sve_regs_n;
+		vec_regs = sve_regs;
+	} else {
+		blessed_n = base_regs_n + vregs_n;
+		vec_regs = vregs;
+	}
+
+	blessed_reg = calloc(blessed_n, sizeof(__u64));
+	for (i = 0; i < base_regs_n; ++i)
+		blessed_reg[i] = base_regs[i];
+	for (i = 0; i < blessed_n - base_regs_n; ++i)
+		blessed_reg[base_regs_n + i] = vec_regs[i];
+
 	for_each_new_reg(i)
 		++new_regs;
 
@@ -344,9 +447,10 @@ int main(int ac, char **av)
 		putchar('\n');
 	}
 
-	TEST_ASSERT(!missing_regs && !failed_get && !failed_set,
-		    "There are %d missing registers; %d registers failed get; %d registers failed set",
-		    missing_regs, failed_get, failed_set);
+	TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
+		    "There are %d missing registers; "
+		    "%d registers failed get; %d registers failed set; %d registers failed reject",
+		    missing_regs, failed_get, failed_set, failed_reject);
 
 	return 0;
 }
@@ -355,7 +459,7 @@ int main(int ac, char **av)
  * The current blessed list was primed with the output of kernel version
  * v4.15 with --core-reg-fixup and then later updated with new registers.
  */
-static __u64 blessed_reg[] = {
+static __u64 base_regs[] = {
 	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
 	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
 	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
@@ -397,38 +501,6 @@ static __u64 blessed_reg[] = {
 	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
 	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
 	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
-	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
 	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
 	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
 	KVM_REG_ARM_FW_REG(0),
@@ -668,4 +740,102 @@ static __u64 blessed_reg[] = {
 	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1,
 	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2,
 };
-static __u64 blessed_n = ARRAY_SIZE(blessed_reg);
+static __u64 base_regs_n = ARRAY_SIZE(base_regs);
+
+static __u64 vregs[] = {
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
+	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
+};
+static __u64 vregs_n = ARRAY_SIZE(vregs);
+
+static __u64 sve_regs[] = {
+	KVM_REG_ARM64_SVE_VLS,
+	KVM_REG_ARM64_SVE_ZREG(0, 0),
+	KVM_REG_ARM64_SVE_ZREG(1, 0),
+	KVM_REG_ARM64_SVE_ZREG(2, 0),
+	KVM_REG_ARM64_SVE_ZREG(3, 0),
+	KVM_REG_ARM64_SVE_ZREG(4, 0),
+	KVM_REG_ARM64_SVE_ZREG(5, 0),
+	KVM_REG_ARM64_SVE_ZREG(6, 0),
+	KVM_REG_ARM64_SVE_ZREG(7, 0),
+	KVM_REG_ARM64_SVE_ZREG(8, 0),
+	KVM_REG_ARM64_SVE_ZREG(9, 0),
+	KVM_REG_ARM64_SVE_ZREG(10, 0),
+	KVM_REG_ARM64_SVE_ZREG(11, 0),
+	KVM_REG_ARM64_SVE_ZREG(12, 0),
+	KVM_REG_ARM64_SVE_ZREG(13, 0),
+	KVM_REG_ARM64_SVE_ZREG(14, 0),
+	KVM_REG_ARM64_SVE_ZREG(15, 0),
+	KVM_REG_ARM64_SVE_ZREG(16, 0),
+	KVM_REG_ARM64_SVE_ZREG(17, 0),
+	KVM_REG_ARM64_SVE_ZREG(18, 0),
+	KVM_REG_ARM64_SVE_ZREG(19, 0),
+	KVM_REG_ARM64_SVE_ZREG(20, 0),
+	KVM_REG_ARM64_SVE_ZREG(21, 0),
+	KVM_REG_ARM64_SVE_ZREG(22, 0),
+	KVM_REG_ARM64_SVE_ZREG(23, 0),
+	KVM_REG_ARM64_SVE_ZREG(24, 0),
+	KVM_REG_ARM64_SVE_ZREG(25, 0),
+	KVM_REG_ARM64_SVE_ZREG(26, 0),
+	KVM_REG_ARM64_SVE_ZREG(27, 0),
+	KVM_REG_ARM64_SVE_ZREG(28, 0),
+	KVM_REG_ARM64_SVE_ZREG(29, 0),
+	KVM_REG_ARM64_SVE_ZREG(30, 0),
+	KVM_REG_ARM64_SVE_ZREG(31, 0),
+	KVM_REG_ARM64_SVE_PREG(0, 0),
+	KVM_REG_ARM64_SVE_PREG(1, 0),
+	KVM_REG_ARM64_SVE_PREG(2, 0),
+	KVM_REG_ARM64_SVE_PREG(3, 0),
+	KVM_REG_ARM64_SVE_PREG(4, 0),
+	KVM_REG_ARM64_SVE_PREG(5, 0),
+	KVM_REG_ARM64_SVE_PREG(6, 0),
+	KVM_REG_ARM64_SVE_PREG(7, 0),
+	KVM_REG_ARM64_SVE_PREG(8, 0),
+	KVM_REG_ARM64_SVE_PREG(9, 0),
+	KVM_REG_ARM64_SVE_PREG(10, 0),
+	KVM_REG_ARM64_SVE_PREG(11, 0),
+	KVM_REG_ARM64_SVE_PREG(12, 0),
+	KVM_REG_ARM64_SVE_PREG(13, 0),
+	KVM_REG_ARM64_SVE_PREG(14, 0),
+	KVM_REG_ARM64_SVE_PREG(15, 0),
+	KVM_REG_ARM64_SVE_FFR(0),
+	ARM64_SYS_REG(3, 0, 1, 2, 0),   /* ZCR_EL1 */
+};
+static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
+
+static __u64 rejects_set[] = {
+#ifdef REG_LIST_SVE
+	KVM_REG_ARM64_SVE_VLS,
+#endif
+};
+static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
-- 
cgit v1.2.3-70-g09d2


From 3031e0288e60f09533339e61117b83099a6e126e Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 30 Sep 2020 21:22:28 -0400
Subject: KVM: selftests: Always clear dirty bitmap after iteration

We used not to clear the dirty bitmap before because KVM_GET_DIRTY_LOG
would overwrite it the next time it copies the dirty log onto it.
In the upcoming dirty ring tests we'll start to fetch dirty pages from
a ring buffer, so no one is going to clear the dirty bitmap for us.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20201001012228.5916-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 752ec158ac59..6a8275a22861 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -195,7 +195,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
 				    page);
 		}
 
-		if (test_bit_le(page, bmap)) {
+		if (test_and_clear_bit_le(page, bmap)) {
 			host_dirty_count++;
 			/*
 			 * If the bit is set, the value written onto
-- 
cgit v1.2.3-70-g09d2


From afdb1960071935cfd5c1908691a34cc6e36931f7 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 30 Sep 2020 21:22:33 -0400
Subject: KVM: selftests: Use a single binary for dirty/clear log test

Remove the clear_dirty_log test, instead merge it into the existing
dirty_log_test.  It should be cleaner to use this single binary to do
both tests, also it's a preparation for the upcoming dirty ring test.

The default behavior will run all the modes in sequence.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20201001012233.6013-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile               |   2 -
 tools/testing/selftests/kvm/clear_dirty_log_test.c |   6 -
 tools/testing/selftests/kvm/dirty_log_test.c       | 187 +++++++++++++++++----
 3 files changed, 156 insertions(+), 39 deletions(-)
 delete mode 100644 tools/testing/selftests/kvm/clear_dirty_log_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 2b0d7d325e2a..c8af04dccf7f 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -59,7 +59,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
 TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
-TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
@@ -68,7 +67,6 @@ TEST_GEN_PROGS_x86_64 += steal_time
 
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
-TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c
deleted file mode 100644
index 11672ec6f74e..000000000000
--- a/tools/testing/selftests/kvm/clear_dirty_log_test.c
+++ /dev/null
@@ -1,6 +0,0 @@
-#define USE_CLEAR_DIRTY_LOG
-#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
-#define KVM_DIRTY_LOG_INITIALLY_SET         (1 << 1)
-#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
-		KVM_DIRTY_LOG_INITIALLY_SET)
-#include "dirty_log_test.c"
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 6a8275a22861..139ccb550618 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -128,6 +128,78 @@ static uint64_t host_dirty_count;
 static uint64_t host_clear_count;
 static uint64_t host_track_next_count;
 
+enum log_mode_t {
+	/* Only use KVM_GET_DIRTY_LOG for logging */
+	LOG_MODE_DIRTY_LOG = 0,
+
+	/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
+	LOG_MODE_CLEAR_LOG = 1,
+
+	LOG_MODE_NUM,
+
+	/* Run all supported modes */
+	LOG_MODE_ALL = LOG_MODE_NUM,
+};
+
+/* Mode of logging to test.  Default is to run all supported modes */
+static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
+/* Logging mode for current run */
+static enum log_mode_t host_log_mode;
+
+static bool clear_log_supported(void)
+{
+	return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+}
+
+static void clear_log_create_vm_done(struct kvm_vm *vm)
+{
+	struct kvm_enable_cap cap = {};
+	u64 manual_caps;
+
+	manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+	TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
+	manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+			KVM_DIRTY_LOG_INITIALLY_SET);
+	cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+	cap.args[0] = manual_caps;
+	vm_enable_cap(vm, &cap);
+}
+
+static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+					  void *bitmap, uint32_t num_pages)
+{
+	kvm_vm_get_dirty_log(vm, slot, bitmap);
+}
+
+static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+					  void *bitmap, uint32_t num_pages)
+{
+	kvm_vm_get_dirty_log(vm, slot, bitmap);
+	kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
+}
+
+struct log_mode {
+	const char *name;
+	/* Return true if this mode is supported, otherwise false */
+	bool (*supported)(void);
+	/* Hook when the vm creation is done (before vcpu creation) */
+	void (*create_vm_done)(struct kvm_vm *vm);
+	/* Hook to collect the dirty pages into the bitmap provided */
+	void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
+				     void *bitmap, uint32_t num_pages);
+} log_modes[LOG_MODE_NUM] = {
+	{
+		.name = "dirty-log",
+		.collect_dirty_pages = dirty_log_collect_dirty_pages,
+	},
+	{
+		.name = "clear-log",
+		.supported = clear_log_supported,
+		.create_vm_done = clear_log_create_vm_done,
+		.collect_dirty_pages = clear_log_collect_dirty_pages,
+	},
+};
+
 /*
  * We use this bitmap to track some pages that should have its dirty
  * bit set in the _next_ iteration.  For example, if we detected the
@@ -137,6 +209,44 @@ static uint64_t host_track_next_count;
  */
 static unsigned long *host_bmap_track;
 
+static void log_modes_dump(void)
+{
+	int i;
+
+	printf("all");
+	for (i = 0; i < LOG_MODE_NUM; i++)
+		printf(", %s", log_modes[i].name);
+	printf("\n");
+}
+
+static bool log_mode_supported(void)
+{
+	struct log_mode *mode = &log_modes[host_log_mode];
+
+	if (mode->supported)
+		return mode->supported();
+
+	return true;
+}
+
+static void log_mode_create_vm_done(struct kvm_vm *vm)
+{
+	struct log_mode *mode = &log_modes[host_log_mode];
+
+	if (mode->create_vm_done)
+		mode->create_vm_done(vm);
+}
+
+static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
+					 void *bitmap, uint32_t num_pages)
+{
+	struct log_mode *mode = &log_modes[host_log_mode];
+
+	TEST_ASSERT(mode->collect_dirty_pages != NULL,
+		    "collect_dirty_pages() is required for any log mode!");
+	mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
+}
+
 static void generate_random_array(uint64_t *guest_array, uint64_t size)
 {
 	uint64_t i;
@@ -257,6 +367,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 #ifdef __x86_64__
 	vm_create_irqchip(vm);
 #endif
+	log_mode_create_vm_done(vm);
 	vm_vcpu_add_default(vm, vcpuid, guest_code);
 	return vm;
 }
@@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 #define DIRTY_MEM_BITS 30 /* 1G */
 #define PAGE_SHIFT_4K  12
 
-#ifdef USE_CLEAR_DIRTY_LOG
-static u64 dirty_log_manual_caps;
-#endif
-
 static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 		     unsigned long interval, uint64_t phys_offset)
 {
@@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	struct kvm_vm *vm;
 	unsigned long *bmap;
 
+	if (!log_mode_supported()) {
+		print_skip("Log mode '%s' not supported",
+			   log_modes[host_log_mode].name);
+		return;
+	}
+
 	/*
 	 * We reserve page table for 2 times of extra dirty mem which
 	 * will definitely cover the original (1G+) test range.  Here
@@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	bmap = bitmap_alloc(host_num_pages);
 	host_bmap_track = bitmap_alloc(host_num_pages);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-	struct kvm_enable_cap cap = {};
-
-	cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
-	cap.args[0] = dirty_log_manual_caps;
-	vm_enable_cap(vm, &cap);
-#endif
-
 	/* Add an extra memory slot for testing dirty logging */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
 				    guest_test_phys_mem,
@@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	while (iteration < iterations) {
 		/* Give the vcpu thread some time to dirty some pages */
 		usleep(interval * 1000);
-		kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
-#ifdef USE_CLEAR_DIRTY_LOG
-		kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
-				       host_num_pages);
-#endif
+		log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
+					     bmap, host_num_pages);
 		vm_dirty_log_verify(mode, bmap);
 		iteration++;
 		sync_global_to_guest(vm, iteration);
@@ -410,6 +512,9 @@ static void help(char *name)
 	       TEST_HOST_LOOP_INTERVAL);
 	printf(" -p: specify guest physical test memory offset\n"
 	       "     Warning: a low offset can conflict with the loaded test code.\n");
+	printf(" -M: specify the host logging mode "
+	       "(default: run all log modes).  Supported modes: \n\t");
+	log_modes_dump();
 	printf(" -m: specify the guest mode ID to test "
 	       "(default: test all supported modes)\n"
 	       "     This option may be used multiple times.\n"
@@ -429,18 +534,7 @@ int main(int argc, char *argv[])
 	bool mode_selected = false;
 	uint64_t phys_offset = 0;
 	unsigned int mode;
-	int opt, i;
-
-#ifdef USE_CLEAR_DIRTY_LOG
-	dirty_log_manual_caps =
-		kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
-	if (!dirty_log_manual_caps) {
-		print_skip("KVM_CLEAR_DIRTY_LOG not available");
-		exit(KSFT_SKIP);
-	}
-	dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
-				  KVM_DIRTY_LOG_INITIALLY_SET);
-#endif
+	int opt, i, j;
 
 #ifdef __x86_64__
 	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
@@ -464,7 +558,7 @@ int main(int argc, char *argv[])
 	guest_mode_init(VM_MODE_P40V48_4K, true, true);
 #endif
 
-	while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
+	while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
 		switch (opt) {
 		case 'i':
 			iterations = strtol(optarg, NULL, 10);
@@ -486,6 +580,26 @@ int main(int argc, char *argv[])
 				    "Guest mode ID %d too big", mode);
 			guest_modes[mode].enabled = true;
 			break;
+		case 'M':
+			if (!strcmp(optarg, "all")) {
+				host_log_mode_option = LOG_MODE_ALL;
+				break;
+			}
+			for (i = 0; i < LOG_MODE_NUM; i++) {
+				if (!strcmp(optarg, log_modes[i].name)) {
+					pr_info("Setting log mode to: '%s'\n",
+						optarg);
+					host_log_mode_option = i;
+					break;
+				}
+			}
+			if (i == LOG_MODE_NUM) {
+				printf("Log mode '%s' invalid. Please choose "
+				       "from: ", optarg);
+				log_modes_dump();
+				exit(1);
+			}
+			break;
 		case 'h':
 		default:
 			help(argv[0]);
@@ -507,7 +621,18 @@ int main(int argc, char *argv[])
 		TEST_ASSERT(guest_modes[i].supported,
 			    "Guest mode ID %d (%s) not supported.",
 			    i, vm_guest_mode_string(i));
-		run_test(i, iterations, interval, phys_offset);
+		if (host_log_mode_option == LOG_MODE_ALL) {
+			/* Run each log mode */
+			for (j = 0; j < LOG_MODE_NUM; j++) {
+				pr_info("Testing Log Mode '%s'\n",
+					log_modes[j].name);
+				host_log_mode = j;
+				run_test(i, iterations, interval, phys_offset);
+			}
+		} else {
+			host_log_mode = host_log_mode_option;
+			run_test(i, iterations, interval, phys_offset);
+		}
 	}
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 4b5d12b0e21cc9f9f00201819844fcafb020ffad Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 27 Oct 2020 16:37:29 -0700
Subject: KVM: selftests: Factor code out of demand_paging_test

Much of the code in demand_paging_test can be reused by other, similar
multi-vCPU-memory-touching-perfromance-tests. Factor that common code
out for reuse.

No functional change expected.

This series was tested by running the following invocations on an Intel
Skylake machine:
dirty_log_perf_test -b 20m -i 100 -v 64
dirty_log_perf_test -b 20g -i 5 -v 4
dirty_log_perf_test -b 4g -i 5 -v 32
demand_paging_test -b 20m -v 64
demand_paging_test -b 20g -v 4
demand_paging_test -b 4g -v 32
All behaved as expected.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20201027233733.1484855-2-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c   | 204 +++------------------
 .../testing/selftests/kvm/include/perf_test_util.h | 187 +++++++++++++++++++
 2 files changed, 210 insertions(+), 181 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/include/perf_test_util.h

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 360cd3ea4cd6..4251e98ceb69 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -21,18 +21,12 @@
 #include <linux/bitops.h>
 #include <linux/userfaultfd.h>
 
-#include "test_util.h"
-#include "kvm_util.h"
+#include "perf_test_util.h"
 #include "processor.h"
+#include "test_util.h"
 
 #ifdef __NR_userfaultfd
 
-/* The memory slot index demand page */
-#define TEST_MEM_SLOT_INDEX		1
-
-/* Default guest test virtual memory offset */
-#define DEFAULT_GUEST_TEST_MEM		0xc0000000
-
 #define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */
 
 #ifdef PRINT_PER_PAGE_UPDATES
@@ -47,75 +41,14 @@
 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
 #endif
 
-#define MAX_VCPUS 512
-
-/*
- * Guest/Host shared variables. Ensure addr_gva2hva() and/or
- * sync_global_to/from_guest() are used when accessing from
- * the host. READ/WRITE_ONCE() should also be used with anything
- * that may change.
- */
-static uint64_t host_page_size;
-static uint64_t guest_page_size;
-
 static char *guest_data_prototype;
 
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-static uint64_t guest_test_phys_mem;
-
-/*
- * Guest virtual memory offset of the testing memory slot.
- * Must not conflict with identity mapped test code.
- */
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
-
-struct vcpu_args {
-	uint64_t gva;
-	uint64_t pages;
-
-	/* Only used by the host userspace part of the vCPU thread */
-	int vcpu_id;
-	struct kvm_vm *vm;
-};
-
-static struct vcpu_args vcpu_args[MAX_VCPUS];
-
-/*
- * Continuously write to the first 8 bytes of each page in the demand paging
- * memory region.
- */
-static void guest_code(uint32_t vcpu_id)
-{
-	uint64_t gva;
-	uint64_t pages;
-	int i;
-
-	/* Make sure vCPU args data structure is not corrupt. */
-	GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id);
-
-	gva = vcpu_args[vcpu_id].gva;
-	pages = vcpu_args[vcpu_id].pages;
-
-	for (i = 0; i < pages; i++) {
-		uint64_t addr = gva + (i * guest_page_size);
-
-		addr &= ~(host_page_size - 1);
-		*(uint64_t *)addr = 0x0123456789ABCDEF;
-	}
-
-	GUEST_SYNC(1);
-}
-
 static void *vcpu_worker(void *data)
 {
 	int ret;
-	struct vcpu_args *args = (struct vcpu_args *)data;
-	struct kvm_vm *vm = args->vm;
-	int vcpu_id = args->vcpu_id;
+	struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+	int vcpu_id = vcpu_args->vcpu_id;
+	struct kvm_vm *vm = perf_test_args.vm;
 	struct kvm_run *run;
 	struct timespec start, end, ts_diff;
 
@@ -141,39 +74,6 @@ static void *vcpu_worker(void *data)
 	return NULL;
 }
 
-#define PAGE_SHIFT_4K  12
-#define PTES_PER_4K_PT 512
-
-static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
-				uint64_t vcpu_memory_bytes)
-{
-	struct kvm_vm *vm;
-	uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
-
-	/* Account for a few pages per-vCPU for stacks */
-	pages += DEFAULT_STACK_PGS * vcpus;
-
-	/*
-	 * Reserve twice the ammount of memory needed to map the test region and
-	 * the page table / stacks region, at 4k, for page tables. Do the
-	 * calculation with 4K page size: the smallest of all archs. (e.g., 64K
-	 * page size guest will need even less memory for page tables).
-	 */
-	pages += (2 * pages) / PTES_PER_4K_PT;
-	pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
-		 PTES_PER_4K_PT;
-	pages = vm_adjust_num_guest_pages(mode, pages);
-
-	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
-	vm = _vm_create(mode, pages, O_RDWR);
-	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-#ifdef __x86_64__
-	vm_create_irqchip(vm);
-#endif
-	return vm;
-}
-
 static int handle_uffd_page_request(int uffd, uint64_t addr)
 {
 	pid_t tid;
@@ -186,7 +86,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
 
 	copy.src = (uint64_t)guest_data_prototype;
 	copy.dst = addr;
-	copy.len = host_page_size;
+	copy.len = perf_test_args.host_page_size;
 	copy.mode = 0;
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
@@ -203,7 +103,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
 	PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
 		       timespec_to_ns(timespec_sub(end, start)));
 	PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
-		       host_page_size, addr, tid);
+		       perf_test_args.host_page_size, addr, tid);
 
 	return 0;
 }
@@ -360,64 +260,21 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 	struct timespec start, end, ts_diff;
 	int *pipefds = NULL;
 	struct kvm_vm *vm;
-	uint64_t guest_num_pages;
 	int vcpu_id;
 	int r;
 
 	vm = create_vm(mode, vcpus, vcpu_memory_bytes);
 
-	guest_page_size = vm_get_page_size(vm);
-
-	TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0,
-		    "Guest memory size is not guest page size aligned.");
-
-	guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size;
-	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
-
-	/*
-	 * If there should be more memory in the guest test region than there
-	 * can be pages in the guest, it will definitely cause problems.
-	 */
-	TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
-		    "Requested more guest memory than address space allows.\n"
-		    "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
-		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
-		    vcpu_memory_bytes);
-
-	host_page_size = getpagesize();
-	TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0,
-		    "Guest memory size is not host page size aligned.");
-
-	guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
-			      guest_page_size;
-	guest_test_phys_mem &= ~(host_page_size - 1);
-
-#ifdef __s390x__
-	/* Align to 1M (segment size) */
-	guest_test_phys_mem &= ~((1 << 20) - 1);
-#endif
-
-	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
-
-	/* Add an extra memory slot for testing demand paging */
-	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-				    guest_test_phys_mem,
-				    TEST_MEM_SLOT_INDEX,
-				    guest_num_pages, 0);
-
-	/* Do mapping for the demand paging memory slot */
-	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
-
-	ucall_init(vm, NULL);
-
-	guest_data_prototype = malloc(host_page_size);
+	guest_data_prototype = malloc(perf_test_args.host_page_size);
 	TEST_ASSERT(guest_data_prototype,
 		    "Failed to allocate buffer for guest data pattern");
-	memset(guest_data_prototype, 0xAB, host_page_size);
+	memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
 
 	vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads));
 	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
+	add_vcpus(vm, vcpus, vcpu_memory_bytes);
+
 	if (use_uffd) {
 		uffd_handler_threads =
 			malloc(vcpus * sizeof(*uffd_handler_threads));
@@ -428,22 +285,18 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 
 		pipefds = malloc(sizeof(int) * vcpus * 2);
 		TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
-	}
 
-	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
-		vm_paddr_t vcpu_gpa;
-		void *vcpu_hva;
-
-		vm_vcpu_add_default(vm, vcpu_id, guest_code);
+		for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+			vm_paddr_t vcpu_gpa;
+			void *vcpu_hva;
 
-		vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
-		PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-			       vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+			vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+			PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+				       vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
 
-		/* Cache the HVA pointer of the region */
-		vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+			/* Cache the HVA pointer of the region */
+			vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
 
-		if (use_uffd) {
 			/*
 			 * Set up user fault fd to handle demand paging
 			 * requests.
@@ -460,22 +313,10 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 			if (r < 0)
 				exit(-r);
 		}
-
-#ifdef __x86_64__
-		vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
-#endif
-
-		vcpu_args[vcpu_id].vm = vm;
-		vcpu_args[vcpu_id].vcpu_id = vcpu_id;
-		vcpu_args[vcpu_id].gva = guest_test_virt_mem +
-					 (vcpu_id * vcpu_memory_bytes);
-		vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size;
 	}
 
 	/* Export the shared variables to the guest */
-	sync_global_to_guest(vm, host_page_size);
-	sync_global_to_guest(vm, guest_page_size);
-	sync_global_to_guest(vm, vcpu_args);
+	sync_global_to_guest(vm, perf_test_args);
 
 	pr_info("Finished creating vCPUs and starting uffd threads\n");
 
@@ -483,7 +324,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 
 	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
 		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
-			       &vcpu_args[vcpu_id]);
+			       &perf_test_args.vcpu_args[vcpu_id]);
 	}
 
 	pr_info("Started all vCPUs\n");
@@ -514,7 +355,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 	pr_info("Total guest execution time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 	pr_info("Overall demand paging rate: %f pgs/sec\n",
-		guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+		perf_test_args.vcpu_args[0].pages * vcpus /
+		((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
 
 	ucall_uninit(vm);
 	kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
new file mode 100644
index 000000000000..f71f0858a1f2
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tools/testing/selftests/kvm/include/perf_test_util.h
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
+#define SELFTEST_KVM_PERF_TEST_UTIL_H
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MAX_VCPUS 512
+
+#define PAGE_SHIFT_4K  12
+#define PTES_PER_4K_PT 512
+
+#define TEST_MEM_SLOT_INDEX		1
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM		0xc0000000
+
+/*
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
+ */
+static uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+struct vcpu_args {
+	uint64_t gva;
+	uint64_t pages;
+
+	/* Only used by the host userspace part of the vCPU thread */
+	int vcpu_id;
+};
+
+struct perf_test_args {
+	struct kvm_vm *vm;
+	uint64_t host_page_size;
+	uint64_t guest_page_size;
+
+	struct vcpu_args vcpu_args[MAX_VCPUS];
+};
+
+static struct perf_test_args perf_test_args;
+
+/*
+ * Continuously write to the first 8 bytes of each page in the
+ * specified region.
+ */
+static void guest_code(uint32_t vcpu_id)
+{
+	struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+	uint64_t gva;
+	uint64_t pages;
+	int i;
+
+	/* Make sure vCPU args data structure is not corrupt. */
+	GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
+
+	gva = vcpu_args->gva;
+	pages = vcpu_args->pages;
+
+	for (i = 0; i < pages; i++) {
+		uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+
+		addr &= ~(perf_test_args.host_page_size - 1);
+		*(uint64_t *)addr = 0x0123456789ABCDEF;
+	}
+
+	GUEST_SYNC(1);
+}
+
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
+				uint64_t vcpu_memory_bytes)
+{
+	struct kvm_vm *vm;
+	uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
+	uint64_t guest_num_pages;
+
+	/* Account for a few pages per-vCPU for stacks */
+	pages += DEFAULT_STACK_PGS * vcpus;
+
+	/*
+	 * Reserve twice the ammount of memory needed to map the test region and
+	 * the page table / stacks region, at 4k, for page tables. Do the
+	 * calculation with 4K page size: the smallest of all archs. (e.g., 64K
+	 * page size guest will need even less memory for page tables).
+	 */
+	pages += (2 * pages) / PTES_PER_4K_PT;
+	pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
+		 PTES_PER_4K_PT;
+	pages = vm_adjust_num_guest_pages(mode, pages);
+
+	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+	vm = _vm_create(mode, pages, O_RDWR);
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+	vm_create_irqchip(vm);
+#endif
+
+	perf_test_args.vm = vm;
+	perf_test_args.guest_page_size = vm_get_page_size(vm);
+	perf_test_args.host_page_size = getpagesize();
+
+	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
+		    "Guest memory size is not guest page size aligned.");
+
+	guest_num_pages = (vcpus * vcpu_memory_bytes) /
+			  perf_test_args.guest_page_size;
+	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+
+	/*
+	 * If there should be more memory in the guest test region than there
+	 * can be pages in the guest, it will definitely cause problems.
+	 */
+	TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+		    "Requested more guest memory than address space allows.\n"
+		    "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
+		    vcpu_memory_bytes);
+
+	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+		    "Guest memory size is not host page size aligned.");
+
+	guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
+			      perf_test_args.guest_page_size;
+	guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
+
+#ifdef __s390x__
+	/* Align to 1M (segment size) */
+	guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+
+	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+
+	/* Add an extra memory slot for testing */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    guest_test_phys_mem,
+				    TEST_MEM_SLOT_INDEX,
+				    guest_num_pages, 0);
+
+	/* Do mapping for the demand paging memory slot */
+	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+
+	ucall_init(vm, NULL);
+
+	return vm;
+}
+
+static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
+{
+	vm_paddr_t vcpu_gpa;
+	struct vcpu_args *vcpu_args;
+	int vcpu_id;
+
+	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+
+		vm_vcpu_add_default(vm, vcpu_id, guest_code);
+
+#ifdef __x86_64__
+		vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
+#endif
+
+		vcpu_args->vcpu_id = vcpu_id;
+		vcpu_args->gva = guest_test_virt_mem +
+				 (vcpu_id * vcpu_memory_bytes);
+		vcpu_args->pages = vcpu_memory_bytes /
+				   perf_test_args.guest_page_size;
+
+		vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+		pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+			 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+	}
+}
+
+#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
-- 
cgit v1.2.3-70-g09d2


From 2fe5149bdfbf3c2cdfafd2b5b496252d45ca1f78 Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 27 Oct 2020 16:37:30 -0700
Subject: KVM: selftests: Remove address rounding in guest code

Rounding the address the guest writes to a host page boundary
will only have an effect if the host page size is larger than the guest
page size, but in that case the guest write would still go to the same
host page. There's no reason to round the address down, so remove the
rounding to simplify the demand paging test.

This series was tested by running the following invocations on an Intel
Skylake machine:
dirty_log_perf_test -b 20m -i 100 -v 64
dirty_log_perf_test -b 20g -i 5 -v 4
dirty_log_perf_test -b 4g -i 5 -v 32
demand_paging_test -b 20m -v 64
demand_paging_test -b 20g -v 4
demand_paging_test -b 4g -v 32
All behaved as expected.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20201027233733.1484855-3-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/perf_test_util.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index f71f0858a1f2..838f946700f0 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -72,7 +72,6 @@ static void guest_code(uint32_t vcpu_id)
 	for (i = 0; i < pages; i++) {
 		uint64_t addr = gva + (i * perf_test_args.guest_page_size);
 
-		addr &= ~(perf_test_args.host_page_size - 1);
 		*(uint64_t *)addr = 0x0123456789ABCDEF;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 1eafbd27edb5098ed6b6bc404c35d56c78beb0fd Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 27 Oct 2020 16:37:31 -0700
Subject: KVM: selftests: Simplify demand_paging_test with timespec_diff_now

Add a helper function to get the current time and return the time since
a given start time. Use that function to simplify the timekeeping in the
demand paging test.

This series was tested by running the following invocations on an Intel
Skylake machine:
dirty_log_perf_test -b 20m -i 100 -v 64
dirty_log_perf_test -b 20g -i 5 -v 4
dirty_log_perf_test -b 4g -i 5 -v 32
demand_paging_test -b 20m -v 64
demand_paging_test -b 20g -v 4
demand_paging_test -b 4g -v 32
All behaved as expected.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20201027233733.1484855-4-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c | 26 ++++++++++++------------
 tools/testing/selftests/kvm/include/test_util.h  |  1 +
 tools/testing/selftests/kvm/lib/test_util.c      | 15 ++++++++++++--
 3 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 4251e98ceb69..7de6feb00076 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -50,7 +50,8 @@ static void *vcpu_worker(void *data)
 	int vcpu_id = vcpu_args->vcpu_id;
 	struct kvm_vm *vm = perf_test_args.vm;
 	struct kvm_run *run;
-	struct timespec start, end, ts_diff;
+	struct timespec start;
+	struct timespec ts_diff;
 
 	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
 	run = vcpu_state(vm, vcpu_id);
@@ -66,8 +67,7 @@ static void *vcpu_worker(void *data)
 			    exit_reason_str(run->exit_reason));
 	}
 
-	clock_gettime(CLOCK_MONOTONIC, &end);
-	ts_diff = timespec_sub(end, start);
+	ts_diff = timespec_diff_now(start);
 	PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
 		       ts_diff.tv_sec, ts_diff.tv_nsec);
 
@@ -78,7 +78,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
 {
 	pid_t tid;
 	struct timespec start;
-	struct timespec end;
+	struct timespec ts_diff;
 	struct uffdio_copy copy;
 	int r;
 
@@ -98,10 +98,10 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
 		return r;
 	}
 
-	clock_gettime(CLOCK_MONOTONIC, &end);
+	ts_diff = timespec_diff_now(start);
 
 	PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
-		       timespec_to_ns(timespec_sub(end, start)));
+		       timespec_to_ns(ts_diff));
 	PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
 		       perf_test_args.host_page_size, addr, tid);
 
@@ -123,7 +123,8 @@ static void *uffd_handler_thread_fn(void *arg)
 	int pipefd = uffd_args->pipefd;
 	useconds_t delay = uffd_args->delay;
 	int64_t pages = 0;
-	struct timespec start, end, ts_diff;
+	struct timespec start;
+	struct timespec ts_diff;
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	while (!quit_uffd_thread) {
@@ -192,8 +193,7 @@ static void *uffd_handler_thread_fn(void *arg)
 		pages++;
 	}
 
-	clock_gettime(CLOCK_MONOTONIC, &end);
-	ts_diff = timespec_sub(end, start);
+	ts_diff = timespec_diff_now(start);
 	PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
 		       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
 		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
@@ -257,7 +257,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 	pthread_t *vcpu_threads;
 	pthread_t *uffd_handler_threads = NULL;
 	struct uffd_handler_args *uffd_args = NULL;
-	struct timespec start, end, ts_diff;
+	struct timespec start;
+	struct timespec ts_diff;
 	int *pipefds = NULL;
 	struct kvm_vm *vm;
 	int vcpu_id;
@@ -335,9 +336,9 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 		PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
 	}
 
-	pr_info("All vCPU threads joined\n");
+	ts_diff = timespec_diff_now(start);
 
-	clock_gettime(CLOCK_MONOTONIC, &end);
+	pr_info("All vCPU threads joined\n");
 
 	if (use_uffd) {
 		char c;
@@ -351,7 +352,6 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 		}
 	}
 
-	ts_diff = timespec_sub(end, start);
 	pr_info("Total guest execution time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 	pr_info("Overall demand paging rate: %f pgs/sec\n",
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 5eb01bf51b86..1cc036ddb0c5 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -64,5 +64,6 @@ int64_t timespec_to_ns(struct timespec ts);
 struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
 struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
 struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
+struct timespec timespec_diff_now(struct timespec start);
 
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 689e97c27ee2..1a46c2c48c7c 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -4,10 +4,13 @@
  *
  * Copyright (C) 2020, Google LLC.
  */
-#include <stdlib.h>
+
+#include <assert.h>
 #include <ctype.h>
 #include <limits.h>
-#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
 #include "test_util.h"
 
 /*
@@ -81,6 +84,14 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
 	return timespec_add_ns((struct timespec){0}, ns1 - ns2);
 }
 
+struct timespec timespec_diff_now(struct timespec start)
+{
+	struct timespec end;
+
+	clock_gettime(CLOCK_MONOTONIC, &end);
+	return timespec_sub(end, start);
+}
+
 void print_skip(const char *fmt, ...)
 {
 	va_list ap;
-- 
cgit v1.2.3-70-g09d2


From 92ab4b9a22cfea9b0d353e86024208040c10e807 Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 27 Oct 2020 16:37:32 -0700
Subject: KVM: selftests: Add wrfract to common guest code

Wrfract will be used by the dirty logging perf test introduced later in
this series to dirty memory sparsely.

This series was tested by running the following invocations on an Intel
Skylake machine:
dirty_log_perf_test -b 20m -i 100 -v 64
dirty_log_perf_test -b 20g -i 5 -v 4
dirty_log_perf_test -b 4g -i 5 -v 32
demand_paging_test -b 20m -v 64
demand_paging_test -b 20g -v 4
demand_paging_test -b 4g -v 32
All behaved as expected.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20201027233733.1484855-5-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c     | 2 ++
 tools/testing/selftests/kvm/include/perf_test_util.h | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 7de6feb00076..47defc65aeda 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -266,6 +266,8 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 
 	vm = create_vm(mode, vcpus, vcpu_memory_bytes);
 
+	perf_test_args.wr_fract = 1;
+
 	guest_data_prototype = malloc(perf_test_args.host_page_size);
 	TEST_ASSERT(guest_data_prototype,
 		    "Failed to allocate buffer for guest data pattern");
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 838f946700f0..1716300469c0 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -46,6 +46,7 @@ struct perf_test_args {
 	struct kvm_vm *vm;
 	uint64_t host_page_size;
 	uint64_t guest_page_size;
+	int wr_fract;
 
 	struct vcpu_args vcpu_args[MAX_VCPUS];
 };
@@ -72,7 +73,10 @@ static void guest_code(uint32_t vcpu_id)
 	for (i = 0; i < pages; i++) {
 		uint64_t addr = gva + (i * perf_test_args.guest_page_size);
 
-		*(uint64_t *)addr = 0x0123456789ABCDEF;
+		if (i % perf_test_args.wr_fract == 0)
+			*(uint64_t *)addr = 0x0123456789ABCDEF;
+		else
+			READ_ONCE(*(uint64_t *)addr);
 	}
 
 	GUEST_SYNC(1);
-- 
cgit v1.2.3-70-g09d2


From f663132d1e09166db419afb9832d463e0a79f3d5 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 4 Nov 2020 22:23:48 +0100
Subject: KVM: selftests: Drop pointless vm_create wrapper

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201104212357.171559-3-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c         | 2 +-
 tools/testing/selftests/kvm/include/kvm_util.h       | 1 -
 tools/testing/selftests/kvm/include/perf_test_util.h | 2 +-
 tools/testing/selftests/kvm/lib/kvm_util.c           | 7 +------
 4 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 139ccb550618..54da9cc20db4 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -362,7 +362,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
-	vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+	vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
 	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 #ifdef __x86_64__
 	vm_create_irqchip(vm);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index feb949a92055..7d29aa786959 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -68,7 +68,6 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
 
 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
 void kvm_vm_free(struct kvm_vm *vmp);
 void kvm_vm_restart(struct kvm_vm *vmp, int perm);
 void kvm_vm_release(struct kvm_vm *vmp);
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 1716300469c0..8e053e9dc7ea 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -105,7 +105,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
 
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
-	vm = _vm_create(mode, pages, O_RDWR);
+	vm = vm_create(mode, pages, O_RDWR);
 	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 #ifdef __x86_64__
 	vm_create_irqchip(vm);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 7669cb7c86e3..126c6727a6b0 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -180,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params)
  * descriptor to control the created VM is created with the permissions
  * given by perm (e.g. O_RDWR).
  */
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 {
 	struct kvm_vm *vm;
 
@@ -271,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	return vm;
 }
 
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
-{
-	return _vm_create(mode, phy_pages, perm);
-}
-
 /*
  * VM Restart
  *
-- 
cgit v1.2.3-70-g09d2


From 6769155fece2100506e22161945712afae61769f Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 4 Nov 2020 22:23:52 +0100
Subject: KVM: selftests: Make the per vcpu memory size global

Rename vcpu_memory_bytes to something with "percpu" in it
in order to be less ambiguous. Also make it global to
simplify things.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201104212357.171559-7-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c     | 20 ++++++++------------
 tools/testing/selftests/kvm/include/perf_test_util.h |  3 +++
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 47defc65aeda..33acd954a298 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -27,8 +27,6 @@
 
 #ifdef __NR_userfaultfd
 
-#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */
-
 #ifdef PRINT_PER_PAGE_UPDATES
 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
 #else
@@ -251,8 +249,7 @@ static int setup_demand_paging(struct kvm_vm *vm,
 }
 
 static void run_test(enum vm_guest_mode mode, bool use_uffd,
-		     useconds_t uffd_delay, int vcpus,
-		     uint64_t vcpu_memory_bytes)
+		     useconds_t uffd_delay, int vcpus)
 {
 	pthread_t *vcpu_threads;
 	pthread_t *uffd_handler_threads = NULL;
@@ -264,7 +261,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 	int vcpu_id;
 	int r;
 
-	vm = create_vm(mode, vcpus, vcpu_memory_bytes);
+	vm = create_vm(mode, vcpus, guest_percpu_mem_size);
 
 	perf_test_args.wr_fract = 1;
 
@@ -276,7 +273,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 	vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads));
 	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
-	add_vcpus(vm, vcpus, vcpu_memory_bytes);
+	add_vcpus(vm, vcpus, guest_percpu_mem_size);
 
 	if (use_uffd) {
 		uffd_handler_threads =
@@ -293,9 +290,9 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 			vm_paddr_t vcpu_gpa;
 			void *vcpu_hva;
 
-			vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+			vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size);
 			PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-				       vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+				       vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size);
 
 			/* Cache the HVA pointer of the region */
 			vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
@@ -312,7 +309,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 						&uffd_handler_threads[vcpu_id],
 						pipefds[vcpu_id * 2],
 						uffd_delay, &uffd_args[vcpu_id],
-						vcpu_hva, vcpu_memory_bytes);
+						vcpu_hva, guest_percpu_mem_size);
 			if (r < 0)
 				exit(-r);
 		}
@@ -413,7 +410,6 @@ static void help(char *name)
 int main(int argc, char *argv[])
 {
 	bool mode_selected = false;
-	uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE;
 	int vcpus = 1;
 	unsigned int mode;
 	int opt, i;
@@ -463,7 +459,7 @@ int main(int argc, char *argv[])
 				    "A negative UFFD delay is not supported.");
 			break;
 		case 'b':
-			vcpu_memory_bytes = parse_size(optarg);
+			guest_percpu_mem_size = parse_size(optarg);
 			break;
 		case 'v':
 			vcpus = atoi(optarg);
@@ -486,7 +482,7 @@ int main(int argc, char *argv[])
 		TEST_ASSERT(guest_modes[i].supported,
 			    "Guest mode ID %d (%s) not supported.",
 			    i, vm_guest_mode_string(i));
-		run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes);
+		run_test(i, use_uffd, uffd_delay, vcpus);
 	}
 
 	return 0;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 8e053e9dc7ea..840dc2a19ce1 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -21,6 +21,8 @@
 /* Default guest test virtual memory offset */
 #define DEFAULT_GUEST_TEST_MEM		0xc0000000
 
+#define DEFAULT_PER_VCPU_MEM_SIZE	(1 << 30) /* 1G */
+
 /*
  * Guest physical memory offset of the testing memory slot.
  * This will be set to the topmost valid physical address minus
@@ -33,6 +35,7 @@ static uint64_t guest_test_phys_mem;
  * Must not conflict with identity mapped test code.
  */
 static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 
 struct vcpu_args {
 	uint64_t gva;
-- 
cgit v1.2.3-70-g09d2


From 3be18630954672b889186e7be9b631f00134e954 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 4 Nov 2020 22:23:53 +0100
Subject: KVM: selftests: Make the number of vcpus global

We also check the input number of vcpus against the maximum supported.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201104212357.171559-8-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c   | 37 ++++++++++------------
 .../testing/selftests/kvm/include/perf_test_util.h |  3 ++
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 33acd954a298..3d96a7bfaff3 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -249,7 +249,7 @@ static int setup_demand_paging(struct kvm_vm *vm,
 }
 
 static void run_test(enum vm_guest_mode mode, bool use_uffd,
-		     useconds_t uffd_delay, int vcpus)
+		     useconds_t uffd_delay)
 {
 	pthread_t *vcpu_threads;
 	pthread_t *uffd_handler_threads = NULL;
@@ -261,7 +261,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 	int vcpu_id;
 	int r;
 
-	vm = create_vm(mode, vcpus, guest_percpu_mem_size);
+	vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
 
 	perf_test_args.wr_fract = 1;
 
@@ -270,23 +270,23 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 		    "Failed to allocate buffer for guest data pattern");
 	memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
 
-	vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads));
+	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
 	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
-	add_vcpus(vm, vcpus, guest_percpu_mem_size);
+	add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
 
 	if (use_uffd) {
 		uffd_handler_threads =
-			malloc(vcpus * sizeof(*uffd_handler_threads));
+			malloc(nr_vcpus * sizeof(*uffd_handler_threads));
 		TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
 
-		uffd_args = malloc(vcpus * sizeof(*uffd_args));
+		uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
 		TEST_ASSERT(uffd_args, "Memory allocation failed");
 
-		pipefds = malloc(sizeof(int) * vcpus * 2);
+		pipefds = malloc(sizeof(int) * nr_vcpus * 2);
 		TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
 
-		for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
 			vm_paddr_t vcpu_gpa;
 			void *vcpu_hva;
 
@@ -322,7 +322,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
 
-	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
 		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
 			       &perf_test_args.vcpu_args[vcpu_id]);
 	}
@@ -330,7 +330,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 	pr_info("Started all vCPUs\n");
 
 	/* Wait for the vcpu threads to quit */
-	for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
 		pthread_join(vcpu_threads[vcpu_id], NULL);
 		PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
 	}
@@ -343,7 +343,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 		char c;
 
 		/* Tell the user fault fd handler threads to quit */
-		for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
 			r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
 			TEST_ASSERT(r == 1, "Unable to write to pipefd");
 
@@ -354,7 +354,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
 	pr_info("Total guest execution time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 	pr_info("Overall demand paging rate: %f pgs/sec\n",
-		perf_test_args.vcpu_args[0].pages * vcpus /
+		perf_test_args.vcpu_args[0].pages * nr_vcpus /
 		((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
 
 	ucall_uninit(vm);
@@ -409,8 +409,8 @@ static void help(char *name)
 
 int main(int argc, char *argv[])
 {
+	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
 	bool mode_selected = false;
-	int vcpus = 1;
 	unsigned int mode;
 	int opt, i;
 	bool use_uffd = false;
@@ -462,12 +462,9 @@ int main(int argc, char *argv[])
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
 		case 'v':
-			vcpus = atoi(optarg);
-			TEST_ASSERT(vcpus > 0,
-				    "Must have a positive number of vCPUs");
-			TEST_ASSERT(vcpus <= MAX_VCPUS,
-				    "This test does not currently support\n"
-				    "more than %d vCPUs.", MAX_VCPUS);
+			nr_vcpus = atoi(optarg);
+			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
 		case 'h':
 		default:
@@ -482,7 +479,7 @@ int main(int argc, char *argv[])
 		TEST_ASSERT(guest_modes[i].supported,
 			    "Guest mode ID %d (%s) not supported.",
 			    i, vm_guest_mode_string(i));
-		run_test(i, use_uffd, uffd_delay, vcpus);
+		run_test(i, use_uffd, uffd_delay);
 	}
 
 	return 0;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 840dc2a19ce1..05eeb5d2bfc4 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -37,6 +37,9 @@ static uint64_t guest_test_phys_mem;
 static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 
+/* Number of VCPUs for the test */
+static int nr_vcpus = 1;
+
 struct vcpu_args {
 	uint64_t gva;
 	uint64_t pages;
-- 
cgit v1.2.3-70-g09d2


From 4fd94ec7d566ee2f0b52111cc6d26dd311f8a7c3 Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 27 Oct 2020 16:37:33 -0700
Subject: KVM: selftests: Introduce the dirty log perf test

The dirty log perf test will time verious dirty logging operations
(enabling dirty logging, dirtying memory, getting the dirty log,
clearing the dirty log, and disabling dirty logging) in order to
quantify dirty logging performance. This test can be used to inform
future performance improvements to KVM's dirty logging infrastructure.

This series was tested by running the following invocations on an Intel
Skylake machine:
dirty_log_perf_test -b 20m -i 100 -v 64
dirty_log_perf_test -b 20g -i 5 -v 4
dirty_log_perf_test -b 4g -i 5 -v 32
demand_paging_test -b 20m -v 64
demand_paging_test -b 20g -v 4
demand_paging_test -b 4g -v 32
All behaved as expected.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20201027233733.1484855-6-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore             |   1 +
 tools/testing/selftests/kvm/Makefile               |   1 +
 tools/testing/selftests/kvm/dirty_log_perf_test.c  | 376 +++++++++++++++++++++
 .../testing/selftests/kvm/include/perf_test_util.h |  18 +-
 tools/testing/selftests/kvm/include/test_util.h    |   1 +
 tools/testing/selftests/kvm/lib/test_util.c        |   7 +
 6 files changed, 396 insertions(+), 8 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/dirty_log_perf_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index ea1692d43411..7a2c242b7152 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -27,6 +27,7 @@
 /clear_dirty_log_test
 /demand_paging_test
 /dirty_log_test
+/dirty_log_perf_test
 /kvm_create_max_vcpus
 /set_memory_region_test
 /steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index c8af04dccf7f..3d14ef77755e 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -61,6 +61,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
+TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
new file mode 100644
index 000000000000..cda7b000b1bc
--- /dev/null
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging performance test
+ *
+ * Based on dirty_log_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2020, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
+#define TEST_HOST_LOOP_N		2UL
+
+/* Host variables */
+static bool host_quit;
+static uint64_t iteration;
+static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
+
+static void *vcpu_worker(void *data)
+{
+	int ret;
+	struct kvm_vm *vm = perf_test_args.vm;
+	uint64_t pages_count = 0;
+	struct kvm_run *run;
+	struct timespec start;
+	struct timespec ts_diff;
+	struct timespec total = (struct timespec){0};
+	struct timespec avg;
+	struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+	int vcpu_id = vcpu_args->vcpu_id;
+
+	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+	run = vcpu_state(vm, vcpu_id);
+
+	while (!READ_ONCE(host_quit)) {
+		uint64_t current_iteration = READ_ONCE(iteration);
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		ret = _vcpu_run(vm, vcpu_id);
+		ts_diff = timespec_diff_now(start);
+
+		TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+		TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
+			    "Invalid guest sync status: exit_reason=%s\n",
+			    exit_reason_str(run->exit_reason));
+
+		pr_debug("Got sync event from vCPU %d\n", vcpu_id);
+		vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+		pr_debug("vCPU %d updated last completed iteration to %lu\n",
+			 vcpu_id, vcpu_last_completed_iteration[vcpu_id]);
+
+		if (current_iteration) {
+			pages_count += vcpu_args->pages;
+			total = timespec_add(total, ts_diff);
+			pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n",
+				vcpu_id, current_iteration, ts_diff.tv_sec,
+				ts_diff.tv_nsec);
+		} else {
+			pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n",
+				vcpu_id, current_iteration, ts_diff.tv_sec,
+				ts_diff.tv_nsec);
+		}
+
+		while (current_iteration == READ_ONCE(iteration) &&
+		       !READ_ONCE(host_quit)) {}
+	}
+
+	avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]);
+	pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+		vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
+		total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+
+	return NULL;
+}
+
+#ifdef USE_CLEAR_DIRTY_LOG
+static u64 dirty_log_manual_caps;
+#endif
+
+static void run_test(enum vm_guest_mode mode, unsigned long iterations,
+		     uint64_t phys_offset, int wr_fract)
+{
+	pthread_t *vcpu_threads;
+	struct kvm_vm *vm;
+	unsigned long *bmap;
+	uint64_t guest_num_pages;
+	uint64_t host_num_pages;
+	int vcpu_id;
+	struct timespec start;
+	struct timespec ts_diff;
+	struct timespec get_dirty_log_total = (struct timespec){0};
+	struct timespec vcpu_dirty_total = (struct timespec){0};
+	struct timespec avg;
+#ifdef USE_CLEAR_DIRTY_LOG
+	struct kvm_enable_cap cap = {};
+	struct timespec clear_dirty_log_total = (struct timespec){0};
+#endif
+
+	vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+
+	perf_test_args.wr_fract = wr_fract;
+
+	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
+	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+	bmap = bitmap_alloc(host_num_pages);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+	cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+	cap.args[0] = dirty_log_manual_caps;
+	vm_enable_cap(vm, &cap);
+#endif
+
+	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
+	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+	add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+
+	sync_global_to_guest(vm, perf_test_args);
+
+	/* Start the iterations */
+	iteration = 0;
+	host_quit = false;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
+			       &perf_test_args.vcpu_args[vcpu_id]);
+	}
+
+	/* Allow the vCPU to populate memory */
+	pr_debug("Starting iteration %lu - Populating\n", iteration);
+	while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
+		pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n",
+			iteration);
+
+	ts_diff = timespec_diff_now(start);
+	pr_info("Populate memory time: %ld.%.9lds\n",
+		ts_diff.tv_sec, ts_diff.tv_nsec);
+
+	/* Enable dirty logging */
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
+				KVM_MEM_LOG_DIRTY_PAGES);
+	ts_diff = timespec_diff_now(start);
+	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
+		ts_diff.tv_sec, ts_diff.tv_nsec);
+
+	while (iteration < iterations) {
+		/*
+		 * Incrementing the iteration number will start the vCPUs
+		 * dirtying memory again.
+		 */
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		iteration++;
+
+		pr_debug("Starting iteration %lu\n", iteration);
+		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+			while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
+				pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n",
+					 vcpu_id, iteration);
+		}
+
+		ts_diff = timespec_diff_now(start);
+		vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);
+		pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n",
+			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+
+		ts_diff = timespec_diff_now(start);
+		get_dirty_log_total = timespec_add(get_dirty_log_total,
+						   ts_diff);
+		pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
+			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
+				       host_num_pages);
+
+		ts_diff = timespec_diff_now(start);
+		clear_dirty_log_total = timespec_add(clear_dirty_log_total,
+						     ts_diff);
+		pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+#endif
+	}
+
+	/* Tell the vcpu thread to quit */
+	host_quit = true;
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+		pthread_join(vcpu_threads[vcpu_id], NULL);
+
+	/* Disable dirty logging */
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
+	ts_diff = timespec_diff_now(start);
+	pr_info("Disabling dirty logging time: %ld.%.9lds\n",
+		ts_diff.tv_sec, ts_diff.tv_nsec);
+
+	avg = timespec_div(get_dirty_log_total, iterations);
+	pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+		iterations, get_dirty_log_total.tv_sec,
+		get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+	avg = timespec_div(clear_dirty_log_total, iterations);
+	pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+		iterations, clear_dirty_log_total.tv_sec,
+		clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+#endif
+
+	free(bmap);
+	free(vcpu_threads);
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+}
+
+struct guest_mode {
+	bool supported;
+	bool enabled;
+};
+static struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_init(mode, supported, enabled) ({ \
+	guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+static void help(char *name)
+{
+	int i;
+
+	puts("");
+	printf("usage: %s [-h] [-i iterations] [-p offset] "
+	       "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name);
+	puts("");
+	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+	       TEST_HOST_LOOP_N);
+	printf(" -p: specify guest physical test memory offset\n"
+	       "     Warning: a low offset can conflict with the loaded test code.\n");
+	printf(" -m: specify the guest mode ID to test "
+	       "(default: test all supported modes)\n"
+	       "     This option may be used multiple times.\n"
+	       "     Guest mode IDs:\n");
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		printf("         %d:    %s%s\n", i, vm_guest_mode_string(i),
+		       guest_modes[i].supported ? " (supported)" : "");
+	}
+	printf(" -b: specify the size of the memory region which should be\n"
+	       "     dirtied by each vCPU. e.g. 10M or 3G.\n"
+	       "     (default: 1G)\n");
+	printf(" -f: specify the fraction of pages which should be written to\n"
+	       "     as opposed to simply read, in the form\n"
+	       "     1/<fraction of pages to write>.\n"
+	       "     (default: 1 i.e. all pages are written to.)\n");
+	printf(" -v: specify the number of vCPUs to run.\n");
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned long iterations = TEST_HOST_LOOP_N;
+	bool mode_selected = false;
+	uint64_t phys_offset = 0;
+	unsigned int mode;
+	int opt, i;
+	int wr_fract = 1;
+
+#ifdef USE_CLEAR_DIRTY_LOG
+	dirty_log_manual_caps =
+		kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+	if (!dirty_log_manual_caps) {
+		print_skip("KVM_CLEAR_DIRTY_LOG not available");
+		exit(KSFT_SKIP);
+	}
+	dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+				  KVM_DIRTY_LOG_INITIALLY_SET);
+#endif
+
+#ifdef __x86_64__
+	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
+#endif
+#ifdef __aarch64__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+	guest_mode_init(VM_MODE_P40V48_64K, true, true);
+
+	{
+		unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+		if (limit >= 52)
+			guest_mode_init(VM_MODE_P52V48_64K, true, true);
+		if (limit >= 48) {
+			guest_mode_init(VM_MODE_P48V48_4K, true, true);
+			guest_mode_init(VM_MODE_P48V48_64K, true, true);
+		}
+	}
+#endif
+#ifdef __s390x__
+	guest_mode_init(VM_MODE_P40V48_4K, true, true);
+#endif
+
+	while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) {
+		switch (opt) {
+		case 'i':
+			iterations = strtol(optarg, NULL, 10);
+			break;
+		case 'p':
+			phys_offset = strtoull(optarg, NULL, 0);
+			break;
+		case 'm':
+			if (!mode_selected) {
+				for (i = 0; i < NUM_VM_MODES; ++i)
+					guest_modes[i].enabled = false;
+				mode_selected = true;
+			}
+			mode = strtoul(optarg, NULL, 10);
+			TEST_ASSERT(mode < NUM_VM_MODES,
+				    "Guest mode ID %d too big", mode);
+			guest_modes[mode].enabled = true;
+			break;
+		case 'b':
+			guest_percpu_mem_size = parse_size(optarg);
+			break;
+		case 'f':
+			wr_fract = atoi(optarg);
+			TEST_ASSERT(wr_fract >= 1,
+				    "Write fraction cannot be less than one");
+			break;
+		case 'v':
+			nr_vcpus = atoi(optarg);
+			TEST_ASSERT(nr_vcpus > 0,
+				    "Must have a positive number of vCPUs");
+			TEST_ASSERT(nr_vcpus <= MAX_VCPUS,
+				    "This test does not currently support\n"
+				    "more than %d vCPUs.", MAX_VCPUS);
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
+
+	pr_info("Test iterations: %"PRIu64"\n",	iterations);
+
+	for (i = 0; i < NUM_VM_MODES; ++i) {
+		if (!guest_modes[i].enabled)
+			continue;
+		TEST_ASSERT(guest_modes[i].supported,
+			    "Guest mode ID %d (%s) not supported.",
+			    i, vm_guest_mode_string(i));
+		run_test(i, iterations, phys_offset, wr_fract);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 05eeb5d2bfc4..2618052057b1 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -76,16 +76,18 @@ static void guest_code(uint32_t vcpu_id)
 	gva = vcpu_args->gva;
 	pages = vcpu_args->pages;
 
-	for (i = 0; i < pages; i++) {
-		uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+	while (true) {
+		for (i = 0; i < pages; i++) {
+			uint64_t addr = gva + (i * perf_test_args.guest_page_size);
 
-		if (i % perf_test_args.wr_fract == 0)
-			*(uint64_t *)addr = 0x0123456789ABCDEF;
-		else
-			READ_ONCE(*(uint64_t *)addr);
-	}
+			if (i % perf_test_args.wr_fract == 0)
+				*(uint64_t *)addr = 0x0123456789ABCDEF;
+			else
+				READ_ONCE(*(uint64_t *)addr);
+		}
 
-	GUEST_SYNC(1);
+		GUEST_SYNC(1);
+	}
 }
 
 static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 1cc036ddb0c5..ffffa560436b 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -65,5 +65,6 @@ struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
 struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
 struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
 struct timespec timespec_diff_now(struct timespec start);
+struct timespec timespec_div(struct timespec ts, int divisor);
 
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 1a46c2c48c7c..8e04c0b1608e 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -92,6 +92,13 @@ struct timespec timespec_diff_now(struct timespec start)
 	return timespec_sub(end, start);
 }
 
+struct timespec timespec_div(struct timespec ts, int divisor)
+{
+	int64_t ns = timespec_to_ns(ts) / divisor;
+
+	return timespec_add_ns((struct timespec){0}, ns);
+}
+
 void print_skip(const char *fmt, ...)
 {
 	va_list ap;
-- 
cgit v1.2.3-70-g09d2


From 6d6a18fdde8b86b919b740ad629153de432d12a8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 9 Nov 2020 09:45:17 -0500
Subject: KVM: selftests: allow two iterations of dirty_log_perf_test

Even though one iteration is not enough for the dirty log performance
test (due to the cost of building page tables, zeroing memory etc.)
two is okay and it is the default.  Without this patch,
"./dirty_log_perf_test" without any further arguments fails.

Cc: Ben Gardon <bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index cda7b000b1bc..85c9b8f73142 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -359,7 +359,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
+	TEST_ASSERT(iterations >= 2, "The test should have at least two iterations");
 
 	pr_info("Test iterations: %"PRIu64"\n",	iterations);
 
-- 
cgit v1.2.3-70-g09d2


From 197afc631413d96dc60acfc7970bdd4125d38cd3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 6 Nov 2020 16:02:51 -0800
Subject: libbpf: Don't attempt to load unused subprog as an entry-point BPF
 program

If BPF code contains unused BPF subprogram and there are no other subprogram
calls (which can realistically happen in real-world applications given
sufficiently smart Clang code optimizations), libbpf will erroneously assume
that subprograms are entry-point programs and will attempt to load them with
UNSPEC program type.

Fix by not relying on subcall instructions and rather detect it based on the
structure of BPF object's sections.

Fixes: 9a94f277c4fb ("tools: libbpf: restore the ability to load programs from .text section")
Reported-by: Dmitrii Banshchikov <dbanschikov@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201107000251.256821-1-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c                             | 23 ++++++++++++----------
 tools/testing/selftests/bpf/prog_tests/subprogs.c  |  6 ++++++
 .../selftests/bpf/progs/test_subprogs_unused.c     | 21 ++++++++++++++++++++
 3 files changed, 40 insertions(+), 10 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_subprogs_unused.c

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 313034117070..28baee7ba1ca 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -560,8 +560,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 		      const char *name, size_t sec_idx, const char *sec_name,
 		      size_t sec_off, void *insn_data, size_t insn_data_sz)
 {
-	int i;
-
 	if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
 		pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
 			sec_name, name, sec_off, insn_data_sz);
@@ -600,13 +598,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 		goto errout;
 	memcpy(prog->insns, insn_data, insn_data_sz);
 
-	for (i = 0; i < prog->insns_cnt; i++) {
-		if (insn_is_subprog_call(&prog->insns[i])) {
-			obj->has_subcalls = true;
-			break;
-		}
-	}
-
 	return 0;
 errout:
 	pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
@@ -3280,7 +3271,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
 static bool prog_is_subprog(const struct bpf_object *obj,
 			    const struct bpf_program *prog)
 {
-	return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls;
+	/* For legacy reasons, libbpf supports an entry-point BPF programs
+	 * without SEC() attribute, i.e., those in the .text section. But if
+	 * there are 2 or more such programs in the .text section, they all
+	 * must be subprograms called from entry-point BPF programs in
+	 * designated SEC()'tions, otherwise there is no way to distinguish
+	 * which of those programs should be loaded vs which are a subprogram.
+	 * Similarly, if there is a function/program in .text and at least one
+	 * other BPF program with custom SEC() attribute, then we just assume
+	 * .text programs are subprograms (even if they are not called from
+	 * other programs), because libbpf never explicitly supported mixing
+	 * SEC()-designated BPF programs and .text entry-point BPF programs.
+	 */
+	return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
 }
 
 struct bpf_program *
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c
index a00abf58c037..3f3d2ac4dd57 100644
--- a/tools/testing/selftests/bpf/prog_tests/subprogs.c
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c
@@ -3,12 +3,14 @@
 #include <test_progs.h>
 #include <time.h>
 #include "test_subprogs.skel.h"
+#include "test_subprogs_unused.skel.h"
 
 static int duration;
 
 void test_subprogs(void)
 {
 	struct test_subprogs *skel;
+	struct test_subprogs_unused *skel2;
 	int err;
 
 	skel = test_subprogs__open_and_load();
@@ -26,6 +28,10 @@ void test_subprogs(void)
 	CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
 	CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
 
+	skel2 = test_subprogs_unused__open_and_load();
+	ASSERT_OK_PTR(skel2, "unused_progs_skel");
+	test_subprogs_unused__destroy(skel2);
+
 cleanup:
 	test_subprogs__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
new file mode 100644
index 000000000000..75d975f8cf90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
@@ -0,0 +1,21 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+const char LICENSE[] SEC("license") = "GPL";
+
+__attribute__((maybe_unused)) __noinline int unused1(int x)
+{
+	return x + 1;
+}
+
+static __attribute__((maybe_unused)) __noinline int unused2(int x)
+{
+	return x + 2;
+}
+
+SEC("raw_tp/sys_enter")
+int main_prog(void *ctx)
+{
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From f52b8fd332573106e60958617a3d2e30611ce1fb Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 6 Nov 2020 14:54:02 -0800
Subject: bpf: selftest: Use static globals in tcp_hdr_options and
 btf_skc_cls_ingress

Some globals in the tcp_hdr_options test and btf_skc_cls_ingress test
are not using static scope.  This patch fixes it.

Targeting bpf-next branch as an improvement since it currently does not
break the build.

Fixes: ad2f8eb0095e ("bpf: selftests: Tcp header options")
Fixes: 9a856cae2217 ("bpf: selftest: Add test_btf_skc_cls_ingress")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201106225402.4135741-1-kafai@fb.com
---
 tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c |  2 +-
 tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c     | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
index 86ccf37e26b3..762f6a9da8b5 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -17,7 +17,7 @@
 #include "test_btf_skc_cls_ingress.skel.h"
 
 static struct test_btf_skc_cls_ingress *skel;
-struct sockaddr_in6 srv_sa6;
+static struct sockaddr_in6 srv_sa6;
 static __u32 duration;
 
 #define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index c85174cdcb77..08d19cafd5e8 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -18,12 +18,12 @@
 #define LO_ADDR6 "::1"
 #define CG_NAME "/tcpbpf-hdr-opt-test"
 
-struct bpf_test_option exp_passive_estab_in;
-struct bpf_test_option exp_active_estab_in;
-struct bpf_test_option exp_passive_fin_in;
-struct bpf_test_option exp_active_fin_in;
-struct hdr_stg exp_passive_hdr_stg;
-struct hdr_stg exp_active_hdr_stg = { .active = true, };
+static struct bpf_test_option exp_passive_estab_in;
+static struct bpf_test_option exp_active_estab_in;
+static struct bpf_test_option exp_passive_fin_in;
+static struct bpf_test_option exp_active_fin_in;
+static struct hdr_stg exp_passive_hdr_stg;
+static struct hdr_stg exp_active_hdr_stg = { .active = true, };
 
 static struct test_misc_tcp_hdr_options *misc_skel;
 static struct test_tcp_hdr_options *skel;
-- 
cgit v1.2.3-70-g09d2


From 1db32acfde741359b0b1b9962ae8cd501c2ff769 Mon Sep 17 00:00:00 2001
From: Tanner Love <tannerlove@google.com>
Date: Fri, 6 Nov 2020 13:07:41 -0500
Subject: selftests/net: test max_num_members, fanout_args in psock_fanout

Add an additional control test that verifies:
-specifying two different max_num_members values fails
-specifying max_num_members > PACKET_FANOUT_MAX fails

In datapath tests, set max_num_members to PACKET_FANOUT_MAX.

Signed-off-by: Tanner Love <tannerlove@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/psock_fanout.c | 72 ++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 2c522f7a0aec..db4521335722 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -56,12 +56,15 @@
 
 #define RING_NUM_FRAMES			20
 
+static uint32_t cfg_max_num_members;
+
 /* Open a socket in a given fanout mode.
  * @return -1 if mode is bad, a valid socket otherwise */
 static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 {
 	struct sockaddr_ll addr = {0};
-	int fd, val;
+	struct fanout_args args;
+	int fd, val, err;
 
 	fd = socket(PF_PACKET, SOCK_RAW, 0);
 	if (fd < 0) {
@@ -83,8 +86,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
 		exit(1);
 	}
 
-	val = (((int) typeflags) << 16) | group_id;
-	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+	if (cfg_max_num_members) {
+		args.id = group_id;
+		args.type_flags = typeflags;
+		args.max_num_members = cfg_max_num_members;
+		err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args,
+				 sizeof(args));
+	} else {
+		val = (((int) typeflags) << 16) | group_id;
+		err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val,
+				 sizeof(val));
+	}
+	if (err) {
 		if (close(fd)) {
 			perror("close packet");
 			exit(1);
@@ -286,6 +299,56 @@ static void test_control_group(void)
 	}
 }
 
+/* Test illegal max_num_members values */
+static void test_control_group_max_num_members(void)
+{
+	int fds[3];
+
+	fprintf(stderr, "test: control multiple sockets, max_num_members\n");
+
+	/* expected failure on greater than PACKET_FANOUT_MAX */
+	cfg_max_num_members = (1 << 16) + 1;
+	if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+		fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n");
+		exit(1);
+	}
+
+	cfg_max_num_members = 256;
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[0] == -1) {
+		fprintf(stderr, "ERROR: failed open\n");
+		exit(1);
+	}
+
+	/* expected failure on joining group with different max_num_members */
+	cfg_max_num_members = 257;
+	if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+		fprintf(stderr, "ERROR: set different max_num_members\n");
+		exit(1);
+	}
+
+	/* success on joining group with same max_num_members */
+	cfg_max_num_members = 256;
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[1] == -1) {
+		fprintf(stderr, "ERROR: failed to join group\n");
+		exit(1);
+	}
+
+	/* success on joining group with max_num_members unspecified */
+	cfg_max_num_members = 0;
+	fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[2] == -1) {
+		fprintf(stderr, "ERROR: failed to join group\n");
+		exit(1);
+	}
+
+	if (close(fds[2]) || close(fds[1]) || close(fds[0])) {
+		fprintf(stderr, "ERROR: closing sockets\n");
+		exit(1);
+	}
+}
+
 /* Test creating a unique fanout group ids */
 static void test_unique_fanout_group_ids(void)
 {
@@ -426,8 +489,11 @@ int main(int argc, char **argv)
 
 	test_control_single();
 	test_control_group();
+	test_control_group_max_num_members();
 	test_unique_fanout_group_ids();
 
+	/* PACKET_FANOUT_MAX */
+	cfg_max_num_members = 1 << 16;
 	/* find a set of ports that do not collide onto the same socket */
 	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
 			    expect_hash[0], expect_hash[1]);
-- 
cgit v1.2.3-70-g09d2


From 3e9fa9983b9297407c2448114d6d27782d5e2ef2 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 30 Sep 2020 21:04:05 -0400
Subject: tools/power turbostat: update version number

goodbye summer...

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 74d2fc6fc78a..f3a1746f7f45 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -5712,7 +5712,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 20.03.20"
+	fprintf(outf, "turbostat version 20.09.30"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
-- 
cgit v1.2.3-70-g09d2


From c335b4f1f65012713832d988ec06512c7bda5c04 Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Thu, 5 Nov 2020 15:24:40 -0800
Subject: kunit: tool: unmark test_data as binary blobs

The tools/testing/kunit/test_data/ directory was marked as binary
because some of the test_data files cause checkpatch warnings. Fix this
by dropping the .gitattributes file.

Fixes: afc63da64f1e ("kunit: kunit_parser: make parser more robust")
Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/.gitattributes | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 tools/testing/kunit/.gitattributes

(limited to 'tools')

diff --git a/tools/testing/kunit/.gitattributes b/tools/testing/kunit/.gitattributes
deleted file mode 100644
index 5b7da1fc3b8f..000000000000
--- a/tools/testing/kunit/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-test_data/* binary
-- 
cgit v1.2.3-70-g09d2


From 3959d0a63b3202ea2aa12b3f6effd5400d773d31 Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Wed, 21 Oct 2020 00:16:03 -0700
Subject: kunit: Fix kunit.py parse subcommand (use null build_dir)

When JSON support was added in [1], the KunitParseRequest tuple was
updated to contain a 'build_dir' field, but kunit.py parse doesn't
accept --build_dir as an option. The code nevertheless tried to access
it, resulting in this error:

AttributeError: 'Namespace' object has no attribute 'build_dir'

Given that the parser only uses the build_dir variable to set the
'build_environment' json field, we set it to None (which gives the JSON
'null') for now. Ultimately, we probably do want to be able to set this,
but since it's new functionality which (for the parse subcommand) never
worked, this is the quickest way of getting it back up and running.

[1]: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git/commit/?h=kunit-fixes&id=21a6d1780d5bbfca0ce9b8104ca6233502fcbf86

Fixes: 21a6d1780d5b ("kunit: tool: allow generating test results in JSON")
Signed-off-by: David Gow <davidgow@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index ebf5f5763dee..a6d5f219f714 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -337,7 +337,7 @@ def main(argv, linux=None):
 				kunit_output = f.read().splitlines()
 		request = KunitParseRequest(cli_args.raw_output,
 					    kunit_output,
-					    cli_args.build_dir,
+					    None,
 					    cli_args.json)
 		result = parse_tests(request)
 		if result.status != KunitStatus.SUCCESS:
-- 
cgit v1.2.3-70-g09d2


From b7e0b983ff13714d261883e89910b0755eb12169 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Wed, 21 Oct 2020 15:07:52 -0700
Subject: kunit: tool: fix pre-existing python type annotation errors

The code uses annotations, but they aren't accurate.
Note that type checking in python is a separate process, running
`kunit.py run` will not check and complain about invalid types at
runtime.

Fix pre-existing issues found by running a type checker
$ mypy *.py

All but one of these were returning `None` without denoting this
properly (via `Optional[Type]`).

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 84a1af2581f5..db0347baa428 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -12,7 +12,7 @@ from collections import namedtuple
 from datetime import datetime
 from enum import Enum, auto
 from functools import reduce
-from typing import List
+from typing import List, Optional, Tuple
 
 TestResult = namedtuple('TestResult', ['status','suites','log'])
 
@@ -151,7 +151,7 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
 	else:
 		return False
 
-def parse_test_case(lines: List[str]) -> TestCase:
+def parse_test_case(lines: List[str]) -> Optional[TestCase]:
 	test_case = TestCase()
 	save_non_diagnositic(lines, test_case)
 	while parse_diagnostic(lines, test_case):
@@ -163,7 +163,7 @@ def parse_test_case(lines: List[str]) -> TestCase:
 
 SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$')
 
-def parse_subtest_header(lines: List[str]) -> str:
+def parse_subtest_header(lines: List[str]) -> Optional[str]:
 	consume_non_diagnositic(lines)
 	if not lines:
 		return None
@@ -176,7 +176,7 @@ def parse_subtest_header(lines: List[str]) -> str:
 
 SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)')
 
-def parse_subtest_plan(lines: List[str]) -> int:
+def parse_subtest_plan(lines: List[str]) -> Optional[int]:
 	consume_non_diagnositic(lines)
 	match = SUBTEST_PLAN.match(lines[0])
 	if match:
@@ -230,7 +230,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus:
 	max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases)
 	return max_status(max_test_case_status, test_suite.status)
 
-def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite:
+def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]:
 	if not lines:
 		return None
 	consume_non_diagnositic(lines)
@@ -271,7 +271,7 @@ def parse_tap_header(lines: List[str]) -> bool:
 
 TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)')
 
-def parse_test_plan(lines: List[str]) -> int:
+def parse_test_plan(lines: List[str]) -> Optional[int]:
 	consume_non_diagnositic(lines)
 	match = TEST_PLAN.match(lines[0])
 	if match:
@@ -310,7 +310,7 @@ def parse_test_result(lines: List[str]) -> TestResult:
 	else:
 		return TestResult(TestStatus.NO_TESTS, [], lines)
 
-def print_and_count_results(test_result: TestResult) -> None:
+def print_and_count_results(test_result: TestResult) -> Tuple[int, int, int]:
 	total_tests = 0
 	failed_tests = 0
 	crashed_tests = 0
-- 
cgit v1.2.3-70-g09d2


From fcdb0bc08ced274078f371e1e0fe6421a97fa9f2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 26 Oct 2020 18:59:25 +0200
Subject: kunit: Do not pollute source directory with generated files
 (.kunitconfig)

When --build_dir is provided use it and do not pollute source directory
which even can be mounted over network or read-only.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit.py        | 25 ++++++++++++-------------
 tools/testing/kunit/kunit_kernel.py | 24 +++++++++++++++++++-----
 2 files changed, 31 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index a6d5f219f714..d4f7846d0745 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -11,7 +11,6 @@ import argparse
 import sys
 import os
 import time
-import shutil
 
 from collections import namedtuple
 from enum import Enum, auto
@@ -44,11 +43,6 @@ class KunitStatus(Enum):
 	BUILD_FAILURE = auto()
 	TEST_FAILURE = auto()
 
-def create_default_kunitconfig():
-	if not os.path.exists(kunit_kernel.kunitconfig_path):
-		shutil.copyfile('arch/um/configs/kunit_defconfig',
-				kunit_kernel.kunitconfig_path)
-
 def get_kernel_root_path():
 	parts = sys.argv[0] if not __file__ else __file__
 	parts = os.path.realpath(parts).split('tools/testing/kunit')
@@ -61,7 +55,6 @@ def config_tests(linux: kunit_kernel.LinuxSourceTree,
 	kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...')
 
 	config_start = time.time()
-	create_default_kunitconfig()
 	success = linux.build_reconfig(request.build_dir, request.make_options)
 	config_end = time.time()
 	if not success:
@@ -262,12 +255,12 @@ def main(argv, linux=None):
 		if not os.path.exists(cli_args.build_dir):
 			os.mkdir(cli_args.build_dir)
 
-		if not os.path.exists(kunit_kernel.kunitconfig_path):
-			create_default_kunitconfig()
-
 		if not linux:
 			linux = kunit_kernel.LinuxSourceTree()
 
+		linux.create_kunitconfig(cli_args.build_dir)
+		linux.read_kunitconfig(cli_args.build_dir)
+
 		request = KunitRequest(cli_args.raw_output,
 				       cli_args.timeout,
 				       cli_args.jobs,
@@ -283,12 +276,12 @@ def main(argv, linux=None):
 				not os.path.exists(cli_args.build_dir)):
 			os.mkdir(cli_args.build_dir)
 
-		if not os.path.exists(kunit_kernel.kunitconfig_path):
-			create_default_kunitconfig()
-
 		if not linux:
 			linux = kunit_kernel.LinuxSourceTree()
 
+		linux.create_kunitconfig(cli_args.build_dir)
+		linux.read_kunitconfig(cli_args.build_dir)
+
 		request = KunitConfigRequest(cli_args.build_dir,
 					     cli_args.make_options)
 		result = config_tests(linux, request)
@@ -301,6 +294,9 @@ def main(argv, linux=None):
 		if not linux:
 			linux = kunit_kernel.LinuxSourceTree()
 
+		linux.create_kunitconfig(cli_args.build_dir)
+		linux.read_kunitconfig(cli_args.build_dir)
+
 		request = KunitBuildRequest(cli_args.jobs,
 					    cli_args.build_dir,
 					    cli_args.alltests,
@@ -315,6 +311,9 @@ def main(argv, linux=None):
 		if not linux:
 			linux = kunit_kernel.LinuxSourceTree()
 
+		linux.create_kunitconfig(cli_args.build_dir)
+		linux.read_kunitconfig(cli_args.build_dir)
+
 		exec_request = KunitExecRequest(cli_args.timeout,
 						cli_args.build_dir,
 						cli_args.alltests)
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index b557b1e93f98..633a2efcfdbd 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -6,10 +6,10 @@
 # Author: Felix Guo <felixguoxiuping@gmail.com>
 # Author: Brendan Higgins <brendanhiggins@google.com>
 
-
 import logging
 import subprocess
 import os
+import shutil
 import signal
 
 from contextlib import ExitStack
@@ -18,7 +18,8 @@ import kunit_config
 import kunit_parser
 
 KCONFIG_PATH = '.config'
-kunitconfig_path = '.kunitconfig'
+KUNITCONFIG_PATH = '.kunitconfig'
+DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig'
 BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
 
 class ConfigError(Exception):
@@ -99,19 +100,22 @@ class LinuxSourceTreeOperations(object):
 						   stderr=subprocess.STDOUT)
 			process.wait(timeout)
 
-
 def get_kconfig_path(build_dir):
 	kconfig_path = KCONFIG_PATH
 	if build_dir:
 		kconfig_path = os.path.join(build_dir, KCONFIG_PATH)
 	return kconfig_path
 
+def get_kunitconfig_path(build_dir):
+	kunitconfig_path = KUNITCONFIG_PATH
+	if build_dir:
+		kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH)
+	return kunitconfig_path
+
 class LinuxSourceTree(object):
 	"""Represents a Linux kernel source tree with KUnit tests."""
 
 	def __init__(self):
-		self._kconfig = kunit_config.Kconfig()
-		self._kconfig.read_from_file(kunitconfig_path)
 		self._ops = LinuxSourceTreeOperations()
 		signal.signal(signal.SIGINT, self.signal_handler)
 
@@ -123,6 +127,16 @@ class LinuxSourceTree(object):
 			return False
 		return True
 
+	def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH):
+		kunitconfig_path = get_kunitconfig_path(build_dir)
+		if not os.path.exists(kunitconfig_path):
+			shutil.copyfile(defconfig, kunitconfig_path)
+
+	def read_kunitconfig(self, build_dir):
+		kunitconfig_path = get_kunitconfig_path(build_dir)
+		self._kconfig = kunit_config.Kconfig()
+		self._kconfig.read_from_file(kunitconfig_path)
+
 	def validate_config(self, build_dir):
 		kconfig_path = get_kconfig_path(build_dir)
 		validated_kconfig = kunit_config.Kconfig()
-- 
cgit v1.2.3-70-g09d2


From 128dc4bcc8c0c7c3bab4a3818a1ec608cccb017a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 26 Oct 2020 18:59:26 +0200
Subject: kunit: Do not pollute source directory with generated files
 (test.log)

When --build_dir is provided use it and do not pollute source directory
which even can be mounted over network or read-only.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_kernel.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 633a2efcfdbd..b4768fa03ce0 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -21,6 +21,7 @@ KCONFIG_PATH = '.config'
 KUNITCONFIG_PATH = '.kunitconfig'
 DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig'
 BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
+OUTFILE_PATH = 'test.log'
 
 class ConfigError(Exception):
 	"""Represents an error trying to configure the Linux kernel."""
@@ -89,11 +90,12 @@ class LinuxSourceTreeOperations(object):
 		except subprocess.CalledProcessError as e:
 			raise BuildError(e.output.decode())
 
-	def linux_bin(self, params, timeout, build_dir, outfile):
+	def linux_bin(self, params, timeout, build_dir):
 		"""Runs the Linux UML binary. Must be named 'linux'."""
 		linux_bin = './linux'
 		if build_dir:
 			linux_bin = os.path.join(build_dir, 'linux')
+		outfile = get_outfile_path(build_dir)
 		with open(outfile, 'w') as output:
 			process = subprocess.Popen([linux_bin] + params,
 						   stdout=output,
@@ -112,6 +114,12 @@ def get_kunitconfig_path(build_dir):
 		kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH)
 	return kunitconfig_path
 
+def get_outfile_path(build_dir):
+	outfile_path = OUTFILE_PATH
+	if build_dir:
+		outfile_path = os.path.join(build_dir, OUTFILE_PATH)
+	return outfile_path
+
 class LinuxSourceTree(object):
 	"""Represents a Linux kernel source tree with KUnit tests."""
 
@@ -192,8 +200,8 @@ class LinuxSourceTree(object):
 
 	def run_kernel(self, args=[], build_dir='', timeout=None):
 		args.extend(['mem=1G'])
-		outfile = 'test.log'
-		self._ops.linux_bin(args, timeout, build_dir, outfile)
+		self._ops.linux_bin(args, timeout, build_dir)
+		outfile = get_outfile_path(build_dir)
 		subprocess.call(['stty', 'sane'])
 		with open(outfile, 'r') as file:
 			for line in file:
-- 
cgit v1.2.3-70-g09d2


From 390881448b1ff1e9d82896abbbda7cdb8e0be27c Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Thu, 29 Oct 2020 15:09:29 -0700
Subject: kunit: tool: print out stderr from make (like build warnings)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently the tool redirects make stdout + stderr, and only shows them
if the make command fails.
This means build warnings aren't shown to the user.

This change prints the contents of stderr even if make succeeds, under
the assumption these are only build warnings or other messages the user
likely wants to see.

We drop stdout from the raised exception since we can no longer easily
collate stdout and stderr and just showing the stderr seems fine.

Example with a warning:

[14:56:35] Building KUnit Kernel ...
../lib/kunit/kunit-test.c: In function ‘kunit_test_successful_try’:
../lib/kunit/kunit-test.c:19:6: warning: unused variable ‘unused’ [-Wunused-variable]
   19 |  int unused;
      |      ^~~~~~

[14:56:40] Starting KUnit Kernel ...

Note the stderr has a trailing \n, and since we use print, we add
another, but it helps separate make and kunit.py output.

Example with a build error:

[15:02:45] Building KUnit Kernel ...
ERROR:root:../lib/kunit/kunit-test.c: In function ‘kunit_test_successful_try’:
../lib/kunit/kunit-test.c:19:2: error: unknown type name ‘invalid_type’
   19 |  invalid_type *test = data;
      |  ^~~~~~~~~~~~
...

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_kernel.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index b4768fa03ce0..2e3cc0fac726 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -84,11 +84,16 @@ class LinuxSourceTreeOperations(object):
 		if build_dir:
 			command += ['O=' + build_dir]
 		try:
-			subprocess.check_output(command, stderr=subprocess.STDOUT)
+			proc = subprocess.Popen(command,
+						stderr=subprocess.PIPE,
+						stdout=subprocess.DEVNULL)
 		except OSError as e:
-			raise BuildError('Could not call execute make: ' + str(e))
-		except subprocess.CalledProcessError as e:
-			raise BuildError(e.output.decode())
+			raise BuildError('Could not call make command: ' + str(e))
+		_, stderr = proc.communicate()
+		if proc.returncode != 0:
+			raise BuildError(stderr.decode())
+		if stderr:  # likely only due to build warnings
+			print(stderr.decode())
 
 	def linux_bin(self, params, timeout, build_dir):
 		"""Runs the Linux UML binary. Must be named 'linux'."""
-- 
cgit v1.2.3-70-g09d2


From 060352e141e4c71ce147a2737f6d30a97f2ec317 Mon Sep 17 00:00:00 2001
From: Daniel Latypov <dlatypov@google.com>
Date: Fri, 30 Oct 2020 15:38:53 -0700
Subject: kunit: tool: fix extra trailing \n in raw + parsed test output

For simplcity, strip all trailing whitespace from parsed output.
I imagine no one is printing out meaningful trailing whitespace via
KUNIT_FAIL() or similar, and that if they are, they really shouldn't.

`isolate_kunit_output()` yielded liens with trailing \n, which results
in artifacty output like this:

$ ./tools/testing/kunit/kunit.py run
[16:16:46] [FAILED] example_simple_test
[16:16:46]     # example_simple_test: EXPECTATION FAILED at lib/kunit/kunit-example-test.c:29

[16:16:46]     Expected 1 + 1 == 3, but

[16:16:46]         1 + 1 == 2

[16:16:46]         3 == 3

[16:16:46]     not ok 1 - example_simple_test

[16:16:46]

After this change:
[16:16:46]     # example_simple_test: EXPECTATION FAILED at lib/kunit/kunit-example-test.c:29
[16:16:46]     Expected 1 + 1 == 3, but
[16:16:46]         1 + 1 == 2
[16:16:46]         3 == 3
[16:16:46]     not ok 1 - example_simple_test
[16:16:46]

We should *not* be expecting lines to end with \n in kunit_tool_test.py
for this reason.

Do the same for `raw_output()` as well which suffers from the same
issue.

This is a followup to [1], but rebased onto kunit-fixes to pick up the
other raw_output() fix and fixes for kunit_tool_test.py.

[1] https://lore.kernel.org/linux-kselftest/20201020233219.4146059-1-dlatypov@google.com/

Signed-off-by: Daniel Latypov <dlatypov@google.com>
Reviewed-by: David Gow <davidgow@google.com>
Tested-by: David Gow <davidgow@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py    | 3 ++-
 tools/testing/kunit/kunit_tool_test.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index db0347baa428..bbfe1b4e4c1c 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -54,6 +54,7 @@ kunit_end_re = re.compile('(List of all partitions:|'
 def isolate_kunit_output(kernel_output):
 	started = False
 	for line in kernel_output:
+		line = line.rstrip()  # line always has a trailing \n
 		if kunit_start_re.search(line):
 			prefix_len = len(line.split('TAP version')[0])
 			started = True
@@ -65,7 +66,7 @@ def isolate_kunit_output(kernel_output):
 
 def raw_output(kernel_output):
 	for line in kernel_output:
-		print(line)
+		print(line.rstrip())
 
 DIVIDER = '=' * 60
 
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 0b60855fb819..497ab51bc170 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -102,7 +102,7 @@ class KUnitParserTest(unittest.TestCase):
 			'test_data/test_output_isolated_correctly.log')
 		file = open(log_path)
 		result = kunit_parser.isolate_kunit_output(file.readlines())
-		self.assertContains('TAP version 14\n', result)
+		self.assertContains('TAP version 14', result)
 		self.assertContains('	# Subtest: example', result)
 		self.assertContains('	1..2', result)
 		self.assertContains('	ok 1 - example_simple_test', result)
@@ -115,7 +115,7 @@ class KUnitParserTest(unittest.TestCase):
 			'test_data/test_pound_sign.log')
 		with open(log_path) as file:
 			result = kunit_parser.isolate_kunit_output(file.readlines())
-		self.assertContains('TAP version 14\n', result)
+		self.assertContains('TAP version 14', result)
 		self.assertContains('	# Subtest: kunit-resource-test', result)
 		self.assertContains('	1..5', result)
 		self.assertContains('	ok 1 - kunit_resource_test_init_resources', result)
-- 
cgit v1.2.3-70-g09d2


From ff2c395b9257f0e617f9cd212893f3c72c80ee6c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:08:40 +1100
Subject: selftests/gpio: Use TEST_GEN_PROGS_EXTENDED

Use TEST_GEN_PROGS_EXTENDED rather than TEST_PROGS_EXTENDED.

That tells the lib.mk logic that the files it references are to be
generated by the Makefile.

Having done that we don't need to override the all rule.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/gpio/Makefile | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 32bdc978a711..c85fb5acf5f4 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -11,22 +11,20 @@ LDLIBS += $(VAR_LDLIBS)
 
 TEST_PROGS := gpio-mockup.sh
 TEST_FILES := gpio-mockup-sysfs.sh
-TEST_PROGS_EXTENDED := gpio-mockup-chardev
+TEST_GEN_PROGS_EXTENDED := gpio-mockup-chardev
 
 GPIODIR := $(realpath ../../../gpio)
 GPIOOBJ := gpio-utils.o
 
-all: $(TEST_PROGS_EXTENDED)
-
 override define CLEAN
-	$(RM) $(TEST_PROGS_EXTENDED)
+	$(RM) $(TEST_GEN_PROGS_EXTENDED)
 	$(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
 endef
 
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-$(TEST_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
+$(TEST_GEN_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
 
 $(GPIODIR)/$(GPIOOBJ):
 	$(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR)
-- 
cgit v1.2.3-70-g09d2


From 449539da2e237336bc750b41f1736a77f9aca25c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:08:41 +1100
Subject: selftests/gpio: Move include of lib.mk up

Move the include of lib.mk up so that in a subsequent patch we can use
OUTPUT, which is initialised by lib.mk, in the definition of the GPIO
variables.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/gpio/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index c85fb5acf5f4..615c8a953ade 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -13,6 +13,9 @@ TEST_PROGS := gpio-mockup.sh
 TEST_FILES := gpio-mockup-sysfs.sh
 TEST_GEN_PROGS_EXTENDED := gpio-mockup-chardev
 
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
 GPIODIR := $(realpath ../../../gpio)
 GPIOOBJ := gpio-utils.o
 
@@ -21,9 +24,6 @@ override define CLEAN
 	$(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
 endef
 
-KSFT_KHDR_INSTALL := 1
-include ../lib.mk
-
 $(TEST_GEN_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
 
 $(GPIODIR)/$(GPIOOBJ):
-- 
cgit v1.2.3-70-g09d2


From b68c1c65dec5fb5186ebd33ce52059b4c6db8500 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:08:42 +1100
Subject: selftests/gpio: Fix build when source tree is read only

Currently the gpio selftests fail to build if the source tree is read
only:

  make -j 160 -C tools/testing/selftests TARGETS=gpio
  make[1]: Entering directory '/linux/tools/testing/selftests/gpio'
  make OUTPUT=/linux/tools/gpio/ -C /linux/tools/gpio
  make[2]: Entering directory '/linux/tools/gpio'
  mkdir -p /linux/tools/gpio/include/linux 2>&1 || true
  ln -sf /linux/tools/gpio/../../include/uapi/linux/gpio.h /linux/tools/gpio/include/linux/gpio.h
  ln: failed to create symbolic link '/linux/tools/gpio/include/linux/gpio.h': Read-only file system

This happens because we ask make to build ../../../gpio (tools/gpio)
without pointing OUTPUT away from the source directory.

To fix it we create a subdirectory of the existing OUTPUT directory,
called tools-gpio, and tell tools/gpio to build in there.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/gpio/Makefile | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 615c8a953ade..acf4088a9891 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -17,14 +17,18 @@ KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
 GPIODIR := $(realpath ../../../gpio)
-GPIOOBJ := gpio-utils.o
+GPIOOUT := $(OUTPUT)/tools-gpio/
+GPIOOBJ := $(GPIOOUT)/gpio-utils.o
 
 override define CLEAN
 	$(RM) $(TEST_GEN_PROGS_EXTENDED)
-	$(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
+	$(RM) -rf $(GPIOOUT)
 endef
 
-$(TEST_GEN_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
+$(TEST_GEN_PROGS_EXTENDED): $(GPIOOBJ)
 
-$(GPIODIR)/$(GPIOOBJ):
-	$(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR)
+$(GPIOOUT):
+	mkdir -p $@
+
+$(GPIOOBJ): $(GPIOOUT)
+	$(MAKE) OUTPUT=$(GPIOOUT) -C $(GPIODIR)
-- 
cgit v1.2.3-70-g09d2


From 85128c5bcdf9bd9b574d7cbafa49170a39fed2e1 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:08:43 +1100
Subject: selftests/gpio: Add to CLEAN rule rather than overriding

Rather than overriding the CLEAN rule we can just append to it.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/gpio/Makefile | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index acf4088a9891..41582fe485ee 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -20,10 +20,7 @@ GPIODIR := $(realpath ../../../gpio)
 GPIOOUT := $(OUTPUT)/tools-gpio/
 GPIOOBJ := $(GPIOOUT)/gpio-utils.o
 
-override define CLEAN
-	$(RM) $(TEST_GEN_PROGS_EXTENDED)
-	$(RM) -rf $(GPIOOUT)
-endef
+CLEAN += ; $(RM) -rf $(GPIOOUT)
 
 $(TEST_GEN_PROGS_EXTENDED): $(GPIOOBJ)
 
-- 
cgit v1.2.3-70-g09d2


From fc4a3a1bf9ad799181e4d4ec9c2598c0405bc27d Mon Sep 17 00:00:00 2001
From: Tommi Rantala <tommi.t.rantala@nokia.com>
Date: Mon, 2 Nov 2020 09:39:18 +0200
Subject: selftests: intel_pstate: ftime() is deprecated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use clock_gettime() instead of deprecated ftime().

  aperf.c: In function ‘main’:
  aperf.c:58:2: warning: ‘ftime’ is deprecated [-Wdeprecated-declarations]
     58 |  ftime(&before);
        |  ^~~~~
  In file included from aperf.c:9:
  /usr/include/sys/timeb.h:39:12: note: declared here
     39 | extern int ftime (struct timeb *__timebuf)
        |            ^~~~~

Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/intel_pstate/aperf.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index f6cd03a87493..a8acf3996973 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -10,8 +10,12 @@
 #include <sched.h>
 #include <errno.h>
 #include <string.h>
+#include <time.h>
 #include "../kselftest.h"
 
+#define MSEC_PER_SEC	1000L
+#define NSEC_PER_MSEC	1000000L
+
 void usage(char *name) {
 	printf ("Usage: %s cpunum\n", name);
 }
@@ -22,7 +26,7 @@ int main(int argc, char **argv) {
 	long long tsc, old_tsc, new_tsc;
 	long long aperf, old_aperf, new_aperf;
 	long long mperf, old_mperf, new_mperf;
-	struct timeb before, after;
+	struct timespec before, after;
 	long long int start, finish, total;
 	cpu_set_t cpuset;
 
@@ -55,7 +59,10 @@ int main(int argc, char **argv) {
 		return 1;
 	}
 
-	ftime(&before);
+	if (clock_gettime(CLOCK_MONOTONIC, &before) < 0) {
+		perror("clock_gettime");
+		return 1;
+	}
 	pread(fd, &old_tsc,  sizeof(old_tsc), 0x10);
 	pread(fd, &old_aperf,  sizeof(old_mperf), 0xe7);
 	pread(fd, &old_mperf,  sizeof(old_aperf), 0xe8);
@@ -64,7 +71,10 @@ int main(int argc, char **argv) {
 		sqrt(i);
 	}
 
-	ftime(&after);
+	if (clock_gettime(CLOCK_MONOTONIC, &after) < 0) {
+		perror("clock_gettime");
+		return 1;
+	}
 	pread(fd, &new_tsc,  sizeof(new_tsc), 0x10);
 	pread(fd, &new_aperf,  sizeof(new_mperf), 0xe7);
 	pread(fd, &new_mperf,  sizeof(new_aperf), 0xe8);
@@ -73,11 +83,11 @@ int main(int argc, char **argv) {
 	aperf = new_aperf-old_aperf;
 	mperf = new_mperf-old_mperf;
 
-	start = before.time*1000 + before.millitm;
-	finish = after.time*1000 + after.millitm;
+	start = before.tv_sec*MSEC_PER_SEC + before.tv_nsec/NSEC_PER_MSEC;
+	finish = after.tv_sec*MSEC_PER_SEC + after.tv_nsec/NSEC_PER_MSEC;
 	total = finish - start;
 
-	printf("runTime: %4.2f\n", 1.0*total/1000);
+	printf("runTime: %4.2f\n", 1.0*total/MSEC_PER_SEC);
 	printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 1c49e3783f8899555190a49024ac86d3d76633cd Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 4 Nov 2020 21:03:05 +1100
Subject: selftests/memfd: Fix implicit declaration warnings

The memfd tests emit several warnings:

  fuse_test.c:261:7: warning: implicit declaration of function 'open'
  fuse_test.c:67:6: warning: implicit declaration of function 'fcntl'
  memfd_test.c:397:6: warning: implicit declaration of function 'fallocate'
  memfd_test.c:64:7: warning: implicit declaration of function 'open'
  memfd_test.c:90:6: warning: implicit declaration of function 'fcntl'

These are all caused by the test not including fcntl.h.

Instead of including linux/fcntl.h, include fcntl.h, which should
eventually cause the former to be included as well.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/memfd/fuse_test.c  | 2 +-
 tools/testing/selftests/memfd/memfd_test.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index b018e835737d..be675002f918 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -20,7 +20,7 @@
 #include <inttypes.h>
 #include <limits.h>
 #include <linux/falloc.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
 #include <linux/memfd.h>
 #include <sched.h>
 #include <stdio.h>
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 334a7eea2004..74baab83fec3 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -6,7 +6,7 @@
 #include <inttypes.h>
 #include <limits.h>
 #include <linux/falloc.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
 #include <linux/memfd.h>
 #include <sched.h>
 #include <stdio.h>
-- 
cgit v1.2.3-70-g09d2


From 82f147944c650a07831c796c398f5c973dbdde79 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Sat, 7 Nov 2020 17:19:35 +0800
Subject: tool: selftests: fix spelling typo of 'writting'

writting -> writing

Signed-off-by: Wang Qing <wangqing@vivo.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 9b0912a01777..9132fae7ad65 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -894,7 +894,7 @@ static int faulting_process(int signal_test)
 				count_verify[nr]);
 	        }
 		/*
-		 * Trigger write protection if there is by writting
+		 * Trigger write protection if there is by writing
 		 * the same value back.
 		 */
 		*area_count(area_dst, nr) = count;
@@ -922,7 +922,7 @@ static int faulting_process(int signal_test)
 				count_verify[nr]); exit(1);
 		}
 		/*
-		 * Trigger write protection if there is by writting
+		 * Trigger write protection if there is by writing
 		 * the same value back.
 		 */
 		*area_count(area_dst, nr) = count;
-- 
cgit v1.2.3-70-g09d2


From 93f20eff0cca972d74cb554a2e8b47730228be16 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 28 Oct 2020 16:31:14 +0800
Subject: selftests/run_kselftest.sh: fix dry-run typo

Should be -d instead of -n for dry-run.

Fixes: 5da1918446a1 ("selftests/run_kselftest.sh: Make each test individually selectable")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/run_kselftest.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
index 609a4ef9300e..97165a83df63 100755
--- a/tools/testing/selftests/run_kselftest.sh
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -48,7 +48,7 @@ while true; do
 		-l | --list)
 			echo "$available"
 			exit 0 ;;
-		-n | --dry-run)
+		-d | --dry-run)
 			dryrun="echo"
 			shift ;;
 		-h | --help)
-- 
cgit v1.2.3-70-g09d2


From c2e46f6b3e3551558d44c4dc518b9667cb0d5f8b Mon Sep 17 00:00:00 2001
From: Sachin Sant <sachinp@linux.vnet.ibm.com>
Date: Fri, 6 Nov 2020 13:10:06 +0530
Subject: selftests/cgroup: Fix build on older distros

On older distros struct clone_args does not have a cgroup member,
leading to build errors:

 cgroup_util.c: In function 'clone_into_cgroup':
 cgroup_util.c:343:4: error: 'struct clone_args' has no member named 'cgroup'
 cgroup_util.c:346:33: error: invalid application of 'sizeof' to incomplete
  type 'struct clone_args'

But the selftests already have a locally defined version of the
structure which is up to date, called __clone_args.

So use __clone_args which fixes the error.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Sachin Sant <sachinp@linux.vnet.ibm.com>>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/cgroup/cgroup_util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 05853b0b8831..027014662fb2 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -337,13 +337,13 @@ pid_t clone_into_cgroup(int cgroup_fd)
 #ifdef CLONE_ARGS_SIZE_VER2
 	pid_t pid;
 
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = CLONE_INTO_CGROUP,
 		.exit_signal = SIGCHLD,
 		.cgroup = cgroup_fd,
 	};
 
-	pid = sys_clone3(&args, sizeof(struct clone_args));
+	pid = sys_clone3(&args, sizeof(struct __clone_args));
 	/*
 	 * Verify that this is a genuine test failure:
 	 * ENOSYS -> clone3() not available
-- 
cgit v1.2.3-70-g09d2


From 97adb13dc9ba08ecd4758bc59efc0205f5cbf377 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vlad@buslov.dev>
Date: Sat, 7 Nov 2020 13:19:28 +0200
Subject: selftest: fix flower terse dump tests

Iproute2 tc classifier terse dump has been accepted with modified syntax.
Update the tests accordingly.

Signed-off-by: Vlad Buslov <vlad@buslov.dev>
Fixes: e7534fd42a99 ("selftests: implement flower classifier terse dump tests")
Link: https://lore.kernel.org/r/20201107111928.453534-1-vlad@buslov.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/tc-tests/filters/tests.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index bb543bf69d69..361235ad574b 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -100,7 +100,7 @@
         ],
         "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
         "expExitCode": "0",
-        "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+        "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
         "matchPattern": "filter protocol ip pref 1 flower.*handle",
         "matchCount": "1",
         "teardown": [
@@ -119,7 +119,7 @@
         ],
         "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
         "expExitCode": "0",
-        "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+        "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
         "matchPattern": "  dst_mac e4:11:22:11:4a:51",
         "matchCount": "0",
         "teardown": [
-- 
cgit v1.2.3-70-g09d2


From 1ccd58331f6f2af73758e572f8aa0215b0cacc0e Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 7 Nov 2020 17:47:17 +0100
Subject: selftests: disable rp_filter when testing bareudp

Some systems have rp_filter=1 as default configuration. This breaks
bareudp.sh as the intermediate namespaces handle part of the routing
with regular IPv4 routes but the reverse path is done with tc
(flower/tunnel_key/mirred).

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/28140b7d20161e4f766b558018fe2718f9bc1117.1604767577.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/bareudp.sh | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index c6fe22de7d0e..c2b9e990e544 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -234,6 +234,12 @@ setup_overlay_ipv4()
 	ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
 	ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
 	ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
+
+	# The intermediate namespaces don't have routes for the reverse path,
+	# as it will be handled by tc. So we need to ensure that rp_filter is
+	# not going to block the traffic.
+	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
+	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
 }
 
 setup_overlay_ipv6()
-- 
cgit v1.2.3-70-g09d2


From 5329722057d41aebc31e391907a501feaa42f7d9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 9 Nov 2020 17:19:29 -0800
Subject: bpf: Assign ID to vmlinux BTF and return extra info for BTF in
 GET_OBJ_INFO

Allocate ID for vmlinux BTF. This makes it visible when iterating over all BTF
objects in the system. To allow distinguishing vmlinux BTF (and later kernel
module BTF) from user-provided BTFs, expose extra kernel_btf flag, as well as
BTF name ("vmlinux" for vmlinux BTF, will equal to module's name for module
BTF).  We might want to later allow specifying BTF name for user-provided BTFs
as well, if that makes sense. But currently this is reserved only for
in-kernel BTFs.

Having in-kernel BTFs exposed IDs will allow to extend BPF APIs that require
in-kernel BTF type with ability to specify BTF types from kernel modules, not
just vmlinux BTF. This will be implemented in a follow up patch set for
fentry/fexit/fmod_ret/lsm/etc.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201110011932.3201430-3-andrii@kernel.org
---
 include/uapi/linux/bpf.h       |  3 +++
 kernel/bpf/btf.c               | 43 +++++++++++++++++++++++++++++++++++++++---
 tools/include/uapi/linux/bpf.h |  3 +++
 3 files changed, 46 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9879d6793e90..162999b12790 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4466,6 +4466,9 @@ struct bpf_btf_info {
 	__aligned_u64 btf;
 	__u32 btf_size;
 	__u32 id;
+	__aligned_u64 name;
+	__u32 name_len;
+	__u32 kernel_btf;
 } __attribute__((aligned(8)));
 
 struct bpf_link_info {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 727c1c27053f..856585db7aa7 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -214,6 +214,8 @@ struct btf {
 	struct btf *base_btf;
 	u32 start_id; /* first type ID in this BTF (0 for base BTF) */
 	u32 start_str_off; /* first string offset (0 for base BTF) */
+	char name[MODULE_NAME_LEN];
+	bool kernel_btf;
 };
 
 enum verifier_phase {
@@ -4429,6 +4431,8 @@ struct btf *btf_parse_vmlinux(void)
 
 	btf->data = __start_BTF;
 	btf->data_size = __stop_BTF - __start_BTF;
+	btf->kernel_btf = true;
+	snprintf(btf->name, sizeof(btf->name), "vmlinux");
 
 	err = btf_parse_hdr(env);
 	if (err)
@@ -4454,8 +4458,13 @@ struct btf *btf_parse_vmlinux(void)
 
 	bpf_struct_ops_init(btf, log);
 
-	btf_verifier_env_free(env);
 	refcount_set(&btf->refcnt, 1);
+
+	err = btf_alloc_id(btf);
+	if (err)
+		goto errout;
+
+	btf_verifier_env_free(env);
 	return btf;
 
 errout:
@@ -5553,7 +5562,9 @@ int btf_get_info_by_fd(const struct btf *btf,
 	struct bpf_btf_info info;
 	u32 info_copy, btf_copy;
 	void __user *ubtf;
-	u32 uinfo_len;
+	char __user *uname;
+	u32 uinfo_len, uname_len, name_len;
+	int ret = 0;
 
 	uinfo = u64_to_user_ptr(attr->info.info);
 	uinfo_len = attr->info.info_len;
@@ -5570,11 +5581,37 @@ int btf_get_info_by_fd(const struct btf *btf,
 		return -EFAULT;
 	info.btf_size = btf->data_size;
 
+	info.kernel_btf = btf->kernel_btf;
+
+	uname = u64_to_user_ptr(info.name);
+	uname_len = info.name_len;
+	if (!uname ^ !uname_len)
+		return -EINVAL;
+
+	name_len = strlen(btf->name);
+	info.name_len = name_len;
+
+	if (uname) {
+		if (uname_len >= name_len + 1) {
+			if (copy_to_user(uname, btf->name, name_len + 1))
+				return -EFAULT;
+		} else {
+			char zero = '\0';
+
+			if (copy_to_user(uname, btf->name, uname_len - 1))
+				return -EFAULT;
+			if (put_user(zero, uname + uname_len - 1))
+				return -EFAULT;
+			/* let user-space know about too short buffer */
+			ret = -ENOSPC;
+		}
+	}
+
 	if (copy_to_user(uinfo, &info, info_copy) ||
 	    put_user(info_copy, &uattr->info.info_len))
 		return -EFAULT;
 
-	return 0;
+	return ret;
 }
 
 int btf_get_fd_by_id(u32 id)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9879d6793e90..162999b12790 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4466,6 +4466,9 @@ struct bpf_btf_info {
 	__aligned_u64 btf;
 	__u32 btf_size;
 	__u32 id;
+	__aligned_u64 name;
+	__u32 name_len;
+	__u32 kernel_btf;
 } __attribute__((aligned(8)));
 
 struct bpf_link_info {
-- 
cgit v1.2.3-70-g09d2


From cecaf4a0f2dcbd7e76156f6d63748b84c176b95b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 9 Nov 2020 17:19:32 -0800
Subject: tools/bpftool: Add support for in-kernel and named BTF in `btf show`

Display vmlinux BTF name and kernel module names when listing available BTFs
on the system.

In human-readable output mode, module BTFs are reported with "name
[module-name]", while vmlinux BTF will be reported as "name [vmlinux]".
Square brackets are added by bpftool and follow kernel convention when
displaying modules in human-readable text outputs.

[vmuser@archvm bpf]$ sudo ../../../bpf/bpftool/bpftool btf s
1: name [vmlinux]  size 4082281B
6: size 2365B  prog_ids 8,6  map_ids 3
7: name [button]  size 46895B
8: name [pcspkr]  size 42328B
9: name [serio_raw]  size 39375B
10: name [floppy]  size 57185B
11: name [i2c_core]  size 76186B
12: name [crc32c_intel]  size 16036B
13: name [i2c_piix4]  size 50497B
14: name [irqbypass]  size 14124B
15: name [kvm]  size 197985B
16: name [kvm_intel]  size 123564B
17: name [cryptd]  size 42466B
18: name [crypto_simd]  size 17187B
19: name [glue_helper]  size 39205B
20: name [aesni_intel]  size 41034B
25: size 36150B
        pids bpftool(2519)

In JSON mode, two fields (boolean "kernel" and string "name") are reported for
each BTF object. vmlinux BTF is reported with name "vmlinux" (kernel itself
returns and empty name for vmlinux BTF).

[vmuser@archvm bpf]$ sudo ../../../bpf/bpftool/bpftool btf s -jp
[{
        "id": 1,
        "size": 4082281,
        "prog_ids": [],
        "map_ids": [],
        "kernel": true,
        "name": "vmlinux"
    },{
        "id": 6,
        "size": 2365,
        "prog_ids": [8,6
        ],
        "map_ids": [3
        ],
        "kernel": false
    },{
        "id": 7,
        "size": 46895,
        "prog_ids": [],
        "map_ids": [],
        "kernel": true,
        "name": "button"
    },{

...

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/20201110011932.3201430-6-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index c96b56e8e3a4..ed5e97157241 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -742,9 +742,14 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
 	       struct btf_attach_table *btf_map_table)
 {
 	struct btf_attach_point *obj;
+	const char *name = u64_to_ptr(info->name);
 	int n;
 
 	printf("%u: ", info->id);
+	if (info->kernel_btf)
+		printf("name [%s]  ", name);
+	else if (name && name[0])
+		printf("name %s  ", name);
 	printf("size %uB", info->btf_size);
 
 	n = 0;
@@ -771,6 +776,7 @@ show_btf_json(struct bpf_btf_info *info, int fd,
 	      struct btf_attach_table *btf_map_table)
 {
 	struct btf_attach_point *obj;
+	const char *name = u64_to_ptr(info->name);
 
 	jsonw_start_object(json_wtr);	/* btf object */
 	jsonw_uint_field(json_wtr, "id", info->id);
@@ -796,6 +802,11 @@ show_btf_json(struct bpf_btf_info *info, int fd,
 
 	emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */
 
+	jsonw_bool_field(json_wtr, "kernel", info->kernel_btf);
+
+	if (name && name[0])
+		jsonw_string_field(json_wtr, "name", name);
+
 	jsonw_end_object(json_wtr);	/* btf object */
 }
 
@@ -803,15 +814,30 @@ static int
 show_btf(int fd, struct btf_attach_table *btf_prog_table,
 	 struct btf_attach_table *btf_map_table)
 {
-	struct bpf_btf_info info = {};
+	struct bpf_btf_info info;
 	__u32 len = sizeof(info);
+	char name[64];
 	int err;
 
+	memset(&info, 0, sizeof(info));
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	if (err) {
 		p_err("can't get BTF object info: %s", strerror(errno));
 		return -1;
 	}
+	/* if kernel support emitting BTF object name, pass name pointer */
+	if (info.name_len) {
+		memset(&info, 0, sizeof(info));
+		info.name_len = sizeof(name);
+		info.name = ptr_to_u64(name);
+		len = sizeof(info);
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			p_err("can't get BTF object info: %s", strerror(errno));
+			return -1;
+		}
+	}
 
 	if (json_output)
 		show_btf_json(&info, fd, btf_prog_table, btf_map_table);
-- 
cgit v1.2.3-70-g09d2


From 58cfa49c2ba7f815adccc27a775e7cf8a8f7f539 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 10 Nov 2020 09:50:12 +0800
Subject: selftest/bpf: Add missed ip6ip6 test back

In comment 173ca26e9b51 ("samples/bpf: add comprehensive ipip, ipip6,
ip6ip6 test") we added ip6ip6 test for bpf tunnel testing. But in commit
933a741e3b82 ("selftests/bpf: bpf tunnel test.") when we moved it to
the current folder, we didn't add it.

This patch add the ip6ip6 test back to bpf tunnel test. Update the ipip6's
topology for both IPv4 and IPv6 testing. Since iperf test is removed as
currect framework simplified it in purpose, I also removed unused tcp
checkings in test_tunnel_kern.c.

Fixes: 933a741e3b82 ("selftests/bpf: bpf tunnel test.")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201110015013.1570716-2-liuhangbin@gmail.com
---
 .../testing/selftests/bpf/progs/test_tunnel_kern.c | 42 ++++-----------------
 tools/testing/selftests/bpf/test_tunnel.sh         | 43 ++++++++++++++++++++--
 2 files changed, 46 insertions(+), 39 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index f48dbfe24ddc..a621b58ab079 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -15,7 +15,6 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
-#include <linux/tcp.h>
 #include <linux/socket.h>
 #include <linux/pkt_cls.h>
 #include <linux/erspan.h>
@@ -528,12 +527,11 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
 	struct iphdr *iph = data;
-	struct tcphdr *tcp = data + sizeof(*iph);
 	void *data_end = (void *)(long)skb->data_end;
 	int ret;
 
 	/* single length check */
-	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+	if (data + sizeof(*iph) > data_end) {
 		ERROR(1);
 		return TC_ACT_SHOT;
 	}
@@ -541,16 +539,6 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
 	key.tunnel_ttl = 64;
 	if (iph->protocol == IPPROTO_ICMP) {
 		key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
-	} else {
-		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
-			return TC_ACT_SHOT;
-
-		if (tcp->dest == bpf_htons(5200))
-			key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
-		else if (tcp->dest == bpf_htons(5201))
-			key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
-		else
-			return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
@@ -585,19 +573,20 @@ int _ipip6_set_tunnel(struct __sk_buff *skb)
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
 	struct iphdr *iph = data;
-	struct tcphdr *tcp = data + sizeof(*iph);
 	void *data_end = (void *)(long)skb->data_end;
 	int ret;
 
 	/* single length check */
-	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+	if (data + sizeof(*iph) > data_end) {
 		ERROR(1);
 		return TC_ACT_SHOT;
 	}
 
 	__builtin_memset(&key, 0x0, sizeof(key));
-	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
 	key.tunnel_ttl = 64;
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	}
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
@@ -634,35 +623,18 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb)
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
 	struct ipv6hdr *iph = data;
-	struct tcphdr *tcp = data + sizeof(*iph);
 	void *data_end = (void *)(long)skb->data_end;
 	int ret;
 
 	/* single length check */
-	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+	if (data + sizeof(*iph) > data_end) {
 		ERROR(1);
 		return TC_ACT_SHOT;
 	}
 
-	key.remote_ipv6[0] = bpf_htonl(0x2401db00);
 	key.tunnel_ttl = 64;
-
 	if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
-		key.remote_ipv6[3] = bpf_htonl(1);
-	} else {
-		if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
-			ERROR(iph->nexthdr);
-			return TC_ACT_SHOT;
-		}
-
-		if (tcp->dest == bpf_htons(5200)) {
-			key.remote_ipv6[3] = bpf_htonl(1);
-		} else if (tcp->dest == bpf_htons(5201)) {
-			key.remote_ipv6[3] = bpf_htonl(2);
-		} else {
-			ERROR(tcp->dest);
-			return TC_ACT_SHOT;
-		}
+		key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
 	}
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index bd12ec97a44d..1ccbe804e8e1 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -24,12 +24,12 @@
 # Root namespace with metadata-mode tunnel + BPF
 # Device names and addresses:
 # 	veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
-# 	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+# 	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
 #
 # Namespace at_ns0 with native tunnel
 # Device names and addresses:
 # 	veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
-# 	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+# 	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
 #
 #
 # End-to-end ping packet flow
@@ -250,7 +250,7 @@ add_ipip_tunnel()
 	ip addr add dev $DEV 10.1.1.200/24
 }
 
-add_ipip6tnl_tunnel()
+add_ip6tnl_tunnel()
 {
 	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
 	ip netns exec at_ns0 ip link set dev veth0 up
@@ -262,11 +262,13 @@ add_ipip6tnl_tunnel()
 		ip link add dev $DEV_NS type $TYPE \
 		local ::11 remote ::22
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96
 	ip netns exec at_ns0 ip link set dev $DEV_NS up
 
 	# root namespace
 	ip link add dev $DEV type $TYPE external
 	ip addr add dev $DEV 10.1.1.200/24
+	ip addr add dev $DEV 1::22/96
 	ip link set dev $DEV up
 }
 
@@ -534,7 +536,7 @@ test_ipip6()
 
 	check $TYPE
 	config_device
-	add_ipip6tnl_tunnel
+	add_ip6tnl_tunnel
 	ip link set dev veth1 mtu 1500
 	attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
 	# underlay
@@ -553,6 +555,34 @@ test_ipip6()
         echo -e ${GREEN}"PASS: $TYPE"${NC}
 }
 
+test_ip6ip6()
+{
+	TYPE=ip6tnl
+	DEV_NS=ip6ip6tnl00
+	DEV=ip6ip6tnl11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6tnl_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip6 over ip6
+	ping6 $PING_ARG 1::11
+	check_err $?
+	ip netns exec at_ns0 ping6 $PING_ARG 1::22
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
 setup_xfrm_tunnel()
 {
 	auth=0x$(printf '1%.0s' {1..40})
@@ -646,6 +676,7 @@ cleanup()
 	ip link del veth1 2> /dev/null
 	ip link del ipip11 2> /dev/null
 	ip link del ipip6tnl11 2> /dev/null
+	ip link del ip6ip6tnl11 2> /dev/null
 	ip link del gretap11 2> /dev/null
 	ip link del ip6gre11 2> /dev/null
 	ip link del ip6gretap11 2> /dev/null
@@ -742,6 +773,10 @@ bpf_tunnel_test()
 	test_ipip6
 	errors=$(( $errors + $? ))
 
+	echo "Testing IP6IP6 tunnel..."
+	test_ip6ip6
+	errors=$(( $errors + $? ))
+
 	echo "Testing IPSec tunnel..."
 	test_xfrm_tunnel
 	errors=$(( $errors + $? ))
-- 
cgit v1.2.3-70-g09d2


From f9f16dfbe76e63ba9aec68055c08242b09be297e Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 6 Nov 2020 17:48:45 +0800
Subject: perf mem: Search event name with more flexible path

The perf tool searches a memory event name under the folder
'/sys/devices/cpu/events/', this leads to the limitation for the
selection of a memory profiling event which must be under this folder.

Thus it's impossible to use any other event as memory event which is not
under this specific folder, e.g. Arm SPE hardware event is not located
in '/sys/devices/cpu/events/' so it cannot be enabled for memory
profiling.

This patch changes to search folder from '/sys/devices/cpu/events/' to
'/sys/devices', so it give flexibility to find events which can be used
for memory profiling.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201106094853.21082-2-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mem-events.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index ea0af0bc4314..35c8d175a9d2 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -18,8 +18,8 @@ unsigned int perf_mem_events__loads_ldlat = 30;
 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
 
 struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
-	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"mem-loads"),
-	E("ldlat-stores",	"cpu/mem-stores/P",		"mem-stores"),
+	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"cpu/events/mem-loads"),
+	E("ldlat-stores",	"cpu/mem-stores/P",		"cpu/events/mem-stores"),
 };
 #undef E
 
@@ -93,7 +93,7 @@ int perf_mem_events__init(void)
 		struct perf_mem_event *e = &perf_mem_events[j];
 		struct stat st;
 
-		scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s",
+		scnprintf(path, PATH_MAX, "%s/devices/%s",
 			  mnt, e->sysfs_name);
 
 		if (!stat(path, &st))
-- 
cgit v1.2.3-70-g09d2


From eaf6aaeec5fa301c0eb8ae92962909b15d075e5f Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 6 Nov 2020 17:48:46 +0800
Subject: perf mem: Introduce weak function perf_mem_events__ptr()

Different architectures might use different event or different event
parameters for memory profiling, this patch introduces a weak
perf_mem_events__ptr() function which allows to return back a
architecture specific memory event.

Since the variable 'perf_mem_events' can be only accessed by the
perf_mem_events__ptr() function, mark the variable as 'static', this
allows the architectures to define its own memory event array.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201106094853.21082-3-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c     | 18 ++++++++++++------
 tools/perf/builtin-mem.c     | 21 ++++++++++++++-------
 tools/perf/util/mem-events.c | 26 +++++++++++++++++++-------
 tools/perf/util/mem-events.h |  2 +-
 4 files changed, 46 insertions(+), 21 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index d5bea5d3cd51..4d1a08e38233 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2867,6 +2867,7 @@ static int perf_c2c__record(int argc, const char **argv)
 	int ret;
 	bool all_user = false, all_kernel = false;
 	bool event_set = false;
+	struct perf_mem_event *e;
 	struct option options[] = {
 	OPT_CALLBACK('e', "event", &event_set, "event",
 		     "event selector. Use 'perf c2c record -e list' to list available events",
@@ -2894,11 +2895,15 @@ static int perf_c2c__record(int argc, const char **argv)
 	rec_argv[i++] = "record";
 
 	if (!event_set) {
-		perf_mem_events[PERF_MEM_EVENTS__LOAD].record  = true;
-		perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+		e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+		e->record = true;
+
+		e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+		e->record = true;
 	}
 
-	if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+	e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+	if (e->record)
 		rec_argv[i++] = "-W";
 
 	rec_argv[i++] = "-d";
@@ -2906,12 +2911,13 @@ static int perf_c2c__record(int argc, const char **argv)
 	rec_argv[i++] = "--sample-cpu";
 
 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-		if (!perf_mem_events[j].record)
+		e = perf_mem_events__ptr(j);
+		if (!e->record)
 			continue;
 
-		if (!perf_mem_events[j].supported) {
+		if (!e->supported) {
 			pr_err("failed: event '%s' not supported\n",
-			       perf_mem_events[j].name);
+			       perf_mem_events__name(j));
 			free(rec_argv);
 			return -1;
 		}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 3523279af6af..9a7df8d01296 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -64,6 +64,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 	const char **rec_argv;
 	int ret;
 	bool all_user = false, all_kernel = false;
+	struct perf_mem_event *e;
 	struct option options[] = {
 	OPT_CALLBACK('e', "event", &mem, "event",
 		     "event selector. use 'perf mem record -e list' to list available events",
@@ -86,13 +87,18 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 
 	rec_argv[i++] = "record";
 
-	if (mem->operation & MEM_OPERATION_LOAD)
-		perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+	if (mem->operation & MEM_OPERATION_LOAD) {
+		e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+		e->record = true;
+	}
 
-	if (mem->operation & MEM_OPERATION_STORE)
-		perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+	if (mem->operation & MEM_OPERATION_STORE) {
+		e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+		e->record = true;
+	}
 
-	if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+	e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+	if (e->record)
 		rec_argv[i++] = "-W";
 
 	rec_argv[i++] = "-d";
@@ -101,10 +107,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 		rec_argv[i++] = "--phys-data";
 
 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-		if (!perf_mem_events[j].record)
+		e = perf_mem_events__ptr(j);
+		if (!e->record)
 			continue;
 
-		if (!perf_mem_events[j].supported) {
+		if (!e->supported) {
 			pr_err("failed: event '%s' not supported\n",
 			       perf_mem_events__name(j));
 			free(rec_argv);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 35c8d175a9d2..7a5a0d699e27 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -17,7 +17,7 @@ unsigned int perf_mem_events__loads_ldlat = 30;
 
 #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
 
-struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
 	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"cpu/events/mem-loads"),
 	E("ldlat-stores",	"cpu/mem-stores/P",		"cpu/events/mem-stores"),
 };
@@ -28,19 +28,31 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
 static char mem_loads_name[100];
 static bool mem_loads_name__init;
 
+struct perf_mem_event * __weak perf_mem_events__ptr(int i)
+{
+	if (i >= PERF_MEM_EVENTS__MAX)
+		return NULL;
+
+	return &perf_mem_events[i];
+}
+
 char * __weak perf_mem_events__name(int i)
 {
+	struct perf_mem_event *e = perf_mem_events__ptr(i);
+
+	if (!e)
+		return NULL;
+
 	if (i == PERF_MEM_EVENTS__LOAD) {
 		if (!mem_loads_name__init) {
 			mem_loads_name__init = true;
 			scnprintf(mem_loads_name, sizeof(mem_loads_name),
-				  perf_mem_events[i].name,
-				  perf_mem_events__loads_ldlat);
+				  e->name, perf_mem_events__loads_ldlat);
 		}
 		return mem_loads_name;
 	}
 
-	return (char *)perf_mem_events[i].name;
+	return (char *)e->name;
 }
 
 int perf_mem_events__parse(const char *str)
@@ -61,7 +73,7 @@ int perf_mem_events__parse(const char *str)
 
 	while (tok) {
 		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-			struct perf_mem_event *e = &perf_mem_events[j];
+			struct perf_mem_event *e = perf_mem_events__ptr(j);
 
 			if (strstr(e->tag, tok))
 				e->record = found = true;
@@ -90,7 +102,7 @@ int perf_mem_events__init(void)
 
 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
 		char path[PATH_MAX];
-		struct perf_mem_event *e = &perf_mem_events[j];
+		struct perf_mem_event *e = perf_mem_events__ptr(j);
 		struct stat st;
 
 		scnprintf(path, PATH_MAX, "%s/devices/%s",
@@ -108,7 +120,7 @@ void perf_mem_events__list(void)
 	int j;
 
 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-		struct perf_mem_event *e = &perf_mem_events[j];
+		struct perf_mem_event *e = perf_mem_events__ptr(j);
 
 		fprintf(stderr, "%-13s%-*s%s\n",
 			e->tag,
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 904dad34f7f7..726a9c8103e4 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -31,13 +31,13 @@ enum {
 	PERF_MEM_EVENTS__MAX,
 };
 
-extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
 extern unsigned int perf_mem_events__loads_ldlat;
 
 int perf_mem_events__parse(const char *str);
 int perf_mem_events__init(void);
 
 char *perf_mem_events__name(int i);
+struct perf_mem_event *perf_mem_events__ptr(int i);
 
 void perf_mem_events__list(void);
 
-- 
cgit v1.2.3-70-g09d2


From 4ba2452cd88f39da68a6dc05fcc95e8977fd6403 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 6 Nov 2020 17:48:47 +0800
Subject: perf mem: Support new memory event PERF_MEM_EVENTS__LOAD_STORE

On the architectures with perf memory profiling, two types of hardware
events have been supported: load and store; if want to profile memory
for both load and store operations, the tool will use these two events
at the same time, the usage is:

  # perf mem record -t load,store -- uname

But this cannot be applied for AUX tracing event, the same PMU event can
be used to only trace memory load, or only memory store, or trace for
both memory load and store.

This patch introduces a new event PERF_MEM_EVENTS__LOAD_STORE, which is
used to support the event which can record both memory load and store
operations.

When user specifies memory operation type as 'load,store', or doesn't
set type so use 'load,store' as default, if the arch supports the event
PERF_MEM_EVENTS__LOAD_STORE, the tool will convert the required
operations to this single event; otherwise, if the arch doesn't support
PERF_MEM_EVENTS__LOAD_STORE, the tool rolls back to enable both events
PERF_MEM_EVENTS__LOAD and PERF_MEM_EVENTS__STORE, which keeps the same
behaviour with before.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201106094853.21082-4-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-mem.c     | 24 ++++++++++++++++++------
 tools/perf/util/mem-events.c | 13 ++++++++++++-
 tools/perf/util/mem-events.h |  1 +
 3 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 9a7df8d01296..21ebe0f47e64 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -87,14 +87,26 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 
 	rec_argv[i++] = "record";
 
-	if (mem->operation & MEM_OPERATION_LOAD) {
-		e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
-		e->record = true;
-	}
+	e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE);
 
-	if (mem->operation & MEM_OPERATION_STORE) {
-		e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+	/*
+	 * The load and store operations are required, use the event
+	 * PERF_MEM_EVENTS__LOAD_STORE if it is supported.
+	 */
+	if (e->tag &&
+	    (mem->operation & MEM_OPERATION_LOAD) &&
+	    (mem->operation & MEM_OPERATION_STORE)) {
 		e->record = true;
+	} else {
+		if (mem->operation & MEM_OPERATION_LOAD) {
+			e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+			e->record = true;
+		}
+
+		if (mem->operation & MEM_OPERATION_STORE) {
+			e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+			e->record = true;
+		}
 	}
 
 	e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 7a5a0d699e27..19007e463b8a 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -20,6 +20,7 @@ unsigned int perf_mem_events__loads_ldlat = 30;
 static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
 	E("ldlat-loads",	"cpu/mem-loads,ldlat=%u/P",	"cpu/events/mem-loads"),
 	E("ldlat-stores",	"cpu/mem-stores/P",		"cpu/events/mem-stores"),
+	E(NULL,			NULL,				NULL),
 };
 #undef E
 
@@ -75,6 +76,9 @@ int perf_mem_events__parse(const char *str)
 		for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
 			struct perf_mem_event *e = perf_mem_events__ptr(j);
 
+			if (!e->tag)
+				continue;
+
 			if (strstr(e->tag, tok))
 				e->record = found = true;
 		}
@@ -105,6 +109,13 @@ int perf_mem_events__init(void)
 		struct perf_mem_event *e = perf_mem_events__ptr(j);
 		struct stat st;
 
+		/*
+		 * If the event entry isn't valid, skip initialization
+		 * and "e->supported" will keep false.
+		 */
+		if (!e->tag)
+			continue;
+
 		scnprintf(path, PATH_MAX, "%s/devices/%s",
 			  mnt, e->sysfs_name);
 
@@ -123,7 +134,7 @@ void perf_mem_events__list(void)
 		struct perf_mem_event *e = perf_mem_events__ptr(j);
 
 		fprintf(stderr, "%-13s%-*s%s\n",
-			e->tag,
+			e->tag ?: "",
 			verbose > 0 ? 25 : 0,
 			verbose > 0 ? perf_mem_events__name(j) : "",
 			e->supported ? ": available" : "");
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 726a9c8103e4..5ef178278909 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -28,6 +28,7 @@ struct mem_info {
 enum {
 	PERF_MEM_EVENTS__LOAD,
 	PERF_MEM_EVENTS__STORE,
+	PERF_MEM_EVENTS__LOAD_STORE,
 	PERF_MEM_EVENTS__MAX,
 };
 
-- 
cgit v1.2.3-70-g09d2


From 8b8173b45a7a9709cc2597548469708a8efbd0d9 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 6 Nov 2020 17:48:48 +0800
Subject: perf c2c: Support memory event PERF_MEM_EVENTS__LOAD_STORE

When user doesn't specify event name, perf c2c tool enables both the
load and store events, and this leads to failure for opening the
duplicate PMU device for AUX trace.

After the memory event PERF_MEM_EVENTS__LOAD_STORE is introduced, when
the user doesn't specify event name, this patch converts the required
operation to PERF_MEM_EVENTS__LOAD_STORE if the arch supports it.
Otherwise, the tool still rolls back to enable events
PERF_MEM_EVENTS__LOAD and PERF_MEM_EVENTS__STORE.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201106094853.21082-5-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 4d1a08e38233..98ae33eac6cc 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2895,11 +2895,20 @@ static int perf_c2c__record(int argc, const char **argv)
 	rec_argv[i++] = "record";
 
 	if (!event_set) {
-		e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
-		e->record = true;
+		e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE);
+		/*
+		 * The load and store operations are required, use the event
+		 * PERF_MEM_EVENTS__LOAD_STORE if it is supported.
+		 */
+		if (e->tag) {
+			e->record = true;
+		} else {
+			e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+			e->record = true;
 
-		e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
-		e->record = true;
+			e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+			e->record = true;
+		}
 	}
 
 	e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
-- 
cgit v1.2.3-70-g09d2


From 436cce00710a3f234ab6b735b5980256e773d388 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 6 Nov 2020 17:48:49 +0800
Subject: perf mem: Only initialize memory event for recording

It's needless to initialize memory events for reporting, this patch
moves memory event initialization for only recording.  Furthermore,
the change allows to parse perf data on cross platforms, e.g. perf
tool can report result properly even the machine doesn't support
the memory events.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201106094853.21082-6-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-mem.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 21ebe0f47e64..72ce4b8fbb0f 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -77,6 +77,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 	OPT_END()
 	};
 
+	if (perf_mem_events__init()) {
+		pr_err("failed: memory events not supported\n");
+		return -1;
+	}
+
 	argc = parse_options(argc, argv, options, record_mem_usage,
 			     PARSE_OPT_KEEP_UNKNOWN);
 
@@ -441,11 +446,6 @@ int cmd_mem(int argc, const char **argv)
 		NULL
 	};
 
-	if (perf_mem_events__init()) {
-		pr_err("failed: memory events not supported\n");
-		return -1;
-	}
-
 	argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
 					mem_usage, PARSE_OPT_KEEP_UNKNOWN);
 
-- 
cgit v1.2.3-70-g09d2


From 014a771c7867fda5b40a95e1c7bc1aa5ac704c91 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 6 Nov 2020 17:48:50 +0800
Subject: perf auxtrace: Add itrace option '-M' for memory events

This patch is to add itrace option '-M' to synthesize memory event.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201106094853.21082-7-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/itrace.txt | 1 +
 tools/perf/util/auxtrace.c          | 4 ++++
 tools/perf/util/auxtrace.h          | 2 ++
 3 files changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index d3740c8f399b..079cdfabb352 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -11,6 +11,7 @@
 		d	create a debug log
 		f	synthesize first level cache events
 		m	synthesize last level cache events
+		M	synthesize memory events
 		t	synthesize TLB events
 		a	synthesize remote access events
 		g	synthesize a call chain (use with i or x)
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 42a85c86421d..62e7f6c5f8b5 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1333,6 +1333,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
 	synth_opts->flc = true;
 	synth_opts->llc = true;
 	synth_opts->tlb = true;
+	synth_opts->mem = true;
 	synth_opts->remote_access = true;
 
 	if (no_sample) {
@@ -1554,6 +1555,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 		case 'a':
 			synth_opts->remote_access = true;
 			break;
+		case 'M':
+			synth_opts->mem = true;
+			break;
 		case 'q':
 			synth_opts->quick += 1;
 			break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 951d2d14cf24..7e5c9e1552bd 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -88,6 +88,7 @@ enum itrace_period_type {
  * @llc: whether to synthesize last level cache events
  * @tlb: whether to synthesize TLB events
  * @remote_access: whether to synthesize remote access events
+ * @mem: whether to synthesize memory events
  * @callchain_sz: maximum callchain size
  * @last_branch_sz: branch context size
  * @period: 'instructions' events period
@@ -126,6 +127,7 @@ struct itrace_synth_opts {
 	bool			llc;
 	bool			tlb;
 	bool			remote_access;
+	bool			mem;
 	unsigned int		callchain_sz;
 	unsigned int		last_branch_sz;
 	unsigned long long	period;
-- 
cgit v1.2.3-70-g09d2


From 13e5df1e3f1ba1a90944362bc57690ea1369b3b7 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 6 Nov 2020 17:48:51 +0800
Subject: perf mem: Support AUX trace

The 'perf mem' tool doesn't support AUX trace data so it cannot receive
the hardware tracing data.

On arm64, although it doesn't support PMU events for memory load and
store, ARM SPE is a good candidate for memory profiling, the hardware
tracer can record memory accessing operations with affiliated
information (e.g. physical address and virtual address for accessing,
cache levels, TLB walking, latency, etc).

To allow "perf mem" tool to support AUX trace, this patch adds the AUX
callbacks for session structure; make itrace memory event as default for
"perf mem", this tells the AUX trace decoder to synthesize memory
samples.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201106094853.21082-8-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-mem.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 72ce4b8fbb0f..fdfbff7592f4 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -7,6 +7,7 @@
 #include "perf.h"
 
 #include <subcmd/parse-options.h>
+#include "util/auxtrace.h"
 #include "util/trace-event.h"
 #include "util/tool.h"
 #include "util/session.h"
@@ -255,6 +256,12 @@ static int process_sample_event(struct perf_tool *tool,
 
 static int report_raw_events(struct perf_mem *mem)
 {
+	struct itrace_synth_opts itrace_synth_opts = {
+		.set = true,
+		.mem = true,	/* Only enable memory event */
+		.default_no_sample = true,
+	};
+
 	struct perf_data data = {
 		.path  = input_name,
 		.mode  = PERF_DATA_MODE_READ,
@@ -267,6 +274,8 @@ static int report_raw_events(struct perf_mem *mem)
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
+	session->itrace_synth_opts = &itrace_synth_opts;
+
 	if (mem->cpu_list) {
 		ret = perf_session__cpu_bitmap(session, mem->cpu_list,
 					       mem->cpu_bitmap);
@@ -410,8 +419,12 @@ int cmd_mem(int argc, const char **argv)
 			.comm		= perf_event__process_comm,
 			.lost		= perf_event__process_lost,
 			.fork		= perf_event__process_fork,
+			.attr		= perf_event__process_attr,
 			.build_id	= perf_event__process_build_id,
 			.namespaces	= perf_event__process_namespaces,
+			.auxtrace_info  = perf_event__process_auxtrace_info,
+			.auxtrace       = perf_event__process_auxtrace,
+			.auxtrace_error = perf_event__process_auxtrace_error,
 			.ordered_events	= true,
 		},
 		.input_name		 = "perf.data",
-- 
cgit v1.2.3-70-g09d2


From c825f7885178f994a2a00ca02016940d94aaed6e Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 6 Nov 2020 17:48:52 +0800
Subject: perf c2c: Support AUX trace

This patch adds the AUX callbacks in session structure, so support AUX
trace for "perf c2c" tool; make itrace memory event as default for "perf
c2c", this tells the AUX trace decoder to synthesize samples and can be
used for statistics.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201106094853.21082-9-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 98ae33eac6cc..c5babeaa3b38 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -369,6 +369,10 @@ static struct perf_c2c c2c = {
 		.exit		= perf_event__process_exit,
 		.fork		= perf_event__process_fork,
 		.lost		= perf_event__process_lost,
+		.attr		= perf_event__process_attr,
+		.auxtrace_info  = perf_event__process_auxtrace_info,
+		.auxtrace       = perf_event__process_auxtrace,
+		.auxtrace_error = perf_event__process_auxtrace_error,
 		.ordered_events	= true,
 		.ordering_requires_timestamps = true,
 	},
@@ -2678,6 +2682,12 @@ static int setup_coalesce(const char *coalesce, bool no_source)
 
 static int perf_c2c__report(int argc, const char **argv)
 {
+	struct itrace_synth_opts itrace_synth_opts = {
+		.set = true,
+		.mem = true,	/* Only enable memory event */
+		.default_no_sample = true,
+	};
+
 	struct perf_session *session;
 	struct ui_progress prog;
 	struct perf_data data = {
@@ -2757,6 +2767,8 @@ static int perf_c2c__report(int argc, const char **argv)
 		goto out;
 	}
 
+	session->itrace_synth_opts = &itrace_synth_opts;
+
 	err = setup_nodes(session);
 	if (err) {
 		pr_err("Failed setup nodes\n");
-- 
cgit v1.2.3-70-g09d2


From 40714c58630aaaf1eb3acc431fe206a6b36a03d6 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Fri, 6 Nov 2020 17:48:53 +0800
Subject: perf mem: Support ARM SPE events

This patch adds ARM SPE events for perf memory profiling:

  'spe-load': event for only recording memory load ops;
  'spe-store': event for only recording memory store ops;
  'spe-ldst': event for recording memory load and store ops.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201106094853.21082-10-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/util/Build        |  2 +-
 tools/perf/arch/arm64/util/mem-events.c | 37 +++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/arch/arm64/util/mem-events.c

(limited to 'tools')

diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 8d2b9bcfffca..ead2f2275eee 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -10,4 +10,4 @@ perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
 			      ../../arm/util/auxtrace.o \
 			      ../../arm/util/cs-etm.o \
-			      arm-spe.o
+			      arm-spe.o mem-events.o
diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c
new file mode 100644
index 000000000000..2a2497372671
--- /dev/null
+++ b/tools/perf/arch/arm64/util/mem-events.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "map_symbol.h"
+#include "mem-events.h"
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+	E("spe-load",	"arm_spe_0/ts_enable=1,load_filter=1,store_filter=0,min_latency=%u/",	"arm_spe_0"),
+	E("spe-store",	"arm_spe_0/ts_enable=1,load_filter=0,store_filter=1/",			"arm_spe_0"),
+	E("spe-ldst",	"arm_spe_0/ts_enable=1,load_filter=1,store_filter=1,min_latency=%u/",	"arm_spe_0"),
+};
+
+static char mem_ev_name[100];
+
+struct perf_mem_event *perf_mem_events__ptr(int i)
+{
+	if (i >= PERF_MEM_EVENTS__MAX)
+		return NULL;
+
+	return &perf_mem_events[i];
+}
+
+char *perf_mem_events__name(int i)
+{
+	struct perf_mem_event *e = perf_mem_events__ptr(i);
+
+	if (i >= PERF_MEM_EVENTS__MAX)
+		return NULL;
+
+	if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE)
+		scnprintf(mem_ev_name, sizeof(mem_ev_name),
+			  e->name, perf_mem_events__loads_ldlat);
+	else /* PERF_MEM_EVENTS__STORE */
+		scnprintf(mem_ev_name, sizeof(mem_ev_name), e->name);
+
+	return mem_ev_name;
+}
-- 
cgit v1.2.3-70-g09d2


From c185f1cde46653cd0a7a1eaf461d16c462870781 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 11 Nov 2020 15:11:28 +0800
Subject: perf arm-spe: Include bitops.h for BIT() macro

Include header linux/bitops.h, directly use its BIT() macro and remove
the self defined macros.

Committer notes:

Use BIT_ULL() instead of BIT to build on 32-bit arches as mentioned in
review by Andre Przywara <andre.przywara@arm.com>. I noticed the build
failure when crossbuilding to arm32 from x86_64.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Link: https://lore.kernel.org/r/20201111071149.815-2-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-decoder.c     | 5 +----
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 7 +++----
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 93e063f22be5..cc18a1e8c212 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -12,6 +12,7 @@
 #include <string.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <linux/bitops.h>
 #include <linux/compiler.h>
 #include <linux/zalloc.h>
 
@@ -21,10 +22,6 @@
 
 #include "arm-spe-decoder.h"
 
-#ifndef BIT
-#define BIT(n)		(1UL << (n))
-#endif
-
 static u64 arm_spe_calc_ip(int index, u64 payload)
 {
 	u8 *addr = (u8 *)&payload;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index b94001b756c7..5f65a3a70c57 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -8,13 +8,12 @@
 #include <string.h>
 #include <endian.h>
 #include <byteswap.h>
+#include <linux/bitops.h>
 
 #include "arm-spe-pkt-decoder.h"
 
-#define BIT(n)		(1ULL << (n))
-
-#define NS_FLAG		BIT(63)
-#define EL_FLAG		(BIT(62) | BIT(61))
+#define NS_FLAG		BIT_ULL(63)
+#define EL_FLAG		(BIT_ULL(62) | BIT_ULL(61))
 
 #define SPE_HEADER0_PAD			0x0
 #define SPE_HEADER0_END			0x1
-- 
cgit v1.2.3-70-g09d2


From 903b659436b706928934ff5ef59d591267e5ce1a Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 11 Nov 2020 15:11:29 +0800
Subject: perf arm-spe: Fix a typo in comment

Fix a typo: s/iff/if.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Link: https://lore.kernel.org/r/20201111071149.815-3-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 5f65a3a70c57..12a96585da94 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -142,7 +142,7 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len,
 
 	/* we use index to identify Events with a less number of
 	 * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS,
-	 * LLC-REFILL, and REMOTE-ACCESS events are identified iff
+	 * LLC-REFILL, and REMOTE-ACCESS events are identified if
 	 * index > 1.
 	 */
 	packet->index = ret - 1;
-- 
cgit v1.2.3-70-g09d2


From b2ded2e2e2764e502fc025f615210434f1eaa2a9 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 11 Nov 2020 15:11:30 +0800
Subject: perf arm-spe: Refactor payload size calculation

This patch defines macro to extract "sz" field from header, and renames
the function payloadlen() to arm_spe_payload_len().

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Link: https://lore.kernel.org/r/20201111071149.815-4-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 12a96585da94..a8eb7be189ec 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -69,22 +69,22 @@ const char *arm_spe_pkt_name(enum arm_spe_pkt_type type)
 	return arm_spe_packet_name[type];
 }
 
-/* return ARM SPE payload size from its encoding,
- * which is in bits 5:4 of the byte.
- * 00 : byte
- * 01 : halfword (2)
- * 10 : word (4)
- * 11 : doubleword (8)
+/*
+ * Extracts the field "sz" from header bits and converts to bytes:
+ *   00 : byte (1)
+ *   01 : halfword (2)
+ *   10 : word (4)
+ *   11 : doubleword (8)
  */
-static int payloadlen(unsigned char byte)
+static unsigned int arm_spe_payload_len(unsigned char hdr)
 {
-	return 1 << ((byte & 0x30) >> 4);
+	return 1U << ((hdr & GENMASK_ULL(5, 4)) >> 4);
 }
 
 static int arm_spe_get_payload(const unsigned char *buf, size_t len,
 			       struct arm_spe_pkt *packet)
 {
-	size_t payload_len = payloadlen(buf[0]);
+	size_t payload_len = arm_spe_payload_len(buf[0]);
 
 	if (len < 1 + payload_len)
 		return ARM_SPE_NEED_MORE_BYTES;
-- 
cgit v1.2.3-70-g09d2


From b65577baf482909225c79d8a6bad44d2a62751f4 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 11 Nov 2020 15:11:31 +0800
Subject: perf arm-spe: Refactor arm_spe_get_events()

In function arm_spe_get_events(), the event packet's 'index' is assigned
as payload length, but the flow is not directive: it firstly gets the
packet length from the return value of arm_spe_get_payload(), the value
includes header length (1) and payload length:

  int ret = arm_spe_get_payload(buf, len, packet);

and then reduces header length from packet length, so finally get the
payload length:

  packet->index = ret - 1;

To simplify the code, this patch directly assigns payload length to
event packet's index; and at the end it calls arm_spe_get_payload() to
return the payload value.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Link: https://lore.kernel.org/r/20201111071149.815-5-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index a8eb7be189ec..57904da89db1 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -136,8 +136,6 @@ static int arm_spe_get_timestamp(const unsigned char *buf, size_t len,
 static int arm_spe_get_events(const unsigned char *buf, size_t len,
 			      struct arm_spe_pkt *packet)
 {
-	int ret = arm_spe_get_payload(buf, len, packet);
-
 	packet->type = ARM_SPE_EVENTS;
 
 	/* we use index to identify Events with a less number of
@@ -145,9 +143,9 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len,
 	 * LLC-REFILL, and REMOTE-ACCESS events are identified if
 	 * index > 1.
 	 */
-	packet->index = ret - 1;
+	packet->index = arm_spe_payload_len(buf[0]);
 
-	return ret;
+	return arm_spe_get_payload(buf, len, packet);
 }
 
 static int arm_spe_get_data_source(const unsigned char *buf, size_t len,
-- 
cgit v1.2.3-70-g09d2


From 0a04244cabc5560ce1e08555e8712a4cd20ab6ce Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 11 Nov 2020 15:11:32 +0800
Subject: perf arm-spe: Fix packet length handling

When processing address packet and counter packet, if the packet
contains extended header, it misses to account the extra one byte for
header length calculation, thus returns the wrong packet length.

To correct the packet length calculation, one possible fixing is simply
to plus extra 1 for extended header, but will spread some duplicate code
in the flows for processing address packet and counter packet.
Alternatively, we can refine the function arm_spe_get_payload() to not
only support short header and allow it to support extended header, and
rely on it for the packet length calculation.

So this patch refactors function arm_spe_get_payload() with a new
argument 'ext_hdr' for support extended header; the packet processing
flows can invoke this function to unify the packet length calculation.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Link: https://lore.kernel.org/r/20201111071149.815-6-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 34 ++++++++--------------
 1 file changed, 12 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 57904da89db1..671a4763fb47 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -82,14 +82,15 @@ static unsigned int arm_spe_payload_len(unsigned char hdr)
 }
 
 static int arm_spe_get_payload(const unsigned char *buf, size_t len,
+			       unsigned char ext_hdr,
 			       struct arm_spe_pkt *packet)
 {
-	size_t payload_len = arm_spe_payload_len(buf[0]);
+	size_t payload_len = arm_spe_payload_len(buf[ext_hdr]);
 
-	if (len < 1 + payload_len)
+	if (len < 1 + ext_hdr + payload_len)
 		return ARM_SPE_NEED_MORE_BYTES;
 
-	buf++;
+	buf += 1 + ext_hdr;
 
 	switch (payload_len) {
 	case 1: packet->payload = *(uint8_t *)buf; break;
@@ -99,7 +100,7 @@ static int arm_spe_get_payload(const unsigned char *buf, size_t len,
 	default: return ARM_SPE_BAD_PACKET;
 	}
 
-	return 1 + payload_len;
+	return 1 + ext_hdr + payload_len;
 }
 
 static int arm_spe_get_pad(struct arm_spe_pkt *packet)
@@ -130,7 +131,7 @@ static int arm_spe_get_timestamp(const unsigned char *buf, size_t len,
 				 struct arm_spe_pkt *packet)
 {
 	packet->type = ARM_SPE_TIMESTAMP;
-	return arm_spe_get_payload(buf, len, packet);
+	return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_events(const unsigned char *buf, size_t len,
@@ -145,14 +146,14 @@ static int arm_spe_get_events(const unsigned char *buf, size_t len,
 	 */
 	packet->index = arm_spe_payload_len(buf[0]);
 
-	return arm_spe_get_payload(buf, len, packet);
+	return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_data_source(const unsigned char *buf, size_t len,
 				   struct arm_spe_pkt *packet)
 {
 	packet->type = ARM_SPE_DATA_SOURCE;
-	return arm_spe_get_payload(buf, len, packet);
+	return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_context(const unsigned char *buf, size_t len,
@@ -160,8 +161,7 @@ static int arm_spe_get_context(const unsigned char *buf, size_t len,
 {
 	packet->type = ARM_SPE_CONTEXT;
 	packet->index = buf[0] & 0x3;
-
-	return arm_spe_get_payload(buf, len, packet);
+	return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
@@ -169,41 +169,31 @@ static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
 {
 	packet->type = ARM_SPE_OP_TYPE;
 	packet->index = buf[0] & 0x3;
-	return arm_spe_get_payload(buf, len, packet);
+	return arm_spe_get_payload(buf, len, 0, packet);
 }
 
 static int arm_spe_get_counter(const unsigned char *buf, size_t len,
 			       const unsigned char ext_hdr, struct arm_spe_pkt *packet)
 {
-	if (len < 2)
-		return ARM_SPE_NEED_MORE_BYTES;
-
 	packet->type = ARM_SPE_COUNTER;
 	if (ext_hdr)
 		packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
 	else
 		packet->index = buf[0] & 0x7;
 
-	packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
-
-	return 1 + ext_hdr + 2;
+	return arm_spe_get_payload(buf, len, ext_hdr, packet);
 }
 
 static int arm_spe_get_addr(const unsigned char *buf, size_t len,
 			    const unsigned char ext_hdr, struct arm_spe_pkt *packet)
 {
-	if (len < 8)
-		return ARM_SPE_NEED_MORE_BYTES;
-
 	packet->type = ARM_SPE_ADDRESS;
 	if (ext_hdr)
 		packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
 	else
 		packet->index = buf[0] & 0x7;
 
-	memcpy_le64(&packet->payload, buf + 1, 8);
-
-	return 1 + ext_hdr + 8;
+	return arm_spe_get_payload(buf, len, ext_hdr, packet);
 }
 
 static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
-- 
cgit v1.2.3-70-g09d2


From c8a950d0d3b926a02c7b2e713850d38217cec3d1 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:05 +0100
Subject: tools: Factor HOSTCC, HOSTLD, HOSTAR definitions

Several Makefiles in tools/ need to define the host toolchain variables.
Move their definition to tools/scripts/Makefile.include

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-2-jean-philippe@linaro.org
---
 tools/bpf/resolve_btfids/Makefile |  9 ---------
 tools/build/Makefile              |  4 ----
 tools/objtool/Makefile            |  9 ---------
 tools/perf/Makefile.perf          |  4 ----
 tools/power/acpi/Makefile.config  |  1 -
 tools/scripts/Makefile.include    | 10 ++++++++++
 6 files changed, 10 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 66cb92136de4..bf656432ad73 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -18,15 +18,6 @@ else
 endif
 
 # always use the host compiler
-ifneq ($(LLVM),)
-HOSTAR  ?= llvm-ar
-HOSTCC  ?= clang
-HOSTLD  ?= ld.lld
-else
-HOSTAR  ?= ar
-HOSTCC  ?= gcc
-HOSTLD  ?= ld
-endif
 AR       = $(HOSTAR)
 CC       = $(HOSTCC)
 LD       = $(HOSTLD)
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 722f1700d96a..bae48e6fa995 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -15,10 +15,6 @@ endef
 $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,LD,$(CROSS_COMPILE)ld)
 
-HOSTCC ?= gcc
-HOSTLD ?= ld
-HOSTAR ?= ar
-
 export HOSTCC HOSTLD HOSTAR
 
 ifeq ($(V),1)
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 4ea9a833dde7..5cdb19036d7f 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -3,15 +3,6 @@ include ../scripts/Makefile.include
 include ../scripts/Makefile.arch
 
 # always use the host compiler
-ifneq ($(LLVM),)
-HOSTAR	?= llvm-ar
-HOSTCC	?= clang
-HOSTLD	?= ld.lld
-else
-HOSTAR	?= ar
-HOSTCC	?= gcc
-HOSTLD	?= ld
-endif
 AR	 = $(HOSTAR)
 CC	 = $(HOSTCC)
 LD	 = $(HOSTLD)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 7ce3f2e8b9c7..62f3deb1d3a8 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -175,10 +175,6 @@ endef
 
 LD += $(EXTRA_LDFLAGS)
 
-HOSTCC  ?= gcc
-HOSTLD  ?= ld
-HOSTAR  ?= ar
-
 PKG_CONFIG = $(CROSS_COMPILE)pkg-config
 LLVM_CONFIG ?= llvm-config
 
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
index 54a2857c2510..331f6d30f472 100644
--- a/tools/power/acpi/Makefile.config
+++ b/tools/power/acpi/Makefile.config
@@ -54,7 +54,6 @@ INSTALL_SCRIPT = ${INSTALL_PROGRAM}
 CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
 CROSS_COMPILE ?= $(CROSS)
 LD = $(CC)
-HOSTCC = gcc
 
 # check if compiler option is supported
 cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index a7974638561c..1358e89cdf7d 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -59,6 +59,16 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
 $(call allow-override,CXX,$(CROSS_COMPILE)g++)
 $(call allow-override,STRIP,$(CROSS_COMPILE)strip)
 
+ifneq ($(LLVM),)
+HOSTAR  ?= llvm-ar
+HOSTCC  ?= clang
+HOSTLD  ?= ld.lld
+else
+HOSTAR  ?= ar
+HOSTCC  ?= gcc
+HOSTLD  ?= ld
+endif
+
 ifeq ($(CC_NO_CLANG), 1)
 EXTRA_WARNINGS += -Wstrict-aliasing=3
 endif
-- 
cgit v1.2.3-70-g09d2


From 9e8929fdbb9c9026bd3a732e9ac7dc9617c86309 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:06 +0100
Subject: tools/bpftool: Force clean of out-of-tree build

Cleaning a partial build can fail if the output directory for libbpf
wasn't created:

$ make -C tools/bpf/bpftool O=/tmp/bpf clean
/bin/sh: line 0: cd: /tmp/bpf/libbpf/: No such file or directory
tools/scripts/Makefile.include:17: *** output directory "/tmp/bpf/libbpf/" does not exist.  Stop.
make: *** [Makefile:36: /tmp/bpf/libbpf/libbpf.a-clean] Error 2

As a result make never gets around to clearing the leftover objects. Add
the libbpf output directory as clean dependency to ensure clean always
succeeds (similarly to the "descend" macro). The directory is later
removed by the clean recipe.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-3-jean-philippe@linaro.org
---
 tools/bpf/bpftool/Makefile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index f60e6ad3a1df..1358c093b812 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -27,11 +27,13 @@ LIBBPF = $(LIBBPF_PATH)libbpf.a
 
 BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
 
-$(LIBBPF): FORCE
-	$(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))
+$(LIBBPF_OUTPUT):
+	$(QUIET_MKDIR)mkdir -p $@
+
+$(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a
 
-$(LIBBPF)-clean:
+$(LIBBPF)-clean: $(LIBBPF_OUTPUT)
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null
 
-- 
cgit v1.2.3-70-g09d2


From 8859b0da5aac28e4e9651c8971e7af344f8ffec1 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:07 +0100
Subject: tools/bpftool: Fix cross-build

The bpftool build first creates an intermediate binary, executed on the
host, to generate skeletons required by the final build. When
cross-building bpftool for an architecture different from the host, the
intermediate binary should be built using the host compiler (gcc) and
the final bpftool using the cross compiler (e.g. aarch64-linux-gnu-gcc).

Generate the intermediate objects into the bootstrap/ directory using
the host toolchain.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-4-jean-philippe@linaro.org
---
 tools/bpf/bpftool/Makefile | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 1358c093b812..d566bced135e 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -19,24 +19,37 @@ BPF_DIR = $(srctree)/tools/lib/bpf/
 ifneq ($(OUTPUT),)
   LIBBPF_OUTPUT = $(OUTPUT)/libbpf/
   LIBBPF_PATH = $(LIBBPF_OUTPUT)
+  BOOTSTRAP_OUTPUT = $(OUTPUT)/bootstrap/
 else
+  LIBBPF_OUTPUT =
   LIBBPF_PATH = $(BPF_DIR)
+  BOOTSTRAP_OUTPUT = $(CURDIR)/bootstrap/
 endif
 
 LIBBPF = $(LIBBPF_PATH)libbpf.a
+LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/
+LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a
 
 BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
 
-$(LIBBPF_OUTPUT):
+$(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT):
 	$(QUIET_MKDIR)mkdir -p $@
 
 $(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) $(LIBBPF_OUTPUT)libbpf.a
 
+$(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT)
+	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \
+		ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@
+
 $(LIBBPF)-clean: $(LIBBPF_OUTPUT)
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null
 
+$(LIBBPF_BOOTSTRAP)-clean: $(LIBBPF_BOOTSTRAP_OUTPUT)
+	$(call QUIET_CLEAN, libbpf-bootstrap)
+	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) clean >/dev/null
+
 prefix ?= /usr/local
 bash_compdir ?= /usr/share/bash-completion/completions
 
@@ -94,6 +107,7 @@ CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
 endif
 
 LIBS = $(LIBBPF) -lelf -lz
+LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
 ifeq ($(feature-libcap), 1)
 CFLAGS += -DUSE_LIBCAP
 LIBS += -lcap
@@ -120,9 +134,9 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT
 SRCS += $(BFD_SRCS)
 endif
 
-BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap)
+BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
 
-BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
@@ -169,12 +183,16 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 
 $(OUTPUT)feature.o: | zdep
 
-$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS)
+$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP)
+	$(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \
+		$(LIBS_BOOTSTRAP)
 
 $(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
 
+$(BOOTSTRAP_OUTPUT)%.o: %.c | $(BOOTSTRAP_OUTPUT)
+	$(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $<
+
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
@@ -182,11 +200,11 @@ feature-detect-clean:
 	$(call QUIET_CLEAN, feature-detect)
 	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
 
-clean: $(LIBBPF)-clean feature-detect-clean
+clean: $(LIBBPF)-clean $(LIBBPF_BOOTSTRAP)-clean feature-detect-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
-	$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
-	$(Q)$(RM) -r -- $(OUTPUT)libbpf/
+	$(Q)$(RM) -- $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
+	$(Q)$(RM) -r -- $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT)
 	$(call QUIET_CLEAN, core-gen)
 	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
 	$(Q)$(RM) -r -- $(OUTPUT)feature/
-- 
cgit v1.2.3-70-g09d2


From 3290996e713315dfc9589e2c340a4c4997d90be8 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:08 +0100
Subject: tools/runqslower: Use Makefile.include

Makefile.include defines variables such as OUTPUT and CC for out-of-tree
build and cross-build. Include it into the runqslower Makefile and use
its $(QUIET*) helpers.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-5-jean-philippe@linaro.org
---
 tools/bpf/runqslower/Makefile | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index fb1337d69868..bcc4a7396713 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+include ../../scripts/Makefile.include
+
 OUTPUT := .output
 CLANG ?= clang
 LLC ?= llc
@@ -21,10 +23,8 @@ VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword			       \
 abs_out := $(abspath $(OUTPUT))
 ifeq ($(V),1)
 Q =
-msg =
 else
 Q = @
-msg = @printf '  %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))";
 MAKEFLAGS += --no-print-directory
 submake_extras := feature_display=0
 endif
@@ -37,12 +37,11 @@ all: runqslower
 runqslower: $(OUTPUT)/runqslower
 
 clean:
-	$(call msg,CLEAN)
+	$(call QUIET_CLEAN, runqslower)
 	$(Q)rm -rf $(OUTPUT) runqslower
 
 $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
-	$(call msg,BINARY,$@)
-	$(Q)$(CC) $(CFLAGS) $^ -lelf -lz -o $@
+	$(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@
 
 $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h	      \
 			$(OUTPUT)/runqslower.bpf.o
@@ -50,31 +49,26 @@ $(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h	      \
 $(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h
 
 $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
-	$(call msg,GEN-SKEL,$@)
-	$(Q)$(BPFTOOL) gen skeleton $< > $@
+	$(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@
 
 $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
-	$(call msg,BPF,$@)
-	$(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES)			      \
+	$(QUIET_GEN)$(CLANG) -g -O2 -target bpf $(INCLUDES)		      \
 		 -c $(filter %.c,$^) -o $@ &&				      \
 	$(LLVM_STRIP) -g $@
 
 $(OUTPUT)/%.o: %.c | $(OUTPUT)
-	$(call msg,CC,$@)
-	$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
+	$(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
 
 $(OUTPUT):
-	$(call msg,MKDIR,$@)
-	$(Q)mkdir -p $(OUTPUT)
+	$(QUIET_MKDIR)mkdir -p $(OUTPUT)
 
 $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
-	$(call msg,GEN,$@)
 	$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
 		echo "Couldn't find kernel BTF; set VMLINUX_BTF to"	       \
 			"specify its location." >&2;			       \
 		exit 1;\
 	fi
-	$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
+	$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
 
 $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
 	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)			       \
-- 
cgit v1.2.3-70-g09d2


From 85e59344d0790379e063bf3cd3dd0fe91ce3b505 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:09 +0100
Subject: tools/runqslower: Enable out-of-tree build

Enable out-of-tree build for runqslower. Only set OUTPUT=.output if it
wasn't already set by the user.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-6-jean-philippe@linaro.org
---
 tools/bpf/runqslower/Makefile | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index bcc4a7396713..0fc4d4046193 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -1,15 +1,18 @@
 # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 include ../../scripts/Makefile.include
 
-OUTPUT := .output
+OUTPUT ?= $(abspath .output)/
+
 CLANG ?= clang
 LLC ?= llc
 LLVM_STRIP ?= llvm-strip
-DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool
+BPFTOOL_OUTPUT := $(OUTPUT)bpftool/
+DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bpftool
 BPFTOOL ?= $(DEFAULT_BPFTOOL)
 LIBBPF_SRC := $(abspath ../../lib/bpf)
-BPFOBJ := $(OUTPUT)/libbpf.a
-BPF_INCLUDE := $(OUTPUT)
+BPFOBJ_OUTPUT := $(OUTPUT)libbpf/
+BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a
+BPF_INCLUDE := $(BPFOBJ_OUTPUT)
 INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib)        \
        -I$(abspath ../../include/uapi)
 CFLAGS := -g -Wall
@@ -20,7 +23,6 @@ VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
 VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword			       \
 					  $(wildcard $(VMLINUX_BTF_PATHS))))
 
-abs_out := $(abspath $(OUTPUT))
 ifeq ($(V),1)
 Q =
 else
@@ -38,7 +40,11 @@ runqslower: $(OUTPUT)/runqslower
 
 clean:
 	$(call QUIET_CLEAN, runqslower)
-	$(Q)rm -rf $(OUTPUT) runqslower
+	$(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT)
+	$(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d
+	$(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
+	$(Q)$(RM) $(OUTPUT)runqslower
+	$(Q)$(RM) -r .output
 
 $(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $^ -lelf -lz -o $@
@@ -59,8 +65,8 @@ $(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
 $(OUTPUT)/%.o: %.c | $(OUTPUT)
 	$(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
 
-$(OUTPUT):
-	$(QUIET_MKDIR)mkdir -p $(OUTPUT)
+$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
+	$(QUIET_MKDIR)mkdir -p $@
 
 $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
 	$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
@@ -70,10 +76,8 @@ $(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
 	fi
 	$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
 
-$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
-	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)			       \
-		    OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
+$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
+	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@
 
-$(DEFAULT_BPFTOOL):
-	$(Q)$(MAKE) $(submake_extras) -C ../bpftool			      \
-		    prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install
+$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
+	$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT)
-- 
cgit v1.2.3-70-g09d2


From 2d9393fefb506fb8c8a483e5dc9bbd7774657b89 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:10 +0100
Subject: tools/runqslower: Build bpftool using HOSTCC

When cross building runqslower for an other architecture, the
intermediate bpftool used to generate a skeleton must be built using the
host toolchain. Pass HOSTCC and HOSTLD, defined in Makefile.include, to
the bpftool Makefile.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-7-jean-philippe@linaro.org
---
 tools/bpf/runqslower/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 0fc4d4046193..4d5ca54fcd4c 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -80,4 +80,5 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU
 	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@
 
 $(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
-	$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT)
+	$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT)   \
+		    CC=$(HOSTCC) LD=$(HOSTLD)
-- 
cgit v1.2.3-70-g09d2


From 0639e5e97ad9c58dd15dcf6f6ccf677cfba39f98 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 10 Nov 2020 17:43:11 +0100
Subject: tools/bpftool: Fix build slowdown

Commit ba2fd563b740 ("tools/bpftool: Support passing BPFTOOL_VERSION to
make") changed BPFTOOL_VERSION to a recursively expanded variable,
forcing it to be recomputed on every expansion of CFLAGS and
dramatically slowing down the bpftool build. Restore BPFTOOL_VERSION as
a simply expanded variable, guarded by an ifeq().

Fixes: ba2fd563b740 ("tools/bpftool: Support passing BPFTOOL_VERSION to make")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201110164310.2600671-8-jean-philippe@linaro.org
---
 tools/bpf/bpftool/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index d566bced135e..804ade95929f 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -30,7 +30,9 @@ LIBBPF = $(LIBBPF_PATH)libbpf.a
 LIBBPF_BOOTSTRAP_OUTPUT = $(BOOTSTRAP_OUTPUT)libbpf/
 LIBBPF_BOOTSTRAP = $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a
 
-BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
+ifeq ($(BPFTOOL_VERSION),)
+BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
+endif
 
 $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT):
 	$(QUIET_MKDIR)mkdir -p $@
-- 
cgit v1.2.3-70-g09d2


From ef1220a7d4bbdb5fc435d691776778568dfb69a8 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Tue, 10 Nov 2020 10:00:48 +0800
Subject: selftests: pmtu.sh: use $ksft_skip for skipped return code

This test uses return code 2 as a hard-coded skipped state, let's use
the kselftest framework skip code variable $ksft_skip instead to make
it more readable and easier to maintain.

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/pmtu.sh | 64 ++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 6bbf69a28e12..fb53987ab64b 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -355,7 +355,7 @@ setup_fou_or_gue() {
 	encap="${3}"
 
 	if [ "${outer}" = "4" ]; then
-		modprobe fou || return 2
+		modprobe fou || return $ksft_skip
 		a_addr="${prefix4}.${a_r1}.1"
 		b_addr="${prefix4}.${b_r1}.1"
 		if [ "${inner}" = "4" ]; then
@@ -366,7 +366,7 @@ setup_fou_or_gue() {
 			ipproto="41"
 		fi
 	else
-		modprobe fou6 || return 2
+		modprobe fou6 || return $ksft_skip
 		a_addr="${prefix6}:${a_r1}::1"
 		b_addr="${prefix6}:${b_r1}::1"
 		if [ "${inner}" = "4" ]; then
@@ -380,8 +380,8 @@ setup_fou_or_gue() {
 		fi
 	fi
 
-	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
-	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
+	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip
+	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip
 
 	run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
 	run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
@@ -455,7 +455,7 @@ setup_ipvX_over_ipvY() {
 		fi
 	fi
 
-	run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2
+	run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip
 	run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
 
 	run_cmd ${ns_a} ip link set ip_a up
@@ -713,7 +713,7 @@ setup_routing() {
 }
 
 setup_bridge() {
-	run_cmd ${ns_a} ip link add br0 type bridge || return 2
+	run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
 	run_cmd ${ns_a} ip link set br0 up
 
 	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
@@ -765,7 +765,7 @@ setup_ovs_vxlan6() {
 }
 
 setup_ovs_bridge() {
-	run_cmd ovs-vsctl add-br ovs_br0 || return 2
+	run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip
 	run_cmd ip link set ovs_br0 up
 
 	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
@@ -887,7 +887,7 @@ check_pmtu_value() {
 test_pmtu_ipvX() {
 	family=${1}
 
-	setup namespaces routing || return 2
+	setup namespaces routing || return $ksft_skip
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
@@ -985,11 +985,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
 	ll_mtu=4000
 
 	if [ ${outer_family} -eq 4 ]; then
-		setup namespaces routing ${type}4 || return 2
+		setup namespaces routing ${type}4 || return $ksft_skip
 		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
 	else
-		setup namespaces routing ${type}6 || return 2
+		setup namespaces routing ${type}6 || return $ksft_skip
 		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
 	fi
@@ -1060,11 +1060,11 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
 	ll_mtu=4000
 
 	if [ ${outer_family} -eq 4 ]; then
-		setup namespaces routing bridge bridged_${type}4 || return 2
+		setup namespaces routing bridge bridged_${type}4 || return $ksft_skip
 		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
 	else
-		setup namespaces routing bridge bridged_${type}6 || return 2
+		setup namespaces routing bridge bridged_${type}6 || return $ksft_skip
 		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
 	fi
@@ -1144,11 +1144,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
 	ll_mtu=4000
 
 	if [ ${outer_family} -eq 4 ]; then
-		setup namespaces routing ovs_bridge ovs_${type}4 || return 2
+		setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip
 		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
 	else
-		setup namespaces routing ovs_bridge ovs_${type}6 || return 2
+		setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip
 		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
 		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
 	fi
@@ -1230,7 +1230,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() {
 	encap=${3}
 	ll_mtu=4000
 
-	setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
+	setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip
 	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
 	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
@@ -1309,7 +1309,7 @@ test_pmtu_ipvX_over_ipvY_exception() {
 	outer=${2}
 	ll_mtu=4000
 
-	setup namespaces routing ip${inner}ip${outer} || return 2
+	setup namespaces routing ip${inner}ip${outer} || return $ksft_skip
 
 	trace "${ns_a}" ip_a         "${ns_b}"  ip_b  \
 	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
@@ -1363,7 +1363,7 @@ test_pmtu_ipv6_ipv6_exception() {
 }
 
 test_pmtu_vti4_exception() {
-	setup namespaces veth vti4 xfrm4 || return 2
+	setup namespaces veth vti4 xfrm4 || return $ksft_skip
 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
 	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
 
@@ -1393,7 +1393,7 @@ test_pmtu_vti4_exception() {
 }
 
 test_pmtu_vti6_exception() {
-	setup namespaces veth vti6 xfrm6 || return 2
+	setup namespaces veth vti6 xfrm6 || return $ksft_skip
 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
 	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
 	fail=0
@@ -1423,7 +1423,7 @@ test_pmtu_vti6_exception() {
 }
 
 test_pmtu_vti4_default_mtu() {
-	setup namespaces veth vti4 || return 2
+	setup namespaces veth vti4 || return $ksft_skip
 
 	# Check that MTU of vti device is MTU of veth minus IPv4 header length
 	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1435,7 +1435,7 @@ test_pmtu_vti4_default_mtu() {
 }
 
 test_pmtu_vti6_default_mtu() {
-	setup namespaces veth vti6 || return 2
+	setup namespaces veth vti6 || return $ksft_skip
 
 	# Check that MTU of vti device is MTU of veth minus IPv6 header length
 	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1447,10 +1447,10 @@ test_pmtu_vti6_default_mtu() {
 }
 
 test_pmtu_vti4_link_add_mtu() {
-	setup namespaces || return 2
+	setup namespaces || return $ksft_skip
 
 	run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
-	[ $? -ne 0 ] && err "  vti not supported" && return 2
+	[ $? -ne 0 ] && err "  vti not supported" && return $ksft_skip
 	run_cmd ${ns_a} ip link del vti4_a
 
 	fail=0
@@ -1485,10 +1485,10 @@ test_pmtu_vti4_link_add_mtu() {
 }
 
 test_pmtu_vti6_link_add_mtu() {
-	setup namespaces || return 2
+	setup namespaces || return $ksft_skip
 
 	run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
-	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
+	[ $? -ne 0 ] && err "  vti6 not supported" && return $ksft_skip
 	run_cmd ${ns_a} ip link del vti6_a
 
 	fail=0
@@ -1523,10 +1523,10 @@ test_pmtu_vti6_link_add_mtu() {
 }
 
 test_pmtu_vti6_link_change_mtu() {
-	setup namespaces || return 2
+	setup namespaces || return $ksft_skip
 
 	run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
-	[ $? -ne 0 ] && err "  dummy not supported" && return 2
+	[ $? -ne 0 ] && err "  dummy not supported" && return $ksft_skip
 	run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
 	run_cmd ${ns_a} ip link set dummy0 up
 	run_cmd ${ns_a} ip link set dummy1 up
@@ -1579,10 +1579,10 @@ test_cleanup_vxlanX_exception() {
 	encap="vxlan"
 	ll_mtu=4000
 
-	check_command taskset || return 2
+	check_command taskset || return $ksft_skip
 	cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
 
-	setup namespaces routing ${encap}${outer} || return 2
+	setup namespaces routing ${encap}${outer} || return $ksft_skip
 	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
 	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
@@ -1644,7 +1644,7 @@ run_test() {
 		fi
 		err_flush
 		exit 1
-	elif [ $ret -eq 2 ]; then
+	elif [ $ret -eq $ksft_skip ]; then
 		printf "TEST: %-60s  [SKIP]\n" "${tdesc}"
 		err_flush
 	fi
@@ -1667,7 +1667,7 @@ run_test_nh() {
 }
 
 test_list_flush_ipv4_exception() {
-	setup namespaces routing || return 2
+	setup namespaces routing || return $ksft_skip
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
@@ -1721,7 +1721,7 @@ test_list_flush_ipv4_exception() {
 }
 
 test_list_flush_ipv6_exception() {
-	setup namespaces routing || return 2
+	setup namespaces routing || return $ksft_skip
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
 	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
@@ -1840,7 +1840,7 @@ for t in ${tests}; do
 	if [ $run_this -eq 1 ]; then
 		run_test "${name}" "${desc}"
 		# if test was skipped no need to retry with nexthop objects
-		[ $? -eq 2 ] && rerun_nh=0
+		[ $? -eq $ksft_skip ] && rerun_nh=0
 
 		if [ "${rerun_nh}" = "1" ]; then
 			run_test_nh "${name}" "${desc}"
-- 
cgit v1.2.3-70-g09d2


From 2a9d3716b810a4f2c8291b7aa8f358d11693f6e5 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Tue, 10 Nov 2020 10:00:49 +0800
Subject: selftests: pmtu.sh: improve the test result processing

This test will treat all non-zero return codes as failures, it will
make the pmtu.sh test script being marked as FAILED when some
sub-test got skipped.

Improve the result processing by
  * Only mark the whole test script as SKIP when all of the
    sub-tests were skipped
  * If the sub-tests were either passed or skipped, the overall
    result will be PASS
  * If any of them has failed with return code 1 or anything bad
    happened (e.g. return code 127 for command not found), the
    overall result will be FAIL

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/pmtu.sh | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index fb53987ab64b..464e31eabc73 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -1652,7 +1652,19 @@ run_test() {
 	return $ret
 	)
 	ret=$?
-	[ $ret -ne 0 ] && exitcode=1
+	case $ret in
+		0)
+			all_skipped=false
+			[ $exitcode=$ksft_skip ] && exitcode=0
+		;;
+		$ksft_skip)
+			[ $all_skipped = true ] && exitcode=$ksft_skip
+		;;
+		*)
+			all_skipped=false
+			exitcode=1
+		;;
+	esac
 
 	return $ret
 }
@@ -1786,6 +1798,7 @@ usage() {
 #
 exitcode=0
 desc=0
+all_skipped=true
 
 while getopts :ptv o
 do
-- 
cgit v1.2.3-70-g09d2


From fd63729cc0a6872bdabd393ee933a969642e4076 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 11 Nov 2020 15:12:15 -0800
Subject: selftests/bpf: Fix unused attribute usage in subprogs_unused test

Correct attribute name is "unused". maybe_unused is a C++17 addition.
This patch fixes compilation warning during selftests compilation.

Fixes: 197afc631413 ("libbpf: Don't attempt to load unused subprog as an entry-point BPF program")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201111231215.1779147-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/progs/test_subprogs_unused.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
index 75d975f8cf90..bc49e050d342 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
@@ -4,12 +4,12 @@
 
 const char LICENSE[] SEC("license") = "GPL";
 
-__attribute__((maybe_unused)) __noinline int unused1(int x)
+__attribute__((unused)) __noinline int unused1(int x)
 {
 	return x + 1;
 }
 
-static __attribute__((maybe_unused)) __noinline int unused2(int x)
+static __attribute__((unused)) __noinline int unused2(int x)
 {
 	return x + 2;
 }
-- 
cgit v1.2.3-70-g09d2


From 6a59edd832e2e353809778859d2a4a2d6c94d011 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 12 Nov 2020 10:10:50 +0100
Subject: tools/bpf: Add bootstrap/ to .gitignore

Commit 8859b0da5aac ("tools/bpftool: Fix cross-build") added a
build-time bootstrap/ directory for bpftool, and removed
bpftool-bootstrap. Update .gitignore accordingly.

Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201112091049.3159055-1-jean-philippe@linaro.org
---
 tools/bpf/bpftool/.gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 3e601bcfd461..944cb4b7c95d 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 *.d
-/bpftool-bootstrap
+/bootstrap/
 /bpftool
 bpftool*.8
 bpf-helpers.*
-- 
cgit v1.2.3-70-g09d2


From c36538798fc6c80bd8bdaddad803b0c86dc13d7c Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 12 Nov 2020 10:10:52 +0100
Subject: tools/bpf: Always run the *-clean recipes

Make $(LIBBPF)-clean and $(LIBBPF_BOOTSTRAP)-clean .PHONY targets, in
case those files exist. And keep consistency within the Makefile by
making the directory dependencies order-only.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201112091049.3159055-2-jean-philippe@linaro.org
---
 tools/bpf/bpftool/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 804ade95929f..f897cb5fb12d 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -44,11 +44,11 @@ $(LIBBPF_BOOTSTRAP): FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \
 		ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@
 
-$(LIBBPF)-clean: $(LIBBPF_OUTPUT)
+$(LIBBPF)-clean: FORCE | $(LIBBPF_OUTPUT)
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_OUTPUT) clean >/dev/null
 
-$(LIBBPF_BOOTSTRAP)-clean: $(LIBBPF_BOOTSTRAP_OUTPUT)
+$(LIBBPF_BOOTSTRAP)-clean: FORCE | $(LIBBPF_BOOTSTRAP_OUTPUT)
 	$(call QUIET_CLEAN, libbpf-bootstrap)
 	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) clean >/dev/null
 
-- 
cgit v1.2.3-70-g09d2


From e24a87b54ef3e39261f1d859b7f78416349dfb14 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 4 Nov 2020 17:42:28 +0800
Subject: perf lock: Correct field name "flags"

The tracepoint "lock:lock_acquire" contains field "flags" but not
"flag".  Current code wrongly retrieves value from field "flag" and it
always gets zero for the value, thus "perf lock" doesn't report the
correct result.

This patch replaces the field name "flag" with "flags", so can read out
the correct flags for locking.

Fixes: e4cef1f65061 ("perf lock: Fix state machine to recognize lock sequence")
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201104094229.17509-1-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-lock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index f0a1dbacb46c..5cecc1ad78e1 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -406,7 +406,7 @@ static int report_lock_acquire_event(struct evsel *evsel,
 	struct lock_seq_stat *seq;
 	const char *name = evsel__strval(evsel, sample, "name");
 	u64 tmp	 = evsel__intval(evsel, sample, "lockdep_addr");
-	int flag = evsel__intval(evsel, sample, "flag");
+	int flag = evsel__intval(evsel, sample, "flags");
 
 	memcpy(&addr, &tmp, sizeof(void *));
 
-- 
cgit v1.2.3-70-g09d2


From b0e5a05cc9e37763c7f19366d94b1a6160c755bc Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 4 Nov 2020 17:42:29 +0800
Subject: perf lock: Don't free "lock_seq_stat" if read_count isn't zero

When execute command "perf lock report", it hits failure and outputs log
as follows:

  perf: builtin-lock.c:623: report_lock_release_event: Assertion `!(seq->read_count < 0)' failed.
  Aborted

This is an imbalance issue.  The locking sequence structure
"lock_seq_stat" contains the reader counter and it is used to check if
the locking sequence is balance or not between acquiring and releasing.

If the tool wrongly frees "lock_seq_stat" when "read_count" isn't zero,
the "read_count" will be reset to zero when allocate a new structure at
the next time; thus it causes the wrong counting for reader and finally
results in imbalance issue.

To fix this issue, if detects "read_count" is not zero (means still have
read user in the locking sequence), goto the "end" tag to skip freeing
structure "lock_seq_stat".

Fixes: e4cef1f65061 ("perf lock: Fix state machine to recognize lock sequence")
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20201104094229.17509-2-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-lock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 5cecc1ad78e1..a2f1e53f37a7 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -621,7 +621,7 @@ static int report_lock_release_event(struct evsel *evsel,
 	case SEQ_STATE_READ_ACQUIRED:
 		seq->read_count--;
 		BUG_ON(seq->read_count < 0);
-		if (!seq->read_count) {
+		if (seq->read_count) {
 			ls->nr_release++;
 			goto end;
 		}
-- 
cgit v1.2.3-70-g09d2


From db1a8b97a0a36155171dbb805fbcb276e07559f6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 9 Nov 2020 13:59:15 -0300
Subject: tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in
 'perf bench mem memcpy'

To bring in the change made in this cset:

  4d6ffa27b8e5116c ("x86/lib: Change .weak to SYM_FUNC_START_WEAK for arch/x86/lib/mem*_64.S")
  6dcc5627f6aec4cb ("x86/asm: Change all ENTRY+ENDPROC to SYM_FUNC_*")

I needed to define SYM_FUNC_START_LOCAL() as SYM_L_GLOBAL as
mem{cpy,set}_{orig,erms} are used by 'perf bench'.

This silences these perf tools build warnings:

  Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S'
  diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
  Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S'
  diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Fangrui Song <maskray@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/lib/memcpy_64.S           |  8 +++-----
 tools/arch/x86/lib/memset_64.S           | 11 ++++++-----
 tools/perf/bench/mem-memcpy-x86-64-asm.S |  3 +++
 tools/perf/bench/mem-memset-x86-64-asm.S |  3 +++
 tools/perf/util/include/linux/linkage.h  |  7 +++++++
 5 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 0b5b8ae56bd9..1e299ac73c86 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -16,8 +16,6 @@
  * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
  */
 
-.weak memcpy
-
 /*
  * memcpy - Copy a memory block.
  *
@@ -30,7 +28,7 @@
  * rax original destination
  */
 SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_LOCAL(memcpy)
+SYM_FUNC_START_WEAK(memcpy)
 	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
 		      "jmp memcpy_erms", X86_FEATURE_ERMS
 
@@ -51,14 +49,14 @@ EXPORT_SYMBOL(__memcpy)
  * memcpy_erms() - enhanced fast string memcpy. This is faster and
  * simpler than memcpy. Use memcpy_erms when possible.
  */
-SYM_FUNC_START(memcpy_erms)
+SYM_FUNC_START_LOCAL(memcpy_erms)
 	movq %rdi, %rax
 	movq %rdx, %rcx
 	rep movsb
 	ret
 SYM_FUNC_END(memcpy_erms)
 
-SYM_FUNC_START(memcpy_orig)
+SYM_FUNC_START_LOCAL(memcpy_orig)
 	movq %rdi, %rax
 
 	cmpq $0x20, %rdx
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index fd5d25a474b7..0bfd26e4ca9e 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -4,8 +4,7 @@
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
-
-.weak memset
+#include <asm/export.h>
 
 /*
  * ISO C memset - set a memory block to a byte value. This function uses fast
@@ -18,7 +17,7 @@
  *
  * rax   original destination
  */
-SYM_FUNC_START_ALIAS(memset)
+SYM_FUNC_START_WEAK(memset)
 SYM_FUNC_START(__memset)
 	/*
 	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
@@ -44,6 +43,8 @@ SYM_FUNC_START(__memset)
 	ret
 SYM_FUNC_END(__memset)
 SYM_FUNC_END_ALIAS(memset)
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
 
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
@@ -56,7 +57,7 @@ SYM_FUNC_END_ALIAS(memset)
  *
  * rax   original destination
  */
-SYM_FUNC_START(memset_erms)
+SYM_FUNC_START_LOCAL(memset_erms)
 	movq %rdi,%r9
 	movb %sil,%al
 	movq %rdx,%rcx
@@ -65,7 +66,7 @@ SYM_FUNC_START(memset_erms)
 	ret
 SYM_FUNC_END(memset_erms)
 
-SYM_FUNC_START(memset_orig)
+SYM_FUNC_START_LOCAL(memset_orig)
 	movq %rdi,%r10
 
 	/* expand byte value  */
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index 9ad015a1e202..6eb45a2aa8db 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -2,6 +2,9 @@
 
 /* Various wrappers to make the kernel .S file build in user-space: */
 
+// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it
+#define SYM_FUNC_START_LOCAL(name)                      \
+        SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
 #define memcpy MEMCPY /* don't hide glibc's memcpy() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index d550bd526162..6f093c483842 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it
+#define SYM_FUNC_START_LOCAL(name)                      \
+        SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
 #define memset MEMSET /* don't hide glibc's memset() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index b8a5159361b4..5acf053fca7d 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -25,6 +25,7 @@
 
 /* SYM_L_* -- linkage of symbols */
 #define SYM_L_GLOBAL(name)			.globl name
+#define SYM_L_WEAK(name)			.weak name
 #define SYM_L_LOCAL(name)			/* nothing */
 
 #define ALIGN __ALIGN
@@ -84,6 +85,12 @@
 	SYM_END(name, SYM_T_FUNC)
 #endif
 
+/* SYM_FUNC_START_WEAK -- use for weak functions */
+#ifndef SYM_FUNC_START_WEAK
+#define SYM_FUNC_START_WEAK(name)			\
+	SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN)
+#endif
+
 /*
  * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START,
  * SYM_FUNC_START_WEAK, ...
-- 
cgit v1.2.3-70-g09d2


From db2ac2e49e564c2b219c4b33d9903aa383334256 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Tue, 10 Nov 2020 14:34:16 +0800
Subject: perf test: Fix a typo in cs-etm testing

Fix a typo: s/devce_name/device_name.

Fixes: fe0aed19b266 ("perf test: Introduce script for Arm CoreSight testing")
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: coresight ml <coresight@lists.linaro.org>
Cc: stable@kernel.org
Link: http://lore.kernel.org/lkml/20201110063417.14467-1-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/test_arm_coresight.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
index 8d84fdbed6a6..59d847d4981d 100755
--- a/tools/perf/tests/shell/test_arm_coresight.sh
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -105,7 +105,7 @@ arm_cs_iterate_devices() {
 		#     `> device_name = 'tmc_etf0'
 		device_name=$(basename $path)
 
-		if is_device_sink $path $devce_name; then
+		if is_device_sink $path $device_name; then
 
 			record_touch_file $device_name $2 &&
 			perf_script_branch_samples touch &&
-- 
cgit v1.2.3-70-g09d2


From dd94ac807a5e10e0b25b68397c473276905cca73 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Tue, 10 Nov 2020 14:34:17 +0800
Subject: perf test: Update branch sample pattern for cs-etm

Since the commit 943b69ac1884 ("perf parse-events: Set exclude_guest=1
for user-space counting"), 'exclude_guest=1' is set for user-space
counting; and the branch sample's modifier has been altered, the sample
event name has been changed from "branches:u:" to "branches:uH:", which
gives out info for "user-space and host counting".

But the cs-etm testing's regular expression cannot match the updated
branch sample event and leads to test failure.

This patch updates the branch sample pattern by using a more flexible
expression '.*' to match branch sample's modifiers, so that allows the
testing to work as expected.

Fixes: 943b69ac1884 ("perf parse-events: Set exclude_guest=1 for user-space counting")
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: coresight ml <coresight@lists.linaro.org>
Cc: stable@kernel.org
Link: http://lore.kernel.org/lkml/20201110063417.14467-2-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/test_arm_coresight.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
index 59d847d4981d..18fde2f179cd 100755
--- a/tools/perf/tests/shell/test_arm_coresight.sh
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -44,7 +44,7 @@ perf_script_branch_samples() {
 	#   touch  6512          1         branches:u:      ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so)
 	#   touch  6512          1         branches:u:      ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so)
 	perf script -F,-time -i ${perfdata} | \
-		egrep " +$1 +[0-9]+ .* +branches:([u|k]:)? +"
+		egrep " +$1 +[0-9]+ .* +branches:(.*:)? +"
 }
 
 perf_report_branch_samples() {
-- 
cgit v1.2.3-70-g09d2


From e865802357086b36632acf3e629f726f089a6769 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Wed, 11 Nov 2020 16:05:35 +0100
Subject: selftests: set conf.all.rp_filter=0 in bareudp.sh

When working on the rp_filter problem, I didn't realise that disabling
it on the network devices didn't cover all cases: rp_filter could also
be enabled globally in the namespace, in which case it would drop
packets, even if the net device has rp_filter=0.

Fixes: 1ccd58331f6f ("selftests: disable rp_filter when testing bareudp")
Fixes: bbbc7aa45eef ("selftests: add test script for bareudp tunnels")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/f2d459346471f163b239aa9d63ce3e2ba9c62895.1605107012.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/bareudp.sh | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index c2b9e990e544..f366cadbc5e8 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -238,6 +238,8 @@ setup_overlay_ipv4()
 	# The intermediate namespaces don't have routes for the reverse path,
 	# as it will be handled by tc. So we need to ensure that rp_filter is
 	# not going to block the traffic.
+	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
 	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
 }
-- 
cgit v1.2.3-70-g09d2


From 9cc873e85800ccde80aa2e4b2bae9f1b5fa4c478 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 10 Nov 2020 19:12:12 -0800
Subject: selftests/bpf: Add skb_pkt_end test

Add a test that currently makes LLVM generate assembly code:

$ llvm-objdump -S skb_pkt_end.o
0000000000000000 <main_prog>:
; 	if (skb_shorter(skb, ETH_IPV4_TCP_SIZE))
       0:	61 12 50 00 00 00 00 00	r2 = *(u32 *)(r1 + 80)
       1:	61 14 4c 00 00 00 00 00	r4 = *(u32 *)(r1 + 76)
       2:	bf 43 00 00 00 00 00 00	r3 = r4
       3:	07 03 00 00 36 00 00 00	r3 += 54
       4:	b7 01 00 00 00 00 00 00	r1 = 0
       5:	2d 23 02 00 00 00 00 00	if r3 > r2 goto +2 <LBB0_2>
       6:	07 04 00 00 0e 00 00 00	r4 += 14
; 	if (skb_shorter(skb, ETH_IPV4_TCP_SIZE))
       7:	bf 41 00 00 00 00 00 00	r1 = r4
0000000000000040 <LBB0_2>:
       8:	b4 00 00 00 ff ff ff ff	w0 = -1
; 	if (!(ip = get_iphdr(skb)))
       9:	2d 23 05 00 00 00 00 00	if r3 > r2 goto +5 <LBB0_6>
; 	proto = ip->protocol;
      10:	71 12 09 00 00 00 00 00	r2 = *(u8 *)(r1 + 9)
; 	if (proto != IPPROTO_TCP)
      11:	56 02 03 00 06 00 00 00	if w2 != 6 goto +3 <LBB0_6>
; 	if (tcp->dest != 0)
      12:	69 12 16 00 00 00 00 00	r2 = *(u16 *)(r1 + 22)
      13:	56 02 01 00 00 00 00 00	if w2 != 0 goto +1 <LBB0_6>
; 	return tcp->urg_ptr;
      14:	69 10 26 00 00 00 00 00	r0 = *(u16 *)(r1 + 38)
0000000000000078 <LBB0_6>:
; }
      15:	95 00 00 00 00 00 00 00	exit

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201111031213.25109-3-alexei.starovoitov@gmail.com
---
 .../selftests/bpf/prog_tests/test_skb_pkt_end.c    | 41 ++++++++++++++++
 tools/testing/selftests/bpf/progs/skb_pkt_end.c    | 54 ++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
 create mode 100644 tools/testing/selftests/bpf/progs/skb_pkt_end.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
new file mode 100644
index 000000000000..cf1215531920
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "skb_pkt_end.skel.h"
+
+static int sanity_run(struct bpf_program *prog)
+{
+	__u32 duration, retval;
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(prog);
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	if (CHECK(err || retval != 123, "test_run",
+		  "err %d errno %d retval %d duration %d\n",
+		  err, errno, retval, duration))
+		return -1;
+	return 0;
+}
+
+void test_test_skb_pkt_end(void)
+{
+	struct skb_pkt_end *skb_pkt_end_skel = NULL;
+	__u32 duration = 0;
+	int err;
+
+	skb_pkt_end_skel = skb_pkt_end__open_and_load();
+	if (CHECK(!skb_pkt_end_skel, "skb_pkt_end_skel_load", "skb_pkt_end skeleton failed\n"))
+		goto cleanup;
+
+	err = skb_pkt_end__attach(skb_pkt_end_skel);
+	if (CHECK(err, "skb_pkt_end_attach", "skb_pkt_end attach failed: %d\n", err))
+		goto cleanup;
+
+	if (sanity_run(skb_pkt_end_skel->progs.main_prog))
+		goto cleanup;
+
+cleanup:
+	skb_pkt_end__destroy(skb_pkt_end_skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
new file mode 100644
index 000000000000..cf6823f42e80
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_PRESERVE_ACCESS_INDEX
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+#define NULL 0
+#define INLINE __always_inline
+
+#define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end)
+
+#define ETH_IPV4_TCP_SIZE (14 + sizeof(struct iphdr) + sizeof(struct tcphdr))
+
+static INLINE struct iphdr *get_iphdr(struct __sk_buff *skb)
+{
+	struct iphdr *ip = NULL;
+	struct ethhdr *eth;
+
+	if (skb_shorter(skb, ETH_IPV4_TCP_SIZE))
+		goto out;
+
+	eth = (void *)(long)skb->data;
+	ip = (void *)(eth + 1);
+
+out:
+	return ip;
+}
+
+SEC("classifier/cls")
+int main_prog(struct __sk_buff *skb)
+{
+	struct iphdr *ip = NULL;
+	struct tcphdr *tcp;
+	__u8 proto = 0;
+
+	if (!(ip = get_iphdr(skb)))
+		goto out;
+
+	proto = ip->protocol;
+
+	if (proto != IPPROTO_TCP)
+		goto out;
+
+	tcp = (void*)(ip + 1);
+	if (tcp->dest != 0)
+		goto out;
+	if (!tcp)
+		goto out;
+
+	return tcp->urg_ptr;
+out:
+	return -1;
+}
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From cb62d34019d9117bb94de6ed35959449d43d6055 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 10 Nov 2020 19:12:13 -0800
Subject: selftests/bpf: Add asm tests for pkt vs pkt_end comparison.

Add few assembly tests for packet comparison.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201111031213.25109-4-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/verifier/ctx_skb.c | 42 ++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index 2e16b8e268f2..2022c0f2cd75 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -1089,3 +1089,45 @@
 	.errstr_unpriv = "R1 leaks addr",
 	.result = REJECT,
 },
+{
+       "pkt > pkt_end taken check",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,                //  0. r2 = *(u32 *)(r1 + data_end)
+                   offsetof(struct __sk_buff, data_end)),
+       BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,                //  1. r4 = *(u32 *)(r1 + data)
+                   offsetof(struct __sk_buff, data)),
+       BPF_MOV64_REG(BPF_REG_3, BPF_REG_4),                    //  2. r3 = r4
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42),                  //  3. r3 += 42
+       BPF_MOV64_IMM(BPF_REG_1, 0),                            //  4. r1 = 0
+       BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2),          //  5. if r3 > r2 goto 8
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14),                  //  6. r4 += 14
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),                    //  7. r1 = r4
+       BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1),          //  8. if r3 > r2 goto 10
+       BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9),            //  9. r2 = *(u8 *)(r1 + 9)
+       BPF_MOV64_IMM(BPF_REG_0, 0),                            // 10. r0 = 0
+       BPF_EXIT_INSN(),                                        // 11. exit
+       },
+       .result = ACCEPT,
+       .prog_type = BPF_PROG_TYPE_SK_SKB,
+},
+{
+       "pkt_end < pkt taken check",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,                //  0. r2 = *(u32 *)(r1 + data_end)
+                   offsetof(struct __sk_buff, data_end)),
+       BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,                //  1. r4 = *(u32 *)(r1 + data)
+                   offsetof(struct __sk_buff, data)),
+       BPF_MOV64_REG(BPF_REG_3, BPF_REG_4),                    //  2. r3 = r4
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42),                  //  3. r3 += 42
+       BPF_MOV64_IMM(BPF_REG_1, 0),                            //  4. r1 = 0
+       BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2),          //  5. if r3 > r2 goto 8
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14),                  //  6. r4 += 14
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),                    //  7. r1 = r4
+       BPF_JMP_REG(BPF_JLT, BPF_REG_2, BPF_REG_3, 1),          //  8. if r2 < r3 goto 10
+       BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9),            //  9. r2 = *(u8 *)(r1 + 9)
+       BPF_MOV64_IMM(BPF_REG_0, 0),                            // 10. r0 = 0
+       BPF_EXIT_INSN(),                                        // 11. exit
+       },
+       .result = ACCEPT,
+       .prog_type = BPF_PROG_TYPE_SK_SKB,
+},
-- 
cgit v1.2.3-70-g09d2


From 53632e11194663b7d5b043a68648892e593dc102 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 12 Nov 2020 13:13:20 -0800
Subject: bpf: selftest: Use bpf_sk_storage in FENTRY/FEXIT/RAW_TP

This patch tests storing the task's related info into the
bpf_sk_storage by fentry/fexit tracing at listen, accept,
and connect.  It also tests the raw_tp at inet_sock_set_state.

A negative test is done by tracing the bpf_sk_storage_free()
and using bpf_sk_storage_get() at the same time.  It ensures
this bpf program cannot load.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201112211320.2587537-1-kafai@fb.com
---
 .../selftests/bpf/prog_tests/sk_storage_tracing.c  | 135 +++++++++++++++++++++
 .../bpf/progs/test_sk_storage_trace_itself.c       |  29 +++++
 .../selftests/bpf/progs/test_sk_storage_tracing.c  |  95 +++++++++++++++
 3 files changed, 259 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
new file mode 100644
index 000000000000..2b392590e8ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_sk_storage_trace_itself.skel.h"
+#include "test_sk_storage_tracing.skel.h"
+
+#define LO_ADDR6 "::1"
+#define TEST_COMM "test_progs"
+
+struct sk_stg {
+	__u32 pid;
+	__u32 last_notclose_state;
+	char comm[16];
+};
+
+static struct test_sk_storage_tracing *skel;
+static __u32 duration;
+static pid_t my_pid;
+
+static int check_sk_stg(int sk_fd, __u32 expected_state)
+{
+	struct sk_stg sk_stg;
+	int err;
+
+	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_stg_map), &sk_fd,
+				  &sk_stg);
+	if (!ASSERT_OK(err, "map_lookup(sk_stg_map)"))
+		return -1;
+
+	if (!ASSERT_EQ(sk_stg.last_notclose_state, expected_state,
+		       "last_notclose_state"))
+		return -1;
+
+	if (!ASSERT_EQ(sk_stg.pid, my_pid, "pid"))
+		return -1;
+
+	if (!ASSERT_STREQ(sk_stg.comm, skel->bss->task_comm, "task_comm"))
+		return -1;
+
+	return 0;
+}
+
+static void do_test(void)
+{
+	int listen_fd = -1, passive_fd = -1, active_fd = -1, value = 1, err;
+	char abyte;
+
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+	if (CHECK(listen_fd == -1, "start_server",
+		  "listen_fd:%d errno:%d\n", listen_fd, errno))
+		return;
+
+	active_fd = connect_to_fd(listen_fd, 0);
+	if (CHECK(active_fd == -1, "connect_to_fd", "active_fd:%d errno:%d\n",
+		  active_fd, errno))
+		goto out;
+
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.del_sk_stg_map),
+				  &active_fd, &value, 0);
+	if (!ASSERT_OK(err, "map_update(del_sk_stg_map)"))
+		goto out;
+
+	passive_fd = accept(listen_fd, NULL, 0);
+	if (CHECK(passive_fd == -1, "accept", "passive_fd:%d errno:%d\n",
+		  passive_fd, errno))
+		goto out;
+
+	shutdown(active_fd, SHUT_WR);
+	err = read(passive_fd, &abyte, 1);
+	if (!ASSERT_OK(err, "read(passive_fd)"))
+		goto out;
+
+	shutdown(passive_fd, SHUT_WR);
+	err = read(active_fd, &abyte, 1);
+	if (!ASSERT_OK(err, "read(active_fd)"))
+		goto out;
+
+	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.del_sk_stg_map),
+				  &active_fd, &value);
+	if (!ASSERT_ERR(err, "map_lookup(del_sk_stg_map)"))
+		goto out;
+
+	err = check_sk_stg(listen_fd, BPF_TCP_LISTEN);
+	if (!ASSERT_OK(err, "listen_fd sk_stg"))
+		goto out;
+
+	err = check_sk_stg(active_fd, BPF_TCP_FIN_WAIT2);
+	if (!ASSERT_OK(err, "active_fd sk_stg"))
+		goto out;
+
+	err = check_sk_stg(passive_fd, BPF_TCP_LAST_ACK);
+	ASSERT_OK(err, "passive_fd sk_stg");
+
+out:
+	if (active_fd != -1)
+		close(active_fd);
+	if (passive_fd != -1)
+		close(passive_fd);
+	if (listen_fd != -1)
+		close(listen_fd);
+}
+
+void test_sk_storage_tracing(void)
+{
+	struct test_sk_storage_trace_itself *skel_itself;
+	int err;
+
+	my_pid = getpid();
+
+	skel_itself = test_sk_storage_trace_itself__open_and_load();
+
+	if (!ASSERT_NULL(skel_itself, "test_sk_storage_trace_itself")) {
+		test_sk_storage_trace_itself__destroy(skel_itself);
+		return;
+	}
+
+	skel = test_sk_storage_tracing__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_sk_storage_tracing"))
+		return;
+
+	err = test_sk_storage_tracing__attach(skel);
+	if (!ASSERT_OK(err, "test_sk_storage_tracing__attach")) {
+		test_sk_storage_tracing__destroy(skel);
+		return;
+	}
+
+	do_test();
+
+	test_sk_storage_tracing__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c
new file mode 100644
index 000000000000..59ef72d02a61
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} sk_stg_map SEC(".maps");
+
+SEC("fentry/bpf_sk_storage_free")
+int BPF_PROG(trace_bpf_sk_storage_free, struct sock *sk)
+{
+	int *value;
+
+	value = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+				   BPF_SK_STORAGE_GET_F_CREATE);
+
+	if (value)
+		*value = 1;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
new file mode 100644
index 000000000000..8e94e5c080aa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+struct sk_stg {
+	__u32 pid;
+	__u32 last_notclose_state;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct sk_stg);
+} sk_stg_map SEC(".maps");
+
+/* Testing delete */
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} del_sk_stg_map SEC(".maps");
+
+char task_comm[16] = "";
+
+SEC("tp_btf/inet_sock_set_state")
+int BPF_PROG(trace_inet_sock_set_state, struct sock *sk, int oldstate,
+	     int newstate)
+{
+	struct sk_stg *stg;
+
+	if (newstate == BPF_TCP_CLOSE)
+		return 0;
+
+	stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+				 BPF_SK_STORAGE_GET_F_CREATE);
+	if (!stg)
+		return 0;
+
+	stg->last_notclose_state = newstate;
+
+	bpf_sk_storage_delete(&del_sk_stg_map, sk);
+
+	return 0;
+}
+
+static void set_task_info(struct sock *sk)
+{
+	struct task_struct *task;
+	struct sk_stg *stg;
+
+	stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+				 BPF_SK_STORAGE_GET_F_CREATE);
+	if (!stg)
+		return;
+
+	stg->pid = bpf_get_current_pid_tgid();
+
+	task = (struct task_struct *)bpf_get_current_task();
+	bpf_core_read_str(&stg->comm, sizeof(stg->comm), &task->comm);
+	bpf_core_read_str(&task_comm, sizeof(task_comm), &task->comm);
+}
+
+SEC("fentry/inet_csk_listen_start")
+int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk, int backlog)
+{
+	set_task_info(sk);
+
+	return 0;
+}
+
+SEC("fentry/tcp_connect")
+int BPF_PROG(trace_tcp_connect, struct sock *sk)
+{
+	set_task_info(sk);
+
+	return 0;
+}
+
+SEC("fexit/inet_csk_accept")
+int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
+	     struct sock *accepted_sk)
+{
+	set_task_info(accepted_sk);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 50431b45685b600fc2851a3f2b53e24643efe6d3 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Fri, 13 Nov 2020 19:51:52 +0800
Subject: tools, bpftool: Add missing close before bpftool net attach exit

progfd is created by prog_parse_fd() in do_attach() and before the latter
returns in case of success, the file descriptor should be closed.

Fixes: 04949ccc273e ("tools: bpftool: add net attach command to attach XDP on interface")
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201113115152.53178-1-wanghai38@huawei.com
---
 tools/bpf/bpftool/net.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 910e7bac6e9e..3fae61ef6339 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -578,8 +578,8 @@ static int do_attach(int argc, char **argv)
 
 	ifindex = net_parse_dev(&argc, &argv);
 	if (ifindex < 1) {
-		close(progfd);
-		return -EINVAL;
+		err = -EINVAL;
+		goto cleanup;
 	}
 
 	if (argc) {
@@ -587,8 +587,8 @@ static int do_attach(int argc, char **argv)
 			overwrite = true;
 		} else {
 			p_err("expected 'overwrite', got: '%s'?", *argv);
-			close(progfd);
-			return -EINVAL;
+			err = -EINVAL;
+			goto cleanup;
 		}
 	}
 
@@ -596,17 +596,17 @@ static int do_attach(int argc, char **argv)
 	if (is_prefix("xdp", attach_type_strings[attach_type]))
 		err = do_attach_detach_xdp(progfd, attach_type, ifindex,
 					   overwrite);
-
-	if (err < 0) {
+	if (err) {
 		p_err("interface %s attach failed: %s",
 		      attach_type_strings[attach_type], strerror(-err));
-		return err;
+		goto cleanup;
 	}
 
 	if (json_output)
 		jsonw_null(json_wtr);
-
-	return 0;
+cleanup:
+	close(progfd);
+	return err;
 }
 
 static int do_detach(int argc, char **argv)
-- 
cgit v1.2.3-70-g09d2


From f782e2c300a717233b64697affda3ea7aac00b2b Mon Sep 17 00:00:00 2001
From: Dmitrii Banshchikov <me@ubique.spb.ru>
Date: Fri, 13 Nov 2020 17:17:56 +0000
Subject: bpf: Relax return code check for subprograms

Currently verifier enforces return code checks for subprograms in the
same manner as it does for program entry points. This prevents returning
arbitrary scalar values from subprograms. Scalar type of returned values
is checked by btf_prepare_func_args() and hence it should be safe to
allow only scalars for now. Relax return code checks for subprograms and
allow any correct scalar values.

Fixes: 51c39bb1d5d10 (bpf: Introduce function-by-function verification)
Signed-off-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201113171756.90594-1-me@ubique.spb.ru
---
 kernel/bpf/verifier.c                                 | 15 +++++++++++++--
 .../selftests/bpf/prog_tests/test_global_funcs.c      |  1 +
 tools/testing/selftests/bpf/progs/test_global_func8.c | 19 +++++++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_global_func8.c

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6204ec705d80..1388bf733071 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env)
 	struct tnum range = tnum_range(0, 1);
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
 	int err;
+	const bool is_subprog = env->cur_state->frame[0]->subprogno;
 
 	/* LSM and struct_ops func-ptr's return type could be "void" */
-	if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+	if (!is_subprog &&
+	    (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
 	     prog_type == BPF_PROG_TYPE_LSM) &&
 	    !prog->aux->attach_func_proto->type)
 		return 0;
@@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env)
 		return -EACCES;
 	}
 
+	reg = cur_regs(env) + BPF_REG_0;
+	if (is_subprog) {
+		if (reg->type != SCALAR_VALUE) {
+			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
+				reg_type_str[reg->type]);
+			return -EINVAL;
+		}
+		return 0;
+	}
+
 	switch (prog_type) {
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
@@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env)
 		return 0;
 	}
 
-	reg = cur_regs(env) + BPF_REG_0;
 	if (reg->type != SCALAR_VALUE) {
 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
 			reg_type_str[reg->type]);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 193002b14d7f..32e4348b714b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -60,6 +60,7 @@ void test_test_global_funcs(void)
 		{ "test_global_func5.o" , "expected pointer to ctx, but got PTR" },
 		{ "test_global_func6.o" , "modified ctx ptr R2" },
 		{ "test_global_func7.o" , "foo() doesn't return scalar" },
+		{ "test_global_func8.o" },
 	};
 	libbpf_print_fn_t old_print_fn = NULL;
 	int err, i, duration = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c
new file mode 100644
index 000000000000..d55a6544b1ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func8.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline int foo(struct __sk_buff *skb)
+{
+	return bpf_get_prandom_u32();
+}
+
+SEC("cgroup_skb/ingress")
+int test_cls(struct __sk_buff *skb)
+{
+	if (!foo(skb))
+		return 0;
+
+	return 1;
+}
-- 
cgit v1.2.3-70-g09d2


From 7a873e4555679a0e749422db071c142b57f80be9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 6 Oct 2020 18:44:17 -0700
Subject: KVM: selftests: Verify supported CR4 bits can be set before
 KVM_SET_CPUID2

Extend the KVM_SET_SREGS test to verify that all supported CR4 bits, as
enumerated by KVM, can be set before KVM_SET_CPUID2, i.e. without first
defining the vCPU model.  KVM is supposed to skip guest CPUID checks
when host userspace is stuffing guest state.

Check the inverse as well, i.e. that KVM rejects KVM_SET_REGS if CR4
has one or more unsupported bits set.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Message-Id: <20201007014417.29276-7-sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/processor.h       | 17 ++++
 tools/testing/selftests/kvm/include/x86_64/vmx.h   |  4 -
 .../testing/selftests/kvm/x86_64/set_sregs_test.c  | 92 +++++++++++++++++++++-
 3 files changed, 108 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 8e61340b3911..90cd5984751b 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -27,6 +27,7 @@
 #define X86_CR4_OSFXSR		(1ul << 9)
 #define X86_CR4_OSXMMEXCPT	(1ul << 10)
 #define X86_CR4_UMIP		(1ul << 11)
+#define X86_CR4_LA57		(1ul << 12)
 #define X86_CR4_VMXE		(1ul << 13)
 #define X86_CR4_SMXE		(1ul << 14)
 #define X86_CR4_FSGSBASE	(1ul << 16)
@@ -36,6 +37,22 @@
 #define X86_CR4_SMAP		(1ul << 21)
 #define X86_CR4_PKE		(1ul << 22)
 
+/* CPUID.1.ECX */
+#define CPUID_VMX		(1ul << 5)
+#define CPUID_SMX		(1ul << 6)
+#define CPUID_PCID		(1ul << 17)
+#define CPUID_XSAVE		(1ul << 26)
+
+/* CPUID.7.EBX */
+#define CPUID_FSGSBASE		(1ul << 0)
+#define CPUID_SMEP		(1ul << 7)
+#define CPUID_SMAP		(1ul << 20)
+
+/* CPUID.7.ECX */
+#define CPUID_UMIP		(1ul << 2)
+#define CPUID_PKU		(1ul << 3)
+#define CPUID_LA57		(1ul << 16)
+
 #define UNEXPECTED_VECTOR_PORT 0xfff0u
 
 /* General Registers in 64-Bit Mode */
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index e78d7e26ba61..65eb1079a161 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -11,10 +11,6 @@
 #include <stdint.h>
 #include "processor.h"
 
-#define CPUID_VMX_BIT				5
-
-#define CPUID_VMX				(1 << 5)
-
 /*
  * Definitions of Primary Processor-Based VM-Execution Controls.
  */
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 9f7656184f31..318be0bf77ab 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -24,16 +24,106 @@
 
 #define VCPU_ID                  5
 
+static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig,
+				 uint64_t feature_bit)
+{
+	struct kvm_sregs sregs;
+	int rc;
+
+	/* Skip the sub-test, the feature is supported. */
+	if (orig->cr4 & feature_bit)
+		return;
+
+	memcpy(&sregs, orig, sizeof(sregs));
+	sregs.cr4 |= feature_bit;
+
+	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
+
+	/* Sanity check that KVM didn't change anything. */
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
+}
+
+static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm)
+{
+	struct kvm_cpuid_entry2 *cpuid_1, *cpuid_7;
+	uint64_t cr4;
+
+	cpuid_1 = kvm_get_supported_cpuid_entry(1);
+	cpuid_7 = kvm_get_supported_cpuid_entry(7);
+
+	cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
+	      X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
+	      X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+	if (cpuid_7->ecx & CPUID_UMIP)
+		cr4 |= X86_CR4_UMIP;
+	if (cpuid_7->ecx & CPUID_LA57)
+		cr4 |= X86_CR4_LA57;
+	if (cpuid_1->ecx & CPUID_VMX)
+		cr4 |= X86_CR4_VMXE;
+	if (cpuid_1->ecx & CPUID_SMX)
+		cr4 |= X86_CR4_SMXE;
+	if (cpuid_7->ebx & CPUID_FSGSBASE)
+		cr4 |= X86_CR4_FSGSBASE;
+	if (cpuid_1->ecx & CPUID_PCID)
+		cr4 |= X86_CR4_PCIDE;
+	if (cpuid_1->ecx & CPUID_XSAVE)
+		cr4 |= X86_CR4_OSXSAVE;
+	if (cpuid_7->ebx & CPUID_SMEP)
+		cr4 |= X86_CR4_SMEP;
+	if (cpuid_7->ebx & CPUID_SMAP)
+		cr4 |= X86_CR4_SMAP;
+	if (cpuid_7->ecx & CPUID_PKU)
+		cr4 |= X86_CR4_PKE;
+
+	return cr4;
+}
+
 int main(int argc, char *argv[])
 {
 	struct kvm_sregs sregs;
 	struct kvm_vm *vm;
+	uint64_t cr4;
 	int rc;
 
 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);
 
-	/* Create VM */
+	/*
+	 * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+	 * use it to verify all supported CR4 bits can be set prior to defining
+	 * the vCPU model, i.e. without doing KVM_SET_CPUID2.
+	 */
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+	vm_vcpu_add(vm, VCPU_ID);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+
+	sregs.cr4 |= calc_cr4_feature_bits(vm);
+	cr4 = sregs.cr4;
+
+	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+		    sregs.cr4, cr4);
+
+	/* Verify all unsupported features are rejected by KVM. */
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_UMIP);
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_LA57);
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_VMXE);
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_SMXE);
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_FSGSBASE);
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_PCIDE);
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_OSXSAVE);
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_SMEP);
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_SMAP);
+	test_cr4_feature_bit(vm, &sregs, X86_CR4_PKE);
+	kvm_vm_free(vm);
+
+	/* Create a "real" VM and verify APIC_BASE can be set. */
 	vm = vm_create_default(VCPU_ID, 0, NULL);
 
 	vcpu_sregs_get(vm, VCPU_ID, &sregs);
-- 
cgit v1.2.3-70-g09d2


From 8b460692fee46a47cebd66d70df88dc9aa6d6b8b Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 29 Sep 2020 17:09:44 +0200
Subject: KVM: selftests: test KVM_GET_SUPPORTED_HV_CPUID as a system ioctl

KVM_GET_SUPPORTED_HV_CPUID is now supported as both vCPU and VM ioctl,
test that.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20200929150944.1235688-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/kvm_util.h    |  2 +
 tools/testing/selftests/kvm/lib/kvm_util.c        | 26 +++++++
 tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c | 87 +++++++++++++----------
 3 files changed, 77 insertions(+), 38 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 7d29aa786959..b2c1364c0402 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -114,6 +114,8 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
 		void *arg);
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 126c6727a6b0..ef2114e1b7ad 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1582,6 +1582,32 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
 		cmd, ret, errno, strerror(errno));
 }
 
+/*
+ * KVM system ioctl
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   cmd - Ioctl number
+ *   arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a KVM fd.
+ */
+void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+	int ret;
+
+	ret = ioctl(vm->kvm_fd, cmd, arg);
+	TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)",
+		cmd, ret, errno, strerror(errno));
+}
+
+int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+	return ioctl(vm->kvm_fd, cmd, arg);
+}
+
 /*
  * VM Dump
  *
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 745b708c2d3b..88a595b7fbdd 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -46,19 +46,19 @@ static bool smt_possible(void)
 }
 
 static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
-			  bool evmcs_enabled)
+			  bool evmcs_expected)
 {
 	int i;
 	int nent = 9;
 	u32 test_val;
 
-	if (evmcs_enabled)
+	if (evmcs_expected)
 		nent += 1; /* 0x4000000A */
 
 	TEST_ASSERT(hv_cpuid_entries->nent == nent,
 		    "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
 		    " with evmcs=%d (returned %d)",
-		    nent, evmcs_enabled, hv_cpuid_entries->nent);
+		    nent, evmcs_expected, hv_cpuid_entries->nent);
 
 	for (i = 0; i < hv_cpuid_entries->nent; i++) {
 		struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
@@ -68,7 +68,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 			    "function %x is our of supported range",
 			    entry->function);
 
-		TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
+		TEST_ASSERT(evmcs_expected || (entry->function != 0x4000000A),
 			    "0x4000000A leaf should not be reported");
 
 		TEST_ASSERT(entry->index == 0,
@@ -87,7 +87,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 			TEST_ASSERT(entry->eax == test_val,
 				    "Wrong max leaf report in 0x40000000.EAX: %x"
 				    " (evmcs=%d)",
-				    entry->eax, evmcs_enabled
+				    entry->eax, evmcs_expected
 				);
 			break;
 		case 0x40000004:
@@ -110,20 +110,23 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 
 }
 
-void test_hv_cpuid_e2big(struct kvm_vm *vm)
+void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system)
 {
 	static struct kvm_cpuid2 cpuid = {.nent = 0};
 	int ret;
 
-	ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+	if (!system)
+		ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+	else
+		ret = _kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
 
 	TEST_ASSERT(ret == -1 && errno == E2BIG,
-		    "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
-		    " it should have: %d %d", ret, errno);
+		    "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+		    " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno);
 }
 
 
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
+struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm, bool system)
 {
 	int nent = 20; /* should be enough */
 	static struct kvm_cpuid2 *cpuid;
@@ -137,7 +140,10 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
 
 	cpuid->nent = nent;
 
-	vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+	if (!system)
+		vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+	else
+		kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
 
 	return cpuid;
 }
@@ -146,45 +152,50 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
 int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
-	int rv, stage;
 	struct kvm_cpuid2 *hv_cpuid_entries;
-	bool evmcs_enabled;
 
 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);
 
-	rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID);
-	if (!rv) {
+	if (!kvm_check_cap(KVM_CAP_HYPERV_CPUID)) {
 		print_skip("KVM_CAP_HYPERV_CPUID not supported");
 		exit(KSFT_SKIP);
 	}
 
-	for (stage = 0; stage < 3; stage++) {
-		evmcs_enabled = false;
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-		vm = vm_create_default(VCPU_ID, 0, guest_code);
-		switch (stage) {
-		case 0:
-			test_hv_cpuid_e2big(vm);
-			continue;
-		case 1:
-			break;
-		case 2:
-			if (!nested_vmx_supported() ||
-			    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-				print_skip("Enlightened VMCS is unsupported");
-				continue;
-			}
-			vcpu_enable_evmcs(vm, VCPU_ID);
-			evmcs_enabled = true;
-			break;
-		}
+	/* Test vCPU ioctl version */
+	test_hv_cpuid_e2big(vm, false);
+
+	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+	test_hv_cpuid(hv_cpuid_entries, false);
+	free(hv_cpuid_entries);
 
-		hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
-		test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
-		free(hv_cpuid_entries);
-		kvm_vm_free(vm);
+	if (!nested_vmx_supported() ||
+	    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+		print_skip("Enlightened VMCS is unsupported");
+		goto do_sys;
 	}
+	vcpu_enable_evmcs(vm, VCPU_ID);
+	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+	test_hv_cpuid(hv_cpuid_entries, true);
+	free(hv_cpuid_entries);
+
+do_sys:
+	/* Test system ioctl version */
+	if (!kvm_check_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+		print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+		goto out;
+	}
+
+	test_hv_cpuid_e2big(vm, true);
+
+	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, true);
+	test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported());
+	free(hv_cpuid_entries);
+
+out:
+	kvm_vm_free(vm);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 60f644fb519831edff38c79755f7970c475e2ece Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 30 Sep 2020 21:22:35 -0400
Subject: KVM: selftests: Introduce after_vcpu_run hook for dirty log test

Provide a hook for the checks after vcpu_run() completes.  Preparation
for the dirty ring test because we'll need to take care of another
exit reason.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20201001012235.6063-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c | 36 ++++++++++++++++++----------
 1 file changed, 24 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 54da9cc20db4..9215b26af10c 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -178,6 +178,15 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
 	kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
 }
 
+static void default_after_vcpu_run(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+	TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+		    "Invalid guest sync status: exit_reason=%s\n",
+		    exit_reason_str(run->exit_reason));
+}
+
 struct log_mode {
 	const char *name;
 	/* Return true if this mode is supported, otherwise false */
@@ -187,16 +196,20 @@ struct log_mode {
 	/* Hook to collect the dirty pages into the bitmap provided */
 	void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
 				     void *bitmap, uint32_t num_pages);
+	/* Hook to call when after each vcpu run */
+	void (*after_vcpu_run)(struct kvm_vm *vm);
 } log_modes[LOG_MODE_NUM] = {
 	{
 		.name = "dirty-log",
 		.collect_dirty_pages = dirty_log_collect_dirty_pages,
+		.after_vcpu_run = default_after_vcpu_run,
 	},
 	{
 		.name = "clear-log",
 		.supported = clear_log_supported,
 		.create_vm_done = clear_log_create_vm_done,
 		.collect_dirty_pages = clear_log_collect_dirty_pages,
+		.after_vcpu_run = default_after_vcpu_run,
 	},
 };
 
@@ -247,6 +260,14 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
 	mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
 }
 
+static void log_mode_after_vcpu_run(struct kvm_vm *vm)
+{
+	struct log_mode *mode = &log_modes[host_log_mode];
+
+	if (mode->after_vcpu_run)
+		mode->after_vcpu_run(vm);
+}
+
 static void generate_random_array(uint64_t *guest_array, uint64_t size)
 {
 	uint64_t i;
@@ -261,25 +282,16 @@ static void *vcpu_worker(void *data)
 	struct kvm_vm *vm = data;
 	uint64_t *guest_array;
 	uint64_t pages_count = 0;
-	struct kvm_run *run;
-
-	run = vcpu_state(vm, VCPU_ID);
 
 	guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
-	generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
 
 	while (!READ_ONCE(host_quit)) {
+		generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+		pages_count += TEST_PAGES_PER_LOOP;
 		/* Let the guest dirty the random pages */
 		ret = _vcpu_run(vm, VCPU_ID);
 		TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
-		if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
-			pages_count += TEST_PAGES_PER_LOOP;
-			generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
-		} else {
-			TEST_FAIL("Invalid guest sync status: "
-				  "exit_reason=%s\n",
-				  exit_reason_str(run->exit_reason));
-		}
+		log_mode_after_vcpu_run(vm);
 	}
 
 	pr_info("Dirtied %"PRIu64" pages\n", pages_count);
-- 
cgit v1.2.3-70-g09d2


From 84292e565951cecfe2718e43905a6103c9e8ac29 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 30 Sep 2020 21:22:37 -0400
Subject: KVM: selftests: Add dirty ring buffer test

Add the initial dirty ring buffer test.

The current test implements the userspace dirty ring collection, by
only reaping the dirty ring when the ring is full.

So it's still running synchronously like this:

            vcpu                             main thread

  1. vcpu dirties pages
  2. vcpu gets dirty ring full
     (userspace exit)

                                       3. main thread waits until full
                                          (so hardware buffers flushed)
                                       4. main thread collects
                                       5. main thread continues vcpu

  6. vcpu continues, goes back to 1

We can't directly collects dirty bits during vcpu execution because
otherwise we can't guarantee the hardware dirty bits were flushed when
we collect and we're very strict on the dirty bits so otherwise we can
fail the future verify procedure.  A follow up patch will make this
test to support async just like the existing dirty log test, by adding
a vcpu kick mechanism.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20201001012237.6111-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c       | 254 ++++++++++++++++++++-
 tools/testing/selftests/kvm/include/kvm_util.h     |   3 +
 tools/testing/selftests/kvm/lib/kvm_util.c         |  72 +++++-
 .../testing/selftests/kvm/lib/kvm_util_internal.h  |   4 +
 4 files changed, 320 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 9215b26af10c..a7da4d9471e1 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -12,8 +12,13 @@
 #include <unistd.h>
 #include <time.h>
 #include <pthread.h>
+#include <semaphore.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
+#include <asm/barrier.h>
 
 #include "test_util.h"
 #include "kvm_util.h"
@@ -57,6 +62,8 @@
 # define test_and_clear_bit_le	test_and_clear_bit
 #endif
 
+#define TEST_DIRTY_RING_COUNT		1024
+
 /*
  * Guest/Host shared variables. Ensure addr_gva2hva() and/or
  * sync_global_to/from_guest() are used when accessing from
@@ -128,6 +135,25 @@ static uint64_t host_dirty_count;
 static uint64_t host_clear_count;
 static uint64_t host_track_next_count;
 
+/* Whether dirty ring reset is requested, or finished */
+static sem_t dirty_ring_vcpu_stop;
+static sem_t dirty_ring_vcpu_cont;
+/*
+ * This is only used for verifying the dirty pages.  Dirty ring has a very
+ * tricky case when the ring just got full, kvm will do userspace exit due to
+ * ring full.  When that happens, the very last PFN is set but actually the
+ * data is not changed (the guest WRITE is not really applied yet), because
+ * we found that the dirty ring is full, refused to continue the vcpu, and
+ * recorded the dirty gfn with the old contents.
+ *
+ * For this specific case, it's safe to skip checking this pfn for this
+ * bit, because it's a redundant bit, and when the write happens later the bit
+ * will be set again.  We use this variable to always keep track of the latest
+ * dirty gfn we've collected, so that if a mismatch of data found later in the
+ * verifying process, we let it pass.
+ */
+static uint64_t dirty_ring_last_page;
+
 enum log_mode_t {
 	/* Only use KVM_GET_DIRTY_LOG for logging */
 	LOG_MODE_DIRTY_LOG = 0,
@@ -135,6 +161,9 @@ enum log_mode_t {
 	/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
 	LOG_MODE_CLEAR_LOG = 1,
 
+	/* Use dirty ring for logging */
+	LOG_MODE_DIRTY_RING = 2,
+
 	LOG_MODE_NUM,
 
 	/* Run all supported modes */
@@ -145,6 +174,20 @@ enum log_mode_t {
 static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
 /* Logging mode for current run */
 static enum log_mode_t host_log_mode;
+static pthread_t vcpu_thread;
+
+/*
+ * In our test we do signal tricks, let's use a better version of
+ * sem_wait to avoid signal interrupts
+ */
+static void sem_wait_until(sem_t *sem)
+{
+	int ret;
+
+	do
+		ret = sem_wait(sem);
+	while (ret == -1 && errno == EINTR);
+}
 
 static bool clear_log_supported(void)
 {
@@ -178,15 +221,131 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
 	kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
 }
 
-static void default_after_vcpu_run(struct kvm_vm *vm)
+static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
 {
 	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
 
+	TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
+		    "vcpu run failed: errno=%d", err);
+
 	TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
 		    "Invalid guest sync status: exit_reason=%s\n",
 		    exit_reason_str(run->exit_reason));
 }
 
+static bool dirty_ring_supported(void)
+{
+	return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING);
+}
+
+static void dirty_ring_create_vm_done(struct kvm_vm *vm)
+{
+	/*
+	 * Switch to dirty ring mode after VM creation but before any
+	 * of the vcpu creation.
+	 */
+	vm_enable_dirty_ring(vm, TEST_DIRTY_RING_COUNT *
+			     sizeof(struct kvm_dirty_gfn));
+}
+
+static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
+{
+	return gfn->flags == KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
+{
+	gfn->flags = KVM_DIRTY_GFN_F_RESET;
+}
+
+static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
+				       int slot, void *bitmap,
+				       uint32_t num_pages, uint32_t *fetch_index)
+{
+	struct kvm_dirty_gfn *cur;
+	uint32_t count = 0;
+
+	while (true) {
+		cur = &dirty_gfns[*fetch_index % TEST_DIRTY_RING_COUNT];
+		if (!dirty_gfn_is_dirtied(cur))
+			break;
+		TEST_ASSERT(cur->slot == slot, "Slot number didn't match: "
+			    "%u != %u", cur->slot, slot);
+		TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
+			    "0x%llx >= 0x%x", cur->offset, num_pages);
+		//pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
+		set_bit_le(cur->offset, bitmap);
+		dirty_ring_last_page = cur->offset;
+		dirty_gfn_set_collected(cur);
+		(*fetch_index)++;
+		count++;
+	}
+
+	return count;
+}
+
+static void dirty_ring_wait_vcpu(void)
+{
+	sem_wait_until(&dirty_ring_vcpu_stop);
+}
+
+static void dirty_ring_continue_vcpu(void)
+{
+	pr_info("Notifying vcpu to continue\n");
+	sem_post(&dirty_ring_vcpu_cont);
+}
+
+static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
+					   void *bitmap, uint32_t num_pages)
+{
+	/* We only have one vcpu */
+	static uint32_t fetch_index = 0;
+	uint32_t count = 0, cleared;
+
+	dirty_ring_wait_vcpu();
+
+	/* Only have one vcpu */
+	count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID),
+				       slot, bitmap, num_pages, &fetch_index);
+
+	cleared = kvm_vm_reset_dirty_ring(vm);
+
+	/* Cleared pages should be the same as collected */
+	TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
+		    "with collected (%u)", cleared, count);
+
+	dirty_ring_continue_vcpu();
+
+	pr_info("Iteration %ld collected %u pages\n", iteration, count);
+}
+
+static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+	/* A ucall-sync or ring-full event is allowed */
+	if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
+		/* We should allow this to continue */
+		;
+	} else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+		/* Update the flag first before pause */
+		sem_post(&dirty_ring_vcpu_stop);
+		pr_info("vcpu stops because dirty ring is full...\n");
+		sem_wait_until(&dirty_ring_vcpu_cont);
+		pr_info("vcpu continues now.\n");
+	} else {
+		TEST_ASSERT(false, "Invalid guest sync status: "
+			    "exit_reason=%s\n",
+			    exit_reason_str(run->exit_reason));
+	}
+}
+
+static void dirty_ring_before_vcpu_join(void)
+{
+	/* Kick another round of vcpu just to make sure it will quit */
+	sem_post(&dirty_ring_vcpu_cont);
+}
+
 struct log_mode {
 	const char *name;
 	/* Return true if this mode is supported, otherwise false */
@@ -197,7 +356,8 @@ struct log_mode {
 	void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
 				     void *bitmap, uint32_t num_pages);
 	/* Hook to call when after each vcpu run */
-	void (*after_vcpu_run)(struct kvm_vm *vm);
+	void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err);
+	void (*before_vcpu_join) (void);
 } log_modes[LOG_MODE_NUM] = {
 	{
 		.name = "dirty-log",
@@ -211,6 +371,14 @@ struct log_mode {
 		.collect_dirty_pages = clear_log_collect_dirty_pages,
 		.after_vcpu_run = default_after_vcpu_run,
 	},
+	{
+		.name = "dirty-ring",
+		.supported = dirty_ring_supported,
+		.create_vm_done = dirty_ring_create_vm_done,
+		.collect_dirty_pages = dirty_ring_collect_dirty_pages,
+		.before_vcpu_join = dirty_ring_before_vcpu_join,
+		.after_vcpu_run = dirty_ring_after_vcpu_run,
+	},
 };
 
 /*
@@ -260,12 +428,20 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
 	mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
 }
 
-static void log_mode_after_vcpu_run(struct kvm_vm *vm)
+static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
 {
 	struct log_mode *mode = &log_modes[host_log_mode];
 
 	if (mode->after_vcpu_run)
-		mode->after_vcpu_run(vm);
+		mode->after_vcpu_run(vm, ret, err);
+}
+
+static void log_mode_before_vcpu_join(void)
+{
+	struct log_mode *mode = &log_modes[host_log_mode];
+
+	if (mode->before_vcpu_join)
+		mode->before_vcpu_join();
 }
 
 static void generate_random_array(uint64_t *guest_array, uint64_t size)
@@ -278,20 +454,22 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size)
 
 static void *vcpu_worker(void *data)
 {
-	int ret;
+	int ret, vcpu_fd;
 	struct kvm_vm *vm = data;
 	uint64_t *guest_array;
 	uint64_t pages_count = 0;
 
+	vcpu_fd = vcpu_get_fd(vm, VCPU_ID);
+
 	guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
 
 	while (!READ_ONCE(host_quit)) {
+		/* Clear any existing kick signals */
 		generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
 		pages_count += TEST_PAGES_PER_LOOP;
 		/* Let the guest dirty the random pages */
-		ret = _vcpu_run(vm, VCPU_ID);
-		TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
-		log_mode_after_vcpu_run(vm);
+		ret = ioctl(vcpu_fd, KVM_RUN, NULL);
+		log_mode_after_vcpu_run(vm, ret, errno);
 	}
 
 	pr_info("Dirtied %"PRIu64" pages\n", pages_count);
@@ -304,6 +482,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
 	uint64_t step = vm_num_host_pages(mode, 1);
 	uint64_t page;
 	uint64_t *value_ptr;
+	uint64_t min_iter = 0;
 
 	for (page = 0; page < host_num_pages; page += step) {
 		value_ptr = host_test_mem + page * host_page_size;
@@ -318,14 +497,64 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
 		}
 
 		if (test_and_clear_bit_le(page, bmap)) {
+			bool matched;
+
 			host_dirty_count++;
+
 			/*
 			 * If the bit is set, the value written onto
 			 * the corresponding page should be either the
 			 * previous iteration number or the current one.
 			 */
-			TEST_ASSERT(*value_ptr == iteration ||
-				    *value_ptr == iteration - 1,
+			matched = (*value_ptr == iteration ||
+				   *value_ptr == iteration - 1);
+
+			if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
+				if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
+					/*
+					 * Short answer: this case is special
+					 * only for dirty ring test where the
+					 * page is the last page before a kvm
+					 * dirty ring full in iteration N-2.
+					 *
+					 * Long answer: Assuming ring size R,
+					 * one possible condition is:
+					 *
+					 *      main thr       vcpu thr
+					 *      --------       --------
+					 *    iter=1
+					 *                   write 1 to page 0~(R-1)
+					 *                   full, vmexit
+					 *    collect 0~(R-1)
+					 *    kick vcpu
+					 *                   write 1 to (R-1)~(2R-2)
+					 *                   full, vmexit
+					 *    iter=2
+					 *    collect (R-1)~(2R-2)
+					 *    kick vcpu
+					 *                   write 1 to (2R-2)
+					 *                   (NOTE!!! "1" cached in cpu reg)
+					 *                   write 2 to (2R-1)~(3R-3)
+					 *                   full, vmexit
+					 *    iter=3
+					 *    collect (2R-2)~(3R-3)
+					 *    (here if we read value on page
+					 *     "2R-2" is 1, while iter=3!!!)
+					 *
+					 * This however can only happen once per iteration.
+					 */
+					min_iter = iteration - 1;
+					continue;
+				} else if (page == dirty_ring_last_page) {
+					/*
+					 * Please refer to comments in
+					 * dirty_ring_last_page.
+					 */
+					continue;
+				}
+			}
+
+			TEST_ASSERT(matched,
 				    "Set page %"PRIu64" value %"PRIu64
 				    " incorrect (iteration=%"PRIu64")",
 				    page, *value_ptr, iteration);
@@ -390,7 +619,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 		     unsigned long interval, uint64_t phys_offset)
 {
-	pthread_t vcpu_thread;
 	struct kvm_vm *vm;
 	unsigned long *bmap;
 
@@ -488,6 +716,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 
 	/* Tell the vcpu thread to quit */
 	host_quit = true;
+	log_mode_before_vcpu_join();
 	pthread_join(vcpu_thread, NULL);
 
 	pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
@@ -548,6 +777,9 @@ int main(int argc, char *argv[])
 	unsigned int mode;
 	int opt, i, j;
 
+	sem_init(&dirty_ring_vcpu_stop, 0, 0);
+	sem_init(&dirty_ring_vcpu_cont, 0, 0);
+
 #ifdef __x86_64__
 	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
 #endif
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index b2c1364c0402..710009cc17fd 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -74,6 +74,7 @@ void kvm_vm_release(struct kvm_vm *vmp);
 void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
 void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
 			    uint64_t first_page, uint32_t num_pages);
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
 
 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
 		       size_t len);
@@ -148,6 +149,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
 struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
 int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
 			  struct kvm_guest_debug *debug);
@@ -201,6 +203,7 @@ void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
 int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
 			  struct kvm_nested_state *state, bool ignore_error);
 #endif
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
 
 const char *exit_reason_str(unsigned int exit_reason);
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index ef2114e1b7ad..a04302666b02 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -114,6 +114,16 @@ int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
 	return r;
 }
 
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
+{
+	struct kvm_enable_cap cap = { 0 };
+
+	cap.cap = KVM_CAP_DIRTY_LOG_RING;
+	cap.args[0] = ring_size;
+	vm_enable_cap(vm, &cap);
+	vm->dirty_ring_size = ring_size;
+}
+
 static void vm_open(struct kvm_vm *vm, int perm)
 {
 	vm->kvm_fd = open(KVM_DEV_PATH, perm);
@@ -328,6 +338,11 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
 		    __func__, strerror(-ret));
 }
 
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+{
+	return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS);
+}
+
 /*
  * Userspace Memory Region Find
  *
@@ -432,10 +447,17 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
  *
  * Removes a vCPU from a VM and frees its resources.
  */
-static void vm_vcpu_rm(struct vcpu *vcpu)
+static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
 {
 	int ret;
 
+	if (vcpu->dirty_gfns) {
+		ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
+		TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, "
+			    "rc: %i errno: %i", ret, errno);
+		vcpu->dirty_gfns = NULL;
+	}
+
 	ret = munmap(vcpu->state, sizeof(*vcpu->state));
 	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
 		"errno: %i", ret, errno);
@@ -453,7 +475,7 @@ void kvm_vm_release(struct kvm_vm *vmp)
 	int ret;
 
 	list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
-		vm_vcpu_rm(vcpu);
+		vm_vcpu_rm(vmp, vcpu);
 
 	ret = close(vmp->fd);
 	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
@@ -1233,6 +1255,15 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
 	return rc;
 }
 
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	return vcpu->fd;
+}
+
 void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
 {
 	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
@@ -1561,6 +1592,42 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
 	return ret;
 }
 
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu;
+	uint32_t size = vm->dirty_ring_size;
+
+	TEST_ASSERT(size > 0, "Should enable dirty ring first");
+
+	vcpu = vcpu_find(vm, vcpuid);
+
+	TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid);
+
+	if (!vcpu->dirty_gfns) {
+		void *addr;
+
+		addr = mmap(NULL, size, PROT_READ,
+			    MAP_PRIVATE, vcpu->fd,
+			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+		TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
+
+		addr = mmap(NULL, size, PROT_READ | PROT_EXEC,
+			    MAP_PRIVATE, vcpu->fd,
+			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+		TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
+
+		addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+			    MAP_SHARED, vcpu->fd,
+			    vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+		TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+
+		vcpu->dirty_gfns = addr;
+		vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
+	}
+
+	return vcpu->dirty_gfns;
+}
+
 /*
  * VM Ioctl
  *
@@ -1680,6 +1747,7 @@ static struct exit_reason {
 	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
 	{KVM_EXIT_OSI, "OSI"},
 	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+	{KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
 	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
 #endif
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index f07d383d03a1..34465dc562d8 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -28,6 +28,9 @@ struct vcpu {
 	uint32_t id;
 	int fd;
 	struct kvm_run *state;
+	struct kvm_dirty_gfn *dirty_gfns;
+	uint32_t fetch_index;
+	uint32_t dirty_gfns_count;
 };
 
 struct kvm_vm {
@@ -52,6 +55,7 @@ struct kvm_vm {
 	vm_vaddr_t tss;
 	vm_vaddr_t idt;
 	vm_vaddr_t handlers;
+	uint32_t dirty_ring_size;
 };
 
 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
-- 
cgit v1.2.3-70-g09d2


From 019d321a68ea07efcfcbc308443251644ff3e71c Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 30 Sep 2020 21:22:39 -0400
Subject: KVM: selftests: Run dirty ring test asynchronously

Previously the dirty ring test was working in synchronous way, because
only with a vmexit (with that it was the ring full event) we'll know
the hardware dirty bits will be flushed to the dirty ring.

With this patch we first introduce a vcpu kick mechanism using SIGUSR1,
which guarantees a vmexit and also therefore the flushing of hardware
dirty bits.  Once this is in place, we can keep the vcpu dirty work
asynchronous of the whole collection procedure now.  Still, we need
to be very careful that when reaching the ring buffer soft limit
(KVM_EXIT_DIRTY_RING_FULL) we must collect the dirty bits before
continuing the vcpu.

Further increase the dirty ring size to current maximum to make sure
we torture more on the no-ring-full case, which should be the major
scenario when the hypervisors like QEMU would like to use this feature.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20201001012239.6159-1-peterx@redhat.com>
[Use KVM_SET_SIGNAL_MASK+sigwait instead of a signal handler. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c | 64 ++++++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index a7da4d9471e1..85c5d07dd243 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -62,7 +62,9 @@
 # define test_and_clear_bit_le	test_and_clear_bit
 #endif
 
-#define TEST_DIRTY_RING_COUNT		1024
+#define TEST_DIRTY_RING_COUNT		65536
+
+#define SIG_IPI SIGUSR1
 
 /*
  * Guest/Host shared variables. Ensure addr_gva2hva() and/or
@@ -138,6 +140,12 @@ static uint64_t host_track_next_count;
 /* Whether dirty ring reset is requested, or finished */
 static sem_t dirty_ring_vcpu_stop;
 static sem_t dirty_ring_vcpu_cont;
+/*
+ * This is updated by the vcpu thread to tell the host whether it's a
+ * ring-full event.  It should only be read until a sem_wait() of
+ * dirty_ring_vcpu_stop and before vcpu continues to run.
+ */
+static bool dirty_ring_vcpu_ring_full;
 /*
  * This is only used for verifying the dirty pages.  Dirty ring has a very
  * tricky case when the ring just got full, kvm will do userspace exit due to
@@ -176,6 +184,11 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
 static enum log_mode_t host_log_mode;
 static pthread_t vcpu_thread;
 
+static void vcpu_kick(void)
+{
+	pthread_kill(vcpu_thread, SIG_IPI);
+}
+
 /*
  * In our test we do signal tricks, let's use a better version of
  * sem_wait to avoid signal interrupts
@@ -286,6 +299,8 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
 
 static void dirty_ring_wait_vcpu(void)
 {
+	/* This makes sure that hardware PML cache flushed */
+	vcpu_kick();
 	sem_wait_until(&dirty_ring_vcpu_stop);
 }
 
@@ -301,9 +316,19 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
 	/* We only have one vcpu */
 	static uint32_t fetch_index = 0;
 	uint32_t count = 0, cleared;
+	bool continued_vcpu = false;
 
 	dirty_ring_wait_vcpu();
 
+	if (!dirty_ring_vcpu_ring_full) {
+		/*
+		 * This is not a ring-full event, it's safe to allow
+		 * vcpu to continue
+		 */
+		dirty_ring_continue_vcpu();
+		continued_vcpu = true;
+	}
+
 	/* Only have one vcpu */
 	count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID),
 				       slot, bitmap, num_pages, &fetch_index);
@@ -314,7 +339,11 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
 	TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
 		    "with collected (%u)", cleared, count);
 
-	dirty_ring_continue_vcpu();
+	if (!continued_vcpu) {
+		TEST_ASSERT(dirty_ring_vcpu_ring_full,
+			    "Didn't continue vcpu even without ring full");
+		dirty_ring_continue_vcpu();
+	}
 
 	pr_info("Iteration %ld collected %u pages\n", iteration, count);
 }
@@ -327,10 +356,15 @@ static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
 	if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
 		/* We should allow this to continue */
 		;
-	} else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+	} else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
+		   (ret == -1 && err == EINTR)) {
 		/* Update the flag first before pause */
+		WRITE_ONCE(dirty_ring_vcpu_ring_full,
+			   run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
 		sem_post(&dirty_ring_vcpu_stop);
-		pr_info("vcpu stops because dirty ring is full...\n");
+		pr_info("vcpu stops because %s...\n",
+			dirty_ring_vcpu_ring_full ?
+			"dirty ring is full" : "vcpu is kicked out");
 		sem_wait_until(&dirty_ring_vcpu_cont);
 		pr_info("vcpu continues now.\n");
 	} else {
@@ -458,9 +492,26 @@ static void *vcpu_worker(void *data)
 	struct kvm_vm *vm = data;
 	uint64_t *guest_array;
 	uint64_t pages_count = 0;
+	struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
+						 + sizeof(sigset_t));
+	sigset_t *sigset = (sigset_t *) &sigmask->sigset;
 
 	vcpu_fd = vcpu_get_fd(vm, VCPU_ID);
 
+	/*
+	 * SIG_IPI is unblocked atomically while in KVM_RUN.  It causes the
+	 * ioctl to return with -EINTR, but it is still pending and we need
+	 * to accept it with the sigwait.
+	 */
+	sigmask->len = 8;
+	pthread_sigmask(0, NULL, sigset);
+	vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask);
+	sigaddset(sigset, SIG_IPI);
+	pthread_sigmask(SIG_BLOCK, sigset, NULL);
+
+	sigemptyset(sigset);
+	sigaddset(sigset, SIG_IPI);
+
 	guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
 
 	while (!READ_ONCE(host_quit)) {
@@ -469,6 +520,11 @@ static void *vcpu_worker(void *data)
 		pages_count += TEST_PAGES_PER_LOOP;
 		/* Let the guest dirty the random pages */
 		ret = ioctl(vcpu_fd, KVM_RUN, NULL);
+		if (ret == -1 && errno == EINTR) {
+			int sig = -1;
+			sigwait(sigset, &sig);
+			assert(sig == SIG_IPI);
+		}
 		log_mode_after_vcpu_run(vm, ret, errno);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From edd3de6fc3d57deddb5cc7c7f1d8316ad26ac4e4 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 30 Sep 2020 21:22:41 -0400
Subject: KVM: selftests: Add "-c" parameter to dirty log test

It's only used to override the existing dirty ring size/count.  If
with a bigger ring count, we test async of dirty ring.  If with a
smaller ring count, we test ring full code path.  Async is default.

It has no use for non-dirty-ring tests.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20201001012241.6208-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 85c5d07dd243..b1f8731721b9 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -183,6 +183,7 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
 /* Logging mode for current run */
 static enum log_mode_t host_log_mode;
 static pthread_t vcpu_thread;
+static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
 
 static void vcpu_kick(void)
 {
@@ -257,7 +258,7 @@ static void dirty_ring_create_vm_done(struct kvm_vm *vm)
 	 * Switch to dirty ring mode after VM creation but before any
 	 * of the vcpu creation.
 	 */
-	vm_enable_dirty_ring(vm, TEST_DIRTY_RING_COUNT *
+	vm_enable_dirty_ring(vm, test_dirty_ring_count *
 			     sizeof(struct kvm_dirty_gfn));
 }
 
@@ -279,7 +280,7 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
 	uint32_t count = 0;
 
 	while (true) {
-		cur = &dirty_gfns[*fetch_index % TEST_DIRTY_RING_COUNT];
+		cur = &dirty_gfns[*fetch_index % test_dirty_ring_count];
 		if (!dirty_gfn_is_dirtied(cur))
 			break;
 		TEST_ASSERT(cur->slot == slot, "Slot number didn't match: "
@@ -803,6 +804,9 @@ static void help(char *name)
 	printf("usage: %s [-h] [-i iterations] [-I interval] "
 	       "[-p offset] [-m mode]\n", name);
 	puts("");
+	printf(" -c: specify dirty ring size, in number of entries\n");
+	printf("     (only useful for dirty-ring test; default: %"PRIu32")\n",
+	       TEST_DIRTY_RING_COUNT);
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
 	       TEST_HOST_LOOP_N);
 	printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
@@ -858,8 +862,11 @@ int main(int argc, char *argv[])
 	guest_mode_init(VM_MODE_P40V48_4K, true, true);
 #endif
 
-	while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
+	while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) {
 		switch (opt) {
+		case 'c':
+			test_dirty_ring_count = strtol(optarg, NULL, 10);
+			break;
 		case 'i':
 			iterations = strtol(optarg, NULL, 10);
 			break;
-- 
cgit v1.2.3-70-g09d2


From 8aa426e854c475504033c176a66d038259bf64ea Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 6 Nov 2020 07:39:26 -0500
Subject: selftests: kvm: keep .gitignore add to date

Add tsc_msrs_test, remove clear_dirty_log_test and alphabetize
everything.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 7a2c242b7152..5468db7dd674 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -14,17 +14,17 @@
 /x86_64/set_sregs_test
 /x86_64/smm_test
 /x86_64/state_test
-/x86_64/user_msr_test
-/x86_64/vmx_preemption_timer_test
 /x86_64/svm_vmcall_test
 /x86_64/sync_regs_test
+/x86_64/tsc_msrs_test
+/x86_64/user_msr_test
 /x86_64/vmx_apic_access_test
 /x86_64/vmx_close_while_nested_test
 /x86_64/vmx_dirty_log_test
+/x86_64/vmx_preemption_timer_test
 /x86_64/vmx_set_nested_state_test
 /x86_64/vmx_tsc_adjust_test
 /x86_64/xss_msr_test
-/clear_dirty_log_test
 /demand_paging_test
 /dirty_log_test
 /dirty_log_perf_test
-- 
cgit v1.2.3-70-g09d2


From f63f0b68c864edea801de678bed279a3d7674f1a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 13 Nov 2020 11:36:49 -0500
Subject: KVM: selftests: always use manual clear in dirty_log_perf_test

Nothing sets USE_CLEAR_DIRTY_LOG anymore, so anything it surrounds
is dead code.

However, it is the recommended way to use the dirty page bitmap
for new enough kernel, so use it whenever KVM has the
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 capability.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 55 +++++++++--------------
 1 file changed, 22 insertions(+), 33 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 85c9b8f73142..9c6a7be31e03 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -27,6 +27,7 @@
 #define TEST_HOST_LOOP_N		2UL
 
 /* Host variables */
+static u64 dirty_log_manual_caps;
 static bool host_quit;
 static uint64_t iteration;
 static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
@@ -88,10 +89,6 @@ static void *vcpu_worker(void *data)
 	return NULL;
 }
 
-#ifdef USE_CLEAR_DIRTY_LOG
-static u64 dirty_log_manual_caps;
-#endif
-
 static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 		     uint64_t phys_offset, int wr_fract)
 {
@@ -106,10 +103,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	struct timespec get_dirty_log_total = (struct timespec){0};
 	struct timespec vcpu_dirty_total = (struct timespec){0};
 	struct timespec avg;
-#ifdef USE_CLEAR_DIRTY_LOG
 	struct kvm_enable_cap cap = {};
 	struct timespec clear_dirty_log_total = (struct timespec){0};
-#endif
 
 	vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
 
@@ -120,11 +115,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
 	bmap = bitmap_alloc(host_num_pages);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-	cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
-	cap.args[0] = dirty_log_manual_caps;
-	vm_enable_cap(vm, &cap);
-#endif
+	if (dirty_log_manual_caps) {
+		cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+		cap.args[0] = dirty_log_manual_caps;
+		vm_enable_cap(vm, &cap);
+	}
 
 	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
 	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
@@ -190,17 +185,17 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 		pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
 			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-		clock_gettime(CLOCK_MONOTONIC, &start);
-		kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
-				       host_num_pages);
+		if (dirty_log_manual_caps) {
+			clock_gettime(CLOCK_MONOTONIC, &start);
+			kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
+					       host_num_pages);
 
-		ts_diff = timespec_diff_now(start);
-		clear_dirty_log_total = timespec_add(clear_dirty_log_total,
-						     ts_diff);
-		pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
-			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
-#endif
+			ts_diff = timespec_diff_now(start);
+			clear_dirty_log_total = timespec_add(clear_dirty_log_total,
+							     ts_diff);
+			pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+				iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+		}
 	}
 
 	/* Tell the vcpu thread to quit */
@@ -220,12 +215,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 		iterations, get_dirty_log_total.tv_sec,
 		get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
 
-#ifdef USE_CLEAR_DIRTY_LOG
-	avg = timespec_div(clear_dirty_log_total, iterations);
-	pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
-		iterations, clear_dirty_log_total.tv_sec,
-		clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
-#endif
+	if (dirty_log_manual_caps) {
+		avg = timespec_div(clear_dirty_log_total, iterations);
+		pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+			iterations, clear_dirty_log_total.tv_sec,
+			clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+	}
 
 	free(bmap);
 	free(vcpu_threads);
@@ -284,16 +279,10 @@ int main(int argc, char *argv[])
 	int opt, i;
 	int wr_fract = 1;
 
-#ifdef USE_CLEAR_DIRTY_LOG
 	dirty_log_manual_caps =
 		kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
-	if (!dirty_log_manual_caps) {
-		print_skip("KVM_CLEAR_DIRTY_LOG not available");
-		exit(KSFT_SKIP);
-	}
 	dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
 				  KVM_DIRTY_LOG_INITIALLY_SET);
-#endif
 
 #ifdef __x86_64__
 	guest_mode_init(VM_MODE_PXXV48_4K, true, true);
-- 
cgit v1.2.3-70-g09d2


From ec2f18bb4783648041498b06d4bff222821efed1 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 11 Nov 2020 13:26:29 +0100
Subject: KVM: selftests: Make vm_create_default common

The code is almost 100% the same anyway. Just move it to common
and add a few arch-specific macros.

Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201111122636.73346-5-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/kvm_util.h     | 23 +++++++++++++---
 .../testing/selftests/kvm/lib/aarch64/processor.c  | 17 ------------
 tools/testing/selftests/kvm/lib/kvm_util.c         | 26 ++++++++++++++++++
 tools/testing/selftests/kvm/lib/s390x/processor.c  | 22 ---------------
 tools/testing/selftests/kvm/lib/x86_64/processor.c | 32 ----------------------
 5 files changed, 45 insertions(+), 75 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 710009cc17fd..8154785196cb 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -45,13 +45,28 @@ enum vm_guest_mode {
 };
 
 #if defined(__aarch64__)
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+
+#define VM_MODE_DEFAULT			VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT			12U
+#define ptes_per_page(page_size)	((page_size) / 8)
+
 #elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
-#else
-#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
+
+#define VM_MODE_DEFAULT			VM_MODE_PXXV48_4K
+#define MIN_PAGE_SHIFT			12U
+#define ptes_per_page(page_size)	((page_size) / 8)
+
+#elif defined(__s390x__)
+
+#define VM_MODE_DEFAULT			VM_MODE_P52V48_4K
+#define MIN_PAGE_SHIFT			12U
+#define ptes_per_page(page_size)	((page_size) / 16)
+
 #endif
 
+#define MIN_PAGE_SIZE		(1U << MIN_PAGE_SHIFT)
+#define PTES_PER_MIN_PAGE	ptes_per_page(MIN_PAGE_SIZE)
+
 #define vm_guest_mode_string(m) vm_guest_mode_string[m]
 extern const char * const vm_guest_mode_string[];
 
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index d6c32c328e9a..cee92d477dc0 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -5,8 +5,6 @@
  * Copyright (C) 2018, Red Hat, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include <linux/compiler.h>
 
 #include "kvm_util.h"
@@ -219,21 +217,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	}
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-				 void *guest_code)
-{
-	uint64_t ptrs_per_4k_pte = 512;
-	uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
-	struct kvm_vm *vm;
-
-	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
-	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-	vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-	return vm;
-}
-
 void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
 {
 	struct kvm_vcpu_init default_init = { .target = -1, };
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index a04302666b02..70676a305523 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2018, Google LLC.
  */
 
+#define _GNU_SOURCE /* for program_invocation_name */
 #include "test_util.h"
 #include "kvm_util.h"
 #include "kvm_util_internal.h"
@@ -281,6 +282,31 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	return vm;
 }
 
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	/* The maximum page table size for a memory region will be when the
+	 * smallest pages are used. Considering each page contains x page
+	 * table descriptors, the total extra size for page tables (for extra
+	 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
+	 * than N/x*2.
+	 */
+	uint64_t extra_pg_pages = (extra_mem_pages / PTES_PER_MIN_PAGE) * 2;
+	struct kvm_vm *vm;
+
+	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+#ifdef __x86_64__
+	vm_create_irqchip(vm);
+#endif
+
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
+
 /*
  * VM Restart
  *
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 7349bb2e1a24..0152f356c099 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -5,8 +5,6 @@
  * Copyright (C) 2019, Red Hat, Inc.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include "processor.h"
 #include "kvm_util.h"
 #include "../kvm_util_internal.h"
@@ -160,26 +158,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	virt_dump_region(stream, vm, indent, vm->pgd);
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-				 void *guest_code)
-{
-	/*
-	 * The additional amount of pages required for the page tables is:
-	 * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ...
-	 * which is definitely smaller than (n / 256) * 2.
-	 */
-	uint64_t extra_pg_pages = extra_mem_pages / 256 * 2;
-	struct kvm_vm *vm;
-
-	vm = vm_create(VM_MODE_DEFAULT,
-		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
-	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-	vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-	return vm;
-}
-
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 {
 	size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index d10c5c05bdf0..95e1a757c629 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -5,8 +5,6 @@
  * Copyright (C) 2018, Google LLC.
  */
 
-#define _GNU_SOURCE /* for program_invocation_name */
-
 #include "test_util.h"
 #include "kvm_util.h"
 #include "../kvm_util_internal.h"
@@ -731,36 +729,6 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
 
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-				 void *guest_code)
-{
-	struct kvm_vm *vm;
-	/*
-	 * For x86 the maximum page table size for a memory region
-	 * will be when only 4K pages are used.  In that case the
-	 * total extra size for page tables (for extra N pages) will
-	 * be: N/512+N/512^2+N/512^3+... which is definitely smaller
-	 * than N/512*2.
-	 */
-	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
-
-	/* Create VM */
-	vm = vm_create(VM_MODE_DEFAULT,
-		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
-		       O_RDWR);
-
-	/* Setup guest code */
-	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-
-	/* Setup IRQ Chip */
-	vm_create_irqchip(vm);
-
-	/* Add the first vCPU. */
-	vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-	return vm;
-}
-
 /*
  * VCPU Get MSR
  *
-- 
cgit v1.2.3-70-g09d2


From 0aa9ec45d42779af711c7a209b5780ff7391b5bd Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 11 Nov 2020 13:26:30 +0100
Subject: KVM: selftests: Introduce vm_create_[default_]_with_vcpus

Introduce new vm_create variants that also takes a number of vcpus,
an amount of per-vcpu pages, and optionally a list of vcpuids. These
variants will create default VMs with enough additional pages to
cover the vcpu stacks, per-vcpu pages, and pagetable pages for all.
The new 'default' variant uses VM_MODE_DEFAULT, whereas the other
new variant accepts the mode as a parameter.

Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201111122636.73346-6-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/kvm_util.h | 10 ++++++++
 tools/testing/selftests/kvm/lib/kvm_util.c     | 35 ++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 8154785196cb..dfa9d369e8fc 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -266,6 +266,16 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 				 void *guest_code);
 
+/* Same as vm_create_default, but can be used for more than one vcpu */
+struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
+					    uint32_t num_percpu_pages, void *guest_code,
+					    uint32_t vcpuids[]);
+
+/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */
+struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+				    uint64_t extra_mem_pages, uint32_t num_percpu_pages,
+				    void *guest_code, uint32_t vcpuids[]);
+
 /*
  * Adds a vCPU with reasonable defaults (e.g. a stack)
  *
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 70676a305523..df2035ac54a6 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -282,8 +282,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	return vm;
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
-				 void *guest_code)
+struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+				    uint64_t extra_mem_pages, uint32_t num_percpu_pages,
+				    void *guest_code, uint32_t vcpuids[])
 {
 	/* The maximum page table size for a memory region will be when the
 	 * smallest pages are used. Considering each page contains x page
@@ -291,10 +292,18 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
 	 * than N/x*2.
 	 */
-	uint64_t extra_pg_pages = (extra_mem_pages / PTES_PER_MIN_PAGE) * 2;
+	uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
+	uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
+	uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
 	struct kvm_vm *vm;
+	int i;
+
+	TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
+		    "nr_vcpus = %d too large for host, max-vcpus = %d",
+		    nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
 
-	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+	pages = vm_adjust_num_guest_pages(mode, pages);
+	vm = vm_create(mode, pages, O_RDWR);
 
 	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
 
@@ -302,11 +311,27 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
 	vm_create_irqchip(vm);
 #endif
 
-	vm_vcpu_add_default(vm, vcpuid, guest_code);
+	for (i = 0; i < nr_vcpus; ++i)
+		vm_vcpu_add_default(vm, vcpuids ? vcpuids[i] : i, guest_code);
 
 	return vm;
 }
 
+struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
+					    uint32_t num_percpu_pages, void *guest_code,
+					    uint32_t vcpuids[])
+{
+	return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages,
+				    num_percpu_pages, guest_code, vcpuids);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code,
+					    (uint32_t []){ vcpuid });
+}
+
 /*
  * VM Restart
  *
-- 
cgit v1.2.3-70-g09d2


From 87c5f35e5c958278174979f13a9e40d3c9962c0f Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 11 Nov 2020 13:26:34 +0100
Subject: KVM: selftests: Also build dirty_log_perf_test on AArch64

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201111122636.73346-10-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 3d14ef77755e..4febf4d5ead9 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -70,6 +70,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_aarch64 += set_memory_region_test
 TEST_GEN_PROGS_aarch64 += steal_time
-- 
cgit v1.2.3-70-g09d2


From 024cd2cbd1ca2d29e6df538855d52c4e5990cab7 Mon Sep 17 00:00:00 2001
From: Santucci Pierpaolo <santucci@epigenesys.com>
Date: Mon, 16 Nov 2020 11:30:37 +0100
Subject: selftest/bpf: Fix IPV6FR handling in flow dissector

From second fragment on, IPV6FR program must stop the dissection of IPV6
fragmented packet. This is the same approach used for IPV4 fragmentation.
This fixes the flow keys calculation for the upper-layer protocols.
Note that according to RFC8200, the first fragment packet must include
the upper-layer header.

Signed-off-by: Santucci Pierpaolo <santucci@epigenesys.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/X7JUzUj34ceE2wBm@santucci.pierpaolo
---
 tools/testing/selftests/bpf/progs/bpf_flow.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 5a65f6b51377..95a5a0778ed7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -368,6 +368,8 @@ PROG(IPV6FR)(struct __sk_buff *skb)
 		 */
 		if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
 			return export_flow_keys(keys, BPF_OK);
+	} else {
+		return export_flow_keys(keys, BPF_OK);
 	}
 
 	return parse_ipv6_proto(skb, fragh->nexthdr);
-- 
cgit v1.2.3-70-g09d2


From 601366678c93618f37a685332c0ba07e5556798c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 30 Oct 2020 14:47:42 +0900
Subject: perf data: Allow to use stdio functions for pipe mode

When perf data is in a pipe, it reads each event separately using
read(2) syscall.  This is a huge performance bottleneck when
processing large data like in perf inject.  Also perf inject needs to
use write(2) syscall for the output.

So convert it to use buffer I/O functions in stdio library for pipe
data.  This makes inject-build-id bench time drops from 20ms to 8ms.

  $ perf bench internals inject-build-id
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.074 msec (+- 0.013 msec)
    Average time per event: 0.792 usec (+- 0.001 usec)
    Average memory usage: 8328 KB (+- 0 KB)
    Average build-id-all injection took: 5.490 msec (+- 0.008 msec)
    Average time per event: 0.538 usec (+- 0.001 usec)
    Average memory usage: 7563 KB (+- 0 KB)

This patch enables it just for perf inject when used with pipe (it's a
default behavior).  Maybe we could do it for perf record and/or report
later..

Committer testing:

Before:

  $ perf stat -r 5 perf bench internals inject-build-id
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.605 msec (+- 0.064 msec)
    Average time per event: 1.334 usec (+- 0.006 usec)
    Average memory usage: 12220 KB (+- 7 KB)
    Average build-id-all injection took: 11.458 msec (+- 0.058 msec)
    Average time per event: 1.123 usec (+- 0.006 usec)
    Average memory usage: 11546 KB (+- 8 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.673 msec (+- 0.057 msec)
    Average time per event: 1.341 usec (+- 0.006 usec)
    Average memory usage: 12508 KB (+- 8 KB)
    Average build-id-all injection took: 11.437 msec (+- 0.046 msec)
    Average time per event: 1.121 usec (+- 0.004 usec)
    Average memory usage: 11812 KB (+- 7 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.641 msec (+- 0.069 msec)
    Average time per event: 1.337 usec (+- 0.007 usec)
    Average memory usage: 12302 KB (+- 8 KB)
    Average build-id-all injection took: 10.820 msec (+- 0.106 msec)
    Average time per event: 1.061 usec (+- 0.010 usec)
    Average memory usage: 11616 KB (+- 7 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.379 msec (+- 0.074 msec)
    Average time per event: 1.312 usec (+- 0.007 usec)
    Average memory usage: 12334 KB (+- 8 KB)
    Average build-id-all injection took: 11.288 msec (+- 0.071 msec)
    Average time per event: 1.107 usec (+- 0.007 usec)
    Average memory usage: 11657 KB (+- 8 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 13.534 msec (+- 0.058 msec)
    Average time per event: 1.327 usec (+- 0.006 usec)
    Average memory usage: 12264 KB (+- 8 KB)
    Average build-id-all injection took: 11.557 msec (+- 0.076 msec)
    Average time per event: 1.133 usec (+- 0.007 usec)
    Average memory usage: 11593 KB (+- 8 KB)

   Performance counter stats for 'perf bench internals inject-build-id' (5 runs):

            4,060.05 msec task-clock:u              #    1.566 CPUs utilized            ( +-  0.65% )
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
             101,888      page-faults:u             #    0.025 M/sec                    ( +-  0.12% )
       3,745,833,163      cycles:u                  #    0.923 GHz                      ( +-  0.10% )  (83.22%)
         194,346,613      stalled-cycles-frontend:u #    5.19% frontend cycles idle     ( +-  0.57% )  (83.30%)
         708,495,034      stalled-cycles-backend:u  #   18.91% backend cycles idle      ( +-  0.48% )  (83.48%)
       5,629,328,628      instructions:u            #    1.50  insn per cycle
                                                    #    0.13  stalled cycles per insn  ( +-  0.21% )  (83.57%)
       1,236,697,927      branches:u                #  304.602 M/sec                    ( +-  0.16% )  (83.44%)
          17,564,877      branch-misses:u           #    1.42% of all branches          ( +-  0.23% )  (82.99%)

              2.5934 +- 0.0128 seconds time elapsed  ( +-  0.49% )

  $

After:

  $ perf stat -r 5 perf bench internals inject-build-id
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.560 msec (+- 0.125 msec)
    Average time per event: 0.839 usec (+- 0.012 usec)
    Average memory usage: 12520 KB (+- 8 KB)
    Average build-id-all injection took: 5.789 msec (+- 0.054 msec)
    Average time per event: 0.568 usec (+- 0.005 usec)
    Average memory usage: 11919 KB (+- 9 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.639 msec (+- 0.111 msec)
    Average time per event: 0.847 usec (+- 0.011 usec)
    Average memory usage: 12732 KB (+- 8 KB)
    Average build-id-all injection took: 5.647 msec (+- 0.069 msec)
    Average time per event: 0.554 usec (+- 0.007 usec)
    Average memory usage: 12093 KB (+- 7 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.551 msec (+- 0.096 msec)
    Average time per event: 0.838 usec (+- 0.009 usec)
    Average memory usage: 12739 KB (+- 8 KB)
    Average build-id-all injection took: 5.617 msec (+- 0.061 msec)
    Average time per event: 0.551 usec (+- 0.006 usec)
    Average memory usage: 12105 KB (+- 7 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.403 msec (+- 0.097 msec)
    Average time per event: 0.824 usec (+- 0.010 usec)
    Average memory usage: 12770 KB (+- 8 KB)
    Average build-id-all injection took: 5.611 msec (+- 0.085 msec)
    Average time per event: 0.550 usec (+- 0.008 usec)
    Average memory usage: 12134 KB (+- 8 KB)
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.518 msec (+- 0.102 msec)
    Average time per event: 0.835 usec (+- 0.010 usec)
    Average memory usage: 12518 KB (+- 10 KB)
    Average build-id-all injection took: 5.503 msec (+- 0.073 msec)
    Average time per event: 0.540 usec (+- 0.007 usec)
    Average memory usage: 11882 KB (+- 8 KB)

   Performance counter stats for 'perf bench internals inject-build-id' (5 runs):

            2,394.88 msec task-clock:u              #    1.577 CPUs utilized            ( +-  0.83% )
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
             103,181      page-faults:u             #    0.043 M/sec                    ( +-  0.11% )
       3,548,172,030      cycles:u                  #    1.482 GHz                      ( +-  0.30% )  (83.26%)
          81,537,700      stalled-cycles-frontend:u #    2.30% frontend cycles idle     ( +-  1.54% )  (83.24%)
         876,631,544      stalled-cycles-backend:u  #   24.71% backend cycles idle      ( +-  1.14% )  (83.45%)
       5,960,361,707      instructions:u            #    1.68  insn per cycle
                                                    #    0.15  stalled cycles per insn  ( +-  0.27% )  (83.26%)
       1,269,413,491      branches:u                #  530.054 M/sec                    ( +-  0.10% )  (83.48%)
          11,372,453      branch-misses:u           #    0.90% of all branches          ( +-  0.52% )  (83.31%)

             1.51874 +- 0.00642 seconds time elapsed  ( +-  0.42% )

  $

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201030054742.87740-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-inject.c |  2 ++
 tools/perf/util/data.c      | 41 ++++++++++++++++++++++++++++++++++++++---
 tools/perf/util/data.h      | 11 ++++++++++-
 tools/perf/util/header.c    |  8 ++++----
 tools/perf/util/session.c   |  7 ++++---
 5 files changed, 58 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 452a75fe68e5..14d6c88fed76 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -853,10 +853,12 @@ int cmd_inject(int argc, const char **argv)
 		.output = {
 			.path = "-",
 			.mode = PERF_DATA_MODE_WRITE,
+			.use_stdio = true,
 		},
 	};
 	struct perf_data data = {
 		.mode = PERF_DATA_MODE_READ,
+		.use_stdio = true,
 	};
 	int ret;
 
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index c47aa34fdc0a..05bbcb663c41 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -174,8 +174,21 @@ static bool check_pipe(struct perf_data *data)
 			is_pipe = true;
 	}
 
-	if (is_pipe)
-		data->file.fd = fd;
+	if (is_pipe) {
+		if (data->use_stdio) {
+			const char *mode;
+
+			mode = perf_data__is_read(data) ? "r" : "w";
+			data->file.fptr = fdopen(fd, mode);
+
+			if (data->file.fptr == NULL) {
+				data->file.fd = fd;
+				data->use_stdio = false;
+			}
+		} else {
+			data->file.fd = fd;
+		}
+	}
 
 	return data->is_pipe = is_pipe;
 }
@@ -334,6 +347,9 @@ int perf_data__open(struct perf_data *data)
 	if (check_pipe(data))
 		return 0;
 
+	/* currently it allows stdio for pipe only */
+	data->use_stdio = false;
+
 	if (!data->path)
 		data->path = "perf.data";
 
@@ -353,7 +369,21 @@ void perf_data__close(struct perf_data *data)
 		perf_data__close_dir(data);
 
 	zfree(&data->file.path);
-	close(data->file.fd);
+
+	if (data->use_stdio)
+		fclose(data->file.fptr);
+	else
+		close(data->file.fd);
+}
+
+ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size)
+{
+	if (data->use_stdio) {
+		if (fread(buf, size, 1, data->file.fptr) == 1)
+			return size;
+		return feof(data->file.fptr) ? 0 : -1;
+	}
+	return readn(data->file.fd, buf, size);
 }
 
 ssize_t perf_data_file__write(struct perf_data_file *file,
@@ -365,6 +395,11 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
 ssize_t perf_data__write(struct perf_data *data,
 			      void *buf, size_t size)
 {
+	if (data->use_stdio) {
+		if (fwrite(buf, size, 1, data->file.fptr) == 1)
+			return size;
+		return -1;
+	}
 	return perf_data_file__write(&data->file, buf, size);
 }
 
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 75947ef6bc17..c563fcbb0288 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -2,6 +2,7 @@
 #ifndef __PERF_DATA_H
 #define __PERF_DATA_H
 
+#include <stdio.h>
 #include <stdbool.h>
 
 enum perf_data_mode {
@@ -16,7 +17,10 @@ enum perf_dir_version {
 
 struct perf_data_file {
 	char		*path;
-	int		 fd;
+	union {
+		int	 fd;
+		FILE	*fptr;
+	};
 	unsigned long	 size;
 };
 
@@ -26,6 +30,7 @@ struct perf_data {
 	bool			 is_pipe;
 	bool			 is_dir;
 	bool			 force;
+	bool			 use_stdio;
 	enum perf_data_mode	 mode;
 
 	struct {
@@ -62,11 +67,15 @@ static inline bool perf_data__is_single_file(struct perf_data *data)
 
 static inline int perf_data__fd(struct perf_data *data)
 {
+	if (data->use_stdio)
+		return fileno(data->file.fptr);
+
 	return data->file.fd;
 }
 
 int perf_data__open(struct perf_data *data);
 void perf_data__close(struct perf_data *data);
+ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size);
 ssize_t perf_data__write(struct perf_data *data,
 			      void *buf, size_t size);
 ssize_t perf_data_file__write(struct perf_data_file *file,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 598285a21dad..b9171bd11fe6 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3647,7 +3647,8 @@ static int perf_file_section__process(struct perf_file_section *section,
 }
 
 static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
-				       struct perf_header *ph, int fd,
+				       struct perf_header *ph,
+				       struct perf_data* data,
 				       bool repipe)
 {
 	struct feat_fd ff = {
@@ -3656,7 +3657,7 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
 	};
 	ssize_t ret;
 
-	ret = readn(fd, header, sizeof(*header));
+	ret = perf_data__read(data, header, sizeof(*header));
 	if (ret <= 0)
 		return -1;
 
@@ -3679,8 +3680,7 @@ static int perf_header__read_pipe(struct perf_session *session)
 	struct perf_header *header = &session->header;
 	struct perf_pipe_file_header f_header;
 
-	if (perf_file_header__read_pipe(&f_header, header,
-					perf_data__fd(session->data),
+	if (perf_file_header__read_pipe(&f_header, header, session->data,
 					session->repipe) < 0) {
 		pr_debug("incompatible file format\n");
 		return -EINVAL;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 098080287c68..5cc722b6fe7c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1937,7 +1937,6 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
 {
 	struct ordered_events *oe = &session->ordered_events;
 	struct perf_tool *tool = session->tool;
-	int fd = perf_data__fd(session->data);
 	union perf_event *event;
 	uint32_t size, cur_size = 0;
 	void *buf = NULL;
@@ -1957,7 +1956,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
 	ordered_events__set_copy_on_queue(oe, true);
 more:
 	event = buf;
-	err = readn(fd, event, sizeof(struct perf_event_header));
+	err = perf_data__read(session->data, event,
+			      sizeof(struct perf_event_header));
 	if (err <= 0) {
 		if (err == 0)
 			goto done;
@@ -1989,7 +1989,8 @@ more:
 	p += sizeof(struct perf_event_header);
 
 	if (size - sizeof(struct perf_event_header)) {
-		err = readn(fd, p, size - sizeof(struct perf_event_header));
+		err = perf_data__read(session->data, p,
+				      size - sizeof(struct perf_event_header));
 		if (err <= 0) {
 			if (err == 0) {
 				pr_err("unexpected end of event stream\n");
-- 
cgit v1.2.3-70-g09d2


From 8113ab20e850491b4144a1a64246f07a2d737a49 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 15 Oct 2020 12:28:58 +0200
Subject: tools/power/cpupower: Read energy_perf_bias from sysfs

... instead of poking at the MSR. For that, move the accessor functions
to misc.c and add a sysfs-writing function too.

There should be no functional changes resulting from this.

Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Cc: Thomas Renninger <trenn@suse.com>
Link: https://lkml.kernel.org/r/20201029190259.3476-2-bp@alien8.de
---
 tools/power/cpupower/lib/cpupower.c          | 23 ++++++++++++-
 tools/power/cpupower/lib/cpupower_intern.h   |  5 +++
 tools/power/cpupower/utils/cpupower-info.c   |  2 +-
 tools/power/cpupower/utils/cpupower-set.c    |  2 +-
 tools/power/cpupower/utils/helpers/helpers.h |  8 ++---
 tools/power/cpupower/utils/helpers/misc.c    | 48 ++++++++++++++++++++++++++++
 tools/power/cpupower/utils/helpers/msr.c     | 28 ----------------
 7 files changed, 81 insertions(+), 35 deletions(-)

(limited to 'tools')

diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
index 3656e697537e..3f7d0c0c5067 100644
--- a/tools/power/cpupower/lib/cpupower.c
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -16,8 +16,8 @@
 
 unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
 {
-	int fd;
 	ssize_t numread;
+	int fd;
 
 	fd = open(path, O_RDONLY);
 	if (fd == -1)
@@ -35,6 +35,27 @@ unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
 	return (unsigned int) numread;
 }
 
+unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen)
+{
+	ssize_t numwritten;
+	int fd;
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1)
+		return 0;
+
+	numwritten = write(fd, buf, buflen - 1);
+	if (numwritten < 1) {
+		perror(path);
+		close(fd);
+		return -1;
+	}
+
+	close(fd);
+
+	return (unsigned int) numwritten;
+}
+
 /*
  * Detect whether a CPU is online
  *
diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h
index 4887c76d23f8..ac1112b956ec 100644
--- a/tools/power/cpupower/lib/cpupower_intern.h
+++ b/tools/power/cpupower/lib/cpupower_intern.h
@@ -1,6 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #define PATH_TO_CPU "/sys/devices/system/cpu/"
+
+#ifndef MAX_LINE_LEN
 #define MAX_LINE_LEN 4096
+#endif
+
 #define SYSFS_PATH_MAX 255
 
 unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
+unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen);
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c
index 0ba61a2c4d81..06345b543786 100644
--- a/tools/power/cpupower/utils/cpupower-info.c
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -101,7 +101,7 @@ int cmd_info(int argc, char **argv)
 		}
 
 		if (params.perf_bias) {
-			ret = msr_intel_get_perf_bias(cpu);
+			ret = cpupower_intel_get_perf_bias(cpu);
 			if (ret < 0) {
 				fprintf(stderr,
 			_("Could not read perf-bias value[%d]\n"), ret);
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 052044d7e012..180d5ba877e6 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -95,7 +95,7 @@ int cmd_set(int argc, char **argv)
 		}
 
 		if (params.perf_bias) {
-			ret = msr_intel_set_perf_bias(cpu, perf_bias);
+			ret = cpupower_intel_set_perf_bias(cpu, perf_bias);
 			if (ret) {
 				fprintf(stderr, _("Error setting perf-bias "
 						  "value on CPU %d\n"), cpu);
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index c258eeccd05f..37dac161f3fe 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -105,8 +105,8 @@ extern struct cpupower_cpu_info cpupower_cpu_info;
 extern int read_msr(int cpu, unsigned int idx, unsigned long long *val);
 extern int write_msr(int cpu, unsigned int idx, unsigned long long val);
 
-extern int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val);
-extern int msr_intel_get_perf_bias(unsigned int cpu);
+extern int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val);
+extern int cpupower_intel_get_perf_bias(unsigned int cpu);
 extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu);
 
 /* Read/Write msr ****************************/
@@ -150,9 +150,9 @@ static inline int read_msr(int cpu, unsigned int idx, unsigned long long *val)
 { return -1; };
 static inline int write_msr(int cpu, unsigned int idx, unsigned long long val)
 { return -1; };
-static inline int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val)
+static inline int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
 { return -1; };
-static inline int msr_intel_get_perf_bias(unsigned int cpu)
+static inline int cpupower_intel_get_perf_bias(unsigned int cpu)
 { return -1; };
 static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
 { return 0; };
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index f406adc40bad..e8f8f643a627 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -1,7 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
 #if defined(__i386__) || defined(__x86_64__)
 
 #include "helpers/helpers.h"
+#include "helpers/sysfs.h"
+
+#include "cpupower_intern.h"
 
 #define MSR_AMD_HWCR	0xc0010015
 
@@ -40,4 +48,44 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 		*support = *active = 1;
 	return 0;
 }
+
+int cpupower_intel_get_perf_bias(unsigned int cpu)
+{
+	char linebuf[MAX_LINE_LEN];
+	char path[SYSFS_PATH_MAX];
+	unsigned long val;
+	char *endp;
+
+	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
+		return -1;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
+
+	if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0)
+		return -1;
+
+	val = strtol(linebuf, &endp, 0);
+	if (endp == linebuf || errno == ERANGE)
+		return -1;
+
+	return val;
+}
+
+int cpupower_intel_set_perf_bias(unsigned int cpu, unsigned int val)
+{
+	char path[SYSFS_PATH_MAX];
+	char linebuf[3] = {};
+
+	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
+		return -1;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
+	snprintf(linebuf, sizeof(linebuf), "%d", val);
+
+	if (cpupower_write_sysfs(path, linebuf, 3) <= 0)
+		return -1;
+
+	return 0;
+}
+
 #endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/helpers/msr.c b/tools/power/cpupower/utils/helpers/msr.c
index ab9950748838..8b0b6be74bb8 100644
--- a/tools/power/cpupower/utils/helpers/msr.c
+++ b/tools/power/cpupower/utils/helpers/msr.c
@@ -11,7 +11,6 @@
 /* Intel specific MSRs */
 #define MSR_IA32_PERF_STATUS		0x198
 #define MSR_IA32_MISC_ENABLES		0x1a0
-#define MSR_IA32_ENERGY_PERF_BIAS	0x1b0
 #define MSR_NEHALEM_TURBO_RATIO_LIMIT	0x1ad
 
 /*
@@ -73,33 +72,6 @@ int write_msr(int cpu, unsigned int idx, unsigned long long val)
 	return -1;
 }
 
-int msr_intel_get_perf_bias(unsigned int cpu)
-{
-	unsigned long long val;
-	int ret;
-
-	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
-		return -1;
-
-	ret = read_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &val);
-	if (ret)
-		return ret;
-	return val;
-}
-
-int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val)
-{
-	int ret;
-
-	if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
-		return -1;
-
-	ret = write_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, val);
-	if (ret)
-		return ret;
-	return 0;
-}
-
 unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
 {
 	unsigned long long val;
-- 
cgit v1.2.3-70-g09d2


From 6d6501d912a9a5e1b73d7fbf419b90a8ec11ed7a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 15 Oct 2020 14:50:16 +0200
Subject: tools/power/turbostat: Read energy_perf_bias from sysfs

... instead of poking at the MSR directly.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Len Brown <lenb@kernel.org>
Cc: linux-pm@vger.kernel.org
Link: https://lkml.kernel.org/r/20201029190259.3476-3-bp@alien8.de
---
 tools/power/x86/turbostat/turbostat.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 33b370865d16..0baec7ea1bbd 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1721,6 +1721,25 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 	return 0;
 }
 
+int get_epb(int cpu)
+{
+	char path[128 + PATH_BYTES];
+	int ret, epb = -1;
+	FILE *fp;
+
+	sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
+
+	fp = fopen_or_die(path, "r");
+
+	ret = fscanf(fp, "%d", &epb);
+	if (ret != 1)
+		err(1, "%s(%s)", __func__, path);
+
+	fclose(fp);
+
+	return epb;
+}
+
 void get_apic_id(struct thread_data *t)
 {
 	unsigned int eax, ebx, ecx, edx;
@@ -3631,9 +3650,8 @@ dump_sysfs_pstate_config(void)
  */
 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
-	unsigned long long msr;
 	char *epb_string;
-	int cpu;
+	int cpu, epb;
 
 	if (!has_epb)
 		return 0;
@@ -3649,10 +3667,11 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return -1;
 	}
 
-	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
+	epb = get_epb(cpu);
+	if (epb < 0)
 		return 0;
 
-	switch (msr & 0xF) {
+	switch (epb) {
 	case ENERGY_PERF_BIAS_PERFORMANCE:
 		epb_string = "performance";
 		break;
@@ -3666,7 +3685,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		epb_string = "custom";
 		break;
 	}
-	fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
+	fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From fe0a5788624c8b8f113a35bbe4636e37f9321241 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 15 Oct 2020 14:58:48 +0200
Subject: tools/power/x86_energy_perf_policy: Read energy_perf_bias from sysfs

... and stop poking at the MSR directly.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201029190259.3476-4-bp@alien8.de
---
 .../x86_energy_perf_policy.c                       | 109 +++++++++++++++++++--
 1 file changed, 99 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 3fe1eed900d4..ad6aed1fabf8 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -91,6 +91,9 @@ unsigned int has_hwp_request_pkg;	/* IA32_HWP_REQUEST_PKG */
 
 unsigned int bdx_highest_ratio;
 
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define SYSFS_PATH_MAX 255
+
 /*
  * maintain compatibility with original implementation, but don't document it:
  */
@@ -668,6 +671,48 @@ int put_msr(int cpu, int offset, unsigned long long new_msr)
 	return 0;
 }
 
+static unsigned int read_sysfs(const char *path, char *buf, size_t buflen)
+{
+	ssize_t numread;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return 0;
+
+	numread = read(fd, buf, buflen - 1);
+	if (numread < 1) {
+		close(fd);
+		return 0;
+	}
+
+	buf[numread] = '\0';
+	close(fd);
+
+	return (unsigned int) numread;
+}
+
+static unsigned int write_sysfs(const char *path, char *buf, size_t buflen)
+{
+	ssize_t numwritten;
+	int fd;
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1)
+		return 0;
+
+	numwritten = write(fd, buf, buflen - 1);
+	if (numwritten < 1) {
+		perror("write failed\n");
+		close(fd);
+		return -1;
+	}
+
+	close(fd);
+
+	return (unsigned int) numwritten;
+}
+
 void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str)
 {
 	if (cpu != -1)
@@ -745,17 +790,61 @@ void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int ms
 	put_msr(cpu, msr_offset, msr);
 }
 
+static int get_epb(int cpu)
+{
+	char path[SYSFS_PATH_MAX];
+	char linebuf[3];
+	char *endp;
+	long val;
+
+	if (!has_epb)
+		return -1;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
+
+	if (!read_sysfs(path, linebuf, 3))
+		return -1;
+
+	val = strtol(linebuf, &endp, 0);
+	if (endp == linebuf || errno == ERANGE)
+		return -1;
+
+	return (int)val;
+}
+
+static int set_epb(int cpu, int val)
+{
+	char path[SYSFS_PATH_MAX];
+	char linebuf[3];
+	char *endp;
+	int ret;
+
+	if (!has_epb)
+		return -1;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu);
+	snprintf(linebuf, sizeof(linebuf), "%d", val);
+
+	ret = write_sysfs(path, linebuf, 3);
+	if (ret <= 0)
+		return -1;
+
+	val = strtol(linebuf, &endp, 0);
+	if (endp == linebuf || errno == ERANGE)
+		return -1;
+
+	return (int)val;
+}
+
 int print_cpu_msrs(int cpu)
 {
-	unsigned long long msr;
 	struct msr_hwp_request req;
 	struct msr_hwp_cap cap;
+	int epb;
 
-	if (has_epb) {
-		get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
-
-		printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr);
-	}
+	epb = get_epb(cpu);
+	if (epb >= 0)
+		printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb);
 
 	if (!has_hwp)
 		return 0;
@@ -1038,15 +1127,15 @@ int enable_hwp_on_cpu(int cpu)
 int update_cpu_msrs(int cpu)
 {
 	unsigned long long msr;
-
+	int epb;
 
 	if (update_epb) {
-		get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
-		put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb);
+		epb = get_epb(cpu);
+		set_epb(cpu, new_epb);
 
 		if (verbose)
 			printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
-				cpu, (unsigned int) msr, (unsigned int) new_epb);
+				cpu, epb, (unsigned int) new_epb);
 	}
 
 	if (update_turbo) {
-- 
cgit v1.2.3-70-g09d2


From 1c756cd429d8f3da33d31f2a970284b9d5260534 Mon Sep 17 00:00:00 2001
From: Al Grant <al.grant@foss.arm.com>
Date: Fri, 13 Nov 2020 20:38:26 +0000
Subject: perf inject: Fix file corruption due to event deletion

"perf inject" can create corrupt files when synthesizing sample events from AUX
data. This happens when in the input file, the first event (for the AUX data)
has a different sample_type from the second event (generally dummy).

Specifically, they differ in the bits that indicate the standard fields
appended to perf records in the mmap buffer. "perf inject" deletes the first
event and moves up the second event to first position.

The problem is with the synthetic PERF_RECORD_MMAP (etc.) events created
by "perf record".

Since these are synthetic versions of events which are normally produced
by the kernel, they have to have the standard fields appended as
described by sample_type.

"perf record" fills these in with zeroes, including the IDENTIFIER
field; perf readers interpret records with zero IDENTIFIER using the
descriptor for the first event in the file.

Since "perf inject" changes the first event, these synthetic records are
then processed with the wrong value of sample_type, and the perf reader
reads bad data, reports on incorrect length records etc.

Mismatching sample_types are seen with "perf record -e cs_etm//", where the AUX
event has TID|TIME|CPU|IDENTIFIER and the dummy event has TID|TIME|IDENTIFIER.

Perhaps they could be the same, but it isn't normally a problem if they aren't
- perf has no problems reading the file.

The sample_types have to agree on the position of IDENTIFIER, because
that's how perf finds the right event descriptor in the first place, but
they don't normally have to agree on other fields, and perf doesn't
check that they do.

The problem is specific to the way "perf inject" reorganizes the events
and the way synthetic MMAP events are recorded with a zero identifier. A
simple solution is to stop "perf inject" deleting the tracing event.

Committer testing

Removed the now unused 'evsel' variable, update the comment about the
evsel removal not being performed anymore, and apply the patch manually
as it failed with this warning:

  warning: Patch sent with format=flowed; space at the end of lines might be lost.

Testing it with:

  $ perf bench internals inject-build-id
  # Running 'internals/inject-build-id' benchmark:
    Average build-id injection took: 8.543 msec (+- 0.130 msec)
    Average time per event: 0.838 usec (+- 0.013 usec)
    Average memory usage: 12717 KB (+- 9 KB)
    Average build-id-all injection took: 5.710 msec (+- 0.058 msec)
    Average time per event: 0.560 usec (+- 0.006 usec)
    Average memory usage: 12079 KB (+- 7 KB)
  $

Signed-off-by: Al Grant <al.grant@arm.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LPU-Reference: b9cf5611-daae-2390-3439-6617f8f0a34b@foss.arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-inject.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 452a75fe68e5..0462dc8db2e3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -779,25 +779,15 @@ static int __cmd_inject(struct perf_inject *inject)
 			dsos__hit_all(session);
 		/*
 		 * The AUX areas have been removed and replaced with
-		 * synthesized hardware events, so clear the feature flag and
-		 * remove the evsel.
+		 * synthesized hardware events, so clear the feature flag.
 		 */
 		if (inject->itrace_synth_opts.set) {
-			struct evsel *evsel;
-
 			perf_header__clear_feat(&session->header,
 						HEADER_AUXTRACE);
 			if (inject->itrace_synth_opts.last_branch ||
 			    inject->itrace_synth_opts.add_last_branch)
 				perf_header__set_feat(&session->header,
 						      HEADER_BRANCH_STACK);
-			evsel = perf_evlist__id2evsel_strict(session->evlist,
-							     inject->aux_id);
-			if (evsel) {
-				pr_debug("Deleting %s\n", evsel__name(evsel));
-				evlist__remove(session->evlist, evsel);
-				evsel__delete(evsel);
-			}
 		}
 		session->header.data_offset = output_data_offset;
 		session->header.data_size = inject->bytes_written;
-- 
cgit v1.2.3-70-g09d2


From 3d05181a085c7a070746c838ea25aebf25f17d52 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Mon, 2 Nov 2020 16:00:25 +0800
Subject: perf vendor events: Update Skylake client events to v50

- Update Skylake events to v50.
- Update Skylake JSON metrics from TMAM 4.0.
- Fix the issue in DRAM_Parallel_Reads
- Fix the perf test warning

Before:

  root@kbl-ppc:~# perf stat -M DRAM_Parallel_Reads -- sleep 1
  event syntax error: '{arb/event=0x80,umask=0x2/,arb/event=0x80,umask=0x2,thresh=1/}:W'
                       \___ unknown term 'thresh' for pmu 'uncore_arb'

  valid terms: event,edge,inv,umask,cmask,config,config1,config2,name,period,percore

  Initial error:
  event syntax error: '..umask=0x2/,arb/event=0x80,umask=0x2,thresh=1/}:W'
                                    \___ Cannot find PMU `arb'. Missing kernel support?

  root@kbl-ppc:~# perf test metrics
  10: PMU events                                 :
  10.3: Parsing of PMU event table metrics               : Skip (some metrics failed)
  10.4: Parsing of PMU event table metrics with fake PMUs: Ok
  67: Parse and process metrics                  : Ok

After:

  root@kbl-ppc:~# perf stat -M MEM_Parallel_Reads -- sleep 1

   Performance counter stats for 'system wide':

           4,951,646      arb/event=0x80,umask=0x2/ #    26.30 MEM_Parallel_Reads       (50.04%)
             188,251      arb/event=0x80,umask=0x2,cmask=1/                                     (49.96%)

         1.000867010 seconds time elapsed

  root@kbl-ppc:~# perf test metrics
  10: PMU events                                 :
  10.3: Parsing of PMU event table metrics               : Ok
  10.4: Parsing of PMU event table metrics with fake PMUs: Ok
  67: Parse and process metrics                  : Ok

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Tested-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/lkml/93fae76f-ce2b-ab0b-3ae9-cc9a2b4cbaec@linux.intel.com/
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/x86/skylake/cache.json  | 4100 ++++++++++----------
 .../arch/x86/skylake/floating-point.json           |   76 +-
 .../perf/pmu-events/arch/x86/skylake/frontend.json |  644 +--
 tools/perf/pmu-events/arch/x86/skylake/memory.json | 2279 +++++------
 tools/perf/pmu-events/arch/x86/skylake/other.json  |   60 +-
 .../perf/pmu-events/arch/x86/skylake/pipeline.json | 1266 +++---
 .../pmu-events/arch/x86/skylake/skl-metrics.json   |  271 +-
 .../arch/x86/skylake/virtual-memory.json           |  374 +-
 8 files changed, 4560 insertions(+), 4510 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/x86/skylake/cache.json b/tools/perf/pmu-events/arch/x86/skylake/cache.json
index 720458139049..27ea2b00ad00 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/cache.json
@@ -1,2928 +1,2926 @@
 [
     {
-        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
-        "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0x21",
-        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand Data Read miss L2, no rejects",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x22",
-        "EventName": "L2_RQSTS.RFO_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFO requests that miss L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x24",
-        "EventName": "L2_RQSTS.CODE_RD_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 cache misses when fetching instructions",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400108000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Demand requests that miss L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x27",
-        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand requests that miss L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100040002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x38",
-        "EventName": "L2_RQSTS.PF_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0080001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "All requests that miss L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
         "Counter": "0,1,2,3",
-        "UMask": "0x3f",
-        "EventName": "L2_RQSTS.MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "All requests that miss L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand code readshave any response type.",
         "Counter": "0,1,2,3",
-        "UMask": "0xc1",
-        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand Data Read requests that hit L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code readshave any response type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0xc2",
-        "EventName": "L2_RQSTS.RFO_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFO requests that hit L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100020002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
-        "EventCode": "0x24",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0xc4",
-        "EventName": "L2_RQSTS.CODE_RD_HIT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0xd8",
-        "EventName": "L2_RQSTS.PF_HIT",
+        "EventName": "L2_RQSTS.PF_MISS",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x38"
     },
     {
-        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0xe1",
-        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand Data Read requests",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC01C0004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0xe2",
-        "EventName": "L2_RQSTS.ALL_RFO",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "RFO requests to L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100040001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the total number of L2 code requests.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0xe4",
-        "EventName": "L2_RQSTS.ALL_CODE_RD",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 code requests",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040100001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Demand requests to L2 cache.",
-        "EventCode": "0x24",
+        "BriefDescription": "Demand requests that miss L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0xe7",
-        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Demand requests to L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x24",
-        "Counter": "0,1,2,3",
-        "UMask": "0xf8",
-        "EventName": "L2_RQSTS.ALL_PF",
+        "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+        "PublicDescription": "Demand requests that miss L2 cache.",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x27"
     },
     {
-        "PublicDescription": "All L2 requests.",
-        "EventCode": "0x24",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0xff",
-        "EventName": "L2_RQSTS.REFERENCES",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "All L2 requests",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400100002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
-        "EventCode": "0x2E",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
-        "Errata": "SKL057",
-        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Core-originated cacheable demand requests missed L3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
-        "EventCode": "0x2E",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4f",
-        "Errata": "SKL057",
-        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040080004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
-        "EventCode": "0x48",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "L1D_PEND_MISS.PENDING",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "L1D miss outstandings duration in cycles",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
-        "EventCode": "0x48",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with L1D load Misses outstanding.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x48",
+        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
-        "EventCode": "0x48",
+        "BriefDescription": "L2 writebacks that access L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "L1D_PEND_MISS.FB_FULL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF0",
+        "EventName": "L2_TRANS.L2_WB",
+        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
-        "EventCode": "0x51",
+        "BriefDescription": "L2 cache lines filling L2",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "L1D.REPLACEMENT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "L1D data line replacements",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF1",
+        "EventName": "L2_LINES_IN.ALL",
+        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1f"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00401C0002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
-        "CounterMask": "6",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400040004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Demand Data Read requests sent to uncore",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000400001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080080002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080100001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001C0004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Demand Data Read requests sent to uncore",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00801C0002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Cacheable and noncachaeble code read requests",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080100004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Demand and prefetch data reads",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040020001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Any memory transaction that reached the SQ.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
-        "EventCode": "0xB2",
+        "BriefDescription": "All retired store instructions.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.ALL_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x82"
     },
     {
-        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
-        "EventCode": "0xB7, 0xBB",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080020004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions that miss the STLB.",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
         "Counter": "0,1,2,3",
-        "UMask": "0x11",
-        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.SILENT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired store instructions that miss the STLB.",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x12",
-        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "L1_Hit_Indication": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040048000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x21",
-        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired load instructions with locked access. (Precise Event)",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000088000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x41",
-        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x42",
-        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x02001C0002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "L1_Hit_Indication": "1"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD0",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x81",
-        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "All retired load instructions. (Precise Event)",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080020001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "All retired store instructions.",
-        "EventCode": "0xD0",
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
         "Counter": "0,1,2,3",
-        "UMask": "0x82",
-        "EventName": "MEM_INST_RETIRED.ALL_STORES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "All retired store instructions. (Precise Event)",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1",
-        "L1_Hit_Indication": "1"
+        "UMask": "0x8"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x02001C0004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400040002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions with L3 cache hits as data sources.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
-        "SampleAfterValue": "50021",
-        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
-        "SampleAfterValue": "50021",
-        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04001C0001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions missed L3 cache as data sources.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100400001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
-        "EventCode": "0xD1",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04001C8000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD2",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000080001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
-        "EventCode": "0xD2",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200020001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
-        "EventCode": "0xD2",
+        "BriefDescription": "Core-originated cacheable demand requests missed L3",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL057",
+        "EventCode": "0x2E",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x41"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
-        "EventCode": "0xD2",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
-        "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_PF",
+        "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf8"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xD4",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
         "CounterHTOff": "0,1,2,3",
-        "Data_LA": "1"
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0080004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts L2 writebacks that access L2 cache.",
-        "EventCode": "0xF0",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "L2_TRANS.L2_WB",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "L2 writebacks that access L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
-        "EventCode": "0xF1",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1f",
-        "EventName": "L2_LINES_IN.ALL",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040040002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
-        "BriefDescription": "L2 cache lines filling L2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xF2",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "L2_LINES_OUT.SILENT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xF2",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "L2_LINES_OUT.NON_SILENT",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
-        "EventCode": "0xF2",
+        "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "L2_LINES_OUT.USELESS_PREF",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "EventCode": "0xF2",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "L2_LINES_OUT.USELESS_HWPF",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200080004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
-        "EventCode": "0xF4",
+        "BriefDescription": "RFO requests that miss L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "SQ_MISC.SPLIT_LOCK",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of cache line split locks sent to uncore.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x22"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0408000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000408000",
+        "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.NON_SILENT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040028000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400408000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200040002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200408000",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100408000",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080408000",
+        "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.USELESS_HWPF",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040408000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC01C8000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10001C8000",
+        "BriefDescription": "Counts all demand data writes (RFOs)have any response type.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)have any response type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04001C8000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04001C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02001C8000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00401C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01001C8000",
+        "BriefDescription": "All requests that miss L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.MISS",
+        "PublicDescription": "All requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x3f"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00801C8000",
+        "BriefDescription": "L2 code requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_CODE_RD",
+        "PublicDescription": "Counts the total number of L2 code requests.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe4"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00401C8000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0108000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000108000",
+        "BriefDescription": "RFO requests that hit L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc2"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400108000",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200108000",
+        "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100108000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080108000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040108000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000040002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0028000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0088000",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000088000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400088000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000080004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200088000",
+        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100088000",
+        "BriefDescription": "L2 cache misses when fetching instructions",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_MISS",
+        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x24"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080088000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01001C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040088000",
+        "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0048000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000048000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC01C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400048000",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200048000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100048000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x02001C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080048000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040048000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0028000",
+        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000028000",
+        "BriefDescription": "Demand requests to L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+        "PublicDescription": "Demand requests to L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe7"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400028000",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100048000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200028000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100028000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040020002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080028000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040028000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0040002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests have any response type.",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000018000",
+        "AnyThread": "1",
+        "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests have any response type.",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC01C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x02001C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100400004",
+        "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL057",
+        "EventCode": "0x2E",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "Counts core-originated cacheable requests to the  L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2.  It does not include all accesses to the L3.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x4f"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC01C0004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10001C0004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04001C0004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02001C0004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01001C0004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00801C0004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00401C0004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0100004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000100004",
+        "BriefDescription": "Retired load instructions that miss the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+        "PEBS": "1",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x11"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400100004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200100004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100100004",
+        "BriefDescription": "Counts demand data readshave any response type.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000010001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data readshave any response type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080100004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040100004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080080004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0080004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000080004",
+        "BriefDescription": "L1D data line replacements",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x51",
+        "EventName": "L1D.REPLACEMENT",
+        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400080004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200080004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100080004",
+        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD4",
+        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080080004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040080004",
+        "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0040004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000040004",
+        "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF2",
+        "EventName": "L2_LINES_OUT.USELESS_PREF",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400040004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000020002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200040004",
+        "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.PF_HIT",
+        "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xd8"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100040004",
+        "BriefDescription": "Demand Data Read miss L2, no rejects",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+        "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x21"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080040004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040040004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0020004",
+        "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020004",
+        "BriefDescription": "All retired load instructions.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x81"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020004",
+        "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "20011",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020004",
+        "BriefDescription": "Demand Data Read requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+        "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080020002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020004",
+        "BriefDescription": "All L2 requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "PublicDescription": "All L2 requests.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xff"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040020004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010004",
+        "BriefDescription": "Cycles with L1D load Misses outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000400002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400400002",
+        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00401C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080400002",
+        "BriefDescription": "Number of cache line split locks sent to uncore.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xF4",
+        "EventName": "SQ_MISC.SPLIT_LOCK",
+        "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040400002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC01C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC01C0002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10001C0002",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04001C0002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02001C0002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01001C0002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00801C0002",
+        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00401C0002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0100002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000100002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400100002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200100002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100100002",
+        "BriefDescription": "L1D miss outstandings duration in cycles",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.PENDING",
+        "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080100002",
+        "BriefDescription": "Demand Data Read requests that hit L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+        "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc1"
+    },
+    {
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040100002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x04001C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0080002",
+        "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000080002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400080002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01001C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200080002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100080002",
+        "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x42"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080080002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040080002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0040002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00801C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000040002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0020002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400040002",
+        "BriefDescription": "Any memory transaction that reached the SQ.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+        "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x80"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200040002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100040002",
+        "BriefDescription": "Counts any other requestshave any response type.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0000018000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requestshave any response type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080040002",
+        "BriefDescription": "Cacheable and noncachaeble code read requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040040002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0020002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020002",
+        "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020002",
+        "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.CODE_RD_HIT",
+        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040020002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs) have any response type.",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs) have any response type.",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0400001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040088000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000400001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200020002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400400001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200400001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100400001",
+        "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.FB_FULL",
+        "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080400001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040400001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0040108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC01C0001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x10001C0001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x04001C0001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400080004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x02001C0001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100080004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x01001C0001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00801C0001",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00801C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00401C0001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0100001",
+        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000100001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1000408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400100001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400020004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200100001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100100001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080100001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040100001",
+        "BriefDescription": "Retired load instructions with locked access.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "UMask": "0x21"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0080001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00801C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000080001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080040002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400080001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01001C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200080001",
+        "BriefDescription": "Demand and prefetch data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100080001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080080001",
+        "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE",
+        "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040080001",
+        "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x41"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0040001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0100080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000040001",
+        "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB2",
+        "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400040001",
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200040001",
+        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100040001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200080002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080040001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080100002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040040001",
+        "BriefDescription": "Retired store instructions that miss the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD0",
+        "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+        "L1_Hit_Indication": "1",
+        "PEBS": "1",
         "SampleAfterValue": "100003",
+        "UMask": "0x12"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC0020001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x01001C8000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1000020001",
+        "BriefDescription": "RFO requests to L2 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.ALL_RFO",
+        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe2"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0400020001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0200400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0200020001",
+        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xD1",
+        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0100020001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0400020002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0080080001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0080020001",
+        "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+        "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0040020001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC0080002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads have any response type.",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0000010001",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads have any response type.",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00401C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json
index 213dd6230cf2..834e1cd841fc 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json
@@ -1,67 +1,67 @@
 [
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
         "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired.  Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired.  Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Cycles with any input/output SSE or FP assist",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired.  Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xCA",
+        "EventName": "FP_ASSIST.ANY",
+        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1e"
     },
     {
-        "EventCode": "0xC7",
+        "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired.  Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
-        "EventCode": "0xCA",
+        "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1e",
-        "EventName": "FP_ASSIST.ANY",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles with any input/output SSE or FP assist",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC7",
+        "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
index 7fa95a35e3ca..e84504d6adea 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
@@ -1,482 +1,516 @@
 [
     {
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "IDQ.MITE_UOPS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x80",
+        "EventName": "ICACHE_16B.IFDATA_STALL",
+        "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "IDQ.MITE_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x14",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "IDQ.DSB_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x408006",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
         "EventName": "IDQ.DSB_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "IDQ.MS_DSB_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "3",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
         "Counter": "0,1,2,3",
-        "UMask": "0x18",
-        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xE6",
+        "EventName": "BACLEARS.ANY",
+        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x18",
-        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x11",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "IDQ.MS_MITE_UOPS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "EventCode": "0x79",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x24",
-        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles MITE is delivering 4 Uops",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x401006",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
-        "EventCode": "0x79",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
         "Counter": "0,1,2,3",
-        "UMask": "0x24",
-        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles MITE is delivering any Uop",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
         "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EventName": "IDQ.MS_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
-        "EventCode": "0x79",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EdgeDetect": "1",
-        "EventName": "IDQ.MS_SWITCHES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
         "EventCode": "0x79",
-        "Counter": "0,1,2,3",
-        "UMask": "0x30",
-        "EventName": "IDQ.MS_UOPS",
+        "EventName": "IDQ.MS_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x30"
     },
     {
-        "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
-        "EventCode": "0x80",
+        "BriefDescription": "Cycles MITE is delivering any Uop",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "ICACHE_16B.IFDATA_STALL",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x24"
     },
     {
-        "EventCode": "0x83",
+        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
         "EventName": "ICACHE_64B.IFTAG_HIT",
         "SampleAfterValue": "200003",
-        "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x83",
+        "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "ICACHE_64B.IFTAG_MISS",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_SWITCHES",
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x30"
     },
     {
-        "EventCode": "0x83",
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "ICACHE_64B.IFTAG_STALL",
-        "SampleAfterValue": "200003",
-        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x13",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MITE_CYCLES",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x404006",
+        "PEBS": "1",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4  x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions).  c. Instruction Decode Queue (IDQ) delivers four uops.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
-        "CounterMask": "3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_MITE_UOPS",
+        "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
-        "CounterMask": "2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
-        "EventCode": "0x9C",
+        "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_STALL",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
     },
     {
-        "EventCode": "0x9C",
-        "Invert": "1",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAB",
+        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.",
-        "EventCode": "0xAB",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x18"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x11",
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x15",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x12",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.L1I_MISS",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.DSB_UOPS",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x13",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x420006",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x14",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x400806",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x15",
+        "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.STLB_MISS",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x400106",
+        "PEBS": "2",
+        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x400206",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x400206",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x200206",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x400406",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x400406",
+        "BriefDescription": "Cycles MITE is delivering 4 Uops",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x24"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x400806",
+        "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_DSB_CYCLES",
+        "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x401006",
+        "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x79",
+        "EventName": "IDQ.MS_UOPS",
+        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x30"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end  after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x402006",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x410006",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x404006",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x200206",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x408006",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x300206",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x410006",
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x100206",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x420006",
+        "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
-        "MSRIndex": "0x3F7",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0x79",
+        "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+        "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x18"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
-        "EventCode": "0xC6",
-        "MSRValue": "0x100206",
+        "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAB",
+        "EventName": "DSB2MITE_SWITCHES.COUNT",
+        "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x402006",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "EventCode": "0xC6",
-        "MSRValue": "0x300206",
+        "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+        "Invert": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x83",
+        "EventName": "ICACHE_64B.IFTAG_MISS",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC6",
+        "EventName": "FRONTEND_RETIRED.L1I_MISS",
         "MSRIndex": "0x3F7",
+        "MSRValue": "0x12",
+        "PEBS": "1",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
         "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x9C",
+        "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+        "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/memory.json b/tools/perf/pmu-events/arch/x86/skylake/memory.json
index f197b4c7695b..7bd3ae338343 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/memory.json
@@ -1,1604 +1,1611 @@
 [
     {
-        "PublicDescription": "Number of times a TSX line had a cache conflict.",
-        "EventCode": "0x54",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "TX_MEM.ABORT_CONFLICT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "EventCode": "0x54",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "TX_MEM.ABORT_CAPACITY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000080004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0104000001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000100002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0084008000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x007C400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
-        "EventCode": "0x54",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x043C408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x5d",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "TX_EXEC.MISC1",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0204000004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
-        "EventCode": "0x5d",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "TX_EXEC.MISC2",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000088000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
-        "EventCode": "0x5d",
+        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "TX_EXEC.MISC3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "RTM region detected inside HLE.",
-        "EventCode": "0x5d",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "TX_EXEC.MISC4",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00BC408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
-        "EventCode": "0x5d",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "TX_EXEC.MISC5",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x103C408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x60",
+        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_TIMER",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "EventCode": "0x60",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x023C400001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x60",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
-        "CounterMask": "6",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0204000002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_MEM",
+        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
-        "CounterMask": "2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
         "Counter": "0,1,2,3",
-        "UMask": "0x6",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
-        "CounterMask": "6",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
-        "EventCode": "0xB0",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000080002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Demand Data Read requests who miss L3 cache",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
-        "EventCode": "0xC3",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "Errata": "SKL089",
-        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1004008000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
-        "EventCode": "0xC8",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "HLE_RETIRED.START",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution started.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times HLE commit succeeded.",
-        "EventCode": "0xC8",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "HLE_RETIRED.COMMIT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution successfully committed",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PEBS": "1",
-        "PublicDescription": "Number of times HLE abort was triggered. (PEBS)",
-        "EventCode": "0xC8",
+        "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "HLE_RETIRED.ABORTED",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "HLE_RETIRED.ABORTED_MEM",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000028000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "HLE_RETIRED.ABORTED_TIMER",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FFC408000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x20",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "100007",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x043C400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xC8",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000020004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "RTM_RETIRED.START",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution started.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0044000002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times RTM commit succeeded.",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "RTM_RETIRED.COMMIT",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution successfully committed",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0204008000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Number of times RTM abort was triggered. (PEBS)",
-        "EventCode": "0xC9",
+        "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "RTM_RETIRED.ABORTED",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "RTM_RETIRED.ABORTED_MEM",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x103C400001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xC9",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "RTM_RETIRED.ABORTED_TIMER",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CONFLICT",
+        "PublicDescription": "Number of times a TSX line had a cache conflict.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1004000001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000080001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
-        "EventCode": "0xC9",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "RTM_RETIRED.ABORTED_EVENTS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x40",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "2003",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x4",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
-        "MSRIndex": "0x3F6",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0204000001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x8",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "50021",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000020002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x10",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "20011",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x20",
+        "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC5",
+        "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x40",
+        "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "2003",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC4",
+        "PublicDescription": "RTM region detected inside HLE.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x80",
+        "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "1009",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC3",
+        "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x100",
+        "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "503",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC2",
+        "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
-        "EventCode": "0xCD",
-        "MSRValue": "0x200",
+        "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
-        "MSRIndex": "0x3F6",
-        "SampleAfterValue": "101",
-        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
-        "TakenAlone": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5d",
+        "EventName": "TX_EXEC.MISC1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FFC408000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0404000001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x203C408000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0084000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x103C408000",
+        "BriefDescription": "Number of times an RTM execution successfully committed",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.COMMIT",
+        "PublicDescription": "Number of times RTM commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043C408000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000100001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023C408000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x103C400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013C408000",
+        "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00BC408000",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000048000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x007C408000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0104000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC4008000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x203C400004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x007C408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004008000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0104000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004008000",
+        "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_TIMER",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404008000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2004000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204008000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1004000004",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand code reads",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0044008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104008000",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0044000001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084008000",
+        "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0044008000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0084000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000408000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x043C400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20001C8000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00BC400002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts any other requests",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000108000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000040004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000088000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000048000",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts any other requests",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000028000",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts any other requests",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x20001C0004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FFC400004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x103C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x203C400004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x013C400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x103C400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0404008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043C400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0104008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023C400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013C400004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x013C408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00BC400004",
+        "BriefDescription": "Number of times an HLE execution successfully committed",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.COMMIT",
+        "PublicDescription": "Number of times HLE commit succeeded.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x007C400004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000020001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC4000004",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x203C408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x023C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000040002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000004",
+        "BriefDescription": "Demand Data Read requests who miss L3 cache",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB0",
+        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00BC400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2004000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000004",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x023C400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0044000004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FFC400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000400004",
+        "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+        "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20001C0004",
+        "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_MEM",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000100004",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x100",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "503",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000080004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x013C400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000040004",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x203C400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020004",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x007C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FFC400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC4008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x203C400002",
+        "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED",
+        "PEBS": "1",
+        "PublicDescription": "Number of times RTM abort was triggered.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x103C400002",
+        "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED",
+        "PEBS": "1",
+        "PublicDescription": "Number of times HLE abort was triggered.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043C400002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x203C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023C400002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0404000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013C400002",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x10",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "20011",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00BC400002",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2004008000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x007C400002",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC4000002",
+        "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x60",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000002",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC4000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000002",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x200",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "101",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000002",
+        "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_CAPACITY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0084000001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000002",
+        "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+        "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000002",
+        "BriefDescription": "Number of times an RTM execution started.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.START",
+        "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0044000002",
+        "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL089",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000400002",
+        "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+        "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20001C0002",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x20001C0002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000100002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000080002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
         "BriefDescription": "Counts all demand data writes (RFOs)",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x1004000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000040002",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all demand data writes (RFOs)",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020002",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts all demand data writes (RFOs)",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x20001C0001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FFC400001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC4000002",
+        "Offcore": "1",
+        "PublicDescription": "Counts all demand data writes (RFOs)",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x007C400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x203C400001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000100004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x103C400001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x00BC400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x043C400001",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x043C400001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
-    },
-    {
         "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x023C400001",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x013C400001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FFC400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x00BC400001",
+        "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC9",
+        "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+        "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x007C400001",
+        "BriefDescription": "Number of times an HLE execution started.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.START",
+        "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x3FC4000001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FFC400004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2004000001",
+        "BriefDescription": "Counts demand data reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
         "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2004000001",
+        "Offcore": "1",
+        "PublicDescription": "Counts demand data reads",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3FC4000001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x1004000001",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x80",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "1009",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0404000001",
+        "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x54",
+        "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+        "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0204000001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000108000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0104000001",
+        "BriefDescription": "Counts all demand code reads",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0044000004",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand code reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0084000001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x013C400002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x0044000001",
+        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "6",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x6"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000400001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x023C408000",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts any other requests",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x20001C0001",
+        "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC8",
+        "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000100001",
+        "BriefDescription": "Counts any other requests",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x20001C8000",
+        "Offcore": "1",
+        "PublicDescription": "Counts any other requests",
         "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x2000040001",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts demand data reads",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000080001",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x4",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000040001",
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
-        "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x8",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "50021",
+        "TakenAlone": "1",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data reads",
-        "EventCode": "0xB7, 0xBB",
-        "MSRValue": "0x2000020001",
+        "BriefDescription": "Counts all demand data writes (RFOs)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
-        "MSRIndex": "0x1a6, 0x1a7",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts demand data reads",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xB7, 0xBB",
+        "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x0404000002",
         "Offcore": "1",
-        "CounterHTOff": "0,1,2,3"
+        "PublicDescription": "Counts all demand data writes (RFOs)",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/other.json b/tools/perf/pmu-events/arch/x86/skylake/other.json
index 84a316d380ac..1a3683f1de91 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/other.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/other.json
@@ -1,48 +1,56 @@
 [
     {
-        "EventCode": "0x32",
+        "BriefDescription": "Number of PREFETCHW instructions executed.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "EventCode": "0x32",
+        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
         "EventName": "SW_PREFETCH_ACCESS.T0",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0x32",
+        "BriefDescription": "Number of hardware interrupts received by the processor.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xCB",
+        "EventName": "HW_INTERRUPTS.RECEIVED",
+        "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+        "SampleAfterValue": "203",
+        "UMask": "0x1"
     },
     {
+        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x32",
+        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of PREFETCHW instructions executed.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
-        "EventCode": "0xCB",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "HW_INTERRUPTS.RECEIVED",
-        "SampleAfterValue": "203",
-        "BriefDescription": "Number of hardware interrupts received by the processor.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x09",
+        "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
index 4a891fbbc4bb..f46e93a57fb4 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
@@ -1,967 +1,969 @@
 [
     {
-        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
-        "Counter": "Fixed counter 0",
-        "UMask": "0x1",
-        "EventName": "INST_RETIRED.ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Instructions retired from execution.",
-        "CounterHTOff": "Fixed counter 0"
-    },
-    {
-        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
-        "Counter": "Fixed counter 1",
-        "UMask": "0x2",
-        "EventName": "CPU_CLK_UNHALTED.THREAD",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles when the thread is not in halt state",
-        "CounterHTOff": "Fixed counter 1"
+        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+        "SampleAfterValue": "2000003"
     },
     {
-        "Counter": "Fixed counter 1",
-        "UMask": "0x2",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
-        "CounterHTOff": "Fixed counter 1"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
-        "Counter": "Fixed counter 2",
-        "UMask": "0x3",
-        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Reference cycles when the core is not in halt state.",
-        "CounterHTOff": "Fixed counter 2"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
-        "EventCode": "0x03",
+        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "LD_BLOCKS.STORE_FORWARD",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x14",
+        "EventName": "ARITH.DIVIDER_ACTIVE",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
-        "EventCode": "0x03",
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "LD_BLOCKS.NO_SR",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x07",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
         "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+        "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
-        "EventCode": "0x0D",
+        "BriefDescription": "Far branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "INT_MISC.RECOVERY_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PEBS": "1",
+        "PublicDescription": "This event counts far branch instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x40"
     },
     {
-        "EventCode": "0x0D",
+        "BriefDescription": "Counts the number of x87 uops dispatched.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.X87",
+        "PublicDescription": "Counts the number of x87 uops executed.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "EventCode": "0x0D",
+        "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
-        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x4C",
+        "EventName": "LOAD_HIT_PRE.SW_PF",
+        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
-        "EventCode": "0x0E",
+        "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_ISSUED.ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
-        "EventCode": "0x0E",
-        "Invert": "1",
+        "BriefDescription": "Total execution stalls.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.",
-        "EventCode": "0x0E",
+        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x0E",
-        "Counter": "0,1,2,3",
-        "UMask": "0x20",
         "EventName": "UOPS_ISSUED.SLOW_LEA",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "EventCode": "0x14",
+        "BriefDescription": "Cycles with less than 10 actually retired uops.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ARITH.DIVIDER_ACTIVE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "10",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
-        "EventCode": "0x3C",
+        "BriefDescription": "Thread cycles when thread is not in halt state",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
         "EventName": "CPU_CLK_UNHALTED.THREAD_P",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Thread cycles when thread is not in halt state",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+        "SampleAfterValue": "2000003"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
-        "EventCode": "0x3C",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "EdgeDetect": "1",
-        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3C",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
         "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x3C",
-        "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+        "Counter": "Fixed counter 1",
+        "CounterHTOff": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "AnyThread": "1",
-        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.THREAD",
+        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "3",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0x3C",
+        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
-        "SampleAfterValue": "2503",
-        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "Invert": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
-        "EventCode": "0x4C",
+        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LOAD_HIT_PRE.SW_PF",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
-        "EventCode": "0x59",
+        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "8",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
-        "EventCode": "0x5E",
+        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xA8",
+        "EventName": "LSD.CYCLES_ACTIVE",
+        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
-        "EventCode": "0x5E",
-        "Invert": "1",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EdgeDetect": "1",
-        "EventName": "RS_EVENTS.EMPTY_END",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.RECOVERY_CYCLES",
+        "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
-        "EventCode": "0x87",
+        "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ILD_STALL.LCP",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 0",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 0",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 1",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 2",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 3",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 4",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 5",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 5",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 6",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 6",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x40"
     },
     {
-        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
-        "EventCode": "0xA1",
+        "BriefDescription": "Cycles per thread when uops are executed in port 7",
         "Counter": "0,1,2,3",
-        "UMask": "0x80",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA1",
         "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+        "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles per thread when uops are executed in port 7",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x80"
     },
     {
-        "PublicDescription": "Counts resource-related stall cycles.",
-        "EventCode": "0xa2",
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "RESOURCE_STALLS.ANY",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Resource-related stall cycles",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
-        "EventCode": "0xA2",
-        "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "RESOURCE_STALLS.SB",
+        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+        "Counter": "1",
+        "CounterHTOff": "1",
+        "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
+        "EventName": "INST_RETIRED.PREC_DIST",
+        "PEBS": "2",
+        "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xA8",
+        "EventName": "LSD.CYCLES_4_UOPS",
+        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Total execution stalls.",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
         "Counter": "0,1,2,3",
-        "UMask": "0x5",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
-        "CounterMask": "5",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xA3",
+        "AnyThread": "1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
-        "CounterMask": "8",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.",
         "Counter": "0,1,2,3",
-        "UMask": "0xc",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x59",
+        "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD",
+        "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
-        "CounterMask": "12",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
-        "CounterMask": "16",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xA3",
+        "BriefDescription": "Not taken branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x14",
-        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
-        "CounterMask": "20",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
+        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "3",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
+        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
+        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
-        "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+        "BriefDescription": "Reference cycles when the core is not in halt state.",
+        "Counter": "Fixed counter 2",
+        "CounterHTOff": "Fixed counter 2",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x3"
     },
     {
-        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
-        "EventCode": "0xA6",
+        "BriefDescription": "All mispredicted macro branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+        "SampleAfterValue": "400009"
     },
     {
-        "EventCode": "0xA6",
+        "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC1",
+        "EventName": "OTHER_ASSISTS.ANY",
+        "SampleAfterValue": "100003",
+        "UMask": "0x3f"
     },
     {
-        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
-        "EventCode": "0xA8",
+        "BriefDescription": "Cycles without actually retired uops.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LSD.UOPS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xC2",
+        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "Invert": "1",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of Uops delivered by the LSD.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
-        "EventCode": "0xA8",
+        "BriefDescription": "Number of Uops delivered by the LSD.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LSD.CYCLES_ACTIVE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA8",
+        "EventName": "LSD.UOPS",
+        "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
-        "EventCode": "0xA8",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "LSD.CYCLES_4_UOPS",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "25003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Number of uops to be executed per-thread each cycle.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.THREAD",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x87",
+        "EventName": "ILD_STALL.LCP",
+        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
-        "EventCode": "0xB1",
-        "Invert": "1",
+        "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.STALL_CYCLES",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "EdgeDetect": "1",
+        "EventCode": "0x5E",
+        "EventName": "RS_EVENTS.EMPTY_END",
+        "Invert": "1",
+        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "16",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Taken branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
-        "CounterMask": "2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "PublicDescription": "This event counts taken branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
-        "EventCode": "0xB1",
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
-        "CounterMask": "3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Number of uops executed from any thread.",
-        "EventCode": "0xB1",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE",
+        "BriefDescription": "Core cycles when the thread is not in halt state",
+        "Counter": "Fixed counter 1",
+        "CounterHTOff": "Fixed counter 1",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of uops executed on the core.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xB1",
+        "AnyThread": "1",
+        "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+        "SampleAfterValue": "25003",
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xB1",
+        "BriefDescription": "Direct and indirect near call instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
-        "CounterMask": "2",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts both direct and indirect near call instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x2"
     },
     {
-        "EventCode": "0xB1",
+        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xCC",
+        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
-        "CounterMask": "3",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x40"
     },
     {
-        "EventCode": "0xB1",
+        "BriefDescription": "Resource-related stall cycles",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xa2",
+        "EventName": "RESOURCE_STALLS.ANY",
+        "PublicDescription": "Counts resource-related stall cycles.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
-        "CounterMask": "4",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xB1",
-        "Invert": "1",
+        "BriefDescription": "Self-modifying code (SMC) detected.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "5",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x5"
     },
     {
-        "PublicDescription": "Counts the number of x87 uops executed.",
-        "EventCode": "0xB1",
+        "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "UOPS_EXECUTED.X87",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+        "SampleAfterValue": "25003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "4",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts the number of x87 uops dispatched.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
-        "EventCode": "0xC0",
+        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "Errata": "SKL091, SKL044",
-        "EventName": "INST_RETIRED.ANY_P",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3",
+        "CounterMask": "20",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of instructions retired. General Counter - architectural event",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x14"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
-        "EventCode": "0xC0",
-        "Counter": "1",
-        "UMask": "0x1",
-        "Errata": "SKL091, SKL044",
-        "EventName": "INST_RETIRED.PREC_DIST",
+        "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
-        "CounterHTOff": "1"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
-        "EventCode": "0xC0",
-        "Invert": "1",
+        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
         "Counter": "0,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,2,3",
+        "CounterMask": "10",
         "Errata": "SKL091, SKL044",
+        "EventCode": "0xC0",
         "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+        "Invert": "1",
+        "PEBS": "2",
+        "PublicDescription": "Number of cycles using an always true condition applied to  PEBS instructions retired event. (inst_ret< 16)",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of cycles using always true condition applied to  PEBS instructions retired event.",
-        "CounterMask": "10",
-        "CounterHTOff": "0,2,3"
+        "UMask": "0x1"
     },
     {
-        "EventCode": "0xC1",
+        "BriefDescription": "Retirement slots used.",
         "Counter": "0,1,2,3",
-        "UMask": "0x3f",
-        "EventName": "OTHER_ASSISTS.ANY",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts the retirement slots used.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0xC2",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
         "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+        "PublicDescription": "Counts the retirement slots used.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Retirement slots used.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "This event counts cycles without actually retired uops.",
-        "EventCode": "0xC2",
-        "Invert": "1",
+        "AnyThread": "1",
+        "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_RETIRED.STALL_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0E",
+        "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+        "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles without actually retired uops.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
-        "EventCode": "0xC2",
-        "Invert": "1",
+        "BriefDescription": "Number of macro-fused uops retired. (non precise)",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.MACRO_FUSED",
+        "PublicDescription": "Counts the number of macro-fused uops retired. (non precise)",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Cycles with less than 10 actually retired uops.",
-        "CounterMask": "10",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Number of machine clears (nukes) of any type.",
-        "EventCode": "0xC3",
+        "BriefDescription": "Increments whenever there is an update to the LBR array.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EdgeDetect": "1",
-        "EventName": "MACHINE_CLEARS.COUNT",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Number of machine clears (nukes) of any type.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xCC",
+        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
-        "EventCode": "0xC3",
+        "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "MACHINE_CLEARS.SMC",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Self-modifying code (SMC) detected.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x5E",
+        "EventName": "RS_EVENTS.EMPTY_CYCLES",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts all (macro) branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Instructions retired from execution.",
+        "Counter": "Fixed counter 0",
+        "CounterHTOff": "Fixed counter 0",
+        "EventName": "INST_RETIRED.ANY",
+        "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "All (macro) branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "2",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.CONDITIONAL",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Conditional branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA2",
+        "EventName": "RESOURCE_STALLS.SB",
+        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.NEAR_CALL",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Direct and indirect near call instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0x3C",
+        "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+        "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+        "SampleAfterValue": "100007"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "All (macro) branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
+        "CounterHTOff": "0,1,2,3",
         "Errata": "SKL091",
+        "EventCode": "0xC4",
         "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
         "SampleAfterValue": "400009",
-        "BriefDescription": "All (macro) branch instructions retired.",
-        "CounterHTOff": "0,1,2,3"
+        "UMask": "0x4"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Mispredicted macro branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
+        "CounterHTOff": "0,1,2,3",
+        "EventCode": "0xC5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+        "PEBS": "2",
+        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+        "SampleAfterValue": "400009",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Return instructions retired.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "Errata": "SKL091",
+        "EventCode": "0xC4",
         "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "PublicDescription": "This event counts return instructions retired.",
         "SampleAfterValue": "100007",
-        "BriefDescription": "Return instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Counts all not taken macro branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Not taken branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+        "PublicDescription": "This event counts not taken branch instructions retired.",
         "SampleAfterValue": "400009",
-        "BriefDescription": "Taken branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.",
-        "EventCode": "0xC4",
+        "BriefDescription": "Conditional branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "Errata": "SKL091",
-        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Counts the number of far branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
-        "EventCode": "0xC5",
-        "Counter": "0,1,2,3",
-        "UMask": "0x0",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.CONDITIONAL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts conditional branch instructions retired.",
         "SampleAfterValue": "400009",
-        "BriefDescription": "All mispredicted macro branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
-        "EventCode": "0xC5",
+        "BriefDescription": "Mispredicted conditional branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xC5",
         "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+        "PEBS": "1",
+        "PublicDescription": "This event counts mispredicted conditional branch instructions retired.",
         "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted conditional branch instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
-        "EventCode": "0xC5",
+        "BriefDescription": "Number of uops executed on the core.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "BR_MISP_RETIRED.NEAR_CALL",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CORE",
+        "PublicDescription": "Number of uops executed from any thread.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PEBS": "2",
-        "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
-        "EventCode": "0xC5",
+        "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Mispredicted macro branch instructions retired.",
-        "CounterHTOff": "0,1,2,3"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "12",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0xc"
     },
     {
-        "PEBS": "1",
-        "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.",
-        "EventCode": "0xC5",
+        "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
-        "SampleAfterValue": "400009",
-        "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xA6",
+        "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
-        "EventCode": "0xCC",
+        "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x0D",
+        "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Increments whenever there is an update to the LBR array.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x80"
     },
     {
-        "EventCode": "0xCC",
+        "BriefDescription": "All (macro) branch instructions retired.",
         "Counter": "0,1,2,3",
-        "UMask": "0x40",
-        "EventName": "ROB_MISC_EVENTS.PAUSE_INST",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "Errata": "SKL091",
+        "EventCode": "0xC4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PublicDescription": "Counts all (macro) branch instructions retired.",
+        "SampleAfterValue": "400009"
+    },
+    {
+        "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xB1",
+        "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
-        "EventCode": "0xE6",
+        "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "BACLEARS.ANY",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0xA3",
+        "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
     }
 ]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index 8704efeb8d31..4cd246782dde 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -1,370 +1,371 @@
 [
     {
-        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
         "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.",
+        "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
         "MetricGroup": "TopdownL1",
-        "MetricName": "Frontend_Bound",
-        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
+        "MetricName": "Frontend_Bound"
     },
     {
+        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
         "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
         "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Frontend_Bound_SMT",
-        "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+        "MetricName": "Frontend_Bound_SMT"
     },
     {
-        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
         "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.",
+        "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
         "MetricGroup": "TopdownL1",
-        "MetricName": "Bad_Speculation",
-        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
+        "MetricName": "Bad_Speculation"
     },
     {
+        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU.",
         "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
         "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Bad_Speculation_SMT",
-        "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
+        "MetricName": "Bad_Speculation_SMT"
     },
     {
+        "MetricConstraint": "NO_NMI_WATCHDOG",
+        "MetricGroup": "TopdownL1",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.",
         "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
         "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )",
-        "MetricGroup": "TopdownL1",
-        "MetricName": "Backend_Bound",
-        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
+        "MetricName": "Backend_Bound"
     },
     {
+        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
+        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU.",
         "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )",
         "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Backend_Bound_SMT",
-        "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+        "MetricName": "Backend_Bound_SMT"
     },
     {
-        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. ",
+        "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
         "MetricGroup": "TopdownL1",
-        "MetricName": "Retiring",
-        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. "
+        "MetricName": "Retiring"
     },
     {
+        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved.  Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU.",
         "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
-        "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
         "MetricGroup": "TopdownL1_SMT",
-        "MetricName": "Retiring_SMT",
-        "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category.  Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved.  Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance.  For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
+        "MetricName": "Retiring_SMT"
     },
     {
-        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
         "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
-        "MetricGroup": "TopDownL1",
+        "BriefDescription": "Instructions Per Cycle (per Logical Processor)",
+        "MetricGroup": "Summary",
         "MetricName": "IPC"
     },
     {
-        "BriefDescription": "Uops Per Instruction",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+        "BriefDescription": "Uops Per Instruction",
         "MetricGroup": "Pipeline;Retire",
         "MetricName": "UPI"
     },
     {
-        "BriefDescription": "Instruction per taken branch",
         "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Instruction per taken branch",
         "MetricGroup": "Branches;Fetch_BW;PGO",
         "MetricName": "IpTB"
     },
     {
-        "BriefDescription": "Branch instructions per taken branch. ",
-        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
-        "MetricGroup": "Branches;PGO",
-        "MetricName": "BpTB"
-    },
-    {
-        "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions",
-        "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )",
-        "MetricGroup": "PGO;IcMiss",
-        "MetricName": "IFetch_Line_Utilization"
-    },
-    {
-        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
-        "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
-        "MetricGroup": "DSB;Fetch_BW",
-        "MetricName": "DSB_Coverage"
-    },
-    {
-        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
         "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
-        "MetricGroup": "Pipeline;Summary",
+        "BriefDescription": "Cycles Per Instruction (per Logical Processor)",
+        "MetricGroup": "Pipeline",
         "MetricName": "CPI"
     },
     {
-        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+        "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricGroup": "Summary",
         "MetricName": "CLKS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
         "MetricExpr": "4 * cycles",
+        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
         "MetricGroup": "TopDownL1",
         "MetricName": "SLOTS"
     },
     {
-        "BriefDescription": "Total issue-pipeline slots (per-Physical Core)",
-        "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+        "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+        "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
         "MetricGroup": "TopDownL1_SMT",
         "MetricName": "SLOTS_SMT"
     },
     {
-        "BriefDescription": "Instructions per Load (lower number means higher occurance rate)",
-        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
-        "MetricGroup": "Instruction_Type",
-        "MetricName": "IpL"
-    },
-    {
-        "BriefDescription": "Instructions per Store (lower number means higher occurance rate)",
-        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
-        "MetricGroup": "Instruction_Type",
-        "MetricName": "IpS"
-    },
-    {
-        "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Branches;Instruction_Type",
-        "MetricName": "IpB"
-    },
-    {
-        "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
-        "MetricGroup": "Branches",
-        "MetricName": "IpCall"
-    },
-    {
-        "BriefDescription": "Total number of retired Instructions",
-        "MetricExpr": "INST_RETIRED.ANY",
-        "MetricGroup": "Summary",
-        "MetricName": "Instructions"
-    },
-    {
-        "BriefDescription": "Instructions Per Cycle (per physical core)",
         "MetricExpr": "INST_RETIRED.ANY / cycles",
-        "MetricGroup": "SMT",
+        "BriefDescription": "Instructions Per Cycle (per physical core)",
+        "MetricGroup": "SMT;TopDownL1",
         "MetricName": "CoreIPC"
     },
     {
+        "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Instructions Per Cycle (per physical core)",
-        "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
-        "MetricGroup": "SMT",
+        "MetricGroup": "SMT;TopDownL1",
         "MetricName": "CoreIPC_SMT"
     },
     {
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / cycles",
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / cycles",
         "MetricGroup": "FLOPS",
         "MetricName": "FLOPc"
     },
     {
+        "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
         "MetricGroup": "FLOPS_SMT",
         "MetricName": "FLOPc_SMT"
     },
     {
+        "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )",
         "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
-        "MetricGroup": "Pipeline",
+        "MetricGroup": "Pipeline;Ports_Utilization",
         "MetricName": "ILP"
     },
     {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
         "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
         "MetricGroup": "BrMispredicts",
         "MetricName": "Branch_Misprediction_Cost"
     },
     {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)",
-        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
         "MetricGroup": "BrMispredicts_SMT",
         "MetricName": "Branch_Misprediction_Cost_SMT"
     },
     {
-        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
         "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
         "MetricGroup": "BrMispredicts",
         "MetricName": "IpMispredict"
     },
     {
-        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
         "MetricExpr": "( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+        "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
         "MetricGroup": "SMT",
         "MetricName": "CORE_CLKS"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS",
+        "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)",
+        "MetricGroup": "Instruction_Type",
+        "MetricName": "IpLoad"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
+        "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
+        "MetricGroup": "Instruction_Type",
+        "MetricName": "IpStore"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+        "MetricGroup": "Branches;Instruction_Type",
+        "MetricName": "IpBranch"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
+        "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
+        "MetricGroup": "Branches",
+        "MetricName": "IpCall"
+    },
+    {
+        "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
+        "BriefDescription": "Branch instructions per taken branch. ",
+        "MetricGroup": "Branches;PGO",
+        "MetricName": "BpTkBranch"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )",
+        "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
+        "MetricGroup": "FLOPS;FP_Arith;Instruction_Type",
+        "MetricName": "IpFLOP"
+    },
+    {
+        "MetricExpr": "INST_RETIRED.ANY",
+        "BriefDescription": "Total number of retired Instructions",
+        "MetricGroup": "Summary;TopDownL1",
+        "MetricName": "Instructions"
+    },
+    {
+        "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
+        "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
+        "MetricGroup": "DSB;Fetch_BW",
+        "MetricName": "DSB_Coverage"
+    },
+    {
         "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
         "MetricGroup": "Memory_Bound;Memory_Lat",
         "MetricName": "Load_Miss_Real_Latency"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
         "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
         "MetricGroup": "Memory_Bound;Memory_BW",
         "MetricName": "MLP"
     },
     {
-        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricConstraint": "NO_NMI_WATCHDOG",
         "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricGroup": "TLB",
-        "MetricName": "Page_Walks_Utilization",
-        "MetricConstraint": "NO_NMI_WATCHDOG"
+        "MetricName": "Page_Walks_Utilization"
     },
     {
+        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
-        "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )",
         "MetricGroup": "TLB_SMT",
         "MetricName": "Page_Walks_Utilization_SMT"
     },
     {
-        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
         "MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
         "MetricGroup": "Memory_BW",
         "MetricName": "L1D_Cache_Fill_BW"
     },
     {
-        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
         "MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
+        "BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
         "MetricGroup": "Memory_BW",
         "MetricName": "L2_Cache_Fill_BW"
     },
     {
-        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
         "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
         "MetricGroup": "Memory_BW",
         "MetricName": "L3_Cache_Fill_BW"
     },
     {
-        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
         "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
-        "MetricGroup": "Memory_BW",
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+        "MetricGroup": "Memory_BW;Offcore",
         "MetricName": "L3_Cache_Access_BW"
     },
     {
-        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricGroup": "Cache_Misses",
         "MetricName": "L1MPKI"
     },
     {
-        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricGroup": "Cache_Misses",
         "MetricName": "L2MPKI"
     },
     {
-        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Cache_Misses",
+        "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
+        "MetricGroup": "Cache_Misses;Offcore",
         "MetricName": "L2MPKI_All"
     },
     {
-        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
+        "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricGroup": "Cache_Misses",
         "MetricName": "L2HPKI_All"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricGroup": "Cache_Misses",
         "MetricName": "L3MPKI"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
-        "MetricGroup": "Summary",
+        "BriefDescription": "Average CPU Utilization",
+        "MetricGroup": "HPC;Summary",
         "MetricName": "CPU_Utilization"
     },
     {
+        "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 ) / duration_time",
-        "MetricGroup": "FLOPS;Summary",
+        "MetricGroup": "FLOPS;HPC",
         "MetricName": "GFLOPs"
     },
     {
-        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricGroup": "Power",
         "MetricName": "Turbo_Utilization"
     },
     {
+        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )",
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
-        "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
-        "MetricGroup": "SMT;Summary",
+        "MetricGroup": "SMT",
         "MetricName": "SMT_2T_Utilization"
     },
     {
-        "BriefDescription": "Fraction of cycles spent in Kernel mode",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
-        "MetricGroup": "Summary",
+        "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode",
+        "MetricGroup": "OS",
         "MetricName": "Kernel_Utilization"
     },
     {
-        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * ( arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@ ) / 1000000 / duration_time / 1000",
-        "MetricGroup": "Memory_BW",
+        "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+        "MetricGroup": "HPC;Memory_BW;SoC",
         "MetricName": "DRAM_BW_Use"
     },
     {
+        "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,cmask\\=1@",
         "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
-        "MetricExpr": "arb@event\\=0x80\\,umask\\=0x2@ / arb@event\\=0x80\\,umask\\=0x2\\,thresh\\=1@",
-        "MetricGroup": "Memory_BW",
-        "MetricName": "DRAM_Parallel_Reads"
+        "MetricGroup": "Memory_BW;SoC",
+        "MetricName": "MEM_Parallel_Reads"
     },
     {
-        "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )",
         "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )",
-        "MetricGroup": "",
+        "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
+        "MetricGroup": "Branches;OS",
         "MetricName": "IpFarBranch"
     },
     {
-        "BriefDescription": "C3 residency percent per core",
         "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C3 residency percent per core",
         "MetricGroup": "Power",
         "MetricName": "C3_Core_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per core",
         "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C6 residency percent per core",
         "MetricGroup": "Power",
         "MetricName": "C6_Core_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per core",
         "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C7 residency percent per core",
         "MetricGroup": "Power",
         "MetricName": "C7_Core_Residency"
     },
     {
-        "BriefDescription": "C2 residency percent per package",
         "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C2 residency percent per package",
         "MetricGroup": "Power",
         "MetricName": "C2_Pkg_Residency"
     },
     {
-        "BriefDescription": "C3 residency percent per package",
         "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C3 residency percent per package",
         "MetricGroup": "Power",
         "MetricName": "C3_Pkg_Residency"
     },
     {
-        "BriefDescription": "C6 residency percent per package",
         "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C6 residency percent per package",
         "MetricGroup": "Power",
         "MetricName": "C6_Pkg_Residency"
     },
     {
-        "BriefDescription": "C7 residency percent per package",
         "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "BriefDescription": "C7 residency percent per package",
         "MetricGroup": "Power",
         "MetricName": "C7_Pkg_Residency"
     }
diff --git a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json
index 2bcba7daca14..432530d15c26 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json
@@ -1,284 +1,284 @@
 [
     {
-        "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
-        "EventCode": "0x08",
+        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
-    },
-    {
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x08",
-        "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x08",
+        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x08",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
-        "EventCode": "0x08",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
         "Counter": "0,1,2,3",
-        "UMask": "0xe",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
-        "EventCode": "0x08",
+        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xAE",
+        "EventName": "ITLB.ITLB_FLUSH",
+        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+        "SampleAfterValue": "100007",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
-        "EventCode": "0x08",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
-        "EventCode": "0x08",
+        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
         "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
         "SampleAfterValue": "2000003",
-        "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+        "Counter": "0,1,2,3",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
         "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Store misses in all DTLB levels that cause page walks",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xBD",
+        "EventName": "TLB_FLUSH.DTLB_THREAD",
+        "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x49",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x49",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages.  The page walks can end with or without a page fault.",
-        "EventCode": "0x49",
+        "BriefDescription": "Misses at all ITLB levels that cause page walks",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
-        "EventCode": "0x49",
+        "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0xe",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
-        "EventCode": "0x49",
+        "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
     },
     {
-        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
-        "EventCode": "0x49",
+        "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
-        "EventCode": "0x49",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks (1G page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x8"
     },
     {
-        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
-        "EventCode": "0x4F",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "EPT.WALK_PENDING",
-        "SampleAfterValue": "2000003",
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
     },
     {
-        "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
-        "EventCode": "0x85",
+        "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Misses at all ITLB levels that cause page walks",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     },
     {
-        "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
-        "EventCode": "0x85",
+        "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
         "Counter": "0,1,2,3",
-        "UMask": "0x2",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.STLB_HIT",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
-        "EventCode": "0x85",
+        "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
         "Counter": "0,1,2,3",
-        "UMask": "0x4",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
-        "EventCode": "0x85",
+        "BriefDescription": "Load misses in all DTLB levels that cause page walks",
         "Counter": "0,1,2,3",
-        "UMask": "0x8",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+        "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x1"
     },
     {
-        "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
-        "EventCode": "0x85",
+        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
         "Counter": "0,1,2,3",
-        "UMask": "0xe",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x4f",
+        "EventName": "EPT.WALK_PENDING",
+        "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
-        "EventCode": "0x85",
+        "BriefDescription": "STLB flush attempts",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "ITLB_MISSES.WALK_PENDING",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xBD",
+        "EventName": "TLB_FLUSH.STLB_ANY",
+        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+        "SampleAfterValue": "100007",
+        "UMask": "0x20"
     },
     {
-        "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
-        "EventCode": "0x85",
+        "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
         "Counter": "0,1,2,3",
-        "UMask": "0x10",
-        "EventName": "ITLB_MISSES.WALK_ACTIVE",
-        "SampleAfterValue": "100003",
-        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
-        "CounterMask": "1",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
     },
     {
-        "EventCode": "0x85",
+        "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "ITLB_MISSES.STLB_HIT",
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "CounterMask": "1",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
         "SampleAfterValue": "100003",
-        "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "UMask": "0x10"
     },
     {
-        "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
-        "EventCode": "0xAE",
+        "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "ITLB.ITLB_FLUSH",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4"
     },
     {
-        "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
-        "EventCode": "0xBD",
+        "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
         "Counter": "0,1,2,3",
-        "UMask": "0x1",
-        "EventName": "TLB_FLUSH.DTLB_THREAD",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "DTLB flush attempts of the thread-specific entries",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0xe"
     },
     {
-        "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
-        "EventCode": "0xBD",
+        "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
         "Counter": "0,1,2,3",
-        "UMask": "0x20",
-        "EventName": "TLB_FLUSH.STLB_ANY",
-        "SampleAfterValue": "100007",
-        "BriefDescription": "STLB flush attempts",
-        "CounterHTOff": "0,1,2,3,4,5,6,7"
+        "CounterHTOff": "0,1,2,3,4,5,6,7",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8"
     }
 ]
\ No newline at end of file
-- 
cgit v1.2.3-70-g09d2


From 29396cd573da08ae9ab0b75925c2f6b3cabb9dfa Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 26 Aug 2020 08:30:55 -0700
Subject: perf expr: Force encapsulation on expr_id_data

This patch resolves some undefined behavior where variables in
expr_id_data were accessed (for debugging) without being defined. To
better enforce the tagged union behavior, the struct is moved into
expr.c and accessors provided. Tag values (kinds) are explicitly
identified.

Signed-off-by: Ian Rogers <irogers@google.com>
Reviewed-By: Kajol Jain<kjain@linux.ibm.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/r/20200826153055.2067780-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/expr.c        | 68 +++++++++++++++++++++++++++++++++++++------
 tools/perf/util/expr.h        | 17 +++--------
 tools/perf/util/expr.y        |  2 +-
 tools/perf/util/metricgroup.c |  4 +--
 4 files changed, 66 insertions(+), 25 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 53482ef53c41..a850fd0be3ee 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -17,6 +17,29 @@
 extern int expr_debug;
 #endif
 
+struct expr_id_data {
+	union {
+		double val;
+		struct {
+			double val;
+			const char *metric_name;
+			const char *metric_expr;
+		} ref;
+		struct expr_id	*parent;
+	};
+
+	enum {
+		/* Holding a double value. */
+		EXPR_ID_DATA__VALUE,
+		/* Reference to another metric. */
+		EXPR_ID_DATA__REF,
+		/* A reference but the value has been computed. */
+		EXPR_ID_DATA__REF_VALUE,
+		/* A parent is remembered for the recursion check. */
+		EXPR_ID_DATA__PARENT,
+	} kind;
+};
+
 static size_t key_hash(const void *key, void *ctx __maybe_unused)
 {
 	const char *str = (const char *)key;
@@ -48,6 +71,7 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
 		return -ENOMEM;
 
 	data_ptr->parent = ctx->parent;
+	data_ptr->kind = EXPR_ID_DATA__PARENT;
 
 	ret = hashmap__set(&ctx->ids, id, data_ptr,
 			   (const void **)&old_key, (void **)&old_data);
@@ -69,7 +93,7 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
 	if (!data_ptr)
 		return -ENOMEM;
 	data_ptr->val = val;
-	data_ptr->is_ref = false;
+	data_ptr->kind = EXPR_ID_DATA__VALUE;
 
 	ret = hashmap__set(&ctx->ids, id, data_ptr,
 			   (const void **)&old_key, (void **)&old_data);
@@ -114,8 +138,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
 	 */
 	data_ptr->ref.metric_name = ref->metric_name;
 	data_ptr->ref.metric_expr = ref->metric_expr;
-	data_ptr->ref.counted = false;
-	data_ptr->is_ref = true;
+	data_ptr->kind = EXPR_ID_DATA__REF;
 
 	ret = hashmap__set(&ctx->ids, name, data_ptr,
 			   (const void **)&old_key, (void **)&old_data);
@@ -148,17 +171,30 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
 
 	data = *datap;
 
-	pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n",
-		  data->is_ref, data->ref.counted, data->val, id);
-
-	if (data->is_ref && !data->ref.counted) {
-		data->ref.counted = true;
+	switch (data->kind) {
+	case EXPR_ID_DATA__VALUE:
+		pr_debug2("lookup(%s): val %f\n", id, data->val);
+		break;
+	case EXPR_ID_DATA__PARENT:
+		pr_debug2("lookup(%s): parent %s\n", id, data->parent->id);
+		break;
+	case EXPR_ID_DATA__REF:
+		pr_debug2("lookup(%s): ref metric name %s\n", id,
+			data->ref.metric_name);
 		pr_debug("processing metric: %s ENTRY\n", id);
-		if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) {
+		data->kind = EXPR_ID_DATA__REF_VALUE;
+		if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr, 1)) {
 			pr_debug("%s failed to count\n", id);
 			return -1;
 		}
 		pr_debug("processing metric: %s EXIT: %f\n", id, data->val);
+		break;
+	case EXPR_ID_DATA__REF_VALUE:
+		pr_debug2("lookup(%s): ref val %f metric name %s\n", id,
+			data->ref.val, data->ref.metric_name);
+		break;
+	default:
+		assert(0);  /* Unreachable. */
 	}
 
 	return 0;
@@ -241,3 +277,17 @@ int expr__find_other(const char *expr, const char *one,
 
 	return ret;
 }
+
+double expr_id_data__value(const struct expr_id_data *data)
+{
+	if (data->kind == EXPR_ID_DATA__VALUE)
+		return data->val;
+	assert(data->kind == EXPR_ID_DATA__REF_VALUE);
+	return data->ref.val;
+}
+
+struct expr_id *expr_id_data__parent(struct expr_id_data *data)
+{
+	assert(data->kind == EXPR_ID_DATA__PARENT);
+	return data->parent;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index fc2b5e824a66..dcf8d19b83c8 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -23,19 +23,7 @@ struct expr_parse_ctx {
 	struct expr_id	*parent;
 };
 
-struct expr_id_data {
-	union {
-		double val;
-		struct {
-			const char *metric_name;
-			const char *metric_expr;
-			bool counted;
-		} ref;
-		struct expr_id	*parent;
-	};
-
-	bool is_ref;
-};
+struct expr_id_data;
 
 struct expr_scanner_ctx {
 	int start_token;
@@ -57,4 +45,7 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
 int expr__find_other(const char *expr, const char *one,
 		struct expr_parse_ctx *ids, int runtime);
 
+double expr_id_data__value(const struct expr_id_data *data);
+struct expr_id *expr_id_data__parent(struct expr_id_data *data);
+
 #endif
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index d34b370391c6..b2ada8f8309a 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -93,7 +93,7 @@ expr:	  NUMBER
 						YYABORT;
 					}
 
-					$$ = data->val;
+					$$ = expr_id_data__value(data);
 					free($1);
 				}
 	| expr '|' expr		{ $$ = (long)$1 | (long)$3; }
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 060454a17293..81d201c8b833 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -833,7 +833,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa
 	if (ret)
 		return ret;
 
-	p = data->parent;
+	p = expr_id_data__parent(data);
 
 	while (p->parent) {
 		if (!strcmp(p->id, id)) {
@@ -854,7 +854,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa
 	}
 
 	p->id     = strdup(id);
-	p->parent = data->parent;
+	p->parent = expr_id_data__parent(data);
 	*parent   = p;
 
 	return p->id ? 0 : -ENOMEM;
-- 
cgit v1.2.3-70-g09d2


From 568beb27959b0515d325ea1c6cf211eed2d66740 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Fri, 13 Nov 2020 10:20:53 -0800
Subject: perf test: Avoid an msan warning in a copied stack.

This fix is for a failure that occurred in the DWARF unwind perf test.

Stack unwinders may probe memory when looking for frames.

Memory sanitizer will poison and track uninitialized memory on the
stack, and on the heap if the value is copied to the heap.

This can lead to false memory sanitizer failures for the use of an
uninitialized value.

Avoid this problem by removing the poison on the copied stack.

The full msan failure with track origins looks like:

==2168==WARNING: MemorySanitizer: use-of-uninitialized-value
    #0 0x559ceb10755b in handle_cfi elfutils/libdwfl/frame_unwind.c:648:8
    #1 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4
    #2 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7
    #3 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10
    #4 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17
    #5 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17
    #6 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14
    #7 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10
    #8 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8
    #9 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8
    #10 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26
    #11 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0)
    #12 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2
    #13 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9
    #14 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9
    #15 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8
    #16 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9
    #17 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9
    #18 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4
    #19 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9
    #20 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11
    #21 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8
    #22 0x559cea95fbce in run_argv tools/perf/perf.c:409:2
    #23 0x559cea95fbce in main tools/perf/perf.c:539:3

  Uninitialized value was stored to memory at
    #0 0x559ceb106acf in __libdwfl_frame_reg_set elfutils/libdwfl/frame_unwind.c:77:22
    #1 0x559ceb106acf in handle_cfi elfutils/libdwfl/frame_unwind.c:627:13
    #2 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4
    #3 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7
    #4 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10
    #5 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17
    #6 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17
    #7 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14
    #8 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10
    #9 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8
    #10 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8
    #11 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26
    #12 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0)
    #13 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2
    #14 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9
    #15 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9
    #16 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8
    #17 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9
    #18 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9
    #19 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4
    #20 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9
    #21 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11
    #22 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8
    #23 0x559cea95fbce in run_argv tools/perf/perf.c:409:2
    #24 0x559cea95fbce in main tools/perf/perf.c:539:3

  Uninitialized value was stored to memory at
    #0 0x559ceb106a54 in handle_cfi elfutils/libdwfl/frame_unwind.c:613:9
    #1 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4
    #2 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7
    #3 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10
    #4 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17
    #5 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17
    #6 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14
    #7 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10
    #8 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8
    #9 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8
    #10 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26
    #11 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0)
    #12 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2
    #13 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9
    #14 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9
    #15 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8
    #16 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9
    #17 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9
    #18 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4
    #19 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9
    #20 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11
    #21 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8
    #22 0x559cea95fbce in run_argv tools/perf/perf.c:409:2
    #23 0x559cea95fbce in main tools/perf/perf.c:539:3

  Uninitialized value was stored to memory at
    #0 0x559ceaff8800 in memory_read tools/perf/util/unwind-libdw.c:156:10
    #1 0x559ceb10f053 in expr_eval elfutils/libdwfl/frame_unwind.c:501:13
    #2 0x559ceb1060cc in handle_cfi elfutils/libdwfl/frame_unwind.c:603:18
    #3 0x559ceb105448 in __libdwfl_frame_unwind elfutils/libdwfl/frame_unwind.c:741:4
    #4 0x559ceb0ece90 in dwfl_thread_getframes elfutils/libdwfl/dwfl_frame.c:435:7
    #5 0x559ceb0ec6b7 in get_one_thread_frames_cb elfutils/libdwfl/dwfl_frame.c:379:10
    #6 0x559ceb0ec6b7 in get_one_thread_cb elfutils/libdwfl/dwfl_frame.c:308:17
    #7 0x559ceb0ec6b7 in dwfl_getthreads elfutils/libdwfl/dwfl_frame.c:283:17
    #8 0x559ceb0ec6b7 in getthread elfutils/libdwfl/dwfl_frame.c:354:14
    #9 0x559ceb0ec6b7 in dwfl_getthread_frames elfutils/libdwfl/dwfl_frame.c:388:10
    #10 0x559ceaff6ae6 in unwind__get_entries tools/perf/util/unwind-libdw.c:236:8
    #11 0x559ceabc9dbc in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:111:8
    #12 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26
    #13 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0)
    #14 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2
    #15 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9
    #16 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9
    #17 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8
    #18 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9
    #19 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9
    #20 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4
    #21 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9
    #22 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11
    #23 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8
    #24 0x559cea95fbce in run_argv tools/perf/perf.c:409:2
    #25 0x559cea95fbce in main tools/perf/perf.c:539:3

  Uninitialized value was stored to memory at
    #0 0x559cea9027d9 in __msan_memcpy llvm/llvm-project/compiler-rt/lib/msan/msan_interceptors.cpp:1558:3
    #1 0x559cea9d2185 in sample_ustack tools/perf/arch/x86/tests/dwarf-unwind.c:41:2
    #2 0x559cea9d202c in test__arch_unwind_sample tools/perf/arch/x86/tests/dwarf-unwind.c:72:9
    #3 0x559ceabc9cbd in test_dwarf_unwind__thread tools/perf/tests/dwarf-unwind.c:106:6
    #4 0x559ceabca5cf in test_dwarf_unwind__compare tools/perf/tests/dwarf-unwind.c:138:26
    #5 0x7f812a6865b0 in bsearch (libc.so.6+0x4e5b0)
    #6 0x559ceabca871 in test_dwarf_unwind__krava_3 tools/perf/tests/dwarf-unwind.c:162:2
    #7 0x559ceabca926 in test_dwarf_unwind__krava_2 tools/perf/tests/dwarf-unwind.c:169:9
    #8 0x559ceabca946 in test_dwarf_unwind__krava_1 tools/perf/tests/dwarf-unwind.c:174:9
    #9 0x559ceabcae12 in test__dwarf_unwind tools/perf/tests/dwarf-unwind.c:211:8
    #10 0x559ceabbc4ab in run_test tools/perf/tests/builtin-test.c:418:9
    #11 0x559ceabbc4ab in test_and_print tools/perf/tests/builtin-test.c:448:9
    #12 0x559ceabbac70 in __cmd_test tools/perf/tests/builtin-test.c:669:4
    #13 0x559ceabbac70 in cmd_test tools/perf/tests/builtin-test.c:815:9
    #14 0x559cea960e30 in run_builtin tools/perf/perf.c:313:11
    #15 0x559cea95fbce in handle_internal_command tools/perf/perf.c:365:8
    #16 0x559cea95fbce in run_argv tools/perf/perf.c:409:2
    #17 0x559cea95fbce in main tools/perf/perf.c:539:3

  Uninitialized value was created by an allocation of 'bf' in the stack frame of function 'perf_event__synthesize_mmap_events'
    #0 0x559ceafc5f60 in perf_event__synthesize_mmap_events tools/perf/util/synthetic-events.c:445

SUMMARY: MemorySanitizer: use-of-uninitialized-value elfutils/libdwfl/frame_unwind.c:648:8 in handle_cfi
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: clang-built-linux@googlegroups.com
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandeep Dasgupta <sdasgup@google.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201113182053.754625-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/dwarf-unwind.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index 4e40402a4f81..478078fb0f22 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -38,6 +38,13 @@ static int sample_ustack(struct perf_sample *sample,
 	stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
 
 	memcpy(buf, (void *) sp, stack_size);
+#ifdef MEMORY_SANITIZER
+	/*
+	 * Copying the stack may copy msan poison, avoid false positives in the
+	 * unwinder by removing the poison here.
+	 */
+	__msan_unpoison(buf, stack_size);
+#endif
 	stack->data = (char *) buf;
 	stack->size = stack_size;
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 08d3e27718bd45ea3284b1b99a2082a233b8667c Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 11 Nov 2020 13:26:36 +0100
Subject: KVM: selftests: Make test skipping consistent

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201111122636.73346-12-drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/kvm_pv_test.c           |  4 ++--
 tools/testing/selftests/kvm/x86_64/user_msr_test.c         |  6 +++++-
 .../selftests/kvm/x86_64/vmx_preemption_timer_test.c       | 14 +++++++-------
 3 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index b10a27485bad..732b244d6956 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -211,8 +211,8 @@ int main(void)
 	struct kvm_vm *vm;
 
 	if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
-		pr_info("will skip kvm paravirt restriction tests.\n");
-		return 0;
+		print_skip("KVM_CAP_ENFORCE_PV_FEATURE_CPUID not supported");
+		exit(KSFT_SKIP);
 	}
 
 	vm = vm_create_default(VCPU_ID, 0, guest_main);
diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
index cbe1b08890ff..f2a667ca49c1 100644
--- a/tools/testing/selftests/kvm/x86_64/user_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
@@ -209,7 +209,11 @@ int main(int argc, char *argv[])
 	run = vcpu_state(vm, VCPU_ID);
 
 	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-	TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+	if (!rc) {
+		print_skip("KVM_CAP_X86_USER_SPACE_MSR not supported");
+		exit(KSFT_SKIP);
+	}
+
 	vm_enable_cap(vm, &cap);
 
 	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index a7737af1224f..25b783070d21 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -169,6 +169,11 @@ int main(int argc, char *argv[])
 	 */
 	nested_vmx_check_supported();
 
+	if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		print_skip("KVM_CAP_NESTED_STATE not supported");
+		exit(KSFT_SKIP);
+	}
+
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
 	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
@@ -176,13 +181,8 @@ int main(int argc, char *argv[])
 
 	vcpu_regs_get(vm, VCPU_ID, &regs1);
 
-	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-		vcpu_alloc_vmx(vm, &vmx_pages_gva);
-		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-	} else {
-		pr_info("will skip vmx preemption timer checks\n");
-		goto done;
-	}
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 
 	for (stage = 1;; stage++) {
 		_vcpu_run(vm, VCPU_ID);
-- 
cgit v1.2.3-70-g09d2


From 22f232d134e142022f5e4cf2de4587a34d5b7d65 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 11 Nov 2020 13:26:35 +0100
Subject: KVM: selftests: x86: Set supported CPUIDs on default VM

Almost all tests do this anyway and the ones that don't don't
appear to care. Only vmx_set_nested_state_test assumes that
a feature (VMX) is disabled until later setting the supported
CPUIDs. It's better to disable that explicitly anyway.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Message-Id: <20201111122636.73346-11-drjones@redhat.com>
[Restore CPUID_VMX, or vmx_set_nested_state breaks. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c        |  3 ---
 .../testing/selftests/kvm/include/perf_test_util.h  |  4 ----
 tools/testing/selftests/kvm/lib/kvm_util.c          | 11 +++++++++--
 .../testing/selftests/kvm/set_memory_region_test.c  |  2 --
 .../selftests/kvm/x86_64/cr4_cpuid_sync_test.c      |  1 -
 tools/testing/selftests/kvm/x86_64/debug_regs.c     |  1 -
 tools/testing/selftests/kvm/x86_64/evmcs_test.c     |  2 --
 tools/testing/selftests/kvm/x86_64/smm_test.c       |  2 --
 tools/testing/selftests/kvm/x86_64/state_test.c     |  1 -
 .../testing/selftests/kvm/x86_64/svm_vmcall_test.c  |  1 -
 tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c  |  1 -
 tools/testing/selftests/kvm/x86_64/user_msr_test.c  |  1 -
 .../selftests/kvm/x86_64/vmx_apic_access_test.c     |  1 -
 .../kvm/x86_64/vmx_close_while_nested_test.c        |  1 -
 .../selftests/kvm/x86_64/vmx_dirty_log_test.c       |  1 -
 .../kvm/x86_64/vmx_preemption_timer_test.c          |  1 -
 .../kvm/x86_64/vmx_set_nested_state_test.c          | 21 +++++++++++++++++++++
 .../selftests/kvm/x86_64/vmx_tsc_adjust_test.c      |  1 -
 18 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index b1f8731721b9..471baecb7772 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -740,9 +740,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
 	/* Cache the HVA pointer of the region */
 	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
 
-#ifdef __x86_64__
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-#endif
 	ucall_init(vm, NULL);
 
 	/* Export the shared variables to the guest */
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 2618052057b1..239421e4f6b8 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -179,10 +179,6 @@ static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
 
 		vm_vcpu_add_default(vm, vcpu_id, guest_code);
 
-#ifdef __x86_64__
-		vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
-#endif
-
 		vcpu_args->vcpu_id = vcpu_id;
 		vcpu_args->gva = guest_test_virt_mem +
 				 (vcpu_id * vcpu_memory_bytes);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index df2035ac54a6..b2c426adb87f 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -311,8 +311,15 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
 	vm_create_irqchip(vm);
 #endif
 
-	for (i = 0; i < nr_vcpus; ++i)
-		vm_vcpu_add_default(vm, vcpuids ? vcpuids[i] : i, guest_code);
+	for (i = 0; i < nr_vcpus; ++i) {
+		uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
+
+		vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+#ifdef __x86_64__
+		vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
+#endif
+	}
 
 	return vm;
 }
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index b3ece55a2da6..5fa5823661da 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -121,8 +121,6 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
 
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
 				    MEM_REGION_GPA, MEM_REGION_SLOT,
 				    MEM_REGION_SIZE / getpagesize(), 0);
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 140e91901582..f40fd097cb35 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -81,7 +81,6 @@ int main(int argc, char *argv[])
 
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 	run = vcpu_state(vm, VCPU_ID);
 
 	while (1) {
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 2fc6b3af81a1..6097a8283377 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -85,7 +85,6 @@ int main(void)
 	}
 
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 	run = vcpu_state(vm, VCPU_ID);
 
 	/* Test software BPs - int3 */
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 757928199f19..37b8a78f6b74 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -92,8 +92,6 @@ int main(int argc, char *argv[])
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
 	if (!nested_vmx_supported() ||
 	    !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
 	    !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index ae39a220609f..613c42c5a9b8 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -102,8 +102,6 @@ int main(int argc, char *argv[])
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
 
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
 	run = vcpu_state(vm, VCPU_ID);
 
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index f6c8b9042f8a..32854c1462ad 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -165,7 +165,6 @@ int main(int argc, char *argv[])
 
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 	run = vcpu_state(vm, VCPU_ID);
 
 	vcpu_regs_get(vm, VCPU_ID, &regs1);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
index 0e1adb4e3199..be2ca157485b 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -44,7 +44,6 @@ int main(int argc, char *argv[])
 	nested_svm_check_supported();
 
 	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
 	vcpu_alloc_svm(vm, &svm_gva);
 	vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index f8e761149daa..e357d8e222d4 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -107,7 +107,6 @@ int main(void)
 	uint64_t val;
 
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
 	val = 0;
 	ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
index f2a667ca49c1..fe88b98908ee 100644
--- a/tools/testing/selftests/kvm/x86_64/user_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
@@ -205,7 +205,6 @@ int main(int argc, char *argv[])
 
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 	run = vcpu_state(vm, VCPU_ID);
 
 	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
index 1f65342d6cb7..d14888b34adb 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -87,7 +87,6 @@ int main(int argc, char *argv[])
 	nested_vmx_check_supported();
 
 	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
 	kvm_get_cpu_address_width(&paddr_width, &vaddr_width);
 	high_gpa = (1ul << paddr_width) - getpagesize();
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index fe40ade06a49..2835a17f1b7a 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -57,7 +57,6 @@ int main(int argc, char *argv[])
 	nested_vmx_check_supported();
 
 	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
 	/* Allocate VMX pages and shared descriptors (vmx_pages). */
 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index e894a638a155..537de1068554 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -82,7 +82,6 @@ int main(int argc, char *argv[])
 
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 	vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
 	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 	run = vcpu_state(vm, VCPU_ID);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index 25b783070d21..a07480aed397 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -176,7 +176,6 @@ int main(int argc, char *argv[])
 
 	/* Create VM */
 	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 	run = vcpu_state(vm, VCPU_ID);
 
 	vcpu_regs_get(vm, VCPU_ID, &regs1);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index d59f3eb67c8f..5827b9bae468 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -244,6 +244,22 @@ void test_vmx_nested_state(struct kvm_vm *vm)
 	free(state);
 }
 
+void disable_vmx(struct kvm_vm *vm)
+{
+	struct kvm_cpuid2 *cpuid = kvm_get_supported_cpuid();
+	int i;
+
+	for (i = 0; i < cpuid->nent; ++i)
+		if (cpuid->entries[i].function == 1 &&
+		    cpuid->entries[i].index == 0)
+			break;
+	TEST_ASSERT(i != cpuid->nent, "CPUID function 1 not found");
+
+	cpuid->entries[i].ecx &= ~CPUID_VMX;
+	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+	cpuid->entries[i].ecx |= CPUID_VMX;
+}
+
 int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
@@ -264,6 +280,11 @@ int main(int argc, char *argv[])
 
 	vm = vm_create_default(VCPU_ID, 0, 0);
 
+	/*
+	 * First run tests with VMX disabled to check error handling.
+	 */
+	disable_vmx(vm);
+
 	/* Passing a NULL kvm_nested_state causes a EFAULT. */
 	test_nested_state_expect_efault(vm, NULL);
 
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index fbe8417cbc2c..7e33a350b053 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -132,7 +132,6 @@ int main(int argc, char *argv[])
 	nested_vmx_check_supported();
 
 	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
 	/* Allocate VMX pages and shared descriptors (vmx_pages). */
 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
-- 
cgit v1.2.3-70-g09d2


From 2acc3c1bc8e98bc66b1badec42e9ea205b4fcdaa Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Mon, 16 Nov 2020 18:16:33 +0800
Subject: selftests/bpf: Fix error return code in run_getsockopt_test()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 65b4414a05eb ("selftests/bpf: add sockopt test that exercises BPF_F_ALLOW_MULTI")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201116101633.64627-1-wanghai38@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/sockopt_multi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index 29188d6f5c8d..51fac975b316 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -138,7 +138,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
 	 */
 
 	buf = 0x40;
-	if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) {
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
 		log_err("Failed to call setsockopt(IP_TOS)");
 		goto detach;
 	}
-- 
cgit v1.2.3-70-g09d2


From de91e631bdc7e6411989e1a9ab65501a31527e0b Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Sun, 15 Nov 2020 10:46:35 +0000
Subject: libbpf: bpf__find_by_name[_kind] should use btf__get_nr_types()

When operating on split BTF, btf__find_by_name[_kind] will not
iterate over all types since they use btf->nr_types to show
the number of types to iterate over. For split BTF this is
the number of types _on top of base BTF_, so it will
underestimate the number of types to iterate over, especially
for vmlinux + module BTF, where the latter is much smaller.

Use btf__get_nr_types() instead.

Fixes: ba451366bf44 ("libbpf: Implement basic split BTF support")
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1605437195-2175-1-git-send-email-alan.maguire@oracle.com
---
 tools/lib/bpf/btf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 2d0d064c6d31..8ff46cd30ca1 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -674,12 +674,12 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
 
 __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
 {
-	__u32 i;
+	__u32 i, nr_types = btf__get_nr_types(btf);
 
 	if (!strcmp(type_name, "void"))
 		return 0;
 
-	for (i = 1; i <= btf->nr_types; i++) {
+	for (i = 1; i <= nr_types; i++) {
 		const struct btf_type *t = btf__type_by_id(btf, i);
 		const char *name = btf__name_by_offset(btf, t->name_off);
 
@@ -693,12 +693,12 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
 __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
 			     __u32 kind)
 {
-	__u32 i;
+	__u32 i, nr_types = btf__get_nr_types(btf);
 
 	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
 		return 0;
 
-	for (i = 1; i <= btf->nr_types; i++) {
+	for (i = 1; i <= nr_types; i++) {
 		const struct btf_type *t = btf__type_by_id(btf, i);
 		const char *name;
 
-- 
cgit v1.2.3-70-g09d2


From ee415d73dcc24caef7f6bbf292dcc365613d2188 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Sun, 15 Nov 2020 14:06:23 +0200
Subject: tools/testing/scatterlist: Fix test to compile and run

Add missing define of ALIGN_DOWN to make the test build and run.  In
addition, __sg_alloc_table_from_pages now support unaligned maximum
segment, so adapt the test result accordingly.

Fixes: 07da1223ec93 ("lib/scatterlist: Add support in dynamic allocation of SG table from pages")
Link: https://lore.kernel.org/r/20201115120623.139113-1-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 tools/testing/scatterlist/linux/mm.h | 1 +
 tools/testing/scatterlist/main.c     | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index 6ae907f375d2..f9a12005fcea 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -33,6 +33,7 @@ typedef unsigned long dma_addr_t;
 #define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
 #define __ALIGN_KERNEL_MASK(x, mask)	(((x) + (mask)) & ~(mask))
 #define ALIGN(x, a)			__ALIGN_KERNEL((x), (a))
+#define ALIGN_DOWN(x, a)		__ALIGN_KERNEL((x) - ((a) - 1), (a))
 
 #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
 
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index b2c7e9f7b8d3..f561aed7c657 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -52,9 +52,9 @@ int main(void)
 {
 	const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
 	struct test *test, tests[] = {
-		{ -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
 		{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
-		{ -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
+		{ 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
+		{ 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
 		{ 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 },
 		{ 0, 1, pfn(0), 1, sgmax, 1 },
 		{ 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 },
-- 
cgit v1.2.3-70-g09d2


From 3f6719c7b62f0327c9091e26d0da10e65668229e Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 17 Nov 2020 23:29:28 +0000
Subject: bpf: Add bpf_bprm_opts_set helper

The helper allows modification of certain bits on the linux_binprm
struct starting with the secureexec bit which can be updated using the
BPF_F_BPRM_SECUREEXEC flag.

secureexec can be set by the LSM for privilege gaining executions to set
the AT_SECURE auxv for glibc.  When set, the dynamic linker disables the
use of certain environment variables (like LD_PRELOAD).

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201117232929.2156341-1-kpsingh@chromium.org
---
 include/uapi/linux/bpf.h       | 16 ++++++++++++++++
 kernel/bpf/bpf_lsm.c           | 26 ++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 ++
 tools/include/uapi/linux/bpf.h | 16 ++++++++++++++++
 4 files changed, 60 insertions(+)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 162999b12790..a52299b80b9d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3787,6 +3787,16 @@ union bpf_attr {
  *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
  *	Return
  *		Pointer to the current task.
+ *
+ * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags)
+ *	Description
+ *		Set or clear certain options on *bprm*:
+ *
+ *		**BPF_F_BPRM_SECUREEXEC** Set the secureexec bit
+ *		which sets the **AT_SECURE** auxv for glibc. The bit
+ *		is cleared if the flag is not specified.
+ *	Return
+ *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3948,6 +3958,7 @@ union bpf_attr {
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
+	FN(bprm_opts_set),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4119,6 +4130,11 @@ enum bpf_lwt_encap_mode {
 	BPF_LWT_ENCAP_IP,
 };
 
+/* Flags for bpf_bprm_opts_set helper */
+enum {
+	BPF_F_BPRM_SECUREEXEC	= (1ULL << 0),
+};
+
 #define __bpf_md_ptr(type, name)	\
 union {					\
 	type name;			\
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 553107f4706a..b4f27a874092 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -7,6 +7,7 @@
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/btf.h>
+#include <linux/binfmts.h>
 #include <linux/lsm_hooks.h>
 #include <linux/bpf_lsm.h>
 #include <linux/kallsyms.h>
@@ -51,6 +52,29 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 	return 0;
 }
 
+/* Mask for all the currently supported BPRM option flags */
+#define BPF_F_BRPM_OPTS_MASK	BPF_F_BPRM_SECUREEXEC
+
+BPF_CALL_2(bpf_bprm_opts_set, struct linux_binprm *, bprm, u64, flags)
+{
+	if (flags & ~BPF_F_BRPM_OPTS_MASK)
+		return -EINVAL;
+
+	bprm->secureexec = (flags & BPF_F_BPRM_SECUREEXEC);
+	return 0;
+}
+
+BTF_ID_LIST_SINGLE(bpf_bprm_opts_set_btf_ids, struct, linux_binprm)
+
+const static struct bpf_func_proto bpf_bprm_opts_set_proto = {
+	.func		= bpf_bprm_opts_set,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_bprm_opts_set_btf_ids[0],
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -71,6 +95,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_task_storage_get_proto;
 	case BPF_FUNC_task_storage_delete:
 		return &bpf_task_storage_delete_proto;
+	case BPF_FUNC_bprm_opts_set:
+		return &bpf_bprm_opts_set_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 31484377b8b1..c5bc947a70ad 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -418,6 +418,7 @@ class PrinterHelpers(Printer):
             'struct bpf_tcp_sock',
             'struct bpf_tunnel_key',
             'struct bpf_xfrm_state',
+            'struct linux_binprm',
             'struct pt_regs',
             'struct sk_reuseport_md',
             'struct sockaddr',
@@ -465,6 +466,7 @@ class PrinterHelpers(Printer):
             'struct bpf_tcp_sock',
             'struct bpf_tunnel_key',
             'struct bpf_xfrm_state',
+            'struct linux_binprm',
             'struct pt_regs',
             'struct sk_reuseport_md',
             'struct sockaddr',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 162999b12790..a52299b80b9d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3787,6 +3787,16 @@ union bpf_attr {
  *		*ARG_PTR_TO_BTF_ID* of type *task_struct*.
  *	Return
  *		Pointer to the current task.
+ *
+ * long bpf_bprm_opts_set(struct linux_binprm *bprm, u64 flags)
+ *	Description
+ *		Set or clear certain options on *bprm*:
+ *
+ *		**BPF_F_BPRM_SECUREEXEC** Set the secureexec bit
+ *		which sets the **AT_SECURE** auxv for glibc. The bit
+ *		is cleared if the flag is not specified.
+ *	Return
+ *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3948,6 +3958,7 @@ union bpf_attr {
 	FN(task_storage_get),		\
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
+	FN(bprm_opts_set),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4119,6 +4130,11 @@ enum bpf_lwt_encap_mode {
 	BPF_LWT_ENCAP_IP,
 };
 
+/* Flags for bpf_bprm_opts_set helper */
+enum {
+	BPF_F_BPRM_SECUREEXEC	= (1ULL << 0),
+};
+
 #define __bpf_md_ptr(type, name)	\
 union {					\
 	type name;			\
-- 
cgit v1.2.3-70-g09d2


From ea87ae85c9b31303a2e9d4c769d9f3ee8a3a60d1 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 17 Nov 2020 23:29:29 +0000
Subject: bpf: Add tests for bpf_bprm_opts_set helper

The test forks a child process, updates the local storage to set/unset
the securexec bit.

The BPF program in the test attaches to bprm_creds_for_exec which checks
the local storage of the current task to set the secureexec bit on the
binary parameters (bprm).

The child then execs a bash command with the environment variable
TMPDIR set in the envp.  The bash command returns a different exit code
based on its observed value of the TMPDIR variable.

Since TMPDIR is one of the variables that is ignored by the dynamic
loader when the secureexec bit is set, one should expect the
child execution to not see this value when the secureexec bit is set.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201117232929.2156341-2-kpsingh@chromium.org
---
 .../selftests/bpf/prog_tests/test_bprm_opts.c      | 116 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bprm_opts.c      |  34 ++++++
 2 files changed, 150 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
 create mode 100644 tools/testing/selftests/bpf/progs/bprm_opts.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
new file mode 100644
index 000000000000..2559bb775762
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <linux/limits.h>
+
+#include "bprm_opts.skel.h"
+#include "network_helpers.h"
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open 434
+#endif
+
+static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL };
+
+static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+static int update_storage(int map_fd, int secureexec)
+{
+	int task_fd, ret = 0;
+
+	task_fd = sys_pidfd_open(getpid(), 0);
+	if (task_fd < 0)
+		return errno;
+
+	ret = bpf_map_update_elem(map_fd, &task_fd, &secureexec, BPF_NOEXIST);
+	if (ret)
+		ret = errno;
+
+	close(task_fd);
+	return ret;
+}
+
+static int run_set_secureexec(int map_fd, int secureexec)
+{
+	int child_pid, child_status, ret, null_fd;
+
+	child_pid = fork();
+	if (child_pid == 0) {
+		null_fd = open("/dev/null", O_WRONLY);
+		if (null_fd == -1)
+			exit(errno);
+		dup2(null_fd, STDOUT_FILENO);
+		dup2(null_fd, STDERR_FILENO);
+		close(null_fd);
+
+		/* Ensure that all executions from hereon are
+		 * secure by setting a local storage which is read by
+		 * the bprm_creds_for_exec hook and sets bprm->secureexec.
+		 */
+		ret = update_storage(map_fd, secureexec);
+		if (ret)
+			exit(ret);
+
+		/* If the binary is executed with securexec=1, the dynamic
+		 * loader ingores and unsets certain variables like LD_PRELOAD,
+		 * TMPDIR etc. TMPDIR is used here to simplify the example, as
+		 * LD_PRELOAD requires a real .so file.
+		 *
+		 * If the value of TMPDIR is set, the bash command returns 10
+		 * and if the value is unset, it returns 20.
+		 */
+		execle("/bin/bash", "bash", "-c",
+		       "[[ -z \"${TMPDIR}\" ]] || exit 10 && exit 20", NULL,
+		       bash_envp);
+		exit(errno);
+	} else if (child_pid > 0) {
+		waitpid(child_pid, &child_status, 0);
+		ret = WEXITSTATUS(child_status);
+
+		/* If a secureexec occurred, the exit status should be 20 */
+		if (secureexec && ret == 20)
+			return 0;
+
+		/* If normal execution happened, the exit code should be 10 */
+		if (!secureexec && ret == 10)
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+void test_test_bprm_opts(void)
+{
+	int err, duration = 0;
+	struct bprm_opts *skel = NULL;
+
+	skel = bprm_opts__open_and_load();
+	if (CHECK(!skel, "skel_load", "skeleton failed\n"))
+		goto close_prog;
+
+	err = bprm_opts__attach(skel);
+	if (CHECK(err, "attach", "attach failed: %d\n", err))
+		goto close_prog;
+
+	/* Run the test with the secureexec bit unset */
+	err = run_set_secureexec(bpf_map__fd(skel->maps.secure_exec_task_map),
+				 0 /* secureexec */);
+	if (CHECK(err, "run_set_secureexec:0", "err = %d\n", err))
+		goto close_prog;
+
+	/* Run the test with the secureexec bit set */
+	err = run_set_secureexec(bpf_map__fd(skel->maps.secure_exec_task_map),
+				 1 /* secureexec */);
+	if (CHECK(err, "run_set_secureexec:1", "err = %d\n", err))
+		goto close_prog;
+
+close_prog:
+	bprm_opts__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c
new file mode 100644
index 000000000000..5bfef2887e70
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bprm_opts.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} secure_exec_task_map SEC(".maps");
+
+SEC("lsm/bprm_creds_for_exec")
+int BPF_PROG(secure_exec, struct linux_binprm *bprm)
+{
+	int *secureexec;
+
+	secureexec = bpf_task_storage_get(&secure_exec_task_map,
+				   bpf_get_current_task_btf(), 0,
+				   BPF_LOCAL_STORAGE_GET_F_CREATE);
+
+	if (secureexec && *secureexec)
+		bpf_bprm_opts_set(bprm, BPF_F_BPRM_SECUREEXEC);
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 2adcba79e69d4a4c0ac3bb86f466d8b5df301608 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Fri, 13 Nov 2020 00:01:31 +0200
Subject: selftests/x86: Add a selftest for SGX

Add a selftest for SGX. It is a trivial test where a simple enclave
copies one 64-bit word of memory between two memory locations,
but ensures that all SGX hardware and software infrastructure is
functioning.

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Jethro Beekman <jethro@fortanix.com>
Cc: linux-kselftest@vger.kernel.org
Link: https://lkml.kernel.org/r/20201112220135.165028-21-jarkko@kernel.org
---
 tools/testing/selftests/Makefile                  |   1 +
 tools/testing/selftests/sgx/.gitignore            |   2 +
 tools/testing/selftests/sgx/Makefile              |  53 +++
 tools/testing/selftests/sgx/call.S                |  44 +++
 tools/testing/selftests/sgx/defines.h             |  21 ++
 tools/testing/selftests/sgx/load.c                | 277 +++++++++++++++
 tools/testing/selftests/sgx/main.c                | 246 ++++++++++++++
 tools/testing/selftests/sgx/main.h                |  38 +++
 tools/testing/selftests/sgx/sigstruct.c           | 391 ++++++++++++++++++++++
 tools/testing/selftests/sgx/test_encl.c           |  20 ++
 tools/testing/selftests/sgx/test_encl.lds         |  40 +++
 tools/testing/selftests/sgx/test_encl_bootstrap.S |  89 +++++
 12 files changed, 1222 insertions(+)
 create mode 100644 tools/testing/selftests/sgx/.gitignore
 create mode 100644 tools/testing/selftests/sgx/Makefile
 create mode 100644 tools/testing/selftests/sgx/call.S
 create mode 100644 tools/testing/selftests/sgx/defines.h
 create mode 100644 tools/testing/selftests/sgx/load.c
 create mode 100644 tools/testing/selftests/sgx/main.c
 create mode 100644 tools/testing/selftests/sgx/main.h
 create mode 100644 tools/testing/selftests/sgx/sigstruct.c
 create mode 100644 tools/testing/selftests/sgx/test_encl.c
 create mode 100644 tools/testing/selftests/sgx/test_encl.lds
 create mode 100644 tools/testing/selftests/sgx/test_encl_bootstrap.S

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d9c283503159..2e20e30a6faa 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -50,6 +50,7 @@ TARGETS += openat2
 TARGETS += rseq
 TARGETS += rtc
 TARGETS += seccomp
+TARGETS += sgx
 TARGETS += sigaltstack
 TARGETS += size
 TARGETS += sparc64
diff --git a/tools/testing/selftests/sgx/.gitignore b/tools/testing/selftests/sgx/.gitignore
new file mode 100644
index 000000000000..fbaf0bda9a92
--- /dev/null
+++ b/tools/testing/selftests/sgx/.gitignore
@@ -0,0 +1,2 @@
+test_sgx
+test_encl.elf
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
new file mode 100644
index 000000000000..d51c90663943
--- /dev/null
+++ b/tools/testing/selftests/sgx/Makefile
@@ -0,0 +1,53 @@
+top_srcdir = ../../../..
+
+include ../lib.mk
+
+.PHONY: all clean
+
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
+			    ../x86/trivial_64bit_program.c)
+
+ifndef OBJCOPY
+OBJCOPY := $(CROSS_COMPILE)objcopy
+endif
+
+INCLUDES := -I$(top_srcdir)/tools/include
+HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC -z noexecstack
+ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \
+	       -fno-stack-protector -mrdrnd $(INCLUDES)
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx
+
+ifeq ($(CAN_BUILD_X86_64), 1)
+all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/test_encl.elf
+endif
+
+$(OUTPUT)/test_sgx: $(OUTPUT)/main.o \
+		    $(OUTPUT)/load.o \
+		    $(OUTPUT)/sigstruct.o \
+		    $(OUTPUT)/call.o
+	$(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto
+
+$(OUTPUT)/main.o: main.c
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/load.o: load.c
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/sigstruct.o: sigstruct.c
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/call.o: call.S
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S
+	$(CC) $(ENCL_CFLAGS) -T $^ -o $@
+
+EXTRA_CLEAN := \
+	$(OUTPUT)/test_encl.elf \
+	$(OUTPUT)/load.o \
+	$(OUTPUT)/call.o \
+	$(OUTPUT)/main.o \
+	$(OUTPUT)/sigstruct.o \
+	$(OUTPUT)/test_sgx \
+	$(OUTPUT)/test_sgx.o \
diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S
new file mode 100644
index 000000000000..4ecadc7490f4
--- /dev/null
+++ b/tools/testing/selftests/sgx/call.S
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+* Copyright(c) 2016-20 Intel Corporation.
+*/
+
+	.text
+
+	.global sgx_call_vdso
+sgx_call_vdso:
+	.cfi_startproc
+	push	%r15
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%r15, 0
+	push	%r14
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%r14, 0
+	push	%r13
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%r13, 0
+	push	%r12
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%r12, 0
+	push	%rbx
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%rbx, 0
+	push	$0
+	.cfi_adjust_cfa_offset	8
+	push	0x38(%rsp)
+	.cfi_adjust_cfa_offset	8
+	call	*eenter(%rip)
+	add	$0x10, %rsp
+	.cfi_adjust_cfa_offset	-0x10
+	pop	%rbx
+	.cfi_adjust_cfa_offset	-8
+	pop	%r12
+	.cfi_adjust_cfa_offset	-8
+	pop	%r13
+	.cfi_adjust_cfa_offset	-8
+	pop	%r14
+	.cfi_adjust_cfa_offset	-8
+	pop	%r15
+	.cfi_adjust_cfa_offset	-8
+	ret
+	.cfi_endproc
diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h
new file mode 100644
index 000000000000..592c1ccf4576
--- /dev/null
+++ b/tools/testing/selftests/sgx/defines.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+
+#ifndef DEFINES_H
+#define DEFINES_H
+
+#include <stdint.h>
+
+#define PAGE_SIZE 4096
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+#define __aligned(x) __attribute__((__aligned__(x)))
+#define __packed __attribute__((packed))
+
+#include "../../../../arch/x86/kernel/cpu/sgx/arch.h"
+#include "../../../../arch/x86/include/asm/enclu.h"
+#include "../../../../arch/x86/include/uapi/asm/sgx.h"
+
+#endif /* DEFINES_H */
diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
new file mode 100644
index 000000000000..9d43b75aaa55
--- /dev/null
+++ b/tools/testing/selftests/sgx/load.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#include <assert.h>
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include "defines.h"
+#include "main.h"
+
+void encl_delete(struct encl *encl)
+{
+	if (encl->encl_base)
+		munmap((void *)encl->encl_base, encl->encl_size);
+
+	if (encl->bin)
+		munmap(encl->bin, encl->bin_size);
+
+	if (encl->fd)
+		close(encl->fd);
+
+	if (encl->segment_tbl)
+		free(encl->segment_tbl);
+
+	memset(encl, 0, sizeof(*encl));
+}
+
+static bool encl_map_bin(const char *path, struct encl *encl)
+{
+	struct stat sb;
+	void *bin;
+	int ret;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)  {
+		perror("open()");
+		return false;
+	}
+
+	ret = stat(path, &sb);
+	if (ret) {
+		perror("stat()");
+		goto err;
+	}
+
+	bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (bin == MAP_FAILED) {
+		perror("mmap()");
+		goto err;
+	}
+
+	encl->bin = bin;
+	encl->bin_size = sb.st_size;
+
+	close(fd);
+	return true;
+
+err:
+	close(fd);
+	return false;
+}
+
+static bool encl_ioc_create(struct encl *encl)
+{
+	struct sgx_secs *secs = &encl->secs;
+	struct sgx_enclave_create ioc;
+	int rc;
+
+	assert(encl->encl_base != 0);
+
+	memset(secs, 0, sizeof(*secs));
+	secs->ssa_frame_size = 1;
+	secs->attributes = SGX_ATTR_MODE64BIT;
+	secs->xfrm = 3;
+	secs->base = encl->encl_base;
+	secs->size = encl->encl_size;
+
+	ioc.src = (unsigned long)secs;
+	rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc);
+	if (rc) {
+		fprintf(stderr, "SGX_IOC_ENCLAVE_CREATE failed: errno=%d\n",
+			errno);
+		munmap((void *)secs->base, encl->encl_size);
+		return false;
+	}
+
+	return true;
+}
+
+static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
+{
+	struct sgx_enclave_add_pages ioc;
+	struct sgx_secinfo secinfo;
+	int rc;
+
+	memset(&secinfo, 0, sizeof(secinfo));
+	secinfo.flags = seg->flags;
+
+	ioc.src = (uint64_t)encl->src + seg->offset;
+	ioc.offset = seg->offset;
+	ioc.length = seg->size;
+	ioc.secinfo = (unsigned long)&secinfo;
+	ioc.flags = SGX_PAGE_MEASURE;
+
+	rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc);
+	if (rc < 0) {
+		fprintf(stderr, "SGX_IOC_ENCLAVE_ADD_PAGES failed: errno=%d.\n",
+			errno);
+		return false;
+	}
+
+	return true;
+}
+
+bool encl_load(const char *path, struct encl *encl)
+{
+	Elf64_Phdr *phdr_tbl;
+	off_t src_offset;
+	Elf64_Ehdr *ehdr;
+	int i, j;
+	int ret;
+
+	memset(encl, 0, sizeof(*encl));
+
+	ret = open("/dev/sgx_enclave", O_RDWR);
+	if (ret < 0) {
+		fprintf(stderr, "Unable to open /dev/sgx_enclave\n");
+		goto err;
+	}
+
+	encl->fd = ret;
+
+	if (!encl_map_bin(path, encl))
+		goto err;
+
+	ehdr = encl->bin;
+	phdr_tbl = encl->bin + ehdr->e_phoff;
+
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		Elf64_Phdr *phdr = &phdr_tbl[i];
+
+		if (phdr->p_type == PT_LOAD)
+			encl->nr_segments++;
+	}
+
+	encl->segment_tbl = calloc(encl->nr_segments,
+				   sizeof(struct encl_segment));
+	if (!encl->segment_tbl)
+		goto err;
+
+	for (i = 0, j = 0; i < ehdr->e_phnum; i++) {
+		Elf64_Phdr *phdr = &phdr_tbl[i];
+		unsigned int flags = phdr->p_flags;
+		struct encl_segment *seg;
+
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		seg = &encl->segment_tbl[j];
+
+		if (!!(flags & ~(PF_R | PF_W | PF_X))) {
+			fprintf(stderr,
+				"%d has invalid segment flags 0x%02x.\n", i,
+				phdr->p_flags);
+			goto err;
+		}
+
+		if (j == 0 && flags != (PF_R | PF_W)) {
+			fprintf(stderr,
+				"TCS has invalid segment flags 0x%02x.\n",
+				phdr->p_flags);
+			goto err;
+		}
+
+		if (j == 0) {
+			src_offset = phdr->p_offset & PAGE_MASK;
+
+			seg->prot = PROT_READ | PROT_WRITE;
+			seg->flags = SGX_PAGE_TYPE_TCS << 8;
+		} else  {
+			seg->prot = (phdr->p_flags & PF_R) ? PROT_READ : 0;
+			seg->prot |= (phdr->p_flags & PF_W) ? PROT_WRITE : 0;
+			seg->prot |= (phdr->p_flags & PF_X) ? PROT_EXEC : 0;
+			seg->flags = (SGX_PAGE_TYPE_REG << 8) | seg->prot;
+		}
+
+		seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset;
+		seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK;
+
+		printf("0x%016lx 0x%016lx 0x%02x\n", seg->offset, seg->size,
+		       seg->prot);
+
+		j++;
+	}
+
+	assert(j == encl->nr_segments);
+
+	encl->src = encl->bin + src_offset;
+	encl->src_size = encl->segment_tbl[j - 1].offset +
+			 encl->segment_tbl[j - 1].size;
+
+	for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
+		encl->encl_size <<= 1;
+
+	return true;
+
+err:
+	encl_delete(encl);
+	return false;
+}
+
+static bool encl_map_area(struct encl *encl)
+{
+	size_t encl_size = encl->encl_size;
+	void *area;
+
+	area = mmap(NULL, encl_size * 2, PROT_NONE,
+		    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (area == MAP_FAILED) {
+		perror("mmap");
+		return false;
+	}
+
+	encl->encl_base = ((uint64_t)area + encl_size - 1) & ~(encl_size - 1);
+
+	munmap(area, encl->encl_base - (uint64_t)area);
+	munmap((void *)(encl->encl_base + encl_size),
+	       (uint64_t)area + encl_size - encl->encl_base);
+
+	return true;
+}
+
+bool encl_build(struct encl *encl)
+{
+	struct sgx_enclave_init ioc;
+	int ret;
+	int i;
+
+	if (!encl_map_area(encl))
+		return false;
+
+	if (!encl_ioc_create(encl))
+		return false;
+
+	/*
+	 * Pages must be added before mapping VMAs because their permissions
+	 * cap the VMA permissions.
+	 */
+	for (i = 0; i < encl->nr_segments; i++) {
+		struct encl_segment *seg = &encl->segment_tbl[i];
+
+		if (!encl_ioc_add_pages(encl, seg))
+			return false;
+	}
+
+	ioc.sigstruct = (uint64_t)&encl->sigstruct;
+	ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc);
+	if (ret) {
+		fprintf(stderr, "SGX_IOC_ENCLAVE_INIT failed: errno=%d\n",
+			errno);
+		return false;
+	}
+
+	return true;
+}
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
new file mode 100644
index 000000000000..724cec700926
--- /dev/null
+++ b/tools/testing/selftests/sgx/main.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include "defines.h"
+#include "main.h"
+#include "../kselftest.h"
+
+static const uint64_t MAGIC = 0x1122334455667788ULL;
+vdso_sgx_enter_enclave_t eenter;
+
+struct vdso_symtab {
+	Elf64_Sym *elf_symtab;
+	const char *elf_symstrtab;
+	Elf64_Word *elf_hashtab;
+};
+
+static void *vdso_get_base_addr(char *envp[])
+{
+	Elf64_auxv_t *auxv;
+	int i;
+
+	for (i = 0; envp[i]; i++)
+		;
+
+	auxv = (Elf64_auxv_t *)&envp[i + 1];
+
+	for (i = 0; auxv[i].a_type != AT_NULL; i++) {
+		if (auxv[i].a_type == AT_SYSINFO_EHDR)
+			return (void *)auxv[i].a_un.a_val;
+	}
+
+	return NULL;
+}
+
+static Elf64_Dyn *vdso_get_dyntab(void *addr)
+{
+	Elf64_Ehdr *ehdr = addr;
+	Elf64_Phdr *phdrtab = addr + ehdr->e_phoff;
+	int i;
+
+	for (i = 0; i < ehdr->e_phnum; i++)
+		if (phdrtab[i].p_type == PT_DYNAMIC)
+			return addr + phdrtab[i].p_offset;
+
+	return NULL;
+}
+
+static void *vdso_get_dyn(void *addr, Elf64_Dyn *dyntab, Elf64_Sxword tag)
+{
+	int i;
+
+	for (i = 0; dyntab[i].d_tag != DT_NULL; i++)
+		if (dyntab[i].d_tag == tag)
+			return addr + dyntab[i].d_un.d_ptr;
+
+	return NULL;
+}
+
+static bool vdso_get_symtab(void *addr, struct vdso_symtab *symtab)
+{
+	Elf64_Dyn *dyntab = vdso_get_dyntab(addr);
+
+	symtab->elf_symtab = vdso_get_dyn(addr, dyntab, DT_SYMTAB);
+	if (!symtab->elf_symtab)
+		return false;
+
+	symtab->elf_symstrtab = vdso_get_dyn(addr, dyntab, DT_STRTAB);
+	if (!symtab->elf_symstrtab)
+		return false;
+
+	symtab->elf_hashtab = vdso_get_dyn(addr, dyntab, DT_HASH);
+	if (!symtab->elf_hashtab)
+		return false;
+
+	return true;
+}
+
+static unsigned long elf_sym_hash(const char *name)
+{
+	unsigned long h = 0, high;
+
+	while (*name) {
+		h = (h << 4) + *name++;
+		high = h & 0xf0000000;
+
+		if (high)
+			h ^= high >> 24;
+
+		h &= ~high;
+	}
+
+	return h;
+}
+
+static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name)
+{
+	Elf64_Word bucketnum = symtab->elf_hashtab[0];
+	Elf64_Word *buckettab = &symtab->elf_hashtab[2];
+	Elf64_Word *chaintab = &symtab->elf_hashtab[2 + bucketnum];
+	Elf64_Sym *sym;
+	Elf64_Word i;
+
+	for (i = buckettab[elf_sym_hash(name) % bucketnum]; i != STN_UNDEF;
+	     i = chaintab[i]) {
+		sym = &symtab->elf_symtab[i];
+		if (!strcmp(name, &symtab->elf_symstrtab[sym->st_name]))
+			return sym;
+	}
+
+	return NULL;
+}
+
+bool report_results(struct sgx_enclave_run *run, int ret, uint64_t result,
+		  const char *test)
+{
+	bool valid = true;
+
+	if (ret) {
+		printf("FAIL: %s() returned: %d\n", test, ret);
+		valid = false;
+	}
+
+	if (run->function != EEXIT) {
+		printf("FAIL: %s() function, expected: %u, got: %u\n", test, EEXIT,
+		       run->function);
+		valid = false;
+	}
+
+	if (result != MAGIC) {
+		printf("FAIL: %s(), expected: 0x%lx, got: 0x%lx\n", test, MAGIC,
+		       result);
+		valid = false;
+	}
+
+	if (run->user_data) {
+		printf("FAIL: %s() user data, expected: 0x0, got: 0x%llx\n",
+		       test, run->user_data);
+		valid = false;
+	}
+
+	return valid;
+}
+
+static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r9,
+			struct sgx_enclave_run *run)
+{
+	run->user_data = 0;
+	return 0;
+}
+
+int main(int argc, char *argv[], char *envp[])
+{
+	struct sgx_enclave_run run;
+	struct vdso_symtab symtab;
+	Elf64_Sym *eenter_sym;
+	uint64_t result = 0;
+	struct encl encl;
+	unsigned int i;
+	void *addr;
+	int ret;
+
+	memset(&run, 0, sizeof(run));
+
+	if (!encl_load("test_encl.elf", &encl)) {
+		encl_delete(&encl);
+		ksft_exit_skip("cannot load enclaves\n");
+	}
+
+	if (!encl_measure(&encl))
+		goto err;
+
+	if (!encl_build(&encl))
+		goto err;
+
+	/*
+	 * An enclave consumer only must do this.
+	 */
+	for (i = 0; i < encl.nr_segments; i++) {
+		struct encl_segment *seg = &encl.segment_tbl[i];
+
+		addr = mmap((void *)encl.encl_base + seg->offset, seg->size,
+			    seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0);
+		if (addr == MAP_FAILED) {
+			fprintf(stderr, "mmap() failed, errno=%d.\n", errno);
+			exit(KSFT_FAIL);
+		}
+	}
+
+	memset(&run, 0, sizeof(run));
+	run.tcs = encl.encl_base;
+
+	addr = vdso_get_base_addr(envp);
+	if (!addr)
+		goto err;
+
+	if (!vdso_get_symtab(addr, &symtab))
+		goto err;
+
+	eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave");
+	if (!eenter_sym)
+		goto err;
+
+	eenter = addr + eenter_sym->st_value;
+
+	ret = sgx_call_vdso((void *)&MAGIC, &result, 0, EENTER, NULL, NULL, &run);
+	if (!report_results(&run, ret, result, "sgx_call_vdso"))
+		goto err;
+
+
+	/* Invoke the vDSO directly. */
+	result = 0;
+	ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER,
+		     0, 0, &run);
+	if (!report_results(&run, ret, result, "eenter"))
+		goto err;
+
+	/* And with an exit handler. */
+	run.user_handler = (__u64)user_handler;
+	run.user_data = 0xdeadbeef;
+	ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER,
+		     0, 0, &run);
+	if (!report_results(&run, ret, result, "user_handler"))
+		goto err;
+
+	printf("SUCCESS\n");
+	encl_delete(&encl);
+	exit(KSFT_PASS);
+
+err:
+	encl_delete(&encl);
+	exit(KSFT_FAIL);
+}
diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
new file mode 100644
index 000000000000..45e6ab65442a
--- /dev/null
+++ b/tools/testing/selftests/sgx/main.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+
+#ifndef MAIN_H
+#define MAIN_H
+
+struct encl_segment {
+	off_t offset;
+	size_t size;
+	unsigned int prot;
+	unsigned int flags;
+};
+
+struct encl {
+	int fd;
+	void *bin;
+	off_t bin_size;
+	void *src;
+	size_t src_size;
+	size_t encl_size;
+	off_t encl_base;
+	unsigned int nr_segments;
+	struct encl_segment *segment_tbl;
+	struct sgx_secs secs;
+	struct sgx_sigstruct sigstruct;
+};
+
+void encl_delete(struct encl *ctx);
+bool encl_load(const char *path, struct encl *encl);
+bool encl_measure(struct encl *encl);
+bool encl_build(struct encl *encl);
+
+int sgx_call_vdso(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9,
+		  struct sgx_enclave_run *run);
+
+#endif /* MAIN_H */
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
new file mode 100644
index 000000000000..cc06f108bae7
--- /dev/null
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <openssl/err.h>
+#include <openssl/pem.h>
+#include "defines.h"
+#include "main.h"
+
+struct q1q2_ctx {
+	BN_CTX *bn_ctx;
+	BIGNUM *m;
+	BIGNUM *s;
+	BIGNUM *q1;
+	BIGNUM *qr;
+	BIGNUM *q2;
+};
+
+static void free_q1q2_ctx(struct q1q2_ctx *ctx)
+{
+	BN_CTX_free(ctx->bn_ctx);
+	BN_free(ctx->m);
+	BN_free(ctx->s);
+	BN_free(ctx->q1);
+	BN_free(ctx->qr);
+	BN_free(ctx->q2);
+}
+
+static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m,
+			   struct q1q2_ctx *ctx)
+{
+	ctx->bn_ctx = BN_CTX_new();
+	ctx->s = BN_bin2bn(s, SGX_MODULUS_SIZE, NULL);
+	ctx->m = BN_bin2bn(m, SGX_MODULUS_SIZE, NULL);
+	ctx->q1 = BN_new();
+	ctx->qr = BN_new();
+	ctx->q2 = BN_new();
+
+	if (!ctx->bn_ctx || !ctx->s || !ctx->m || !ctx->q1 || !ctx->qr ||
+	    !ctx->q2) {
+		free_q1q2_ctx(ctx);
+		return false;
+	}
+
+	return true;
+}
+
+static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1,
+		      uint8_t *q2)
+{
+	struct q1q2_ctx ctx;
+
+	if (!alloc_q1q2_ctx(s, m, &ctx)) {
+		fprintf(stderr, "Not enough memory for Q1Q2 calculation\n");
+		return false;
+	}
+
+	if (!BN_mul(ctx.q1, ctx.s, ctx.s, ctx.bn_ctx))
+		goto out;
+
+	if (!BN_div(ctx.q1, ctx.qr, ctx.q1, ctx.m, ctx.bn_ctx))
+		goto out;
+
+	if (BN_num_bytes(ctx.q1) > SGX_MODULUS_SIZE) {
+		fprintf(stderr, "Too large Q1 %d bytes\n",
+			BN_num_bytes(ctx.q1));
+		goto out;
+	}
+
+	if (!BN_mul(ctx.q2, ctx.s, ctx.qr, ctx.bn_ctx))
+		goto out;
+
+	if (!BN_div(ctx.q2, NULL, ctx.q2, ctx.m, ctx.bn_ctx))
+		goto out;
+
+	if (BN_num_bytes(ctx.q2) > SGX_MODULUS_SIZE) {
+		fprintf(stderr, "Too large Q2 %d bytes\n",
+			BN_num_bytes(ctx.q2));
+		goto out;
+	}
+
+	BN_bn2bin(ctx.q1, q1);
+	BN_bn2bin(ctx.q2, q2);
+
+	free_q1q2_ctx(&ctx);
+	return true;
+out:
+	free_q1q2_ctx(&ctx);
+	return false;
+}
+
+struct sgx_sigstruct_payload {
+	struct sgx_sigstruct_header header;
+	struct sgx_sigstruct_body body;
+};
+
+static bool check_crypto_errors(void)
+{
+	int err;
+	bool had_errors = false;
+	const char *filename;
+	int line;
+	char str[256];
+
+	for ( ; ; ) {
+		if (ERR_peek_error() == 0)
+			break;
+
+		had_errors = true;
+		err = ERR_get_error_line(&filename, &line);
+		ERR_error_string_n(err, str, sizeof(str));
+		fprintf(stderr, "crypto: %s: %s:%d\n", str, filename, line);
+	}
+
+	return had_errors;
+}
+
+static inline const BIGNUM *get_modulus(RSA *key)
+{
+	const BIGNUM *n;
+
+	RSA_get0_key(key, &n, NULL, NULL);
+	return n;
+}
+
+static RSA *gen_sign_key(void)
+{
+	BIGNUM *e;
+	RSA *key;
+	int ret;
+
+	e = BN_new();
+	key = RSA_new();
+
+	if (!e || !key)
+		goto err;
+
+	ret = BN_set_word(e, RSA_3);
+	if (ret != 1)
+		goto err;
+
+	ret = RSA_generate_key_ex(key, 3072, e, NULL);
+	if (ret != 1)
+		goto err;
+
+	BN_free(e);
+
+	return key;
+
+err:
+	RSA_free(key);
+	BN_free(e);
+
+	return NULL;
+}
+
+static void reverse_bytes(void *data, int length)
+{
+	int i = 0;
+	int j = length - 1;
+	uint8_t temp;
+	uint8_t *ptr = data;
+
+	while (i < j) {
+		temp = ptr[i];
+		ptr[i] = ptr[j];
+		ptr[j] = temp;
+		i++;
+		j--;
+	}
+}
+
+enum mrtags {
+	MRECREATE = 0x0045544145524345,
+	MREADD = 0x0000000044444145,
+	MREEXTEND = 0x00444E4554584545,
+};
+
+static bool mrenclave_update(EVP_MD_CTX *ctx, const void *data)
+{
+	if (!EVP_DigestUpdate(ctx, data, 64)) {
+		fprintf(stderr, "digest update failed\n");
+		return false;
+	}
+
+	return true;
+}
+
+static bool mrenclave_commit(EVP_MD_CTX *ctx, uint8_t *mrenclave)
+{
+	unsigned int size;
+
+	if (!EVP_DigestFinal_ex(ctx, (unsigned char *)mrenclave, &size)) {
+		fprintf(stderr, "digest commit failed\n");
+		return false;
+	}
+
+	if (size != 32) {
+		fprintf(stderr, "invalid digest size = %u\n", size);
+		return false;
+	}
+
+	return true;
+}
+
+struct mrecreate {
+	uint64_t tag;
+	uint32_t ssaframesize;
+	uint64_t size;
+	uint8_t reserved[44];
+} __attribute__((__packed__));
+
+
+static bool mrenclave_ecreate(EVP_MD_CTX *ctx, uint64_t blob_size)
+{
+	struct mrecreate mrecreate;
+	uint64_t encl_size;
+
+	for (encl_size = 0x1000; encl_size < blob_size; )
+		encl_size <<= 1;
+
+	memset(&mrecreate, 0, sizeof(mrecreate));
+	mrecreate.tag = MRECREATE;
+	mrecreate.ssaframesize = 1;
+	mrecreate.size = encl_size;
+
+	if (!EVP_DigestInit_ex(ctx, EVP_sha256(), NULL))
+		return false;
+
+	return mrenclave_update(ctx, &mrecreate);
+}
+
+struct mreadd {
+	uint64_t tag;
+	uint64_t offset;
+	uint64_t flags; /* SECINFO flags */
+	uint8_t reserved[40];
+} __attribute__((__packed__));
+
+static bool mrenclave_eadd(EVP_MD_CTX *ctx, uint64_t offset, uint64_t flags)
+{
+	struct mreadd mreadd;
+
+	memset(&mreadd, 0, sizeof(mreadd));
+	mreadd.tag = MREADD;
+	mreadd.offset = offset;
+	mreadd.flags = flags;
+
+	return mrenclave_update(ctx, &mreadd);
+}
+
+struct mreextend {
+	uint64_t tag;
+	uint64_t offset;
+	uint8_t reserved[48];
+} __attribute__((__packed__));
+
+static bool mrenclave_eextend(EVP_MD_CTX *ctx, uint64_t offset,
+			      const uint8_t *data)
+{
+	struct mreextend mreextend;
+	int i;
+
+	for (i = 0; i < 0x1000; i += 0x100) {
+		memset(&mreextend, 0, sizeof(mreextend));
+		mreextend.tag = MREEXTEND;
+		mreextend.offset = offset + i;
+
+		if (!mrenclave_update(ctx, &mreextend))
+			return false;
+
+		if (!mrenclave_update(ctx, &data[i + 0x00]))
+			return false;
+
+		if (!mrenclave_update(ctx, &data[i + 0x40]))
+			return false;
+
+		if (!mrenclave_update(ctx, &data[i + 0x80]))
+			return false;
+
+		if (!mrenclave_update(ctx, &data[i + 0xC0]))
+			return false;
+	}
+
+	return true;
+}
+
+static bool mrenclave_segment(EVP_MD_CTX *ctx, struct encl *encl,
+			      struct encl_segment *seg)
+{
+	uint64_t end = seg->offset + seg->size;
+	uint64_t offset;
+
+	for (offset = seg->offset; offset < end; offset += PAGE_SIZE) {
+		if (!mrenclave_eadd(ctx, offset, seg->flags))
+			return false;
+
+		if (!mrenclave_eextend(ctx, offset, encl->src + offset))
+			return false;
+	}
+
+	return true;
+}
+
+bool encl_measure(struct encl *encl)
+{
+	uint64_t header1[2] = {0x000000E100000006, 0x0000000000010000};
+	uint64_t header2[2] = {0x0000006000000101, 0x0000000100000060};
+	struct sgx_sigstruct *sigstruct = &encl->sigstruct;
+	struct sgx_sigstruct_payload payload;
+	uint8_t digest[SHA256_DIGEST_LENGTH];
+	unsigned int siglen;
+	RSA *key = NULL;
+	EVP_MD_CTX *ctx;
+	int i;
+
+	memset(sigstruct, 0, sizeof(*sigstruct));
+
+	sigstruct->header.header1[0] = header1[0];
+	sigstruct->header.header1[1] = header1[1];
+	sigstruct->header.header2[0] = header2[0];
+	sigstruct->header.header2[1] = header2[1];
+	sigstruct->exponent = 3;
+	sigstruct->body.attributes = SGX_ATTR_MODE64BIT;
+	sigstruct->body.xfrm = 3;
+
+	/* sanity check */
+	if (check_crypto_errors())
+		goto err;
+
+	key = gen_sign_key();
+	if (!key)
+		goto err;
+
+	BN_bn2bin(get_modulus(key), sigstruct->modulus);
+
+	ctx = EVP_MD_CTX_create();
+	if (!ctx)
+		goto err;
+
+	if (!mrenclave_ecreate(ctx, encl->src_size))
+		goto err;
+
+	for (i = 0; i < encl->nr_segments; i++) {
+		struct encl_segment *seg = &encl->segment_tbl[i];
+
+		if (!mrenclave_segment(ctx, encl, seg))
+			goto err;
+	}
+
+	if (!mrenclave_commit(ctx, sigstruct->body.mrenclave))
+		goto err;
+
+	memcpy(&payload.header, &sigstruct->header, sizeof(sigstruct->header));
+	memcpy(&payload.body, &sigstruct->body, sizeof(sigstruct->body));
+
+	SHA256((unsigned char *)&payload, sizeof(payload), digest);
+
+	if (!RSA_sign(NID_sha256, digest, SHA256_DIGEST_LENGTH,
+		      sigstruct->signature, &siglen, key))
+		goto err;
+
+	if (!calc_q1q2(sigstruct->signature, sigstruct->modulus, sigstruct->q1,
+		       sigstruct->q2))
+		goto err;
+
+	/* BE -> LE */
+	reverse_bytes(sigstruct->signature, SGX_MODULUS_SIZE);
+	reverse_bytes(sigstruct->modulus, SGX_MODULUS_SIZE);
+	reverse_bytes(sigstruct->q1, SGX_MODULUS_SIZE);
+	reverse_bytes(sigstruct->q2, SGX_MODULUS_SIZE);
+
+	EVP_MD_CTX_destroy(ctx);
+	RSA_free(key);
+	return true;
+
+err:
+	EVP_MD_CTX_destroy(ctx);
+	RSA_free(key);
+	return false;
+}
diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c
new file mode 100644
index 000000000000..cf25b5dc1e03
--- /dev/null
+++ b/tools/testing/selftests/sgx/test_encl.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#include <stddef.h>
+#include "defines.h"
+
+static void *memcpy(void *dest, const void *src, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		((char *)dest)[i] = ((char *)src)[i];
+
+	return dest;
+}
+
+void encl_body(void *rdi, void *rsi)
+{
+	memcpy(rsi, rdi, 8);
+}
diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds
new file mode 100644
index 000000000000..0fbbda7e665e
--- /dev/null
+++ b/tools/testing/selftests/sgx/test_encl.lds
@@ -0,0 +1,40 @@
+OUTPUT_FORMAT(elf64-x86-64)
+
+PHDRS
+{
+	tcs PT_LOAD;
+	text PT_LOAD;
+	data PT_LOAD;
+}
+
+SECTIONS
+{
+	. = 0;
+	.tcs : {
+		*(.tcs*)
+	} : tcs
+
+	. = ALIGN(4096);
+	.text : {
+		*(.text*)
+		*(.rodata*)
+	} : text
+
+	. = ALIGN(4096);
+	.data : {
+		*(.data*)
+	} : data
+
+	/DISCARD/ : {
+		*(.comment*)
+		*(.note*)
+		*(.debug*)
+		*(.eh_frame*)
+	}
+}
+
+ASSERT(!DEFINED(.altinstructions), "ALTERNATIVES are not supported in enclaves")
+ASSERT(!DEFINED(.altinstr_replacement), "ALTERNATIVES are not supported in enclaves")
+ASSERT(!DEFINED(.discard.retpoline_safe), "RETPOLINE ALTERNATIVES are not supported in enclaves")
+ASSERT(!DEFINED(.discard.nospec), "RETPOLINE ALTERNATIVES are not supported in enclaves")
+ASSERT(!DEFINED(.got.plt), "Libcalls are not supported in enclaves")
diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S
new file mode 100644
index 000000000000..5d5680d4ea39
--- /dev/null
+++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+
+	.macro ENCLU
+	.byte 0x0f, 0x01, 0xd7
+	.endm
+
+	.section ".tcs", "aw"
+	.balign	4096
+
+	.fill	1, 8, 0			# STATE (set by CPU)
+	.fill	1, 8, 0			# FLAGS
+	.quad	encl_ssa		# OSSA
+	.fill	1, 4, 0			# CSSA (set by CPU)
+	.fill	1, 4, 1			# NSSA
+	.quad	encl_entry		# OENTRY
+	.fill	1, 8, 0			# AEP (set by EENTER and ERESUME)
+	.fill	1, 8, 0			# OFSBASE
+	.fill	1, 8, 0			# OGSBASE
+	.fill	1, 4, 0xFFFFFFFF 	# FSLIMIT
+	.fill	1, 4, 0xFFFFFFFF	# GSLIMIT
+	.fill	4024, 1, 0		# Reserved
+
+	# Identical to the previous TCS.
+	.fill	1, 8, 0			# STATE (set by CPU)
+	.fill	1, 8, 0			# FLAGS
+	.quad	encl_ssa		# OSSA
+	.fill	1, 4, 0			# CSSA (set by CPU)
+	.fill	1, 4, 1			# NSSA
+	.quad	encl_entry		# OENTRY
+	.fill	1, 8, 0			# AEP (set by EENTER and ERESUME)
+	.fill	1, 8, 0			# OFSBASE
+	.fill	1, 8, 0			# OGSBASE
+	.fill	1, 4, 0xFFFFFFFF 	# FSLIMIT
+	.fill	1, 4, 0xFFFFFFFF	# GSLIMIT
+	.fill	4024, 1, 0		# Reserved
+
+	.text
+
+encl_entry:
+	# RBX contains the base address for TCS, which is also the first address
+	# inside the enclave. By adding the value of le_stack_end to it, we get
+	# the absolute address for the stack.
+	lea	(encl_stack)(%rbx), %rax
+	xchg	%rsp, %rax
+	push	%rax
+
+	push	%rcx # push the address after EENTER
+	push	%rbx # push the enclave base address
+
+	call	encl_body
+
+	pop	%rbx # pop the enclave base address
+
+	/* Clear volatile GPRs, except RAX (EEXIT function). */
+	xor     %rcx, %rcx
+	xor     %rdx, %rdx
+	xor     %rdi, %rdi
+	xor     %rsi, %rsi
+	xor     %r8, %r8
+	xor     %r9, %r9
+	xor     %r10, %r10
+	xor     %r11, %r11
+
+	# Reset status flags.
+	add     %rdx, %rdx # OF = SF = AF = CF = 0; ZF = PF = 1
+
+	# Prepare EEXIT target by popping the address of the instruction after
+	# EENTER to RBX.
+	pop	%rbx
+
+	# Restore the caller stack.
+	pop	%rax
+	mov	%rax, %rsp
+
+	# EEXIT
+	mov	$4, %rax
+	enclu
+
+	.section ".data", "aw"
+
+encl_ssa:
+	.space 4096
+
+	.balign 4096
+	.space 8192
+encl_stack:
-- 
cgit v1.2.3-70-g09d2


From 0eaa8d153a1d573e53b8283c90db44057d1376f6 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Wed, 18 Nov 2020 19:06:40 +0200
Subject: selftests/sgx: Use a statically generated 3072-bit RSA key

Use a statically generated key for signing the enclave, because
generating keys on the fly can eat the kernel entropy pool. Another
good reason for doing this is predictable builds. The RSA has been
arbitrarily selected. It's contents do not matter.

This also makes the selftest execute a lot quicker instead of the delay
that it had before (because of slow key generation).

 [ bp: Disambiguate "static key" which means something else in the
   kernel, fix typos. ]

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: linux-kselftest@vger.kernel.org
Link: https://lkml.kernel.org/r/20201118170640.39629-1-jarkko@kernel.org
---
 tools/testing/selftests/sgx/Makefile     |  6 ++++-
 tools/testing/selftests/sgx/main.h       |  3 +++
 tools/testing/selftests/sgx/sign_key.S   | 12 ++++++++++
 tools/testing/selftests/sgx/sign_key.pem | 39 ++++++++++++++++++++++++++++++++
 tools/testing/selftests/sgx/sigstruct.c  | 34 ++++++++++------------------
 5 files changed, 71 insertions(+), 23 deletions(-)
 create mode 100644 tools/testing/selftests/sgx/sign_key.S
 create mode 100644 tools/testing/selftests/sgx/sign_key.pem

(limited to 'tools')

diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index d51c90663943..7f12d55b97f8 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -25,7 +25,8 @@ endif
 $(OUTPUT)/test_sgx: $(OUTPUT)/main.o \
 		    $(OUTPUT)/load.o \
 		    $(OUTPUT)/sigstruct.o \
-		    $(OUTPUT)/call.o
+		    $(OUTPUT)/call.o \
+		    $(OUTPUT)/sign_key.o
 	$(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto
 
 $(OUTPUT)/main.o: main.c
@@ -40,6 +41,9 @@ $(OUTPUT)/sigstruct.o: sigstruct.c
 $(OUTPUT)/call.o: call.S
 	$(CC) $(HOST_CFLAGS) -c $< -o $@
 
+$(OUTPUT)/sign_key.o: sign_key.S
+	$(CC) $(HOST_CFLAGS) -c $< -o $@
+
 $(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S
 	$(CC) $(ENCL_CFLAGS) -T $^ -o $@
 
diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
index 45e6ab65442a..67211a708f04 100644
--- a/tools/testing/selftests/sgx/main.h
+++ b/tools/testing/selftests/sgx/main.h
@@ -27,6 +27,9 @@ struct encl {
 	struct sgx_sigstruct sigstruct;
 };
 
+extern unsigned char sign_key[];
+extern unsigned char sign_key_end[];
+
 void encl_delete(struct encl *ctx);
 bool encl_load(const char *path, struct encl *encl);
 bool encl_measure(struct encl *encl);
diff --git a/tools/testing/selftests/sgx/sign_key.S b/tools/testing/selftests/sgx/sign_key.S
new file mode 100644
index 000000000000..e4fbe948444a
--- /dev/null
+++ b/tools/testing/selftests/sgx/sign_key.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+* Copyright(c) 2016-20 Intel Corporation.
+*/
+
+    .section ".rodata", "a"
+
+sign_key:
+    .globl sign_key
+    .incbin "sign_key.pem"
+sign_key_end:
+    .globl sign_key_end
diff --git a/tools/testing/selftests/sgx/sign_key.pem b/tools/testing/selftests/sgx/sign_key.pem
new file mode 100644
index 000000000000..d76f21f19187
--- /dev/null
+++ b/tools/testing/selftests/sgx/sign_key.pem
@@ -0,0 +1,39 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIG4wIBAAKCAYEApalGbq7Q+usM91CPtksu3D+b0Prc8gAFL6grM3mg85A5Bx8V
+cfMXPgtrw8EYFwQxDAvzZWwl+9VfOX0ECrFRBkOHcOiG0SnADN8+FLj1UiNUQwbp
+S6OzhNWuRcSbGraSOyUlVlV0yMQSvewyzGklOaXBe30AJqzIBc8QfdSxKuP8rs0Z
+ga6k/Bl73osrYKByILJTUUeZqjLERsE6GebsdzbWgKn8qVqng4ZS4yMNg6LeRlH3
++9CIPgg4jwpSLHcp7dq2qTIB9a0tGe9ayp+5FbucpB6U7ePold0EeRN6RlJGDF9k
+L93v8P5ykz5G5gYZ2g0K1X2sHIWV4huxPgv5PXgdyQYbK+6olqj0d5rjYuwX57Ul
+k6SroPS1U6UbdCjG5txM+BNGU0VpD0ZhrIRw0leQdnNcCO9sTJuInZrgYacSVJ7u
+mtB+uCt+uzUesc+l+xPRYA+9e14lLkZp7AAmo9FvL816XDI09deehJ3i/LmHKCRN
+tuqC5TprRjFwUr6dAgEDAoIBgG5w2Z8fNfycs0+LCnmHdJLVEotR6KFVWMpwHMz7
+wKJgJgS/Y6FMuilc8oKAuroCy11dTO5IGVKOP3uorVx2NgQtBPXwWeDGgAiU1A3Q
+o4wXjYIEm4fCd63jyYPYZ2ckYXzDbjmOTdstYdPyzIhGGNEZK6eoqsRzMAPfYFPj
+IMdCqHSIu6vJw1K7p+myHOsVoWshjODaZnF3LYSA0WaZ8vokjwBxUxuRxQJZjJds
+s60XPtmL+qfgWtQFewoG4XL6GuD8FcXccynRRtzrLtFNPIl9BQfWfjBBhTC1/Te1
+0Z6XbZvpdUTD9OfLB7SbR2OUFNpKQgriO0iYVdbW3cr7uu38Zwp4W1TX73DPjoi6
+KNooP6SGWd4mRJW2+dUmSYS4QNG8eVVZswKcploEIXlAKRsOe4kzJJ1iETugIe85
+uX8nd1WYEp65xwoRUg8hqng0MeyveVbXqNKuJG6tzNDt9kgFYo+hmC/oouAW2Dtc
+T9jdRAwKJXqA2Eg6OkgXCEv+kwKBwQDYaQiFMlFhsmLlqI+EzCUh7c941/cL7m6U
+7j98+8ngl0HgCEcrc10iJVCKakQW3YbPzAx3XkKTaGjWazvvrFarXIGlOud64B8a
+iWyQ7VdlnmZnNEdk+C83tI91OQeaTKqRLDGzKh29Ry/jL8Pcbazt+kDgxa0H7qJp
+roADUanLQuNkYubpbhFBh3xpa2EExaVq6rF7nIVsD8W9TrbmPKA4LgH7z0iy544D
+kVCNYsTjYDdUWP+WiSor8kCnnpjnN9sCgcEAw/eNezUD1UDf6OYFC9+5JZJFn4Tg
+mZMyN93JKIb199ffwnjtHUSjcyiWeesXucpzwtGbTcwQnDisSW4oneYKLSEBlBaq
+scqiUugyGZZOthFSCbdXYXMViK2vHrKlkse7GxVlROKcEhM/pRBrmjaGO8eWR+D4
+FO2wCXzVs3KgV6j779frw0vC54oHOxc9+Lu1rSHp4i+600koyvL/zF6U/5tZXIvN
+YW2yoiQJnjCmVA1pwbwV6KAUTPDTMnBK+YjnAoHBAJBGBa4hi5Z27JkbCliIGMFJ
+NPs6pLKe9GNJf6in2+sPgUAFhMeiPhbDiwbxgrnpBIqICE+ULGJFmzmc0p/IOceT
+ARjR76dAFLxbnbXzj5kURETNhO36yiUjCk4mBRGIcbYddndxaSjaH+zKgpLzyJ6m
+1esuc1qfFvEfAAI2cTIsl5hB70ZJYNZaUvDyQK3ZGPHxy6e9rkgKg9OJz0QoatAe
+q/002yHvtAJg4F5B2JeVejg7VQ8GHB1MKxppu0TP5wKBwQCCpQj8zgKOKz/wmViy
+lSYZDC5qWJW7t3bP6TDFr06lOpUsUJ4TgxeiGw778g/RMaKB4RIz3WBoJcgw9BsT
+7rFza1ZiucchMcGMmswRDt8kC4wGejpA92Owc8oUdxkMhSdnY5jYlxK2t3/DYEe8
+JFl9L7mFQKVjSSAGUzkiTGrlG1Kf5UfXh9dFBq98uilQfSPIwUaWynyM23CHTKqI
+Pw3/vOY9sojrnncWwrEUIG7is5vWfWPwargzSzd29YdRBe8CgcEAuRVewK/YeNOX
+B7ZG6gKKsfsvrGtY7FPETzLZAHjoVXYNea4LVZ2kn4hBXXlvw/4HD+YqcTt4wmif
+5JQlDvjNobUiKJZpzy7hklVhF7wZFl4pCF7Yh43q9iQ7gKTaeUG7MiaK+G8Zz8aY
+HW9rsiihbdZkccMvnPfO9334XMxl3HtBRzLstjUlbLB7Sdh+7tZ3JQidCOFNs5pE
+XyWwnASPu4tKfDahH1UUTp1uJcq/6716CSWg080avYxFcn75qqsb
+-----END RSA PRIVATE KEY-----
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
index cc06f108bae7..dee7a3d6c5a5 100644
--- a/tools/testing/selftests/sgx/sigstruct.c
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -135,33 +135,21 @@ static inline const BIGNUM *get_modulus(RSA *key)
 
 static RSA *gen_sign_key(void)
 {
-	BIGNUM *e;
+	unsigned long sign_key_length;
+	BIO *bio;
 	RSA *key;
-	int ret;
 
-	e = BN_new();
-	key = RSA_new();
+	sign_key_length = (unsigned long)&sign_key_end -
+			  (unsigned long)&sign_key;
 
-	if (!e || !key)
-		goto err;
-
-	ret = BN_set_word(e, RSA_3);
-	if (ret != 1)
-		goto err;
-
-	ret = RSA_generate_key_ex(key, 3072, e, NULL);
-	if (ret != 1)
-		goto err;
+	bio = BIO_new_mem_buf(&sign_key, sign_key_length);
+	if (!bio)
+		return NULL;
 
-	BN_free(e);
+	key = PEM_read_bio_RSAPrivateKey(bio, NULL, NULL, NULL);
+	BIO_free(bio);
 
 	return key;
-
-err:
-	RSA_free(key);
-	BN_free(e);
-
-	return NULL;
 }
 
 static void reverse_bytes(void *data, int length)
@@ -339,8 +327,10 @@ bool encl_measure(struct encl *encl)
 		goto err;
 
 	key = gen_sign_key();
-	if (!key)
+	if (!key) {
+		ERR_print_errors_fp(stdout);
 		goto err;
+	}
 
 	BN_bn2bin(get_modulus(key), sigstruct->modulus);
 
-- 
cgit v1.2.3-70-g09d2


From d055126180564a57fe533728a4e93d0cb53d49b3 Mon Sep 17 00:00:00 2001
From: Dmitrii Banshchikov <me@ubique.spb.ru>
Date: Tue, 17 Nov 2020 18:45:49 +0000
Subject: bpf: Add bpf_ktime_get_coarse_ns helper

The helper uses CLOCK_MONOTONIC_COARSE source of time that is less
accurate but more performant.

We have a BPF CGROUP_SKB firewall that supports event logging through
bpf_perf_event_output(). Each event has a timestamp and currently we use
bpf_ktime_get_ns() for it. Use of bpf_ktime_get_coarse_ns() saves ~15-20
ns in time required for event logging.

bpf_ktime_get_ns():
EgressLogByRemoteEndpoint                              113.82ns    8.79M

bpf_ktime_get_coarse_ns():
EgressLogByRemoteEndpoint                               95.40ns   10.48M

Signed-off-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201117184549.257280-1-me@ubique.spb.ru
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 11 +++++++++++
 kernel/bpf/core.c              |  1 +
 kernel/bpf/helpers.c           | 13 +++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 6 files changed, 39 insertions(+)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 581b2a2e78eb..e1bcb6d7345c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1842,6 +1842,7 @@ extern const struct bpf_func_proto bpf_copy_from_user_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
 extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
+extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a52299b80b9d..3ca6146f001a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3797,6 +3797,16 @@ union bpf_attr {
  *		is cleared if the flag is not specified.
  *	Return
  *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
+ *
+ * u64 bpf_ktime_get_coarse_ns(void)
+ * 	Description
+ * 		Return a coarse-grained version of the time elapsed since
+ * 		system boot, in nanoseconds. Does not include time the system
+ * 		was suspended.
+ *
+ * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
+ * 	Return
+ * 		Current *ktime*.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3959,6 +3969,7 @@ union bpf_attr {
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
+	FN(ktime_get_coarse_ns),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 55454d2278b1..ff55cbcfbab4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2211,6 +2211,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
 const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
+const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
 
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 25520f5eeaf6..2c395deae279 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -167,6 +167,17 @@ const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_0(bpf_ktime_get_coarse_ns)
+{
+	return ktime_get_coarse_ns();
+}
+
+const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
+	.func		= bpf_ktime_get_coarse_ns,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 BPF_CALL_0(bpf_get_current_pid_tgid)
 {
 	struct task_struct *task = current;
@@ -685,6 +696,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
+	case BPF_FUNC_ktime_get_coarse_ns:
+		return &bpf_ktime_get_coarse_ns_proto;
 	case BPF_FUNC_ringbuf_output:
 		return &bpf_ringbuf_output_proto;
 	case BPF_FUNC_ringbuf_reserve:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 02986c7b90eb..d255bc9b2bfa 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1280,6 +1280,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
+	case BPF_FUNC_ktime_get_coarse_ns:
+		return &bpf_ktime_get_coarse_ns_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_current_pid_tgid:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a52299b80b9d..3ca6146f001a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3797,6 +3797,16 @@ union bpf_attr {
  *		is cleared if the flag is not specified.
  *	Return
  *		**-EINVAL** if invalid *flags* are passed, zero otherwise.
+ *
+ * u64 bpf_ktime_get_coarse_ns(void)
+ * 	Description
+ * 		Return a coarse-grained version of the time elapsed since
+ * 		system boot, in nanoseconds. Does not include time the system
+ * 		was suspended.
+ *
+ * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
+ * 	Return
+ * 		Current *ktime*.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3959,6 +3969,7 @@ union bpf_attr {
 	FN(task_storage_delete),	\
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
+	FN(ktime_get_coarse_ns),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3-70-g09d2


From 6016df8fe874e1cf36f6357d71438b384198ce06 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@gmail.com>
Date: Wed, 18 Nov 2020 08:16:38 +0100
Subject: selftests/bpf: Fix broken riscv build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The selftests/bpf Makefile includes system include directories from
the host, when building BPF programs. On RISC-V glibc requires that
__riscv_xlen is defined. This is not the case for "clang -target bpf",
which messes up __WORDSIZE (errno.h -> ... -> wordsize.h) and breaks
the build.

By explicitly defining __risc_xlen correctly for riscv, we can
workaround this.

Fixes: 167381f3eac0 ("selftests/bpf: Makefile fix "missing" headers on build with -idirafter")
Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Luke Nelson <luke.r.nels@gmail.com>
Link: https://lore.kernel.org/bpf/20201118071640.83773-2-bjorn.topel@gmail.com
---
 tools/testing/selftests/bpf/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c1708ffa6b1c..3d5940cd110d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -219,7 +219,8 @@ $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids	\
 # build would have failed anyways.
 define get_sys_includes
 $(shell $(1) -v -E - </dev/null 2>&1 \
-	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
 endef
 
 # Determine target endianness.
-- 
cgit v1.2.3-70-g09d2


From c77b0589ca29ad1859fe7d7c1ecd63c0632379fa Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@gmail.com>
Date: Wed, 18 Nov 2020 08:16:39 +0100
Subject: selftests/bpf: Avoid running unprivileged tests with alignment
 requirements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some architectures have strict alignment requirements. In that case,
the BPF verifier detects if a program has unaligned accesses and
rejects them. A user can pass BPF_F_ANY_ALIGNMENT to a program to
override this check. That, however, will only work when a privileged
user loads a program. An unprivileged user loading a program with this
flag will be rejected prior entering the verifier.

Hence, it does not make sense to load unprivileged programs without
strict alignment when testing the verifier. This patch avoids exactly
that.

Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Luke Nelson <luke.r.nels@gmail.com>
Link: https://lore.kernel.org/bpf/20201118071640.83773-3-bjorn.topel@gmail.com
---
 tools/testing/selftests/bpf/test_verifier.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 9be395d9dc64..4bfe3aa2cfc4 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1152,6 +1152,19 @@ static void get_unpriv_disabled()
 
 static bool test_as_unpriv(struct bpf_test *test)
 {
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	/* Some architectures have strict alignment requirements. In
+	 * that case, the BPF verifier detects if a program has
+	 * unaligned accesses and rejects them. A user can pass
+	 * BPF_F_ANY_ALIGNMENT to a program to override this
+	 * check. That, however, will only work when a privileged user
+	 * loads a program. An unprivileged user loading a program
+	 * with this flag will be rejected prior entering the
+	 * verifier.
+	 */
+	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
+		return false;
+#endif
 	return !test->prog_type ||
 	       test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER ||
 	       test->prog_type == BPF_PROG_TYPE_CGROUP_SKB;
-- 
cgit v1.2.3-70-g09d2


From 6007b23cc7555df882be870433dc589841d4eb06 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@gmail.com>
Date: Wed, 18 Nov 2020 08:16:40 +0100
Subject: selftests/bpf: Mark tests that require unaligned memory access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A lot of tests require unaligned memory access to work. Mark the tests
as such, so that they can be avoided on unsupported architectures such
as RISC-V.

Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Luke Nelson <luke.r.nels@gmail.com>
Link: https://lore.kernel.org/bpf/20201118071640.83773-4-bjorn.topel@gmail.com
---
 .../testing/selftests/bpf/verifier/ctx_sk_lookup.c |  7 ++++
 .../selftests/bpf/verifier/direct_value_access.c   |  3 ++
 tools/testing/selftests/bpf/verifier/map_ptr.c     |  1 +
 .../selftests/bpf/verifier/raw_tp_writable.c       |  1 +
 .../testing/selftests/bpf/verifier/ref_tracking.c  |  4 ++
 tools/testing/selftests/bpf/verifier/regalloc.c    |  8 ++++
 tools/testing/selftests/bpf/verifier/wide_access.c | 46 +++++++++++++---------
 7 files changed, 52 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
index 2ad5f974451c..fb13ca2d5606 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -266,6 +266,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 8-byte read from bpf_sk_lookup remote_ip4 field",
@@ -292,6 +293,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 8-byte read from bpf_sk_lookup remote_port field",
@@ -305,6 +307,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 8-byte read from bpf_sk_lookup local_ip4 field",
@@ -331,6 +334,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 8-byte read from bpf_sk_lookup local_port field",
@@ -344,6 +348,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 /* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
 {
@@ -410,6 +415,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"invalid 4-byte unaligned read from bpf_sk_lookup at even offset",
@@ -422,6 +428,7 @@
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
 	.expected_attach_type = BPF_SK_LOOKUP,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 /* in-bound and out-of-bound writes to bpf_sk_lookup */
 {
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index 988f46a1a4c7..c0648dc009b5 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -69,6 +69,7 @@
 	.fixup_map_array_48b = { 1 },
 	.result = REJECT,
 	.errstr = "R1 min value is outside of the allowed memory range",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"direct map access, write test 7",
@@ -195,6 +196,7 @@
 	.fixup_map_array_48b = { 1, 3 },
 	.result = REJECT,
 	.errstr = "invalid access to map value, value_size=48 off=47 size=2",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"direct map access, write test 17",
@@ -209,6 +211,7 @@
 	.fixup_map_array_48b = { 1, 3 },
 	.result = REJECT,
 	.errstr = "invalid access to map value, value_size=48 off=47 size=2",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"direct map access, write test 18",
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
index 637f9293bda8..b117bdd3806d 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -44,6 +44,7 @@
 	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
 	.result = REJECT,
 	.errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"bpf_map_ptr: read ops field accepted",
diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
index 95b5d70a1dc1..2978fb5a769d 100644
--- a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
+++ b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
@@ -31,4 +31,5 @@
 	.fixup_map_hash_8b = { 1, },
 	.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
 	.errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 006b5bd99c08..3b6ee009c00b 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -675,6 +675,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
 	.errstr = "invalid mem access",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"reference tracking: use ptr from bpf_sk_fullsock() after release",
@@ -698,6 +699,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
 	.errstr = "invalid mem access",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
@@ -725,6 +727,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
 	.errstr = "invalid mem access",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"reference tracking: use sk after bpf_sk_release(tp)",
@@ -747,6 +750,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
 	.errstr = "invalid mem access",
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c
index 4ad7e05de706..bb0dd89dd212 100644
--- a/tools/testing/selftests/bpf/verifier/regalloc.c
+++ b/tools/testing/selftests/bpf/verifier/regalloc.c
@@ -21,6 +21,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc negative",
@@ -71,6 +72,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc src_reg negative",
@@ -97,6 +99,7 @@
 	.result = REJECT,
 	.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc and spill",
@@ -126,6 +129,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc and spill negative",
@@ -156,6 +160,7 @@
 	.result = REJECT,
 	.errstr = "invalid access to map value, value_size=48 off=48 size=8",
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc three regs",
@@ -182,6 +187,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc after call",
@@ -210,6 +216,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc in callee",
@@ -240,6 +247,7 @@
 	.fixup_map_hash_48b = { 4 },
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
 	"regalloc, spill, JEQ",
diff --git a/tools/testing/selftests/bpf/verifier/wide_access.c b/tools/testing/selftests/bpf/verifier/wide_access.c
index ccade9312d21..55af248efa93 100644
--- a/tools/testing/selftests/bpf/verifier/wide_access.c
+++ b/tools/testing/selftests/bpf/verifier/wide_access.c
@@ -1,4 +1,4 @@
-#define BPF_SOCK_ADDR_STORE(field, off, res, err) \
+#define BPF_SOCK_ADDR_STORE(field, off, res, err, flgs)	\
 { \
 	"wide store to bpf_sock_addr." #field "[" #off "]", \
 	.insns = { \
@@ -11,31 +11,36 @@
 	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \
 	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \
 	.errstr = err, \
+	.flags = flgs, \
 }
 
 /* user_ip6[0] is u64 aligned */
 BPF_SOCK_ADDR_STORE(user_ip6, 0, ACCEPT,
-		    NULL),
+		    NULL, 0),
 BPF_SOCK_ADDR_STORE(user_ip6, 1, REJECT,
-		    "invalid bpf_context access off=12 size=8"),
+		    "invalid bpf_context access off=12 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_STORE(user_ip6, 2, ACCEPT,
-		    NULL),
+		    NULL, 0),
 BPF_SOCK_ADDR_STORE(user_ip6, 3, REJECT,
-		    "invalid bpf_context access off=20 size=8"),
+		    "invalid bpf_context access off=20 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 
 /* msg_src_ip6[0] is _not_ u64 aligned */
 BPF_SOCK_ADDR_STORE(msg_src_ip6, 0, REJECT,
-		    "invalid bpf_context access off=44 size=8"),
+		    "invalid bpf_context access off=44 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_STORE(msg_src_ip6, 1, ACCEPT,
-		    NULL),
+		    NULL, 0),
 BPF_SOCK_ADDR_STORE(msg_src_ip6, 2, REJECT,
-		    "invalid bpf_context access off=52 size=8"),
+		    "invalid bpf_context access off=52 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT,
-		    "invalid bpf_context access off=56 size=8"),
+		    "invalid bpf_context access off=56 size=8", 0),
 
 #undef BPF_SOCK_ADDR_STORE
 
-#define BPF_SOCK_ADDR_LOAD(field, off, res, err) \
+#define BPF_SOCK_ADDR_LOAD(field, off, res, err, flgs)	\
 { \
 	"wide load from bpf_sock_addr." #field "[" #off "]", \
 	.insns = { \
@@ -48,26 +53,31 @@ BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT,
 	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \
 	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \
 	.errstr = err, \
+	.flags = flgs, \
 }
 
 /* user_ip6[0] is u64 aligned */
 BPF_SOCK_ADDR_LOAD(user_ip6, 0, ACCEPT,
-		   NULL),
+		   NULL, 0),
 BPF_SOCK_ADDR_LOAD(user_ip6, 1, REJECT,
-		   "invalid bpf_context access off=12 size=8"),
+		   "invalid bpf_context access off=12 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_LOAD(user_ip6, 2, ACCEPT,
-		   NULL),
+		   NULL, 0),
 BPF_SOCK_ADDR_LOAD(user_ip6, 3, REJECT,
-		   "invalid bpf_context access off=20 size=8"),
+		   "invalid bpf_context access off=20 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 
 /* msg_src_ip6[0] is _not_ u64 aligned */
 BPF_SOCK_ADDR_LOAD(msg_src_ip6, 0, REJECT,
-		   "invalid bpf_context access off=44 size=8"),
+		   "invalid bpf_context access off=44 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_LOAD(msg_src_ip6, 1, ACCEPT,
-		   NULL),
+		   NULL, 0),
 BPF_SOCK_ADDR_LOAD(msg_src_ip6, 2, REJECT,
-		   "invalid bpf_context access off=52 size=8"),
+		   "invalid bpf_context access off=52 size=8",
+		    F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
 BPF_SOCK_ADDR_LOAD(msg_src_ip6, 3, REJECT,
-		   "invalid bpf_context access off=56 size=8"),
+		   "invalid bpf_context access off=56 size=8", 0),
 
 #undef BPF_SOCK_ADDR_LOAD
-- 
cgit v1.2.3-70-g09d2


From f5eca0b279117f25020112a2f65ec9c3ea25f3ac Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Fri, 23 Oct 2020 10:45:39 +0800
Subject: selftests/powerpc/eeh: disable kselftest timeout setting for
 eeh-basic

The eeh-basic test got its own 60 seconds timeout (defined in commit
414f50434aa2 "selftests/eeh: Bump EEH wait time to 60s") per breakable
device.

And we have discovered that the number of breakable devices varies
on different hardware. The device recovery time ranges from 0 to 35
seconds. In our test pool it will take about 30 seconds to run on a
Power8 system that with 5 breakable devices, 60 seconds to run on a
Power9 system that with 4 breakable devices.

Extend the timeout setting in the kselftest framework to 5 minutes
to give it a chance to finish.

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201023024539.9512-1-po-hsu.lin@canonical.com
---
 tools/testing/selftests/powerpc/eeh/Makefile | 2 +-
 tools/testing/selftests/powerpc/eeh/settings | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/eeh/settings

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile
index b397babd569b..ae963eb2dc5b 100644
--- a/tools/testing/selftests/powerpc/eeh/Makefile
+++ b/tools/testing/selftests/powerpc/eeh/Makefile
@@ -3,7 +3,7 @@ noarg:
 	$(MAKE) -C ../
 
 TEST_PROGS := eeh-basic.sh
-TEST_FILES := eeh-functions.sh
+TEST_FILES := eeh-functions.sh settings
 
 top_srcdir = ../../../../..
 include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/eeh/settings b/tools/testing/selftests/powerpc/eeh/settings
new file mode 100644
index 000000000000..694d70710ff0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/settings
@@ -0,0 +1 @@
+timeout=300
-- 
cgit v1.2.3-70-g09d2


From fcb48454c23c5679d1a2e252f127642e91b05cbe Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Tue, 17 Nov 2020 16:59:11 +1100
Subject: selftests/powerpc: rfi_flush: disable entry flush if present

We are about to add an entry flush. The rfi (exit) flush test measures
the number of L1D flushes over a syscall with the RFI flush enabled and
disabled. But if the entry flush is also enabled, the effect of enabling
and disabling the RFI flush is masked.

If there is a debugfs entry for the entry flush, disable it during the RFI
flush and restore it later.

Reported-by: Spoorthy S <spoorts2@in.ibm.com>
Signed-off-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../testing/selftests/powerpc/security/rfi_flush.c | 35 ++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 93a65bd1f231..4e7b2776c367 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -85,19 +85,33 @@ int rfi_flush_test(void)
 	__u64 l1d_misses_total = 0;
 	unsigned long iterations = 100000, zero_size = 24 * 1024;
 	unsigned long l1d_misses_expected;
-	int rfi_flush_org, rfi_flush;
+	int rfi_flush_orig, rfi_flush;
+	int have_entry_flush, entry_flush_orig;
 
 	SKIP_IF(geteuid() != 0);
 
 	// The PMU event we use only works on Power7 or later
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
-	if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
+	if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
 		perror("Unable to read powerpc/rfi_flush debugfs file");
 		SKIP_IF(1);
 	}
 
-	rfi_flush = rfi_flush_org;
+	if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
+		have_entry_flush = 0;
+	} else {
+		have_entry_flush = 1;
+
+		if (entry_flush_orig != 0) {
+			if (write_debugfs_file("powerpc/entry_flush", 0) < 0) {
+				perror("error writing to powerpc/entry_flush debugfs file");
+				return 1;
+			}
+		}
+	}
+
+	rfi_flush = rfi_flush_orig;
 
 	fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
 	FAIL_IF(fd < 0);
@@ -106,6 +120,7 @@ int rfi_flush_test(void)
 
 	FAIL_IF(perf_event_enable(fd));
 
+	// disable L1 prefetching
 	set_dscr(1);
 
 	iter = repetitions;
@@ -147,8 +162,8 @@ again:
 		       repetitions * l1d_misses_expected / 2,
 		       passes, repetitions);
 
-	if (rfi_flush == rfi_flush_org) {
-		rfi_flush = !rfi_flush_org;
+	if (rfi_flush == rfi_flush_orig) {
+		rfi_flush = !rfi_flush_orig;
 		if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
 			perror("error writing to powerpc/rfi_flush debugfs file");
 			return 1;
@@ -164,11 +179,19 @@ again:
 
 	set_dscr(0);
 
-	if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
+	if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
 		perror("unable to restore original value of powerpc/rfi_flush debugfs file");
 		return 1;
 	}
 
+	if (have_entry_flush) {
+		if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
+			perror("unable to restore original value of powerpc/entry_flush "
+			       "debugfs file");
+			return 1;
+		}
+	}
+
 	return rc;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 89a83a0c69c81a25ce91002b90ca27ed86132a0a Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Tue, 17 Nov 2020 16:59:14 +1100
Subject: selftests/powerpc: entry flush test

Add a test modelled on the RFI flush test which counts the number
of L1D misses doing a simple syscall with the entry flush on and off.

For simplicity of backporting, this test duplicates a lot of code from
rfi_flush. We clean that up in the next patch.

Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../testing/selftests/powerpc/security/.gitignore  |   1 +
 tools/testing/selftests/powerpc/security/Makefile  |   2 +-
 .../selftests/powerpc/security/entry_flush.c       | 198 +++++++++++++++++++++
 3 files changed, 200 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/security/entry_flush.c

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore
index f795e06f5ae3..4257a1f156bb 100644
--- a/tools/testing/selftests/powerpc/security/.gitignore
+++ b/tools/testing/selftests/powerpc/security/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 rfi_flush
+entry_flush
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index eadbbff50be6..921152caf1dc 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0+
 
-TEST_GEN_PROGS := rfi_flush spectre_v2
+TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2
 top_srcdir = ../../../../..
 
 CFLAGS += -I../../../../../usr/include
diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c
new file mode 100644
index 000000000000..7ae7e37204c5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/entry_flush.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define CACHELINE_SIZE 128
+
+struct perf_event_read {
+	__u64 nr;
+	__u64 l1d_misses;
+};
+
+static inline __u64 load(void *addr)
+{
+	__u64 tmp;
+
+	asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+	return tmp;
+}
+
+static void syscall_loop(char *p, unsigned long iterations,
+			 unsigned long zero_size)
+{
+	for (unsigned long i = 0; i < iterations; i++) {
+		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+			load(p + j);
+		getppid();
+	}
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+	static int warned;
+	ucontext_t *ctx = (ucontext_t *)unused;
+	unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+	/* mtspr 3,RS to check for move to DSCR below */
+	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+		if (!warned++)
+			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+		*pc += 4;
+	} else {
+		printf("SIGILL at %p\n", pc);
+		abort();
+	}
+}
+
+static void set_dscr(unsigned long val)
+{
+	static int init;
+	struct sigaction sa;
+
+	if (!init) {
+		memset(&sa, 0, sizeof(sa));
+		sa.sa_sigaction = sigill_handler;
+		sa.sa_flags = SA_SIGINFO;
+		if (sigaction(SIGILL, &sa, NULL))
+			perror("sigill_handler");
+		init = 1;
+	}
+
+	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
+int entry_flush_test(void)
+{
+	char *p;
+	int repetitions = 10;
+	int fd, passes = 0, iter, rc = 0;
+	struct perf_event_read v;
+	__u64 l1d_misses_total = 0;
+	unsigned long iterations = 100000, zero_size = 24 * 1024;
+	unsigned long l1d_misses_expected;
+	int rfi_flush_orig;
+	int entry_flush, entry_flush_orig;
+
+	SKIP_IF(geteuid() != 0);
+
+	// The PMU event we use only works on Power7 or later
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+	if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
+		perror("Unable to read powerpc/rfi_flush debugfs file");
+		SKIP_IF(1);
+	}
+
+	if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
+		perror("Unable to read powerpc/entry_flush debugfs file");
+		SKIP_IF(1);
+	}
+
+	if (rfi_flush_orig != 0) {
+		if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) {
+			perror("error writing to powerpc/rfi_flush debugfs file");
+			FAIL_IF(1);
+		}
+	}
+
+	entry_flush = entry_flush_orig;
+
+	fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+	FAIL_IF(fd < 0);
+
+	p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+	FAIL_IF(perf_event_enable(fd));
+
+	// disable L1 prefetching
+	set_dscr(1);
+
+	iter = repetitions;
+
+	/*
+	 * We expect to see l1d miss for each cacheline access when entry_flush
+	 * is set. Allow a small variation on this.
+	 */
+	l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
+again:
+	FAIL_IF(perf_event_reset(fd));
+
+	syscall_loop(p, iterations, zero_size);
+
+	FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+	if (entry_flush && v.l1d_misses >= l1d_misses_expected)
+		passes++;
+	else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2))
+		passes++;
+
+	l1d_misses_total += v.l1d_misses;
+
+	while (--iter)
+		goto again;
+
+	if (passes < repetitions) {
+		printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+		       entry_flush, l1d_misses_total, entry_flush ? '<' : '>',
+		       entry_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
+		       repetitions - passes, repetitions);
+		rc = 1;
+	} else {
+		printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+		       entry_flush, l1d_misses_total, entry_flush ? '>' : '<',
+		       entry_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
+		       passes, repetitions);
+	}
+
+	if (entry_flush == entry_flush_orig) {
+		entry_flush = !entry_flush_orig;
+		if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) {
+			perror("error writing to powerpc/entry_flush debugfs file");
+			return 1;
+		}
+		iter = repetitions;
+		l1d_misses_total = 0;
+		passes = 0;
+		goto again;
+	}
+
+	perf_event_disable(fd);
+	close(fd);
+
+	set_dscr(0);
+
+	if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
+		perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+		return 1;
+	}
+
+	if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
+		perror("unable to restore original value of powerpc/entry_flush debugfs file");
+		return 1;
+	}
+
+	return rc;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(entry_flush_test, "entry_flush_test");
+}
-- 
cgit v1.2.3-70-g09d2


From 0d239f3b03efc78fb5b290aff6c747fecd3b98cb Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Tue, 17 Nov 2020 16:59:15 +1100
Subject: selftests/powerpc: refactor entry and rfi_flush tests

For simplicity in backporting, the original entry_flush test contained
a lot of duplicated code from the rfi_flush test. De-duplicate that code.

Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/include/utils.h    |  5 ++
 tools/testing/selftests/powerpc/security/Makefile  |  2 +
 .../selftests/powerpc/security/entry_flush.c       | 61 +------------------
 .../selftests/powerpc/security/flush_utils.c       | 70 ++++++++++++++++++++++
 .../selftests/powerpc/security/flush_utils.h       | 17 ++++++
 .../testing/selftests/powerpc/security/rfi_flush.c | 61 +------------------
 6 files changed, 96 insertions(+), 120 deletions(-)
 create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.c
 create mode 100644 tools/testing/selftests/powerpc/security/flush_utils.h

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 052b5a775dc2..b7d188fc87c7 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -42,6 +42,11 @@ int perf_event_enable(int fd);
 int perf_event_disable(int fd);
 int perf_event_reset(int fd);
 
+struct perf_event_read {
+	__u64 nr;
+	__u64 l1d_misses;
+};
+
 #if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
 #include <unistd.h>
 #include <sys/syscall.h>
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index 921152caf1dc..f25e854fe370 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -11,3 +11,5 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c
 
 $(OUTPUT)/spectre_v2: CFLAGS += -m64
 $(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
+$(OUTPUT)/rfi_flush: flush_utils.c
+$(OUTPUT)/entry_flush: flush_utils.c
diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c
index 7ae7e37204c5..78cf914fa321 100644
--- a/tools/testing/selftests/powerpc/security/entry_flush.c
+++ b/tools/testing/selftests/powerpc/security/entry_flush.c
@@ -15,66 +15,7 @@
 #include <string.h>
 #include <stdio.h>
 #include "utils.h"
-
-#define CACHELINE_SIZE 128
-
-struct perf_event_read {
-	__u64 nr;
-	__u64 l1d_misses;
-};
-
-static inline __u64 load(void *addr)
-{
-	__u64 tmp;
-
-	asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
-
-	return tmp;
-}
-
-static void syscall_loop(char *p, unsigned long iterations,
-			 unsigned long zero_size)
-{
-	for (unsigned long i = 0; i < iterations; i++) {
-		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
-			load(p + j);
-		getppid();
-	}
-}
-
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
-{
-	static int warned;
-	ucontext_t *ctx = (ucontext_t *)unused;
-	unsigned long *pc = &UCONTEXT_NIA(ctx);
-
-	/* mtspr 3,RS to check for move to DSCR below */
-	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
-		if (!warned++)
-			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
-		*pc += 4;
-	} else {
-		printf("SIGILL at %p\n", pc);
-		abort();
-	}
-}
-
-static void set_dscr(unsigned long val)
-{
-	static int init;
-	struct sigaction sa;
-
-	if (!init) {
-		memset(&sa, 0, sizeof(sa));
-		sa.sa_sigaction = sigill_handler;
-		sa.sa_flags = SA_SIGINFO;
-		if (sigaction(SIGILL, &sa, NULL))
-			perror("sigill_handler");
-		init = 1;
-	}
-
-	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
-}
+#include "flush_utils.h"
 
 int entry_flush_test(void)
 {
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c
new file mode 100644
index 000000000000..0c3c4c40c7fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/flush_utils.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+#include "flush_utils.h"
+
+static inline __u64 load(void *addr)
+{
+	__u64 tmp;
+
+	asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+	return tmp;
+}
+
+void syscall_loop(char *p, unsigned long iterations,
+		  unsigned long zero_size)
+{
+	for (unsigned long i = 0; i < iterations; i++) {
+		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+			load(p + j);
+		getppid();
+	}
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+	static int warned;
+	ucontext_t *ctx = (ucontext_t *)unused;
+	unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+	/* mtspr 3,RS to check for move to DSCR below */
+	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+		if (!warned++)
+			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+		*pc += 4;
+	} else {
+		printf("SIGILL at %p\n", pc);
+		abort();
+	}
+}
+
+void set_dscr(unsigned long val)
+{
+	static int init;
+	struct sigaction sa;
+
+	if (!init) {
+		memset(&sa, 0, sizeof(sa));
+		sa.sa_sigaction = sigill_handler;
+		sa.sa_flags = SA_SIGINFO;
+		if (sigaction(SIGILL, &sa, NULL))
+			perror("sigill_handler");
+		init = 1;
+	}
+
+	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h
new file mode 100644
index 000000000000..07a5eb301466
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/flush_utils.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+
+#define CACHELINE_SIZE 128
+
+void syscall_loop(char *p, unsigned long iterations,
+		  unsigned long zero_size);
+
+void set_dscr(unsigned long val);
+
+#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 4e7b2776c367..7565fd786640 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -10,71 +10,12 @@
 #include <stdint.h>
 #include <malloc.h>
 #include <unistd.h>
-#include <signal.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include "utils.h"
+#include "flush_utils.h"
 
-#define CACHELINE_SIZE 128
-
-struct perf_event_read {
-	__u64 nr;
-	__u64 l1d_misses;
-};
-
-static inline __u64 load(void *addr)
-{
-	__u64 tmp;
-
-	asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
-
-	return tmp;
-}
-
-static void syscall_loop(char *p, unsigned long iterations,
-			 unsigned long zero_size)
-{
-	for (unsigned long i = 0; i < iterations; i++) {
-		for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
-			load(p + j);
-		getppid();
-	}
-}
-
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
-{
-	static int warned = 0;
-	ucontext_t *ctx = (ucontext_t *)unused;
-	unsigned long *pc = &UCONTEXT_NIA(ctx);
-
-	/* mtspr 3,RS to check for move to DSCR below */
-	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
-		if (!warned++)
-			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
-		*pc += 4;
-	} else {
-		printf("SIGILL at %p\n", pc);
-		abort();
-	}
-}
-
-static void set_dscr(unsigned long val)
-{
-	static int init = 0;
-	struct sigaction sa;
-
-	if (!init) {
-		memset(&sa, 0, sizeof(sa));
-		sa.sa_sigaction = sigill_handler;
-		sa.sa_flags = SA_SIGINFO;
-		if (sigaction(SIGILL, &sa, NULL))
-			perror("sigill_handler");
-		init = 1;
-	}
-
-	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
-}
 
 int rfi_flush_test(void)
 {
-- 
cgit v1.2.3-70-g09d2


From a61ea6379ae9dbb63fbf022d1456733520db6be7 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 19 Nov 2020 14:53:22 +0900
Subject: tools/bootconfig: Fix errno reference after printf()

Fix not to refer the errno variable as the result of previous libc
functions after printf() because printf() can change the errno.

Link: https://lkml.kernel.org/r/160576520243.320071.51093664672431249.stgit@devnote2

Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 52 +++++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index eb92027817a7..52eb2bbe8966 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -147,6 +147,12 @@ static int load_xbc_file(const char *path, char **buf)
 	return ret;
 }
 
+static int pr_errno(const char *msg, int err)
+{
+	pr_err("%s: %d\n", msg, err);
+	return err;
+}
+
 static int load_xbc_from_initrd(int fd, char **buf)
 {
 	struct stat stat;
@@ -162,26 +168,24 @@ static int load_xbc_from_initrd(int fd, char **buf)
 	if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN)
 		return 0;
 
-	if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) {
-		pr_err("Failed to lseek: %d\n", -errno);
-		return -errno;
-	}
+	if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
+		return pr_errno("Failed to lseek for magic", -errno);
+
 	if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0)
-		return -errno;
+		return pr_errno("Failed to read", -errno);
+
 	/* Check the bootconfig magic bytes */
 	if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0)
 		return 0;
 
-	if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) {
-		pr_err("Failed to lseek: %d\n", -errno);
-		return -errno;
-	}
+	if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0)
+		return pr_errno("Failed to lseek for size", -errno);
 
 	if (read(fd, &size, sizeof(u32)) < 0)
-		return -errno;
+		return pr_errno("Failed to read size", -errno);
 
 	if (read(fd, &csum, sizeof(u32)) < 0)
-		return -errno;
+		return pr_errno("Failed to read checksum", -errno);
 
 	/* Wrong size error  */
 	if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) {
@@ -190,10 +194,8 @@ static int load_xbc_from_initrd(int fd, char **buf)
 	}
 
 	if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN),
-		  SEEK_SET) < 0) {
-		pr_err("Failed to lseek: %d\n", -errno);
-		return -errno;
-	}
+		  SEEK_SET) < 0)
+		return pr_errno("Failed to lseek", -errno);
 
 	ret = load_xbc_fd(fd, buf, size);
 	if (ret < 0)
@@ -262,14 +264,16 @@ static int show_xbc(const char *path, bool list)
 
 	ret = stat(path, &st);
 	if (ret < 0) {
-		pr_err("Failed to stat %s: %d\n", path, -errno);
-		return -errno;
+		ret = -errno;
+		pr_err("Failed to stat %s: %d\n", path, ret);
+		return ret;
 	}
 
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
-		pr_err("Failed to open initrd %s: %d\n", path, fd);
-		return -errno;
+		ret = -errno;
+		pr_err("Failed to open initrd %s: %d\n", path, ret);
+		return ret;
 	}
 
 	ret = load_xbc_from_initrd(fd, &buf);
@@ -307,8 +311,9 @@ static int delete_xbc(const char *path)
 
 	fd = open(path, O_RDWR);
 	if (fd < 0) {
-		pr_err("Failed to open initrd %s: %d\n", path, fd);
-		return -errno;
+		ret = -errno;
+		pr_err("Failed to open initrd %s: %d\n", path, ret);
+		return ret;
 	}
 
 	size = load_xbc_from_initrd(fd, &buf);
@@ -383,9 +388,10 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	/* Apply new one */
 	fd = open(path, O_RDWR | O_APPEND);
 	if (fd < 0) {
-		pr_err("Failed to open %s: %d\n", path, fd);
+		ret = -errno;
+		pr_err("Failed to open %s: %d\n", path, ret);
 		free(data);
-		return fd;
+		return ret;
 	}
 	/* TODO: Ensure the @path is initramfs/initrd image */
 	ret = write(fd, data, size + 8);
-- 
cgit v1.2.3-70-g09d2


From a995e6bc0524450adfd6181dfdcd9d0520cfaba5 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 19 Nov 2020 14:53:31 +0900
Subject: tools/bootconfig: Fix to check the write failure correctly

Fix to check the write(2) failure including partial write
correctly and try to rollback the partial write, because
if there is no BOOTCONFIG_MAGIC string, we can not remove it.

Link: https://lkml.kernel.org/r/160576521135.320071.3883101436675969998.stgit@devnote2

Fixes: 85c46b78da58 ("bootconfig: Add bootconfig magic word for indicating bootconfig explicitly")
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 52eb2bbe8966..a0733cbb3c49 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -337,6 +337,7 @@ static int delete_xbc(const char *path)
 
 static int apply_xbc(const char *path, const char *xbc_path)
 {
+	struct stat stat;
 	u32 size, csum;
 	char *buf, *data;
 	int ret, fd;
@@ -394,16 +395,26 @@ static int apply_xbc(const char *path, const char *xbc_path)
 		return ret;
 	}
 	/* TODO: Ensure the @path is initramfs/initrd image */
+	if (fstat(fd, &stat) < 0) {
+		pr_err("Failed to get the size of %s\n", path);
+		goto out;
+	}
 	ret = write(fd, data, size + 8);
-	if (ret < 0) {
+	if (ret < size + 8) {
+		if (ret < 0)
+			ret = -errno;
 		pr_err("Failed to apply a boot config: %d\n", ret);
-		goto out;
+		if (ret < 0)
+			goto out;
+		goto out_rollback;
 	}
 	/* Write a magic word of the bootconfig */
 	ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
-	if (ret < 0) {
+	if (ret < BOOTCONFIG_MAGIC_LEN) {
+		if (ret < 0)
+			ret = -errno;
 		pr_err("Failed to apply a boot config magic: %d\n", ret);
-		goto out;
+		goto out_rollback;
 	}
 	ret = 0;
 out:
@@ -411,6 +422,17 @@ out:
 	free(data);
 
 	return ret;
+
+out_rollback:
+	/* Map the partial write to -ENOSPC */
+	if (ret >= 0)
+		ret = -ENOSPC;
+	if (ftruncate(fd, stat.st_size) < 0) {
+		ret = -errno;
+		pr_err("Failed to rollback the write error: %d\n", ret);
+		pr_err("The initrd %s may be corrupted. Recommend to rebuild.\n", path);
+	}
+	goto out;
 }
 
 static int usage(void)
-- 
cgit v1.2.3-70-g09d2


From e1cef2d4c379b2aab43a7dc9601f645048209090 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 19 Nov 2020 14:53:40 +0900
Subject: tools/bootconfig: Align the bootconfig applied initrd image size to 4

Align the bootconfig applied initrd image size to 4. To fill the gap,
the bootconfig command uses null characters in between the bootconfig
data and the footer. This will expands the footer size but don't change
the checksum.
Thus the block image of the initrd file with bootconfig is as follows.

[initrd][bootconfig][(pad)][size][csum]["#BOOTCONFIG\n"]

Link: https://lkml.kernel.org/r/160576522046.320071.8550680670010950634.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/bootconfig.h          |  3 ++
 tools/bootconfig/main.c             | 57 ++++++++++++++++++++++---------------
 tools/bootconfig/test-bootconfig.sh |  6 +++-
 3 files changed, 42 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 9903088891fa..2696eb0fc149 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -12,6 +12,9 @@
 
 #define BOOTCONFIG_MAGIC	"#BOOTCONFIG\n"
 #define BOOTCONFIG_MAGIC_LEN	12
+#define BOOTCONFIG_ALIGN_SHIFT	2
+#define BOOTCONFIG_ALIGN	(1 << BOOTCONFIG_ALIGN_SHIFT)
+#define BOOTCONFIG_ALIGN_MASK	(BOOTCONFIG_ALIGN - 1)
 
 /* XBC tree node */
 struct xbc_node {
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index a0733cbb3c49..4a445b6304bb 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -337,12 +337,13 @@ static int delete_xbc(const char *path)
 
 static int apply_xbc(const char *path, const char *xbc_path)
 {
+	char *buf, *data, *p;
+	size_t total_size;
 	struct stat stat;
+	const char *msg;
 	u32 size, csum;
-	char *buf, *data;
+	int pos, pad;
 	int ret, fd;
-	const char *msg;
-	int pos;
 
 	ret = load_xbc_file(xbc_path, &buf);
 	if (ret < 0) {
@@ -352,13 +353,12 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	size = strlen(buf) + 1;
 	csum = checksum((unsigned char *)buf, size);
 
-	/* Prepare xbc_path data */
-	data = malloc(size + 8);
+	/* Backup the bootconfig data */
+	data = calloc(size + BOOTCONFIG_ALIGN +
+		      sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1);
 	if (!data)
 		return -ENOMEM;
-	strcpy(data, buf);
-	*(u32 *)(data + size) = size;
-	*(u32 *)(data + size + 4) = csum;
+	memcpy(data, buf, size);
 
 	/* Check the data format */
 	ret = xbc_init(buf, &msg, &pos);
@@ -399,24 +399,35 @@ static int apply_xbc(const char *path, const char *xbc_path)
 		pr_err("Failed to get the size of %s\n", path);
 		goto out;
 	}
-	ret = write(fd, data, size + 8);
-	if (ret < size + 8) {
+
+	/* To align up the total size to BOOTCONFIG_ALIGN, get padding size */
+	total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN;
+	pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size;
+	size += pad;
+
+	/* Add a footer */
+	p = data + size;
+	*(u32 *)p = size;
+	p += sizeof(u32);
+
+	*(u32 *)p = csum;
+	p += sizeof(u32);
+
+	memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
+	p += BOOTCONFIG_MAGIC_LEN;
+
+	total_size = p - data;
+
+	ret = write(fd, data, total_size);
+	if (ret < total_size) {
 		if (ret < 0)
 			ret = -errno;
 		pr_err("Failed to apply a boot config: %d\n", ret);
-		if (ret < 0)
-			goto out;
-		goto out_rollback;
-	}
-	/* Write a magic word of the bootconfig */
-	ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
-	if (ret < BOOTCONFIG_MAGIC_LEN) {
-		if (ret < 0)
-			ret = -errno;
-		pr_err("Failed to apply a boot config magic: %d\n", ret);
-		goto out_rollback;
-	}
-	ret = 0;
+		if (ret >= 0)
+			goto out_rollback;
+	} else
+		ret = 0;
+
 out:
 	close(fd);
 	free(data);
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index d295e406a756..baed891d0ba4 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -9,6 +9,7 @@ else
   TESTDIR=.
 fi
 BOOTCONF=${TESTDIR}/bootconfig
+ALIGN=4
 
 INITRD=`mktemp ${TESTDIR}/initrd-XXXX`
 TEMPCONF=`mktemp ${TESTDIR}/temp-XXXX.bconf`
@@ -59,7 +60,10 @@ echo "Show command test"
 xpass $BOOTCONF $INITRD
 
 echo "File size check"
-xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12)
+total_size=$(expr $bconf_size + $initrd_size + 9 + 12 + $ALIGN - 1 )
+total_size=$(expr $total_size / $ALIGN)
+total_size=$(expr $total_size \* $ALIGN)
+xpass test $new_size -eq $total_size
 
 echo "Apply command repeat test"
 xpass $BOOTCONF -a $TEMPCONF $INITRD
-- 
cgit v1.2.3-70-g09d2


From 1fd6cee127e2ddff36d648573d7566aafb0d0b77 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 18 Nov 2020 22:13:50 +0100
Subject: libbpf: Fix VERSIONED_SYM_COUNT number parsing

We remove "other info" from "readelf -s --wide" output when
parsing GLOBAL_SYM_COUNT variable, which was added in [1].
But we don't do that for VERSIONED_SYM_COUNT and it's failing
the check_abi target on powerpc Fedora 33.

The extra "other info" wasn't problem for VERSIONED_SYM_COUNT
parsing until commit [2] added awk in the pipe, which assumes
that the last column is symbol, but it can be "other info".

Adding "other info" removal for VERSIONED_SYM_COUNT the same
way as we did for GLOBAL_SYM_COUNT parsing.

[1] aa915931ac3e ("libbpf: Fix readelf output parsing for Fedora")
[2] 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check")

Fixes: 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201118211350.1493421-1-jolsa@kernel.org
---
 tools/lib/bpf/Makefile | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 5f9abed3e226..55bd78b3496f 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -146,6 +146,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
 			   awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
 			   sort -u | wc -l)
 VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+			      sed 's/\[.*\]//' | \
 			      awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
 			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
@@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so
 		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
 		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
 		readelf --dyn-syms --wide $(OUTPUT)libbpf.so |		 \
+		    sed 's/\[.*\]//' |					 \
 		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
 		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
 		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \
-- 
cgit v1.2.3-70-g09d2


From c8a36aedf3e24768e94d87fdcdd37684bd241c44 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Tue, 17 Nov 2020 12:05:46 -0800
Subject: selftest/bpf: Test bpf_probe_read_user_str() strips trailing bytes
 after NUL

Previously, bpf_probe_read_user_str() could potentially overcopy the
trailing bytes after the NUL due to how do_strncpy_from_user() does the
copy in long-sized strides. The issue has been fixed in the previous
commit.

This commit adds a selftest that ensures we don't regress
bpf_probe_read_user_str() again.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/4d977508fab4ec5b7b574b85bdf8b398868b6ee9.1605642949.git.dxu@dxuuu.xyz
---
 .../selftests/bpf/prog_tests/probe_read_user_str.c | 71 ++++++++++++++++++++++
 .../selftests/bpf/progs/test_probe_read_user_str.c | 25 ++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_probe_read_user_str.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c
new file mode 100644
index 000000000000..e419298132b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_probe_read_user_str.skel.h"
+
+static const char str1[] = "mestring";
+static const char str2[] = "mestringalittlebigger";
+static const char str3[] = "mestringblubblubblubblubblub";
+
+static int test_one_str(struct test_probe_read_user_str *skel, const char *str,
+			size_t len)
+{
+	int err, duration = 0;
+	char buf[256];
+
+	/* Ensure bytes after string are ones */
+	memset(buf, 1, sizeof(buf));
+	memcpy(buf, str, len);
+
+	/* Give prog our userspace pointer */
+	skel->bss->user_ptr = buf;
+
+	/* Trigger tracepoint */
+	usleep(1);
+
+	/* Did helper fail? */
+	if (CHECK(skel->bss->ret < 0, "prog_ret", "prog returned: %ld\n",
+		  skel->bss->ret))
+		return 1;
+
+	/* Check that string was copied correctly */
+	err = memcmp(skel->bss->buf, str, len);
+	if (CHECK(err, "memcmp", "prog copied wrong string"))
+		return 1;
+
+	/* Now check that no extra trailing bytes were copied */
+	memset(buf, 0, sizeof(buf));
+	err = memcmp(skel->bss->buf + len, buf, sizeof(buf) - len);
+	if (CHECK(err, "memcmp", "trailing bytes were not stripped"))
+		return 1;
+
+	return 0;
+}
+
+void test_probe_read_user_str(void)
+{
+	struct test_probe_read_user_str *skel;
+	int err, duration = 0;
+
+	skel = test_probe_read_user_str__open_and_load();
+	if (CHECK(!skel, "test_probe_read_user_str__open_and_load",
+		  "skeleton open and load failed\n"))
+		return;
+
+	/* Give pid to bpf prog so it doesn't read from anyone else */
+	skel->bss->pid = getpid();
+
+	err = test_probe_read_user_str__attach(skel);
+	if (CHECK(err, "test_probe_read_user_str__attach",
+		  "skeleton attach failed: %d\n", err))
+		goto out;
+
+	if (test_one_str(skel, str1, sizeof(str1)))
+		goto out;
+	if (test_one_str(skel, str2, sizeof(str2)))
+		goto out;
+	if (test_one_str(skel, str3, sizeof(str3)))
+		goto out;
+
+out:
+	test_probe_read_user_str__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c
new file mode 100644
index 000000000000..3ae398b75dcd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include <sys/types.h>
+
+pid_t pid = 0;
+long ret = 0;
+void *user_ptr = 0;
+char buf[256] = {};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int on_write(void *ctx)
+{
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	ret = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 450d060e8f752a6ce052a2bffd3f01633472e330 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 18 Nov 2020 23:30:39 -0800
Subject: bpftool: Add {i,d}tlb_misses support for bpftool profile

Commit 47c09d6a9f67("bpftool: Introduce "prog profile" command")
introduced "bpftool prog profile" command which can be used
to profile bpf program with metrics like # of instructions,

This patch added support for itlb_misses and dtlb_misses.
During an internal bpf program performance evaluation,
I found these two metrics are also very useful. The following
is an example output:

 $ bpftool prog profile id 324 duration 3 cycles itlb_misses

           1885029 run_cnt
        5134686073 cycles
            306893 itlb_misses

 $ bpftool prog profile id 324 duration 3 cycles dtlb_misses

           1827382 run_cnt
        4943593648 cycles
           5975636 dtlb_misses

 $ bpftool prog profile id 324 duration 3 cycles llc_misses

           1836527 run_cnt
        5019612972 cycles
           4161041 llc_misses

From the above, we can see quite some dtlb misses, 3 dtlb misses
perf prog run. This might be something worth further investigation.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201119073039.4060095-1-yhs@fb.com
---
 tools/bpf/bpftool/prog.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index acdb2c245f0a..1fe3ba255bad 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1717,6 +1717,34 @@ struct profile_metric {
 		.ratio_desc = "LLC misses per million insns",
 		.ratio_mul = 1e6,
 	},
+	{
+		.name = "itlb_misses",
+		.attr = {
+			.type = PERF_TYPE_HW_CACHE,
+			.config =
+				PERF_COUNT_HW_CACHE_ITLB |
+				(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+				(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+			.exclude_user = 1
+		},
+		.ratio_metric = 2,
+		.ratio_desc = "itlb misses per million insns",
+		.ratio_mul = 1e6,
+	},
+	{
+		.name = "dtlb_misses",
+		.attr = {
+			.type = PERF_TYPE_HW_CACHE,
+			.config =
+				PERF_COUNT_HW_CACHE_DTLB |
+				(PERF_COUNT_HW_CACHE_OP_READ << 8) |
+				(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+			.exclude_user = 1
+		},
+		.ratio_metric = 2,
+		.ratio_desc = "dtlb misses per million insns",
+		.ratio_mul = 1e6,
+	},
 };
 
 static __u64 profile_total_count;
@@ -2109,7 +2137,7 @@ static int do_help(int argc, char **argv)
 		"                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
 		"       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
 		"                        flow_dissector }\n"
-		"       METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
+		"       METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2]);
-- 
cgit v1.2.3-70-g09d2


From f5098e34dd4c774c3040e417960f1637e5daade8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 17 Nov 2020 11:33:02 -0800
Subject: selftests/seccomp: powerpc: Fix typo in macro variable name

A typo sneaked into the powerpc selftest. Fix the name so it builds again.

Fixes: 46138329faea ("selftests/seccomp: powerpc: Fix seccomp return value testing")
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/lkml/87y2ix2895.fsf@mpe.ellerman.id.au
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 4a180439ee9e..7f7ecfcd66db 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1758,10 +1758,10 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 		 * and the code is stored as a positive value.	\
 		 */						\
 		if (_result < 0) {				\
-			SYSCALL_RET(_regs) = -result;		\
+			SYSCALL_RET(_regs) = -_result;		\
 			(_regs).ccr |= 0x10000000;		\
 		} else {					\
-			SYSCALL_RET(_regs) = result;		\
+			SYSCALL_RET(_regs) = _result;		\
 			(_regs).ccr &= ~0x10000000;		\
 		}						\
 	} while (0)
-- 
cgit v1.2.3-70-g09d2


From 4c222f31fb1db4d590503a181a6268ced9252379 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 17 Nov 2020 11:54:43 -0800
Subject: selftests/seccomp: sh: Fix register names

It looks like the seccomp selftests was never actually built for sh.
This fixes it, though I don't have an environment to do a runtime test
of it yet.

Fixes: 0bb605c2c7f2b4b3 ("sh: Add SECCOMP_FILTER")
Tested-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Link: https://lore.kernel.org/lkml/a36d7b48-6598-1642-e403-0c77a86f416d@physik.fu-berlin.de
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 7f7ecfcd66db..26c72f2b61b1 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1804,8 +1804,8 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 #define SYSCALL_RET(_regs)	(_regs).a[(_regs).windowbase * 4 + 2]
 #elif defined(__sh__)
 # define ARCH_REGS		struct pt_regs
-# define SYSCALL_NUM(_regs)	(_regs).gpr[3]
-# define SYSCALL_RET(_regs)	(_regs).gpr[0]
+# define SYSCALL_NUM(_regs)	(_regs).regs[3]
+# define SYSCALL_RET(_regs)	(_regs).regs[0]
 #else
 # error "Do not know how to find your architecture's registers and syscalls"
 #endif
-- 
cgit v1.2.3-70-g09d2


From 192cf32243ce39af65bd095625aec374b38c03df Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 11 Oct 2020 10:47:45 -0500
Subject: selftests/seccomp: Compare bitmap vs filter overhead
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As part of the seccomp benchmarking, include the expectations with
regard to the timing behavior of the constant action bitmaps, and report
inconsistencies better.

Example output with constant action bitmaps on x86:

$ sudo ./seccomp_benchmark 100000000
Current BPF sysctl settings:
net.core.bpf_jit_enable = 1
net.core.bpf_jit_harden = 0
Benchmarking 200000000 syscalls...
129.359381409 - 0.008724424 = 129350656985 (129.4s)
getpid native: 646 ns
264.385890006 - 129.360453229 = 135025436777 (135.0s)
getpid RET_ALLOW 1 filter (bitmap): 675 ns
399.400511893 - 264.387045901 = 135013465992 (135.0s)
getpid RET_ALLOW 2 filters (bitmap): 675 ns
545.872866260 - 399.401718327 = 146471147933 (146.5s)
getpid RET_ALLOW 3 filters (full): 732 ns
696.337101319 - 545.874097681 = 150463003638 (150.5s)
getpid RET_ALLOW 4 filters (full): 752 ns
Estimated total seccomp overhead for 1 bitmapped filter: 29 ns
Estimated total seccomp overhead for 2 bitmapped filters: 29 ns
Estimated total seccomp overhead for 3 full filters: 86 ns
Estimated total seccomp overhead for 4 full filters: 106 ns
Estimated seccomp entry overhead: 29 ns
Estimated seccomp per-filter overhead (last 2 diff): 20 ns
Estimated seccomp per-filter overhead (filters / 4): 19 ns
Expectations:
	native ≤ 1 bitmap (646 ≤ 675): ✔️
	native ≤ 1 filter (646 ≤ 732): ✔️
	per-filter (last 2 diff) ≈ per-filter (filters / 4) (20 ≈ 19): ✔️
	1 bitmapped ≈ 2 bitmapped (29 ≈ 29): ✔️
	entry ≈ 1 bitmapped (29 ≈ 29): ✔️
	entry ≈ 2 bitmapped (29 ≈ 29): ✔️
	native + entry + (per filter * 4) ≈ 4 filters total (755 ≈ 752): ✔️

[YiFei: Changed commit message to show stats for this patch series]
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/1b61df3db85c5f7f1b9202722c45e7b39df73ef2.1602431034.git.yifeifz2@illinois.edu
---
 .../testing/selftests/seccomp/seccomp_benchmark.c  | 151 ++++++++++++++++++---
 tools/testing/selftests/seccomp/settings           |   2 +-
 2 files changed, 130 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 91f5a89cadac..fcc806585266 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -4,12 +4,16 @@
  */
 #define _GNU_SOURCE
 #include <assert.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
 #include <unistd.h>
 #include <linux/filter.h>
 #include <linux/seccomp.h>
+#include <sys/param.h>
 #include <sys/prctl.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
@@ -70,18 +74,74 @@ unsigned long long calibrate(void)
 	return samples * seconds;
 }
 
+bool approx(int i_one, int i_two)
+{
+	double one = i_one, one_bump = one * 0.01;
+	double two = i_two, two_bump = two * 0.01;
+
+	one_bump = one + MAX(one_bump, 2.0);
+	two_bump = two + MAX(two_bump, 2.0);
+
+	/* Equal to, or within 1% or 2 digits */
+	if (one == two ||
+	    (one > two && one <= two_bump) ||
+	    (two > one && two <= one_bump))
+		return true;
+	return false;
+}
+
+bool le(int i_one, int i_two)
+{
+	if (i_one <= i_two)
+		return true;
+	return false;
+}
+
+long compare(const char *name_one, const char *name_eval, const char *name_two,
+	     unsigned long long one, bool (*eval)(int, int), unsigned long long two)
+{
+	bool good;
+
+	printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two,
+	       (long long)one, name_eval, (long long)two);
+	if (one > INT_MAX) {
+		printf("Miscalculation! Measurement went negative: %lld\n", (long long)one);
+		return 1;
+	}
+	if (two > INT_MAX) {
+		printf("Miscalculation! Measurement went negative: %lld\n", (long long)two);
+		return 1;
+	}
+
+	good = eval(one, two);
+	printf("%s\n", good ? "✔️" : "❌");
+
+	return good ? 0 : 1;
+}
+
 int main(int argc, char *argv[])
 {
+	struct sock_filter bitmap_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog bitmap_prog = {
+		.len = (unsigned short)ARRAY_SIZE(bitmap_filter),
+		.filter = bitmap_filter,
+	};
 	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, args[0])),
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
 	struct sock_fprog prog = {
 		.len = (unsigned short)ARRAY_SIZE(filter),
 		.filter = filter,
 	};
-	long ret;
-	unsigned long long samples;
-	unsigned long long native, filter1, filter2;
+
+	long ret, bits;
+	unsigned long long samples, calc;
+	unsigned long long native, filter1, filter2, bitmap1, bitmap2;
+	unsigned long long entry, per_filter1, per_filter2;
 
 	printf("Current BPF sysctl settings:\n");
 	system("sysctl net.core.bpf_jit_enable");
@@ -101,35 +161,82 @@ int main(int argc, char *argv[])
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	assert(ret == 0);
 
-	/* One filter */
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	/* One filter resulting in a bitmap */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
 	assert(ret == 0);
 
-	filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
-	printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1);
+	bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1);
+
+	/* Second filter resulting in a bitmap */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
+	assert(ret == 0);
 
-	if (filter1 == native)
-		printf("No overhead measured!? Try running again with more samples.\n");
+	bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2);
 
-	/* Two filters */
+	/* Third filter, can no longer be converted to bitmap */
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
 	assert(ret == 0);
 
-	filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
-	printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2);
-
-	/* Calculations */
-	printf("Estimated total seccomp overhead for 1 filter: %llu ns\n",
-		filter1 - native);
+	filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1);
 
-	printf("Estimated total seccomp overhead for 2 filters: %llu ns\n",
-		filter2 - native);
+	/* Fourth filter, can not be converted to bitmap because of filter 3 */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
+	assert(ret == 0);
 
-	printf("Estimated seccomp per-filter overhead: %llu ns\n",
-		filter2 - filter1);
+	filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2);
+
+	/* Estimations */
+#define ESTIMATE(fmt, var, what)	do {			\
+		var = (what);					\
+		printf("Estimated " fmt ": %llu ns\n", var);	\
+		if (var > INT_MAX)				\
+			goto more_samples;			\
+	} while (0)
+
+	ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc,
+		 bitmap1 - native);
+	ESTIMATE("total seccomp overhead for 2 bitmapped filters", calc,
+		 bitmap2 - native);
+	ESTIMATE("total seccomp overhead for 3 full filters", calc,
+		 filter1 - native);
+	ESTIMATE("total seccomp overhead for 4 full filters", calc,
+		 filter2 - native);
+	ESTIMATE("seccomp entry overhead", entry,
+		 bitmap1 - native - (bitmap2 - bitmap1));
+	ESTIMATE("seccomp per-filter overhead (last 2 diff)", per_filter1,
+		 filter2 - filter1);
+	ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2,
+		 (filter2 - native - entry) / 4);
+
+	printf("Expectations:\n");
+	ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1);
+	bits = compare("native", "≤", "1 filter", native, le, filter1);
+	if (bits)
+		goto more_samples;
+
+	ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)",
+			per_filter1, approx, per_filter2);
+
+	bits = compare("1 bitmapped", "≈", "2 bitmapped",
+			bitmap1 - native, approx, bitmap2 - native);
+	if (bits) {
+		printf("Skipping constant action bitmap expectations: they appear unsupported.\n");
+		goto out;
+	}
 
-	printf("Estimated seccomp entry overhead: %llu ns\n",
-		filter1 - native - (filter2 - filter1));
+	ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native);
+	ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native);
+	ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total",
+			entry + (per_filter1 * 4) + native, approx, filter2);
+	if (ret == 0)
+		goto out;
 
+more_samples:
+	printf("Saw unexpected benchmark result. Try running again with more samples?\n");
+out:
 	return 0;
 }
diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings
index ba4d85f74cd6..6091b45d226b 100644
--- a/tools/testing/selftests/seccomp/settings
+++ b/tools/testing/selftests/seccomp/settings
@@ -1 +1 @@
-timeout=90
+timeout=120
-- 
cgit v1.2.3-70-g09d2


From fbb8531e58bd989868db3c2513d06870c46bd87f Mon Sep 17 00:00:00 2001
From: Antonio Cardace <acardace@redhat.com>
Date: Wed, 18 Nov 2020 21:45:20 +0100
Subject: selftests: extract common functions in ethtool-common.sh

Factor out some useful functions so that they can be reused
by other ethtool-netdevsim scripts.

Signed-off-by: Antonio Cardace <acardace@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/netdevsim/ethtool-common.sh        | 69 ++++++++++++++++++++++
 .../drivers/net/netdevsim/ethtool-pause.sh         | 63 +-------------------
 2 files changed, 71 insertions(+), 61 deletions(-)
 create mode 100644 tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
new file mode 100644
index 000000000000..fa44cf6e732c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0
+NSIM_NETDEV=
+num_passes=0
+num_errors=0
+
+function cleanup_nsim {
+    if [ -e $NSIM_DEV_SYS ]; then
+	echo $NSIM_ID > /sys/bus/netdevsim/del_device
+    fi
+}
+
+function cleanup {
+    cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function get_netdev_name {
+    local -n old=$1
+
+    new=$(ls /sys/class/net)
+
+    for netdev in $new; do
+	for check in $old; do
+            [ $netdev == $check ] && break
+	done
+
+	if [ $netdev != $check ]; then
+	    echo $netdev
+	    break
+	fi
+    done
+}
+
+function check {
+    local code=$1
+    local str=$2
+    local exp_str=$3
+
+    if [ $code -ne 0 ]; then
+	((num_errors++))
+	return
+    fi
+
+    if [ "$str" != "$exp_str"  ]; then
+	echo -e "Expected: '$exp_str', got '$str'"
+	((num_errors++))
+	return
+    fi
+
+    ((num_passes++))
+}
+
+function make_netdev {
+    # Make a netdevsim
+    old_netdevs=$(ls /sys/class/net)
+
+    if ! $(lsmod | grep -q netdevsim); then
+	modprobe netdevsim
+    fi
+
+    echo $NSIM_ID > /sys/bus/netdevsim/new_device
+    echo `get_netdev_name old_netdevs`
+}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
index 25c896b9e2eb..b4a7abfe5454 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
@@ -1,60 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0-only
 
-NSIM_ID=$((RANDOM % 1024))
-NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
-NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0
-NSIM_NETDEV=
-num_passes=0
-num_errors=0
-
-function cleanup_nsim {
-    if [ -e $NSIM_DEV_SYS ]; then
-	echo $NSIM_ID > /sys/bus/netdevsim/del_device
-    fi
-}
-
-function cleanup {
-    cleanup_nsim
-}
-
-trap cleanup EXIT
-
-function get_netdev_name {
-    local -n old=$1
-
-    new=$(ls /sys/class/net)
-
-    for netdev in $new; do
-	for check in $old; do
-            [ $netdev == $check ] && break
-	done
-
-	if [ $netdev != $check ]; then
-	    echo $netdev
-	    break
-	fi
-    done
-}
-
-function check {
-    local code=$1
-    local str=$2
-    local exp_str=$3
-
-    if [ $code -ne 0 ]; then
-	((num_errors++))
-	return
-    fi
-
-    if [ "$str" != "$exp_str"  ]; then
-	echo -e "Expected: '$exp_str', got '$str'"
-	((num_errors++))
-	return
-    fi
-
-    ((num_passes++))
-}
+source ethtool-common.sh
 
 # Bail if ethtool is too old
 if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then
@@ -62,13 +9,7 @@ if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then
     exit 4
 fi
 
-# Make a netdevsim
-old_netdevs=$(ls /sys/class/net)
-
-modprobe netdevsim
-echo $NSIM_ID > /sys/bus/netdevsim/new_device
-
-NSIM_NETDEV=`get_netdev_name old_netdevs`
+NSIM_NETDEV=$(make_netdev)
 
 set -o pipefail
 
-- 
cgit v1.2.3-70-g09d2


From 9e48ee80ac4e04c9985379d58248dd2a96a170ef Mon Sep 17 00:00:00 2001
From: Antonio Cardace <acardace@redhat.com>
Date: Wed, 18 Nov 2020 21:45:21 +0100
Subject: selftests: refactor get_netdev_name function

As pointed out by Michal Kubecek, getting the name
with the previous approach was racy, it's better
and easier to get the name of the device with this
patch's approach.

Essentialy the function doesn't need to exist
anymore as it's a simple 'ls' command.

Signed-off-by: Antonio Cardace <acardace@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/netdevsim/ethtool-common.sh          | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
index fa44cf6e732c..9f64d5c7107b 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -20,23 +20,6 @@ function cleanup {
 
 trap cleanup EXIT
 
-function get_netdev_name {
-    local -n old=$1
-
-    new=$(ls /sys/class/net)
-
-    for netdev in $new; do
-	for check in $old; do
-            [ $netdev == $check ] && break
-	done
-
-	if [ $netdev != $check ]; then
-	    echo $netdev
-	    break
-	fi
-    done
-}
-
 function check {
     local code=$1
     local str=$2
@@ -65,5 +48,6 @@ function make_netdev {
     fi
 
     echo $NSIM_ID > /sys/bus/netdevsim/new_device
-    echo `get_netdev_name old_netdevs`
+    # get new device name
+    ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/
 }
-- 
cgit v1.2.3-70-g09d2


From fbb7a1f8137df4a693ea2b44096ad8ec518e3db1 Mon Sep 17 00:00:00 2001
From: Antonio Cardace <acardace@redhat.com>
Date: Wed, 18 Nov 2020 21:45:22 +0100
Subject: selftests: add ring and coalesce selftests

Add scripts to test ring and coalesce settings
of netdevsim.

Signed-off-by: Antonio Cardace <acardace@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/netdevsim/ethtool-coalesce.sh      | 132 +++++++++++++++++++++
 .../drivers/net/netdevsim/ethtool-ring.sh          |  85 +++++++++++++
 2 files changed, 217 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
new file mode 100755
index 000000000000..9adfba8f87e6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+function get_value {
+    local query="${SETTINGS_MAP[$1]}"
+
+    echo $(ethtool -c $NSIM_NETDEV | \
+        awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[ \t]/, "", $2); print $2}')
+}
+
+function update_current_settings {
+    for key in ${!SETTINGS_MAP[@]}; do
+        CURRENT_SETTINGS[$key]=$(get_value $key)
+    done
+    echo ${CURRENT_SETTINGS[@]}
+}
+
+if ! ethtool -h | grep -q coalesce; then
+    echo "SKIP: No --coalesce support in ethtool"
+    exit 4
+fi
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+declare -A SETTINGS_MAP=(
+    ["rx-frames-low"]="rx-frame-low"
+    ["tx-frames-low"]="tx-frame-low"
+    ["rx-frames-high"]="rx-frame-high"
+    ["tx-frames-high"]="tx-frame-high"
+    ["rx-usecs"]="rx-usecs"
+    ["rx-frames"]="rx-frames"
+    ["rx-usecs-irq"]="rx-usecs-irq"
+    ["rx-frames-irq"]="rx-frames-irq"
+    ["tx-usecs"]="tx-usecs"
+    ["tx-frames"]="tx-frames"
+    ["tx-usecs-irq"]="tx-usecs-irq"
+    ["tx-frames-irq"]="tx-frames-irq"
+    ["stats-block-usecs"]="stats-block-usecs"
+    ["pkt-rate-low"]="pkt-rate-low"
+    ["rx-usecs-low"]="rx-usecs-low"
+    ["tx-usecs-low"]="tx-usecs-low"
+    ["pkt-rate-high"]="pkt-rate-high"
+    ["rx-usecs-high"]="rx-usecs-high"
+    ["tx-usecs-high"]="tx-usecs-high"
+    ["sample-interval"]="sample-interval"
+)
+
+declare -A CURRENT_SETTINGS=(
+    ["rx-frames-low"]=""
+    ["tx-frames-low"]=""
+    ["rx-frames-high"]=""
+    ["tx-frames-high"]=""
+    ["rx-usecs"]=""
+    ["rx-frames"]=""
+    ["rx-usecs-irq"]=""
+    ["rx-frames-irq"]=""
+    ["tx-usecs"]=""
+    ["tx-frames"]=""
+    ["tx-usecs-irq"]=""
+    ["tx-frames-irq"]=""
+    ["stats-block-usecs"]=""
+    ["pkt-rate-low"]=""
+    ["rx-usecs-low"]=""
+    ["tx-usecs-low"]=""
+    ["pkt-rate-high"]=""
+    ["rx-usecs-high"]=""
+    ["tx-usecs-high"]=""
+    ["sample-interval"]=""
+)
+
+declare -A EXPECTED_SETTINGS=(
+    ["rx-frames-low"]=""
+    ["tx-frames-low"]=""
+    ["rx-frames-high"]=""
+    ["tx-frames-high"]=""
+    ["rx-usecs"]=""
+    ["rx-frames"]=""
+    ["rx-usecs-irq"]=""
+    ["rx-frames-irq"]=""
+    ["tx-usecs"]=""
+    ["tx-frames"]=""
+    ["tx-usecs-irq"]=""
+    ["tx-frames-irq"]=""
+    ["stats-block-usecs"]=""
+    ["pkt-rate-low"]=""
+    ["rx-usecs-low"]=""
+    ["tx-usecs-low"]=""
+    ["pkt-rate-high"]=""
+    ["rx-usecs-high"]=""
+    ["tx-usecs-high"]=""
+    ["sample-interval"]=""
+)
+
+# populate the expected settings map
+for key in ${!SETTINGS_MAP[@]}; do
+    EXPECTED_SETTINGS[$key]=$(get_value $key)
+done
+
+# test
+for key in ${!SETTINGS_MAP[@]}; do
+    value=$((RANDOM % $((2**32-1))))
+
+    ethtool -C $NSIM_NETDEV "$key" "$value"
+
+    EXPECTED_SETTINGS[$key]="$value"
+    expected=${EXPECTED_SETTINGS[@]}
+    current=$(update_current_settings)
+
+    check $? "$current" "$expected"
+    set +x
+done
+
+# bool settings which ethtool displays on the same line
+ethtool -C $NSIM_NETDEV adaptive-rx on
+s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on  TX: off")
+check $? "$s" ""
+
+ethtool -C $NSIM_NETDEV adaptive-tx on
+s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on  TX: on")
+check $? "$s" ""
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
new file mode 100755
index 000000000000..c969559ffa7a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+function get_value {
+    local query="${SETTINGS_MAP[$1]}"
+
+    echo $(ethtool -g $NSIM_NETDEV | \
+        tail -n +$CURR_SETT_LINE | \
+        awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}')
+}
+
+function update_current_settings {
+    for key in ${!SETTINGS_MAP[@]}; do
+        CURRENT_SETTINGS[$key]=$(get_value $key)
+    done
+    echo ${CURRENT_SETTINGS[@]}
+}
+
+if ! ethtool -h | grep -q set-ring >/dev/null; then
+    echo "SKIP: No --set-ring support in ethtool"
+    exit 4
+fi
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+declare -A SETTINGS_MAP=(
+    ["rx"]="RX"
+    ["rx-mini"]="RX Mini"
+    ["rx-jumbo"]="RX Jumbo"
+    ["tx"]="TX"
+)
+
+declare -A EXPECTED_SETTINGS=(
+    ["rx"]=""
+    ["rx-mini"]=""
+    ["rx-jumbo"]=""
+    ["tx"]=""
+)
+
+declare -A CURRENT_SETTINGS=(
+    ["rx"]=""
+    ["rx-mini"]=""
+    ["rx-jumbo"]=""
+    ["tx"]=""
+)
+
+MAX_VALUE=$((RANDOM % $((2**32-1))))
+RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/)
+
+for ring_max_entry in $RING_MAX_LIST; do
+    echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry
+done
+
+CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:)
+
+# populate the expected settings map
+for key in ${!SETTINGS_MAP[@]}; do
+    EXPECTED_SETTINGS[$key]=$(get_value $key)
+done
+
+# test
+for key in ${!SETTINGS_MAP[@]}; do
+    value=$((RANDOM % $MAX_VALUE))
+
+    ethtool -G $NSIM_NETDEV "$key" "$value"
+
+    EXPECTED_SETTINGS[$key]="$value"
+    expected=${EXPECTED_SETTINGS[@]}
+    current=$(update_current_settings)
+
+    check $? "$current" "$expected"
+    set +x
+done
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
-- 
cgit v1.2.3-70-g09d2


From 20ac8f8690535161d9357f5b4af4dfdf88c56578 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:44 +0200
Subject: selftests: mlxsw: Add nexthop objects configuration tests

Test that unsupported nexthop objects are rejected and that offload
indication is correctly set on: nexthop objects, nexthop group objects
and routes associated these objects.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh       | 189 +++++++++++++++++++++
 1 file changed, 189 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index f4031002d5e9..5de47d72f8c9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -29,6 +29,10 @@ ALL_TESTS="
 	bridge_extern_learn_test
 	neigh_offload_test
 	nexthop_offload_test
+	nexthop_obj_invalid_test
+	nexthop_obj_offload_test
+	nexthop_obj_group_offload_test
+	nexthop_obj_route_offload_test
 	devlink_reload_test
 "
 NUM_NETIFS=2
@@ -674,6 +678,191 @@ nexthop_offload_test()
 	sysctl_restore net.ipv6.conf.$swp2.keep_addr_on_down
 }
 
+nexthop_obj_invalid_test()
+{
+	# Test that invalid nexthop object configurations are rejected
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.3 fdb
+	check_fail $? "managed to configure an FDB nexthop when should not"
+
+	ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1
+	check_fail $? "managed to configure a nexthop with MPLS encap when should not"
+
+	ip nexthop add id 1 blackhole
+	check_fail $? "managed to configure a blackhole nexthop when should not"
+
+	ip nexthop add id 1 dev $swp1
+	ip nexthop add id 2 dev $swp1
+	ip nexthop add id 10 group 1/2
+	check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
+
+	log_test "nexthop objects - invalid configurations"
+
+	ip nexthop del id 2
+	ip nexthop del id 1
+
+	simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_offload_test()
+{
+	# Test offload indication of nexthop objects
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop not marked as offloaded when should"
+
+	ip neigh replace 192.0.2.2 nud failed dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop marked as offloaded after setting neigh to failed state"
+
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop not marked as offloaded after neigh replace"
+
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop marked as offloaded after replacing to use an invalid address"
+
+	ip nexthop replace id 1 via 192.0.2.2 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "nexthop not marked as offloaded after replacing to use a valid address"
+
+	log_test "nexthop objects offload indication"
+
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_group_offload_test()
+{
+	# Test offload indication of nexthop group objects
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+	ip nexthop add id 10 group 1/2
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "IPv4 nexthop not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 2
+	check_err $? "IPv6 nexthop not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "nexthop group not marked as offloaded when should"
+
+	# Invalidate nexthop id 1
+	ip neigh replace 192.0.2.2 nud failed dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 10
+	check_fail $? "nexthop group not marked as offloaded with one valid nexthop"
+
+	# Invalidate nexthop id 2
+	ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "nexthop group marked as offloaded when should not"
+
+	# Revalidate nexthop id 1
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "nexthop group not marked as offloaded after revalidating nexthop"
+
+	log_test "nexthop group objects offload indication"
+
+	ip neigh del 2001:db8:1::2 dev $swp1
+	ip neigh del 192.0.2.3 dev $swp1
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 10
+	ip nexthop del id 2
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_route_offload_test()
+{
+	# Test offload indication of routes using nexthop objects
+	RET=0
+
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2
+	setup_wait
+
+	ip nexthop add id 1 via 192.0.2.2 dev $swp1
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+		dev $swp1
+
+	ip route replace 198.51.100.0/24 nhid 1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route not marked as offloaded when using valid nexthop"
+
+	ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route not marked as offloaded after replacing valid nexthop with a valid one"
+
+	ip nexthop replace id 1 via 192.0.2.4 dev $swp1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route marked as offloaded after replacing valid nexthop with an invalid one"
+
+	ip nexthop replace id 1 via 192.0.2.2 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show 198.51.100.0/24
+	check_err $? "route not marked as offloaded after replacing invalid nexthop with a valid one"
+
+	log_test "routes using nexthop objects offload indication"
+
+	ip route del 198.51.100.0/24
+	ip neigh del 192.0.2.3 dev $swp1
+	ip neigh del 192.0.2.2 dev $swp1
+	ip nexthop del id 1
+
+	simple_if_fini $swp2
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
 devlink_reload_test()
 {
 	# Test that after executing all the above configuration tests, a
-- 
cgit v1.2.3-70-g09d2


From ffb721515bf3352f38457fd2ab19f575e75e190e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:45 +0200
Subject: selftests: forwarding: Do not configure nexthop objects twice

routing_nh_obj() is used to configure the nexthop objects employed by
the test, but it is called twice resulting in "RTNETLINK answers: File
exists" messages.

Remove the first call, so that the function is only called after
setup_wait(), when all the interfaces are up and ready.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/router_mpath_nh.sh | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index cf3d26c233e8..6067477ff326 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -312,7 +312,6 @@ setup_prepare()
 
 	router1_create
 	router2_create
-	routing_nh_obj
 
 	forwarding_enable
 }
-- 
cgit v1.2.3-70-g09d2


From 3600f29ad1399a1335af2030e8106ac8bbe9261a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:46 +0200
Subject: selftests: forwarding: Test IPv4 routes with IPv6 link-local nexthops

In addition to IPv4 multipath tests with IPv4 nexthops, also test IPv4
multipath with nexthops that use IPv6 link-local addresses.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/router_mpath_nh.sh | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 6067477ff326..e8c2573d5232 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -280,6 +280,17 @@ multipath_test()
 	multipath4_test "Weighted MP 2:1" 2 1
 	multipath4_test "Weighted MP 11:45" 11 45
 
+	log_info "Running IPv4 multipath tests with IPv6 link-local nexthops"
+	ip nexthop replace id 101 via fe80:2::22 dev $rp12
+	ip nexthop replace id 102 via fe80:3::23 dev $rp13
+
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	ip nexthop replace id 102 via 169.254.3.23 dev $rp13
+	ip nexthop replace id 101 via 169.254.2.22 dev $rp12
+
 	log_info "Running IPv6 multipath tests"
 	multipath6_test "ECMP" 1 1
 	multipath6_test "Weighted MP 2:1" 2 1
-- 
cgit v1.2.3-70-g09d2


From e96fa54bbd90e487a8c230155db4231d9326ebcc Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:47 +0200
Subject: selftests: forwarding: Add device-only nexthop test

In a similar fashion to router_multipath.sh and its nexthop objects
version router_mpath_nh.sh, create a nexthop objects version of
router.sh.

It reuses the same topology, but uses device-only nexthop objects
instead of legacy ones.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/net/forwarding/router_nh.sh  | 160 +++++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/router_nh.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh
new file mode 100755
index 000000000000..f3a53738bdcc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_nh.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp2 clsact
+
+	ip address add 192.0.2.1/24 dev $rp1
+	ip address add 2001:db8:1::1/64 dev $rp1
+
+	ip address add 198.51.100.1/24 dev $rp2
+	ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 2001:db8:2::1/64 dev $rp2
+	ip address del 198.51.100.1/24 dev $rp2
+
+	ip address del 2001:db8:1::1/64 dev $rp1
+	ip address del 192.0.2.1/24 dev $rp1
+
+	tc qdisc del dev $rp2 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+routing_nh_obj()
+{
+	# Create the nexthops as AF_INET6, so that IPv4 and IPv6 routes could
+	# use them.
+	ip -6 nexthop add id 101 dev $rp1
+	ip -6 nexthop add id 102 dev $rp2
+
+	ip route replace 192.0.2.0/24 nhid 101
+	ip route replace 2001:db8:1::/64 nhid 101
+	ip route replace 198.51.100.0/24 nhid 102
+	ip route replace 2001:db8:2::/64 nhid 102
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1mac=$(mac_get $rp1)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-70-g09d2


From e035146d65603165078629671afa9409f659a358 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 19 Nov 2020 15:08:48 +0200
Subject: selftests: forwarding: Add multipath tunneling nexthop test

Add a nexthop objects version of gre_multipath.sh. Unlike the original
test, it also tests IPv6 overlay which is not possible with the legacy
nexthop implementation. See commit 9a2ad3623868 ("selftests: forwarding:
gre_multipath: Drop IPv6 tests") for more info.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/gre_multipath_nh.sh   | 356 +++++++++++++++++++++
 1 file changed, 356 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/gre_multipath_nh.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
new file mode 100755
index 000000000000..d03aa2cab9fd
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
@@ -0,0 +1,356 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# |  2001:db8:1::1/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.2.2/28                          |
+# |  2001:db8:1::2/64                          |
+# |                                            |
+# |  + g1a (gre)          + g1b (gre)          |
+# |    loc=192.0.2.65       loc=192.0.2.81     |
+# |    rem=192.0.2.66 --.   rem=192.0.2.82 --. |
+# |    tos=inherit      |   tos=inherit      | |
+# |  .------------------'                    | |
+# |  |                    .------------------' |
+# |  v                    v                    |
+# |  + $ul1.111 (vlan)    + $ul1.222 (vlan)    |
+# |  | 192.0.2.129/28     | 192.0.2.145/28     |
+# |   \                  /                     |
+# |    \________________/                      |
+# |            |                               |
+# |            + $ul1                          |
+# +------------|-------------------------------+
+#              |
+# +------------|-------------------------------+
+# | SW2        + $ul2                          |
+# |     _______|________                       |
+# |    /                \                      |
+# |   /                  \                     |
+# |  + $ul2.111 (vlan)    + $ul2.222 (vlan)    |
+# |  ^ 192.0.2.130/28     ^ 192.0.2.146/28     |
+# |  |                    |                    |
+# |  |                    '------------------. |
+# |  '------------------.                    | |
+# |  + g2a (gre)        | + g2b (gre)        | |
+# |    loc=192.0.2.66   |   loc=192.0.2.82   | |
+# |    rem=192.0.2.65 --'   rem=192.0.2.81 --' |
+# |    tos=inherit          tos=inherit        |
+# |                                            |
+# |              $ol2 +                        |
+# |     192.0.2.17/28 |                        |
+# |  2001:db8:2::1/64 |                        |
+# +-------------------|------------------------+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# |  2001:db8:2::2/64       |
+# +-------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_ipv4
+	multipath_ipv6
+	multipath_ipv6_l4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+	__simple_if_init $ul1 v$ol1
+	vlan_create $ul1 111 v$ol1 192.0.2.129/28
+	vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+	tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1a v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+	__simple_if_init g1b v$ol1 192.0.2.81/32
+	ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+	ip -6 nexthop add id 101 dev g1a
+	ip -6 nexthop add id 102 dev g1b
+	ip nexthop add id 103 group 101/102
+
+	ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+	ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 192.0.2.16/28
+
+	ip nexthop del id 103
+	ip -6 nexthop del id 102
+	ip -6 nexthop del id 101
+
+	ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+	__simple_if_fini g1b 192.0.2.81/32
+	tunnel_destroy g1b
+
+	ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+	__simple_if_fini g1a 192.0.2.65/32
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 222
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+	simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 192.0.2.130/28
+	vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+	tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+	__simple_if_init g2a v$ol2 192.0.2.66/32
+	ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+	tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+	__simple_if_init g2b v$ol2 192.0.2.82/32
+	ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+	ip -6 nexthop add id 201 dev g2a
+	ip -6 nexthop add id 202 dev g2b
+	ip nexthop add id 203 group 201/202
+
+	ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+	ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+	tc qdisc add dev $ul2 clsact
+	tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+	tc qdisc del dev $ul2 clsact
+
+	ip route del vrf v$ol2 2001:db8:1::/64
+	ip route del vrf v$ol2 192.0.2.0/28
+
+	ip nexthop del id 203
+	ip -6 nexthop del id 202
+	ip -6 nexthop del id 201
+
+	ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+	__simple_if_fini g2b 192.0.2.82/32
+	tunnel_destroy g2b
+
+	ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	__simple_if_fini g2a 192.0.2.66/32
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 222
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 0
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	# Generate 16384 echo requests, each with a random flow label.
+	for ((i=0; i < 16384; ++i)); do
+		ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+	done
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+		$MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6_l4()
+{
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-70-g09d2


From 8b819a84d4b12c4a91cc9f91ad69ca09c3e0606d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 19 Nov 2020 11:45:57 -0800
Subject: selftests: mptcp: add link failure test case

Add a test case where a link fails with multiple subflows.
The expectation is that MPTCP will transmit any data that
could not be delivered via the failed link on another subflow.

Co-developed-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 104 +++++++++++++++++++-----
 1 file changed, 82 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 0d93b243695f..f841ed8186c1 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -5,6 +5,7 @@ ret=0
 sin=""
 sout=""
 cin=""
+cinsent=""
 cout=""
 ksft_skip=4
 timeout=30
@@ -81,7 +82,7 @@ cleanup_partial()
 cleanup()
 {
 	rm -f "$cin" "$cout"
-	rm -f "$sin" "$sout"
+	rm -f "$sin" "$sout" "$cinsent"
 	cleanup_partial
 }
 
@@ -144,6 +145,13 @@ if [ $? -ne 0 ];then
 	exit $ksft_skip
 fi
 
+print_file_err()
+{
+	ls -l "$1" 1>&2
+	echo "Trailing bytes are: "
+	tail -c 27 "$1"
+}
+
 check_transfer()
 {
 	in=$1
@@ -155,6 +163,7 @@ check_transfer()
 		echo "[ FAIL ] $what does not match (in, out):"
 		print_file_err "$in"
 		print_file_err "$out"
+		ret=1
 
 		return 1
 	fi
@@ -175,6 +184,17 @@ do_ping()
 	fi
 }
 
+link_failure()
+{
+	ns="$1"
+
+	l=$((RANDOM%4))
+	l=$((l+1))
+
+	veth="ns1eth$l"
+	ip -net "$ns" link set "$veth" down
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -182,9 +202,10 @@ do_transfer()
 	cl_proto="$3"
 	srv_proto="$4"
 	connect_addr="$5"
-	rm_nr_ns1="$6"
-	rm_nr_ns2="$7"
-	speed="$8"
+	test_link_fail="$6"
+	rm_nr_ns1="$7"
+	rm_nr_ns2="$8"
+	speed="$9"
 
 	port=$((10000+$TEST_COUNT))
 	TEST_COUNT=$((TEST_COUNT+1))
@@ -220,7 +241,12 @@ do_transfer()
 
 	sleep 1
 
-	ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+	if [ "$test_link_fail" -eq 0 ];then
+		ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+	else
+		( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \
+		ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" &
+	fi
 	cpid=$!
 
 	if [ $rm_nr_ns1 -gt 0 ]; then
@@ -265,12 +291,17 @@ do_transfer()
 		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
 
 		cat "$capout"
+		ret=1
 		return 1
 	fi
 
 	check_transfer $sin $cout "file received by client"
 	retc=$?
-	check_transfer $cin $sout "file received by server"
+	if [ "$test_link_fail" -eq 0 ];then
+		check_transfer $cin $sout "file received by server"
+	else
+		check_transfer $cinsent $sout "file received by server"
+	fi
 	rets=$?
 
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
@@ -286,13 +317,12 @@ make_file()
 {
 	name=$1
 	who=$2
+	size=$3
 
-	SIZE=1
-
-	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+	dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
 	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
 
-	echo "Created $name (size $SIZE KB) containing data sent by $who"
+	echo "Created $name (size $size KB) containing data sent by $who"
 }
 
 run_tests()
@@ -300,14 +330,32 @@ run_tests()
 	listener_ns="$1"
 	connector_ns="$2"
 	connect_addr="$3"
-	rm_nr_ns1="${4:-0}"
-	rm_nr_ns2="${5:-0}"
-	speed="${6:-fast}"
+	test_linkfail="${4:-0}"
+	rm_nr_ns1="${5:-0}"
+	rm_nr_ns2="${6:-0}"
+	speed="${7:-fast}"
 	lret=0
+	oldin=""
+
+	if [ "$test_linkfail" -eq 1 ];then
+		size=$((RANDOM%1024))
+		size=$((size+1))
+		size=$((size*128))
+
+		oldin=$(mktemp)
+		cp "$cin" "$oldin"
+		make_file "$cin" "client" $size
+	fi
 
 	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
-		${rm_nr_ns1} ${rm_nr_ns2} ${speed}
+		${test_linkfail} ${rm_nr_ns1} ${rm_nr_ns2} ${speed}
 	lret=$?
+
+	if [ "$test_linkfail" -eq 1 ];then
+		cp "$oldin" "$cin"
+		rm -f "$oldin"
+	fi
+
 	if [ $lret -ne 0 ]; then
 		ret=$lret
 		return
@@ -440,10 +488,11 @@ chk_rm_nr()
 sin=$(mktemp)
 sout=$(mktemp)
 cin=$(mktemp)
+cinsent=$(mktemp)
 cout=$(mktemp)
 init
-make_file "$cin" "client"
-make_file "$sin" "server"
+make_file "$cin" "client" 1
+make_file "$sin" "server" 1
 trap cleanup EXIT
 
 run_tests $ns1 $ns2 10.0.1.1
@@ -528,12 +577,23 @@ run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "multiple subflows and signal" 3 3 3
 chk_add_nr 1 1
 
+# accept and use add_addr with additional subflows and link loss
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1 1
+chk_join_nr "multiple flows, signal, link failure" 3 3 3
+chk_add_nr 1 1
+
 # add_addr timeout
 reset_with_add_addr_timeout
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl limits 1 1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-run_tests $ns1 $ns2 10.0.1.1 0 0 slow
+run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow
 chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
 chk_add_nr 4 0
 
@@ -542,7 +602,7 @@ reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl limits 0 1
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1 0 1 slow
+run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow
 chk_join_nr "remove single subflow" 1 1 1
 chk_rm_nr 1 1
 
@@ -552,7 +612,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
 ip netns exec $ns2 ./pm_nl_ctl limits 0 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1 0 2 slow
+run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow
 chk_join_nr "remove multiple subflows" 2 2 2
 chk_rm_nr 2 2
 
@@ -561,7 +621,7 @@ reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-run_tests $ns1 $ns2 10.0.1.1 1 0 slow
+run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
 chk_join_nr "remove single address" 1 1 1
 chk_add_nr 1 1
 chk_rm_nr 0 0
@@ -572,7 +632,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1 1 1 slow
+run_tests $ns1 $ns2 10.0.1.1 0 1 1 slow
 chk_join_nr "remove subflow and signal" 2 2 2
 chk_add_nr 1 1
 chk_rm_nr 1 1
@@ -584,7 +644,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 ip netns exec $ns2 ./pm_nl_ctl limits 1 3
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1 1 2 slow
+run_tests $ns1 $ns2 10.0.1.1 0 1 2 slow
 chk_join_nr "remove subflows and signal" 3 3 3
 chk_add_nr 1 1
 chk_rm_nr 2 2
-- 
cgit v1.2.3-70-g09d2


From 523514ed0a998fda389b9b6f00d0f2054ba30d25 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 19 Nov 2020 11:46:01 -0800
Subject: selftests: mptcp: add ADD_ADDR IPv6 test cases

This patch added IPv6 support for do_transfer, and the test cases for
ADD_ADDR IPv6.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 70 ++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f841ed8186c1..0eae628d1ffd 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -195,6 +195,12 @@ link_failure()
 	ip -net "$ns" link set "$veth" down
 }
 
+# $1: IP address
+is_v6()
+{
+	[ -z "${1##*:*}" ]
+}
+
 do_transfer()
 {
 	listener_ns="$1"
@@ -236,7 +242,15 @@ do_transfer()
 		mptcp_connect="./mptcp_connect -r"
 	fi
 
-	ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
+	local local_addr
+	if is_v6 "${connect_addr}"; then
+		local_addr="::"
+	else
+		local_addr="0.0.0.0"
+	fi
+
+	ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \
+		-s ${srv_proto} ${local_addr} < "$sin" > "$sout" &
 	spid=$!
 
 	sleep 1
@@ -649,6 +663,60 @@ chk_join_nr "remove subflows and signal" 3 3 3
 chk_add_nr 1 1
 chk_rm_nr 2 2
 
+# subflow IPv6
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+chk_join_nr "single subflow IPv6" 1 1 1
+
+# add_address, unused IPv6
+reset
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+chk_join_nr "unused signal address IPv6" 0 0 0
+chk_add_nr 1 1
+
+# signal address IPv6
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+chk_join_nr "single address IPv6" 1 1 1
+chk_add_nr 1 1
+
+# add_addr timeout IPv6
+reset_with_add_addr_timeout 6
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
+chk_add_nr 4 0
+
+# single address IPv6, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+run_tests $ns1 $ns2 dead:beef:1::1 0 1 0 slow
+chk_join_nr "remove single address IPv6" 1 1 1
+chk_add_nr 1 1
+chk_rm_nr 0 0
+
+# subflow and signal IPv6, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
+run_tests $ns1 $ns2 dead:beef:1::1 0 1 1 slow
+chk_join_nr "remove subflow and signal IPv6" 2 2 2
+chk_add_nr 1 1
+chk_rm_nr 1 1
+
 # single subflow, syncookies
 reset_with_cookies
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-- 
cgit v1.2.3-70-g09d2


From d23e95c09067618eabd6d0e8cff372f0ce517c84 Mon Sep 17 00:00:00 2001
From: Todd Brandt <todd.e.brandt@linux.intel.com>
Date: Tue, 10 Nov 2020 18:36:17 -0800
Subject: pm-graph v5.8

 - if wakeups occur in s2idle: "freeze time: N (-x ms waking y times) ms"

 - change FREEZELOOP and FREEZEWAKE to S2LOOP and S2WAKE for brevity

 - returns all sysfs vals to their initial state after testing

 - use the dmesg log for debugging until the test is completed,
   instrument the executeSuspend process to have a full trace,
   if test completes, formal dmesg log overwrites the debug log

 - fix CPU_ON and CPU_OFF devices in the timeline, should include [n]

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/pm-graph/README        |   4 +-
 tools/power/pm-graph/sleepgraph.py | 387 +++++++++++++++++++++----------------
 2 files changed, 227 insertions(+), 164 deletions(-)

(limited to 'tools')

diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README
index 89d0a7dab4bc..da468bd510ca 100644
--- a/tools/power/pm-graph/README
+++ b/tools/power/pm-graph/README
@@ -6,7 +6,7 @@
    |_|                    |___/          |_|
 
    pm-graph: suspend/resume/boot timing analysis tools
-    Version: 5.7
+    Version: 5.8
      Author: Todd Brandt <todd.e.brandt@intel.com>
   Home Page: https://01.org/pm-graph
 
@@ -61,7 +61,7 @@
        - runs with python2 or python3, choice is made by /usr/bin/python link
        - python
        - python-configparser (for python2 sleepgraph)
-       - python-requests (for googlesheet.py)
+       - python-requests (for stresstester.py)
        - linux-tools-common (for turbostat usage in sleepgraph)
 
        Ubuntu:
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 1bc36a1db14f..81f4b8abbdf7 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -81,7 +81,7 @@ def ascii(text):
 #	 store system values and test parameters
 class SystemValues:
 	title = 'SleepGraph'
-	version = '5.7'
+	version = '5.8'
 	ansi = False
 	rs = 0
 	display = ''
@@ -92,8 +92,9 @@ class SystemValues:
 	testlog = True
 	dmesglog = True
 	ftracelog = False
+	acpidebug = True
 	tstat = True
-	mindevlen = 0.0
+	mindevlen = 0.0001
 	mincglen = 0.0
 	cgphase = ''
 	cgtest = -1
@@ -115,6 +116,7 @@ class SystemValues:
 	fpdtpath = '/sys/firmware/acpi/tables/FPDT'
 	epath = '/sys/kernel/debug/tracing/events/power/'
 	pmdpath = '/sys/power/pm_debug_messages'
+	acpipath='/sys/module/acpi/parameters/debug_level'
 	traceevents = [
 		'suspend_resume',
 		'wakeup_source_activate',
@@ -162,16 +164,16 @@ class SystemValues:
 	devdump = False
 	mixedphaseheight = True
 	devprops = dict()
+	cfgdef = dict()
 	platinfo = []
 	predelay = 0
 	postdelay = 0
-	pmdebug = ''
 	tmstart = 'SUSPEND START %Y%m%d-%H:%M:%S.%f'
 	tmend = 'RESUME COMPLETE %Y%m%d-%H:%M:%S.%f'
 	tracefuncs = {
 		'sys_sync': {},
 		'ksys_sync': {},
-		'pm_notifier_call_chain_robust': {},
+		'__pm_notifier_call_chain': {},
 		'pm_prepare_console': {},
 		'pm_notifier_call_chain': {},
 		'freeze_processes': {},
@@ -490,9 +492,9 @@ class SystemValues:
 		call('echo 0 > %s/wakealarm' % self.rtcpath, shell=True)
 	def initdmesg(self):
 		# get the latest time stamp from the dmesg log
-		fp = Popen('dmesg', stdout=PIPE).stdout
+		lines = Popen('dmesg', stdout=PIPE).stdout.readlines()
 		ktime = '0'
-		for line in fp:
+		for line in reversed(lines):
 			line = ascii(line).replace('\r\n', '')
 			idx = line.find('[')
 			if idx > 1:
@@ -500,7 +502,7 @@ class SystemValues:
 			m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
 			if(m):
 				ktime = m.group('ktime')
-		fp.close()
+				break
 		self.dmesgstart = float(ktime)
 	def getdmesg(self, testdata):
 		op = self.writeDatafileHeader(self.dmesgfile, testdata)
@@ -715,8 +717,6 @@ class SystemValues:
 			self.fsetVal('0', 'events/kprobes/enable')
 			self.fsetVal('', 'kprobe_events')
 			self.fsetVal('1024', 'buffer_size_kb')
-		if self.pmdebug:
-			self.setVal(self.pmdebug, self.pmdpath)
 	def setupAllKprobes(self):
 		for name in self.tracefuncs:
 			self.defaultKprobe(name, self.tracefuncs[name])
@@ -740,11 +740,7 @@ class SystemValues:
 		# turn trace off
 		self.fsetVal('0', 'tracing_on')
 		self.cleanupFtrace()
-		# pm debug messages
-		pv = self.getVal(self.pmdpath)
-		if pv != '1':
-			self.setVal('1', self.pmdpath)
-			self.pmdebug = pv
+		self.testVal(self.pmdpath, 'basic', '1')
 		# set the trace clock to global
 		self.fsetVal('global', 'trace_clock')
 		self.fsetVal('nop', 'current_tracer')
@@ -900,6 +896,14 @@ class SystemValues:
 		if isgz:
 			return gzip.open(filename, mode+'t')
 		return open(filename, mode)
+	def putlog(self, filename, text):
+		with self.openlog(filename, 'a') as fp:
+			fp.write(text)
+			fp.close()
+	def dlog(self, text):
+		self.putlog(self.dmesgfile, '# %s\n' % text)
+	def flog(self, text):
+		self.putlog(self.ftracefile, text)
 	def b64unzip(self, data):
 		try:
 			out = codecs.decode(base64.b64decode(data), 'zlib').decode()
@@ -992,9 +996,7 @@ class SystemValues:
 		# add a line for each of these commands with their outputs
 		for name, cmdline, info in cmdafter:
 			footer += '# platform-%s: %s | %s\n' % (name, cmdline, self.b64zip(info))
-
-		with self.openlog(self.ftracefile, 'a') as fp:
-			fp.write(footer)
+		self.flog(footer)
 		return True
 	def commonPrefix(self, list):
 		if len(list) < 2:
@@ -1034,6 +1036,7 @@ class SystemValues:
 			cmdline, cmdpath = ' '.join(cargs[2:]), self.getExec(cargs[2])
 			if not cmdpath or (begin and not delta):
 				continue
+			self.dlog('[%s]' % cmdline)
 			try:
 				fp = Popen([cmdpath]+cargs[3:], stdout=PIPE, stderr=PIPE).stdout
 				info = ascii(fp.read()).strip()
@@ -1060,6 +1063,29 @@ class SystemValues:
 			else:
 				out.append((name, cmdline, '\tnothing' if not info else info))
 		return out
+	def testVal(self, file, fmt='basic', value=''):
+		if file == 'restoreall':
+			for f in self.cfgdef:
+				if os.path.exists(f):
+					fp = open(f, 'w')
+					fp.write(self.cfgdef[f])
+					fp.close()
+			self.cfgdef = dict()
+		elif value and os.path.exists(file):
+			fp = open(file, 'r+')
+			if fmt == 'radio':
+				m = re.match('.*\[(?P<v>.*)\].*', fp.read())
+				if m:
+					self.cfgdef[file] = m.group('v')
+			elif fmt == 'acpi':
+				line = fp.read().strip().split('\n')[-1]
+				m = re.match('.* (?P<v>[0-9A-Fx]*) .*', line)
+				if m:
+					self.cfgdef[file] = m.group('v')
+			else:
+				self.cfgdef[file] = fp.read().strip()
+			fp.write(value)
+			fp.close()
 	def haveTurbostat(self):
 		if not self.tstat:
 			return False
@@ -1201,6 +1227,57 @@ class SystemValues:
 			self.multitest[sz] *= 1440
 		elif unit == 'h':
 			self.multitest[sz] *= 60
+	def displayControl(self, cmd):
+		xset, ret = 'timeout 10 xset -d :0.0 {0}', 0
+		if self.sudouser:
+			xset = 'sudo -u %s %s' % (self.sudouser, xset)
+		if cmd == 'init':
+			ret = call(xset.format('dpms 0 0 0'), shell=True)
+			if not ret:
+				ret = call(xset.format('s off'), shell=True)
+		elif cmd == 'reset':
+			ret = call(xset.format('s reset'), shell=True)
+		elif cmd in ['on', 'off', 'standby', 'suspend']:
+			b4 = self.displayControl('stat')
+			ret = call(xset.format('dpms force %s' % cmd), shell=True)
+			if not ret:
+				curr = self.displayControl('stat')
+				self.vprint('Display Switched: %s -> %s' % (b4, curr))
+				if curr != cmd:
+					self.vprint('WARNING: Display failed to change to %s' % cmd)
+			if ret:
+				self.vprint('WARNING: Display failed to change to %s with xset' % cmd)
+				return ret
+		elif cmd == 'stat':
+			fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout
+			ret = 'unknown'
+			for line in fp:
+				m = re.match('[\s]*Monitor is (?P<m>.*)', ascii(line))
+				if(m and len(m.group('m')) >= 2):
+					out = m.group('m').lower()
+					ret = out[3:] if out[0:2] == 'in' else out
+					break
+			fp.close()
+		return ret
+	def setRuntimeSuspend(self, before=True):
+		if before:
+			# runtime suspend disable or enable
+			if self.rs > 0:
+				self.rstgt, self.rsval, self.rsdir = 'on', 'auto', 'enabled'
+			else:
+				self.rstgt, self.rsval, self.rsdir = 'auto', 'on', 'disabled'
+			pprint('CONFIGURING RUNTIME SUSPEND...')
+			self.rslist = deviceInfo(self.rstgt)
+			for i in self.rslist:
+				self.setVal(self.rsval, i)
+			pprint('runtime suspend %s on all devices (%d changed)' % (self.rsdir, len(self.rslist)))
+			pprint('waiting 5 seconds...')
+			time.sleep(5)
+		else:
+			# runtime suspend re-enable or re-disable
+			for i in self.rslist:
+				self.setVal(self.rstgt, i)
+			pprint('runtime suspend settings restored on %d devices' % len(self.rslist))
 
 sysvals = SystemValues()
 switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0']
@@ -1640,15 +1717,20 @@ class Data:
 			if 'resume_machine' in phase and 'suspend_machine' in lp:
 				tS, tR = self.dmesg[lp]['end'], self.dmesg[phase]['start']
 				tL = tR - tS
-				if tL > 0:
-					left = True if tR > tZero else False
-					self.trimTime(tS, tL, left)
-					if 'trying' in self.dmesg[lp] and self.dmesg[lp]['trying'] >= 0.001:
-						tTry = round(self.dmesg[lp]['trying'] * 1000)
-						text = '%.0f (-%.0f waking)' % (tL * 1000, tTry)
+				if tL <= 0:
+					continue
+				left = True if tR > tZero else False
+				self.trimTime(tS, tL, left)
+				if 'waking' in self.dmesg[lp]:
+					tCnt = self.dmesg[lp]['waking'][0]
+					if self.dmesg[lp]['waking'][1] >= 0.001:
+						tTry = '-%.0f' % (round(self.dmesg[lp]['waking'][1] * 1000))
 					else:
-						text = '%.0f' % (tL * 1000)
-					self.tLow.append(text)
+						tTry = '-%.3f' % (self.dmesg[lp]['waking'][1] * 1000)
+					text = '%.0f (%s ms waking %d times)' % (tL * 1000, tTry, tCnt)
+				else:
+					text = '%.0f' % (tL * 1000)
+				self.tLow.append(text)
 			lp = phase
 	def getMemTime(self):
 		if not self.hwstart or not self.hwend:
@@ -1921,7 +2003,7 @@ class Data:
 			for dev in list:
 				length = (list[dev]['end'] - list[dev]['start']) * 1000
 				width = widfmt % (((list[dev]['end']-list[dev]['start'])*100)/tTotal)
-				if width != '0.000000' and length >= mindevlen:
+				if length >= mindevlen:
 					devlist.append(dev)
 			self.tdevlist[phase] = devlist
 	def addHorizontalDivider(self, devname, devend):
@@ -3316,9 +3398,10 @@ def parseTraceLog(live=False):
 							# trim out s2idle loops, track time trying to freeze
 							llp = data.lastPhase(2)
 							if llp.startswith('suspend_machine'):
-								if 'trying' not in data.dmesg[llp]:
-									data.dmesg[llp]['trying'] = 0
-								data.dmesg[llp]['trying'] += \
+								if 'waking' not in data.dmesg[llp]:
+									data.dmesg[llp]['waking'] = [0, 0.0]
+								data.dmesg[llp]['waking'][0] += 1
+								data.dmesg[llp]['waking'][1] += \
 									t.time - data.dmesg[lp]['start']
 							data.currphase = ''
 							del data.dmesg[lp]
@@ -4555,7 +4638,7 @@ def createHTML(testruns, testfail):
 				# draw the devices for this phase
 				phaselist = data.dmesg[b]['list']
 				for d in sorted(data.tdevlist[b]):
-					dname = d if '[' not in d else d.split('[')[0]
+					dname = d if ('[' not in d or 'CPU' in d) else d.split('[')[0]
 					name, dev = dname, phaselist[d]
 					drv = xtraclass = xtrainfo = xtrastyle = ''
 					if 'htmlclass' in dev:
@@ -5194,156 +5277,146 @@ def addScriptCode(hf, testruns):
 	'</script>\n'
 	hf.write(script_code);
 
-def setRuntimeSuspend(before=True):
-	global sysvals
-	sv = sysvals
-	if sv.rs == 0:
-		return
-	if before:
-		# runtime suspend disable or enable
-		if sv.rs > 0:
-			sv.rstgt, sv.rsval, sv.rsdir = 'on', 'auto', 'enabled'
-		else:
-			sv.rstgt, sv.rsval, sv.rsdir = 'auto', 'on', 'disabled'
-		pprint('CONFIGURING RUNTIME SUSPEND...')
-		sv.rslist = deviceInfo(sv.rstgt)
-		for i in sv.rslist:
-			sv.setVal(sv.rsval, i)
-		pprint('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist)))
-		pprint('waiting 5 seconds...')
-		time.sleep(5)
-	else:
-		# runtime suspend re-enable or re-disable
-		for i in sv.rslist:
-			sv.setVal(sv.rstgt, i)
-		pprint('runtime suspend settings restored on %d devices' % len(sv.rslist))
-
 # Function: executeSuspend
 # Description:
 #	 Execute system suspend through the sysfs interface, then copy the output
 #	 dmesg and ftrace files to the test output directory.
 def executeSuspend(quiet=False):
-	pm = ProcessMonitor()
-	tp = sysvals.tpath
-	if sysvals.wifi:
-		wifi = sysvals.checkWifi()
+	sv, tp, pm = sysvals, sysvals.tpath, ProcessMonitor()
+	if sv.wifi:
+		wifi = sv.checkWifi()
+		sv.dlog('wifi check, connected device is "%s"' % wifi)
 	testdata = []
 	# run these commands to prepare the system for suspend
-	if sysvals.display:
+	if sv.display:
 		if not quiet:
-			pprint('SET DISPLAY TO %s' % sysvals.display.upper())
-		displayControl(sysvals.display)
+			pprint('SET DISPLAY TO %s' % sv.display.upper())
+		ret = sv.displayControl(sv.display)
+		sv.dlog('xset display %s, ret = %d' % (sv.display, ret))
 		time.sleep(1)
-	if sysvals.sync:
+	if sv.sync:
 		if not quiet:
 			pprint('SYNCING FILESYSTEMS')
+		sv.dlog('syncing filesystems')
 		call('sync', shell=True)
-	# mark the start point in the kernel ring buffer just as we start
-	sysvals.initdmesg()
+	sv.dlog('read dmesg')
+	sv.initdmesg()
 	# start ftrace
-	if(sysvals.usecallgraph or sysvals.usetraceevents):
+	if(sv.usecallgraph or sv.usetraceevents):
 		if not quiet:
 			pprint('START TRACING')
-		sysvals.fsetVal('1', 'tracing_on')
-		if sysvals.useprocmon:
+		sv.dlog('start ftrace tracing')
+		sv.fsetVal('1', 'tracing_on')
+		if sv.useprocmon:
+			sv.dlog('start the process monitor')
 			pm.start()
-	sysvals.cmdinfo(True)
+	sv.dlog('run the cmdinfo list before')
+	sv.cmdinfo(True)
 	# execute however many s/r runs requested
-	for count in range(1,sysvals.execcount+1):
+	for count in range(1,sv.execcount+1):
 		# x2delay in between test runs
-		if(count > 1 and sysvals.x2delay > 0):
-			sysvals.fsetVal('WAIT %d' % sysvals.x2delay, 'trace_marker')
-			time.sleep(sysvals.x2delay/1000.0)
-			sysvals.fsetVal('WAIT END', 'trace_marker')
+		if(count > 1 and sv.x2delay > 0):
+			sv.fsetVal('WAIT %d' % sv.x2delay, 'trace_marker')
+			time.sleep(sv.x2delay/1000.0)
+			sv.fsetVal('WAIT END', 'trace_marker')
 		# start message
-		if sysvals.testcommand != '':
+		if sv.testcommand != '':
 			pprint('COMMAND START')
 		else:
-			if(sysvals.rtcwake):
+			if(sv.rtcwake):
 				pprint('SUSPEND START')
 			else:
 				pprint('SUSPEND START (press a key to resume)')
 		# set rtcwake
-		if(sysvals.rtcwake):
+		if(sv.rtcwake):
 			if not quiet:
-				pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
-			sysvals.rtcWakeAlarmOn()
+				pprint('will issue an rtcwake in %d seconds' % sv.rtcwaketime)
+			sv.dlog('enable RTC wake alarm')
+			sv.rtcWakeAlarmOn()
 		# start of suspend trace marker
-		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			sysvals.fsetVal(datetime.now().strftime(sysvals.tmstart), 'trace_marker')
+		if(sv.usecallgraph or sv.usetraceevents):
+			sv.fsetVal(datetime.now().strftime(sv.tmstart), 'trace_marker')
 		# predelay delay
-		if(count == 1 and sysvals.predelay > 0):
-			sysvals.fsetVal('WAIT %d' % sysvals.predelay, 'trace_marker')
-			time.sleep(sysvals.predelay/1000.0)
-			sysvals.fsetVal('WAIT END', 'trace_marker')
+		if(count == 1 and sv.predelay > 0):
+			sv.fsetVal('WAIT %d' % sv.predelay, 'trace_marker')
+			time.sleep(sv.predelay/1000.0)
+			sv.fsetVal('WAIT END', 'trace_marker')
 		# initiate suspend or command
+		sv.dlog('system executing a suspend')
 		tdata = {'error': ''}
-		if sysvals.testcommand != '':
-			res = call(sysvals.testcommand+' 2>&1', shell=True);
+		if sv.testcommand != '':
+			res = call(sv.testcommand+' 2>&1', shell=True);
 			if res != 0:
 				tdata['error'] = 'cmd returned %d' % res
 		else:
-			mode = sysvals.suspendmode
-			if sysvals.memmode and os.path.exists(sysvals.mempowerfile):
+			mode = sv.suspendmode
+			if sv.memmode and os.path.exists(sv.mempowerfile):
 				mode = 'mem'
-				pf = open(sysvals.mempowerfile, 'w')
-				pf.write(sysvals.memmode)
-				pf.close()
-			if sysvals.diskmode and os.path.exists(sysvals.diskpowerfile):
+				sv.testVal(sv.mempowerfile, 'radio', sv.memmode)
+			if sv.diskmode and os.path.exists(sv.diskpowerfile):
 				mode = 'disk'
-				pf = open(sysvals.diskpowerfile, 'w')
-				pf.write(sysvals.diskmode)
-				pf.close()
-			if mode == 'freeze' and sysvals.haveTurbostat():
+				sv.testVal(sv.diskpowerfile, 'radio', sv.diskmode)
+			if sv.acpidebug:
+				sv.testVal(sv.acpipath, 'acpi', '0xe')
+			if mode == 'freeze' and sv.haveTurbostat():
 				# execution will pause here
-				turbo = sysvals.turbostat()
+				turbo = sv.turbostat()
 				if turbo:
 					tdata['turbo'] = turbo
 			else:
-				pf = open(sysvals.powerfile, 'w')
+				pf = open(sv.powerfile, 'w')
 				pf.write(mode)
 				# execution will pause here
 				try:
 					pf.close()
 				except Exception as e:
 					tdata['error'] = str(e)
-		if(sysvals.rtcwake):
-			sysvals.rtcWakeAlarmOff()
+		sv.dlog('system returned from resume')
+		# reset everything
+		sv.testVal('restoreall')
+		if(sv.rtcwake):
+			sv.dlog('disable RTC wake alarm')
+			sv.rtcWakeAlarmOff()
 		# postdelay delay
-		if(count == sysvals.execcount and sysvals.postdelay > 0):
-			sysvals.fsetVal('WAIT %d' % sysvals.postdelay, 'trace_marker')
-			time.sleep(sysvals.postdelay/1000.0)
-			sysvals.fsetVal('WAIT END', 'trace_marker')
+		if(count == sv.execcount and sv.postdelay > 0):
+			sv.fsetVal('WAIT %d' % sv.postdelay, 'trace_marker')
+			time.sleep(sv.postdelay/1000.0)
+			sv.fsetVal('WAIT END', 'trace_marker')
 		# return from suspend
 		pprint('RESUME COMPLETE')
-		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			sysvals.fsetVal(datetime.now().strftime(sysvals.tmend), 'trace_marker')
-		if sysvals.wifi and wifi:
-			tdata['wifi'] = sysvals.pollWifi(wifi)
-		if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'):
+		if(sv.usecallgraph or sv.usetraceevents):
+			sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
+		if sv.wifi and wifi:
+			tdata['wifi'] = sv.pollWifi(wifi)
+			sv.dlog('wifi check, %s' % tdata['wifi'])
+		if(sv.suspendmode == 'mem' or sv.suspendmode == 'command'):
+			sv.dlog('read the ACPI FPDT')
 			tdata['fw'] = getFPDT(False)
 		testdata.append(tdata)
-	cmdafter = sysvals.cmdinfo(False)
+	sv.dlog('run the cmdinfo list after')
+	cmdafter = sv.cmdinfo(False)
 	# stop ftrace
-	if(sysvals.usecallgraph or sysvals.usetraceevents):
-		if sysvals.useprocmon:
+	if(sv.usecallgraph or sv.usetraceevents):
+		if sv.useprocmon:
+			sv.dlog('stop the process monitor')
 			pm.stop()
-		sysvals.fsetVal('0', 'tracing_on')
+		sv.fsetVal('0', 'tracing_on')
 	# grab a copy of the dmesg output
 	if not quiet:
 		pprint('CAPTURING DMESG')
-	sysvals.getdmesg(testdata)
+	sysvals.dlog('EXECUTION TRACE END')
+	sv.getdmesg(testdata)
 	# grab a copy of the ftrace output
-	if(sysvals.usecallgraph or sysvals.usetraceevents):
+	if(sv.usecallgraph or sv.usetraceevents):
 		if not quiet:
 			pprint('CAPTURING TRACE')
-		op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata)
+		op = sv.writeDatafileHeader(sv.ftracefile, testdata)
 		fp = open(tp+'trace', 'r')
 		for line in fp:
 			op.write(line)
 		op.close()
-		sysvals.fsetVal('', 'trace')
-		sysvals.platforminfo(cmdafter)
+		sv.fsetVal('', 'trace')
+		sv.platforminfo(cmdafter)
 
 def readFile(file):
 	if os.path.islink(file):
@@ -5586,39 +5659,6 @@ def dmidecode(mempath, fatal=False):
 		count += 1
 	return out
 
-def displayControl(cmd):
-	xset, ret = 'timeout 10 xset -d :0.0 {0}', 0
-	if sysvals.sudouser:
-		xset = 'sudo -u %s %s' % (sysvals.sudouser, xset)
-	if cmd == 'init':
-		ret = call(xset.format('dpms 0 0 0'), shell=True)
-		if not ret:
-			ret = call(xset.format('s off'), shell=True)
-	elif cmd == 'reset':
-		ret = call(xset.format('s reset'), shell=True)
-	elif cmd in ['on', 'off', 'standby', 'suspend']:
-		b4 = displayControl('stat')
-		ret = call(xset.format('dpms force %s' % cmd), shell=True)
-		if not ret:
-			curr = displayControl('stat')
-			sysvals.vprint('Display Switched: %s -> %s' % (b4, curr))
-			if curr != cmd:
-				sysvals.vprint('WARNING: Display failed to change to %s' % cmd)
-		if ret:
-			sysvals.vprint('WARNING: Display failed to change to %s with xset' % cmd)
-			return ret
-	elif cmd == 'stat':
-		fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout
-		ret = 'unknown'
-		for line in fp:
-			m = re.match('[\s]*Monitor is (?P<m>.*)', ascii(line))
-			if(m and len(m.group('m')) >= 2):
-				out = m.group('m').lower()
-				ret = out[3:] if out[0:2] == 'in' else out
-				break
-		fp.close()
-	return ret
-
 # Function: getFPDT
 # Description:
 #	 Read the acpi bios tables and pull out FPDT, the firmware data
@@ -6001,8 +6041,19 @@ def rerunTest(htmlfile=''):
 #	 execute a suspend/resume, gather the logs, and generate the output
 def runTest(n=0, quiet=False):
 	# prepare for the test
-	sysvals.initFtrace(quiet)
 	sysvals.initTestOutput('suspend')
+	op = sysvals.writeDatafileHeader(sysvals.dmesgfile, [])
+	op.write('# EXECUTION TRACE START\n')
+	op.close()
+	if n <= 1:
+		if sysvals.rs != 0:
+			sysvals.dlog('%sabling runtime suspend' % ('en' if sysvals.rs > 0 else 'dis'))
+			sysvals.setRuntimeSuspend(True)
+		if sysvals.display:
+			ret = sysvals.displayControl('init')
+			sysvals.dlog('xset display init, ret = %d' % ret)
+	sysvals.dlog('initialize ftrace')
+	sysvals.initFtrace(quiet)
 
 	# execute the test
 	executeSuspend(quiet)
@@ -6098,8 +6149,16 @@ def data_from_html(file, outpath, issues, fulldetail=False):
 	if wifi:
 		extra['wifi'] = wifi
 	low = find_in_html(html, 'freeze time: <b>', ' ms</b>')
-	if low and 'waking' in low:
-		issue = 'FREEZEWAKE'
+	for lowstr in ['waking', '+']:
+		if not low:
+			break
+		if lowstr not in low:
+			continue
+		if lowstr == '+':
+			issue = 'S2LOOPx%d' % len(low.split('+'))
+		else:
+			m = re.match('.*waking *(?P<n>[0-9]*) *times.*', low)
+			issue = 'S2WAKEx%s' % m.group('n') if m else 'S2WAKExNaN'
 		match = [i for i in issues if i['match'] == issue]
 		if len(match) > 0:
 			match[0]['count'] += 1
@@ -6605,6 +6664,11 @@ if __name__ == '__main__':
 				val = next(args)
 			except:
 				doError('-info requires one string argument', True)
+		elif(arg == '-desc'):
+			try:
+				val = next(args)
+			except:
+				doError('-desc requires one string argument', True)
 		elif(arg == '-rs'):
 			try:
 				val = next(args)
@@ -6814,9 +6878,9 @@ if __name__ == '__main__':
 			runSummary(sysvals.outdir, True, genhtml)
 		elif(cmd in ['xon', 'xoff', 'xstandby', 'xsuspend', 'xinit', 'xreset']):
 			sysvals.verbose = True
-			ret = displayControl(cmd[1:])
+			ret = sysvals.displayControl(cmd[1:])
 		elif(cmd == 'xstat'):
-			pprint('Display Status: %s' % displayControl('stat').upper())
+			pprint('Display Status: %s' % sysvals.displayControl('stat').upper())
 		elif(cmd == 'wificheck'):
 			dev = sysvals.checkWifi()
 			if dev:
@@ -6854,12 +6918,8 @@ if __name__ == '__main__':
 	if mode.startswith('disk-'):
 		sysvals.diskmode = mode.split('-', 1)[-1]
 		sysvals.suspendmode = 'disk'
-
 	sysvals.systemInfo(dmidecode(sysvals.mempath))
 
-	setRuntimeSuspend(True)
-	if sysvals.display:
-		displayControl('init')
 	failcnt, ret = 0, 0
 	if sysvals.multitest['run']:
 		# run multiple tests in a separate subdirectory
@@ -6900,7 +6960,10 @@ if __name__ == '__main__':
 			sysvals.testdir = sysvals.outdir
 		# run the test in the current directory
 		ret = runTest()
+
+	# reset to default values after testing
 	if sysvals.display:
-		displayControl('reset')
-	setRuntimeSuspend(False)
+		sysvals.displayControl('reset')
+	if sysvals.rs != 0:
+		sysvals.setRuntimeSuspend(False)
 	sys.exit(ret)
-- 
cgit v1.2.3-70-g09d2


From 93035242d9e22f2aad6ac0b886f19444713c0089 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Sun, 15 Nov 2020 14:06:50 +0200
Subject: tools/testing/scatterlist: Test dynamic __sg_alloc_table_from_pages

Add few cases to test the dynamic allocation flow of
__sg_alloc_table_from_pages.

Link: https://lore.kernel.org/r/20201115120650.139277-1-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 tools/testing/scatterlist/main.c | 64 +++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index f561aed7c657..2d27e47bbf2f 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -9,6 +9,7 @@ struct test {
 	int alloc_ret;
 	unsigned num_pages;
 	unsigned *pfn;
+	unsigned *pfn_app;
 	unsigned size;
 	unsigned int max_seg;
 	unsigned int expected_segments;
@@ -52,31 +53,39 @@ int main(void)
 {
 	const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
 	struct test *test, tests[] = {
-		{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
-		{ 0, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
-		{ 0, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
-		{ 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 },
-		{ 0, 1, pfn(0), 1, sgmax, 1 },
-		{ 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 },
-		{ 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 },
-		{ 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 },
-		{ 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 },
-		{ 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 },
-		{ 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 },
-		{ 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
-		{ 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
-		{ 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 },
-		{ 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
-		{ 0, 0, NULL, 0, 0, 0 },
+		{ -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 },
+		{ 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 },
+		{ 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 },
+		{ 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 },
+		{ 0, 1, pfn(0), NULL, 1, sgmax, 1 },
+		{ 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 2, pfn(1, 0), NULL, 2 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 3, pfn(0, 1, 2), NULL, 3 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 3, pfn(0, 1, 2), pfn(3, 4, 5), 3 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 3, pfn(0, 1, 2), pfn(4, 5, 6), 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(0, 2, 1), NULL, 3 * PAGE_SIZE, sgmax, 3 },
+		{ 0, 3, pfn(0, 1, 3), NULL, 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(1, 2, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(1, 3, 4), NULL, 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 4, pfn(0, 1, 3, 4), NULL, 4 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 5, pfn(0, 1, 3, 4, 5), NULL, 5 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 5, pfn(0, 1, 3, 4, 6), NULL, 5 * PAGE_SIZE, sgmax, 3 },
+		{ 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 5, pfn(0, 1, 2, 3, 4), NULL, 5 * PAGE_SIZE, 2 * PAGE_SIZE,
+		  3 },
+		{ 0, 6, pfn(0, 1, 2, 3, 4, 5), NULL, 6 * PAGE_SIZE,
+		  2 * PAGE_SIZE, 3 },
+		{ 0, 6, pfn(0, 2, 3, 4, 5, 6), NULL, 6 * PAGE_SIZE,
+		  2 * PAGE_SIZE, 4 },
+		{ 0, 6, pfn(0, 1, 3, 4, 5, 6), pfn(7, 8, 9, 10, 11, 12),
+		  6 * PAGE_SIZE, 12 * PAGE_SIZE, 2 },
+		{ 0, 0, NULL, NULL, 0, 0, 0 },
 	};
 	unsigned int i;
 
 	for (i = 0, test = tests; test->expected_segments; test++, i++) {
+		int left_pages = test->pfn_app ? test->num_pages : 0;
 		struct page *pages[MAX_PAGES];
 		struct sg_table st;
 		struct scatterlist *sg;
@@ -84,14 +93,23 @@ int main(void)
 		set_pages(pages, test->pfn, test->num_pages);
 
 		sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
-				test->size, test->max_seg, NULL, 0, GFP_KERNEL);
+				test->size, test->max_seg, NULL, left_pages, GFP_KERNEL);
 		assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
 
 		if (test->alloc_ret)
 			continue;
 
+		if (test->pfn_app) {
+			set_pages(pages, test->pfn_app, test->num_pages);
+			sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
+					test->size, test->max_seg, sg, 0, GFP_KERNEL);
+
+			assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
+		}
+
 		VALIDATE(st.nents == test->expected_segments, &st, test);
-		VALIDATE(st.orig_nents == test->expected_segments, &st, test);
+		if (!test->pfn_app)
+			VALIDATE(st.orig_nents == test->expected_segments, &st, test);
 
 		sg_free_table(&st);
 	}
-- 
cgit v1.2.3-70-g09d2


From 716572b0003ef67a4889bd7d85baf5099c5a0248 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 2 Nov 2020 11:51:10 -0800
Subject: selftests/x86/fsgsbase: Fix GS == 1, 2, and 3 tests

Setting GS to 1, 2, or 3 causes a nonsensical part of the IRET microcode
to change GS back to zero on a return from kernel mode to user mode. The
result is that these tests fail randomly depending on when interrupts
happen. Detect when this happens and let the test pass.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/7567fd44a1d60a9424f25b19a998f12149993b0d.1604346596.git.luto@kernel.org
---
 tools/testing/selftests/x86/fsgsbase.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 7161cfc2e60b..8c780cce941d 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -392,8 +392,8 @@ static void set_gs_and_switch_to(unsigned long local,
 		local = read_base(GS);
 
 		/*
-		 * Signal delivery seems to mess up weird selectors.  Put it
-		 * back.
+		 * Signal delivery is quite likely to change a selector
+		 * of 1, 2, or 3 back to 0 due to IRET being defective.
 		 */
 		asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
 	} else {
@@ -411,6 +411,14 @@ static void set_gs_and_switch_to(unsigned long local,
 	if (base == local && sel_pre_sched == sel_post_sched) {
 		printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
 		       sel_pre_sched, local);
+	} else if (base == local && sel_pre_sched >= 1 && sel_pre_sched <= 3 &&
+		   sel_post_sched == 0) {
+		/*
+		 * IRET is misdesigned and will squash selectors 1, 2, or 3
+		 * to zero.  Don't fail the test just because this happened.
+		 */
+		printf("[OK]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx because IRET is defective\n",
+		       sel_pre_sched, local, sel_post_sched, base);
 	} else {
 		nerrs++;
 		printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
-- 
cgit v1.2.3-70-g09d2


From aeaaf005da1de075929e56562dced4a58238efc4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 2 Nov 2020 11:51:11 -0800
Subject: selftests/x86: Add missing .note.GNU-stack sections

Several of the x86 selftests end up with executable stacks because
the asm was missing the annotation that says that they are modern
and don't need executable stacks.  Add the annotations.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/6f043c03e9e0e4557e1e975a63b07a4d18965a68.1604346596.git.luto@kernel.org
---
 tools/testing/selftests/x86/raw_syscall_helper_32.S | 2 ++
 tools/testing/selftests/x86/thunks.S                | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S
index 94410fa2b5ed..a10d36afdca0 100644
--- a/tools/testing/selftests/x86/raw_syscall_helper_32.S
+++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S
@@ -45,3 +45,5 @@ int80_and_ret:
 
 	.type int80_and_ret, @function
 	.size int80_and_ret, .-int80_and_ret
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
index 1bb5d62c16a4..a2d47d8344d4 100644
--- a/tools/testing/selftests/x86/thunks.S
+++ b/tools/testing/selftests/x86/thunks.S
@@ -57,3 +57,5 @@ call32_from_64:
 	ret
 
 .size call32_from_64, .-call32_from_64
+
+.section .note.GNU-stack,"",%progbits
-- 
cgit v1.2.3-70-g09d2


From 24eb2a02a68c98d46878214f46f855d934dc73ff Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 23 Nov 2020 09:12:26 +0200
Subject: selftests: mlxsw: Add blackhole nexthop configuration tests

Test the mlxsw allows blackhole nexthops to be installed and that the
nexthops are marked as offloaded.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh       | 25 +++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index 5de47d72f8c9..a2eff5f58209 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -32,6 +32,7 @@ ALL_TESTS="
 	nexthop_obj_invalid_test
 	nexthop_obj_offload_test
 	nexthop_obj_group_offload_test
+	nexthop_obj_blackhole_offload_test
 	nexthop_obj_route_offload_test
 	devlink_reload_test
 "
@@ -693,9 +694,6 @@ nexthop_obj_invalid_test()
 	ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1
 	check_fail $? "managed to configure a nexthop with MPLS encap when should not"
 
-	ip nexthop add id 1 blackhole
-	check_fail $? "managed to configure a blackhole nexthop when should not"
-
 	ip nexthop add id 1 dev $swp1
 	ip nexthop add id 2 dev $swp1
 	ip nexthop add id 10 group 1/2
@@ -817,6 +815,27 @@ nexthop_obj_group_offload_test()
 	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
 }
 
+nexthop_obj_blackhole_offload_test()
+{
+	# Test offload indication of blackhole nexthop objects
+	RET=0
+
+	ip nexthop add id 1 blackhole
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 1
+	check_err $? "Blackhole nexthop not marked as offloaded when should"
+
+	ip nexthop add id 10 group 1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip nexthop show id 10
+	check_err $? "Nexthop group not marked as offloaded when should"
+
+	log_test "blackhole nexthop objects offload indication"
+
+	ip nexthop del id 10
+	ip nexthop del id 1
+}
+
 nexthop_obj_route_offload_test()
 {
 	# Test offload indication of routes using nexthop objects
-- 
cgit v1.2.3-70-g09d2


From 1beaff779f783268dcc61c0a8fbd16e9c8b0abfa Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 23 Nov 2020 09:12:27 +0200
Subject: selftests: forwarding: Add blackhole nexthops tests

Test that IPv4 and IPv6 ping fail when the route is using a blackhole
nexthop or a group with a blackhole nexthop. Test that ping passes when
the route starts using a valid nexthop.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/forwarding/router_mpath_nh.sh    | 58 +++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index e8c2573d5232..388e4492b81b 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -1,7 +1,13 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	multipath_test
+	ping_ipv4_blackhole
+	ping_ipv6_blackhole
+"
 NUM_NETIFS=8
 source lib.sh
 
@@ -302,6 +308,56 @@ multipath_test()
 	multipath6_l4_test "Weighted MP 11:45" 11 45
 }
 
+ping_ipv4_blackhole()
+{
+	RET=0
+
+	ip nexthop add id 1001 blackhole
+	ip nexthop add id 1002 group 1001
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1001
+	ping_do $h1 198.51.100.2
+	check_fail $? "ping did not fail when using a blackhole nexthop"
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1002
+	ping_do $h1 198.51.100.2
+	check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+	ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 103
+	ping_do $h1 198.51.100.2
+	check_err $? "ping failed with a valid nexthop"
+
+	log_test "IPv4 blackhole ping"
+
+	ip nexthop del id 1002
+	ip nexthop del id 1001
+}
+
+ping_ipv6_blackhole()
+{
+	RET=0
+
+	ip -6 nexthop add id 1001 blackhole
+	ip nexthop add id 1002 group 1001
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1001
+	ping6_do $h1 2001:db8:2::2
+	check_fail $? "ping did not fail when using a blackhole nexthop"
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1002
+	ping6_do $h1 2001:db8:2::2
+	check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+	ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 106
+	ping6_do $h1 2001:db8:2::2
+	check_err $? "ping failed with a valid nexthop"
+
+	log_test "IPv6 blackhole ping"
+
+	ip nexthop del id 1002
+	ip -6 nexthop del id 1001
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
-- 
cgit v1.2.3-70-g09d2


From 84e8feeadcf048903e65b7d82769c58576c506b0 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 23 Nov 2020 09:12:30 +0200
Subject: selftests: mlxsw: Add blackhole_nexthop trap test

Test that packets hitting a blackhole nexthop are trapped to the CPU
when the trap is enabled. Test that packets are not reported when the
trap is disabled.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/devlink_trap_l3_drops.sh     | 36 ++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index f5abb1ebd392..4029833f7e27 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -52,6 +52,7 @@ ALL_TESTS="
 	blackhole_route_test
 	irif_disabled_test
 	erif_disabled_test
+	blackhole_nexthop_test
 "
 
 NUM_NETIFS=4
@@ -647,6 +648,41 @@ erif_disabled_test()
 	devlink_trap_action_set $trap_name "drop"
 }
 
+__blackhole_nexthop_test()
+{
+	local flags=$1; shift
+	local subnet=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local trap_name="blackhole_nexthop"
+	local mz_pid
+
+	RET=0
+
+	ip -$flags nexthop add id 1 blackhole
+	ip -$flags route add $subnet nhid 1
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower skip_hw dst_ip $dip ip_proto udp action drop
+
+	# Generate packets to the blackhole nexthop
+	$MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+	log_test "Blackhole nexthop: IPv$flags"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+	ip -$flags route del $subnet
+	ip -$flags nexthop del id 1
+}
+
+blackhole_nexthop_test()
+{
+	__blackhole_nexthop_test "4" "198.51.100.0/30" "ip" $h2_ipv4
+	__blackhole_nexthop_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-70-g09d2


From 05a98d7672731aeb5f9837b35cc7fe70444e70bd Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Sat, 21 Nov 2020 21:22:04 -0500
Subject: selftest/bpf: Fix link in readme

The link was bad because of invalid rst; it was pointing to itself and
was rendering badly.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201122022205.57229-1-andreimatei1@gmail.com
---
 tools/testing/selftests/bpf/README.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index ac9eda830187..3b8d8885892d 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -2,7 +2,10 @@
 BPF Selftest Notes
 ==================
 General instructions on running selftests can be found in
-`Documentation/bpf/bpf_devel_QA.rst`_.
+`Documentation/bpf/bpf_devel_QA.rst`__.
+
+__ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests
+
 
 Additional information about selftest failures are
 documented here.
-- 
cgit v1.2.3-70-g09d2


From 1c26ac6ab3ce47ee2e6342373681dedbb97e21a3 Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Sat, 21 Nov 2020 21:22:05 -0500
Subject: selftest/bpf: Fix rst formatting in readme

A couple of places in the readme had invalid rst formatting causing the
rendering to be off. This patch fixes them with minimal edits.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201122022205.57229-2-andreimatei1@gmail.com
---
 tools/testing/selftests/bpf/README.rst | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 3b8d8885892d..ca064180d4d0 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -33,11 +33,12 @@ The verifier will reject such code with above error.
 At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and
 the insn 20 undoes map_value addition. It is currently impossible for the
 verifier to understand such speculative pointer arithmetic.
-Hence
-    https://reviews.llvm.org/D85570
-addresses it on the compiler side. It was committed on llvm 12.
+Hence `this patch`__ addresses it on the compiler side. It was committed on llvm 12.
+
+__ https://reviews.llvm.org/D85570
 
 The corresponding C code
+
 .. code-block:: c
 
   for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
@@ -80,10 +81,11 @@ The symptom for ``bpf_iter/netlink`` looks like
   17: (7b) *(u64 *)(r7 +0) = r2
   only read is supported
 
-This is due to a llvm BPF backend bug. The fix 
-  https://reviews.llvm.org/D78466
+This is due to a llvm BPF backend bug. `The fix`__
 has been pushed to llvm 10.x release branch and will be
-available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
+available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
+
+__  https://reviews.llvm.org/D78466
 
 BPF CO-RE-based tests and Clang version
 =======================================
@@ -97,11 +99,11 @@ them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too
 old to support them, they shouldn't cause build failures or runtime test
 failures:
 
-  - __builtin_btf_type_id() ([0], [1], [2]);
-  - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]).
+- __builtin_btf_type_id() [0_, 1_, 2_];
+- __builtin_preserve_type_info(), __builtin_preserve_enum_value() [3_, 4_].
 
-  [0] https://reviews.llvm.org/D74572
-  [1] https://reviews.llvm.org/D74668
-  [2] https://reviews.llvm.org/D85174
-  [3] https://reviews.llvm.org/D83878
-  [4] https://reviews.llvm.org/D83242
+.. _0: https://reviews.llvm.org/D74572
+.. _1: https://reviews.llvm.org/D74668
+.. _2: https://reviews.llvm.org/D85174
+.. _3: https://reviews.llvm.org/D83878
+.. _4: https://reviews.llvm.org/D83242
-- 
cgit v1.2.3-70-g09d2


From 68878a5c5b852d17f5827ce8a0f6fbd8b4cdfada Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 24 Nov 2020 18:41:00 +0800
Subject: bpftool: Fix error return value in build_btf_type_table

An appropriate return value should be set on the failed path.

Fixes: 4d374ba0bf30 ("tools: bpftool: implement "bpftool btf show|list"")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201124104100.491-1-thunder.leizhen@huawei.com
---
 tools/bpf/bpftool/btf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 8ab142ff5eac..2afb7d5b1aca 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -693,6 +693,7 @@ build_btf_type_table(struct btf_attach_table *tab, enum bpf_obj_type type,
 		obj_node = calloc(1, sizeof(*obj_node));
 		if (!obj_node) {
 			p_err("failed to allocate memory: %s", strerror(errno));
+			err = -ENOMEM;
 			goto err_free;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From db13db9f67fe5049159a05e870daedcee5879f8d Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Tue, 24 Nov 2020 15:21:14 +0800
Subject: libbpf: Add support for canceling cached_cons advance

Add a new function for returning descriptors the user received
after an xsk_ring_cons__peek call. After the application has
gotten a number of descriptors from a ring, it might not be able
to or want to process them all for various reasons. Therefore,
it would be useful to have an interface for returning or
cancelling a number of them so that they are returned to the ring.

This patch adds a new function called xsk_ring_cons__cancel that
performs this operation on nb descriptors counted from the end of
the batch of descriptors that was received through the peek call.

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
[ Magnus Karlsson: rewrote changelog ]
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/1606202474-8119-1-git-send-email-lirongqing@baidu.com
---
 tools/lib/bpf/xsk.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 1069c46364ff..1719a327e5f9 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -153,6 +153,12 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
 	return entries;
 }
 
+static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons,
+					 size_t nb)
+{
+	cons->cached_cons -= nb;
+}
+
 static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
 {
 	/* Make sure data has been read before indicating we are done
-- 
cgit v1.2.3-70-g09d2


From 27672f0d280a3f286a410a8db2004f46ace72a17 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 24 Nov 2020 15:12:09 +0000
Subject: bpf: Add a BPF helper for getting the IMA hash of an inode

Provide a wrapper function to get the IMA hash of an inode. This helper
is useful in fingerprinting files (e.g executables on execution) and
using these fingerprints in detections like an executable unlinking
itself.

Since the ima_inode_hash can sleep, it's only allowed for sleepable
LSM hooks.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201124151210.1081188-3-kpsingh@chromium.org
---
 include/uapi/linux/bpf.h       | 11 +++++++++++
 kernel/bpf/bpf_lsm.c           | 26 ++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 ++
 tools/include/uapi/linux/bpf.h | 11 +++++++++++
 4 files changed, 50 insertions(+)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3ca6146f001a..c3458ec1f30a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3807,6 +3807,16 @@ union bpf_attr {
  * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
  * 	Return
  * 		Current *ktime*.
+ *
+ * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
+ *	Description
+ *		Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *		If the hash is larger than *size*, then only *size*
+ *		bytes will be copied to *dst*
+ *	Return
+ *		The **hash_algo** is returned on success,
+ *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
+ *		invalid arguments are passed.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3970,6 +3980,7 @@ union bpf_attr {
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
+	FN(ima_inode_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index b4f27a874092..70e5e0b6d69d 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -15,6 +15,7 @@
 #include <net/bpf_sk_storage.h>
 #include <linux/bpf_local_storage.h>
 #include <linux/btf_ids.h>
+#include <linux/ima.h>
 
 /* For every LSM hook that allows attachment of BPF programs, declare a nop
  * function where a BPF program can be attached.
@@ -75,6 +76,29 @@ const static struct bpf_func_proto bpf_bprm_opts_set_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_ima_inode_hash, struct inode *, inode, void *, dst, u32, size)
+{
+	return ima_inode_hash(inode, dst, size);
+}
+
+static bool bpf_ima_inode_hash_allowed(const struct bpf_prog *prog)
+{
+	return bpf_lsm_is_sleepable_hook(prog->aux->attach_btf_id);
+}
+
+BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
+
+const static struct bpf_func_proto bpf_ima_inode_hash_proto = {
+	.func		= bpf_ima_inode_hash,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_ima_inode_hash_btf_ids[0],
+	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.allowed	= bpf_ima_inode_hash_allowed,
+};
+
 static const struct bpf_func_proto *
 bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -97,6 +121,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_task_storage_delete_proto;
 	case BPF_FUNC_bprm_opts_set:
 		return &bpf_bprm_opts_set_proto;
+	case BPF_FUNC_ima_inode_hash:
+		return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index c5bc947a70ad..8b829748d488 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -436,6 +436,7 @@ class PrinterHelpers(Printer):
             'struct xdp_md',
             'struct path',
             'struct btf_ptr',
+            'struct inode',
     ]
     known_types = {
             '...',
@@ -480,6 +481,7 @@ class PrinterHelpers(Printer):
             'struct task_struct',
             'struct path',
             'struct btf_ptr',
+            'struct inode',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3ca6146f001a..c3458ec1f30a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3807,6 +3807,16 @@ union bpf_attr {
  * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC_COARSE**)
  * 	Return
  * 		Current *ktime*.
+ *
+ * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
+ *	Description
+ *		Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *		If the hash is larger than *size*, then only *size*
+ *		bytes will be copied to *dst*
+ *	Return
+ *		The **hash_algo** is returned on success,
+ *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
+ *		invalid arguments are passed.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3970,6 +3980,7 @@ union bpf_attr {
 	FN(get_current_task_btf),	\
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
+	FN(ima_inode_hash),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3-70-g09d2


From 34b82d3ac1058653b3de7be4697b55f67533b1f1 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Tue, 24 Nov 2020 15:12:10 +0000
Subject: bpf: Add a selftest for bpf_ima_inode_hash

The test does the following:

- Mounts a loopback filesystem and appends the IMA policy to measure
  executions only on this file-system. Restricting the IMA policy to
  a particular filesystem prevents a system-wide IMA policy change.
- Executes an executable copied to this loopback filesystem.
- Calls the bpf_ima_inode_hash in the bprm_committed_creds hook and
  checks if the call succeeded and checks if a hash was calculated.

The test shells out to the added ima_setup.sh script as the setup is
better handled in a shell script and is more complicated to do in the
test program or even shelling out individual commands from C.

The list of required configs (i.e. IMA, SECURITYFS,
IMA_{WRITE,READ}_POLICY) for running this test are also updated.

Suggested-by: Mimi Zohar <zohar@linux.ibm.com> (limit policy rule to loopback mount)
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201124151210.1081188-4-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/config                |  4 ++
 tools/testing/selftests/bpf/ima_setup.sh          | 80 +++++++++++++++++++++++
 tools/testing/selftests/bpf/prog_tests/test_ima.c | 74 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/ima.c           | 28 ++++++++
 4 files changed, 186 insertions(+)
 create mode 100755 tools/testing/selftests/bpf/ima_setup.sh
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_ima.c
 create mode 100644 tools/testing/selftests/bpf/progs/ima.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 2118e23ac07a..365bf9771b07 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -39,3 +39,7 @@ CONFIG_BPF_JIT=y
 CONFIG_BPF_LSM=y
 CONFIG_SECURITY=y
 CONFIG_LIRC=y
+CONFIG_IMA=y
+CONFIG_SECURITYFS=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_READ_POLICY=y
diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
new file mode 100755
index 000000000000..15490ccc5e55
--- /dev/null
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+
+IMA_POLICY_FILE="/sys/kernel/security/ima/policy"
+TEST_BINARY="/bin/true"
+
+usage()
+{
+        echo "Usage: $0 <setup|cleanup|run> <existing_tmp_dir>"
+        exit 1
+}
+
+setup()
+{
+        local tmp_dir="$1"
+        local mount_img="${tmp_dir}/test.img"
+        local mount_dir="${tmp_dir}/mnt"
+        local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+        mkdir -p ${mount_dir}
+
+        dd if=/dev/zero of="${mount_img}" bs=1M count=10
+
+        local loop_device="$(losetup --find --show ${mount_img})"
+
+        mkfs.ext4 "${loop_device}"
+        mount "${loop_device}" "${mount_dir}"
+
+        cp "${TEST_BINARY}" "${mount_dir}"
+        local mount_uuid="$(blkid -s UUID -o value ${loop_device})"
+        echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
+}
+
+cleanup() {
+        local tmp_dir="$1"
+        local mount_img="${tmp_dir}/test.img"
+        local mount_dir="${tmp_dir}/mnt"
+
+        local loop_devices=$(losetup -j ${mount_img} -O NAME --noheadings)
+        for loop_dev in "${loop_devices}"; do
+                losetup -d $loop_dev
+        done
+
+        umount ${mount_dir}
+        rm -rf ${tmp_dir}
+}
+
+run()
+{
+        local tmp_dir="$1"
+        local mount_dir="${tmp_dir}/mnt"
+        local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+
+        exec "${copied_bin_path}"
+}
+
+main()
+{
+        [[ $# -ne 2 ]] && usage
+
+        local action="$1"
+        local tmp_dir="$2"
+
+        [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
+
+        if [[ "${action}" == "setup" ]]; then
+                setup "${tmp_dir}"
+        elif [[ "${action}" == "cleanup" ]]; then
+                cleanup "${tmp_dir}"
+        elif [[ "${action}" == "run" ]]; then
+                run "${tmp_dir}"
+        else
+                echo "Unknown action: ${action}"
+                exit 1
+        fi
+}
+
+main "$@"
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
new file mode 100644
index 000000000000..61fca681d524
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <test_progs.h>
+
+#include "ima.skel.h"
+
+static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
+{
+	int child_pid, child_status;
+
+	child_pid = fork();
+	if (child_pid == 0) {
+		*monitored_pid = getpid();
+		execlp("./ima_setup.sh", "./ima_setup.sh", "run", measured_dir,
+		       NULL);
+		exit(errno);
+
+	} else if (child_pid > 0) {
+		waitpid(child_pid, &child_status, 0);
+		return WEXITSTATUS(child_status);
+	}
+
+	return -EINVAL;
+}
+
+void test_test_ima(void)
+{
+	char measured_dir_template[] = "/tmp/ima_measuredXXXXXX";
+	const char *measured_dir;
+	char cmd[256];
+
+	int err, duration = 0;
+	struct ima *skel = NULL;
+
+	skel = ima__open_and_load();
+	if (CHECK(!skel, "skel_load", "skeleton failed\n"))
+		goto close_prog;
+
+	err = ima__attach(skel);
+	if (CHECK(err, "attach", "attach failed: %d\n", err))
+		goto close_prog;
+
+	measured_dir = mkdtemp(measured_dir_template);
+	if (CHECK(measured_dir == NULL, "mkdtemp", "err %d\n", errno))
+		goto close_prog;
+
+	snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir);
+	if (CHECK_FAIL(system(cmd)))
+		goto close_clean;
+
+	err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+	if (CHECK(err, "run_measured_process", "err = %d\n", err))
+		goto close_clean;
+
+	CHECK(skel->data->ima_hash_ret < 0, "ima_hash_ret",
+	      "ima_hash_ret = %ld\n", skel->data->ima_hash_ret);
+
+	CHECK(skel->bss->ima_hash == 0, "ima_hash",
+	      "ima_hash = %lu\n", skel->bss->ima_hash);
+
+close_clean:
+	snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
+	CHECK_FAIL(system(cmd));
+close_prog:
+	ima__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
new file mode 100644
index 000000000000..86b21aff4bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+long ima_hash_ret = -1;
+u64 ima_hash = 0;
+u32 monitored_pid = 0;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm.s/bprm_committed_creds")
+int BPF_PROG(ima, struct linux_binprm *bprm)
+{
+	u32 pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid == monitored_pid)
+		ima_hash_ret = bpf_ima_inode_hash(bprm->file->f_inode,
+						  &ima_hash, sizeof(ima_hash));
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From fb3558127cb62ba2dea9e3d0efa1bb1d7e5eee2a Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Tue, 24 Nov 2020 22:52:55 -0500
Subject: bpf: Fix selftest compilation on clang 11

Before this patch, profiler.inc.h wouldn't compile with clang-11 (before
the __builtin_preserve_enum_value LLVM builtin was introduced in
https://reviews.llvm.org/D83242).

Another test that uses this builtin (test_core_enumval) is conditionally
skipped if the compiler is too old. In that spirit, this patch inhibits
part of populate_cgroup_info(), which needs this CO-RE builtin. The
selftests build again on clang-11.

The affected test (the profiler test) doesn't pass on clang-11 because
it's missing https://reviews.llvm.org/D85570, but at least the test suite
as a whole compiles. The test's expected failure is already called out in
the README.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Florian Lehner <dev@der-flo.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201125035255.17970-1-andreimatei1@gmail.com
---
 tools/testing/selftests/bpf/progs/profiler.inc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 30982a7e4d0f..4896fdf816f7 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -256,6 +256,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
 	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 
+#if __has_builtin(__builtin_preserve_enum_value)
 	if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
 		int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
 						  pids_cgrp_id___local);
@@ -275,6 +276,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 			}
 		}
 	}
+#endif
 
 	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
 	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
-- 
cgit v1.2.3-70-g09d2


From 75eeaddd57f4a0ac89110547221df8f3757d5a6f Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:26 +0800
Subject: perf arm-spe: Refactor printing string to buffer

When outputs strings to the decoding buffer with function snprintf(),
SPE decoder needs to detects if any error returns from snprintf() and if
so needs to directly bail out.  If snprintf() returns success, it needs
to update buffer pointer and reduce the buffer length so can continue to
output the next string into the consequent memory space.

This complex logics are spreading in the function arm_spe_pkt_desc() so
there has many duplicate codes for handling error detecting, increment
buffer pointer and decrement buffer size.

To avoid the duplicate code, this patch introduces a new helper function
arm_spe_pkt_out_string() which is used to wrap up the complex logics,
and it's used by the caller arm_spe_pkt_desc().  This patch moves the
variable 'blen' as the function's local variable so allows to remove
the unnecessary braces and improve the readability.

This patch simplifies the return value for arm_spe_pkt_desc(): '0' means
success and other values mean an error has occurred.  To realize this,
it relies on arm_spe_pkt_out_string()'s parameter 'err', the 'err' is a
cumulative value, returns its final value if printing buffer is called
for one time or multiple times.  Finally, the error is handled in a
central place, rather than directly bailing out in switch-cases, it
returns error at the end of arm_spe_pkt_desc().

This patch changes the caller arm_spe_dump() to respect the updated
return value semantics of arm_spe_pkt_desc().

Suggested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-2-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 302 ++++++++++-----------
 tools/perf/util/arm-spe.c                          |   2 +-
 2 files changed, 151 insertions(+), 153 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 671a4763fb47..fbededc1bcd4 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -9,6 +9,7 @@
 #include <endian.h>
 #include <byteswap.h>
 #include <linux/bitops.h>
+#include <stdarg.h>
 
 #include "arm-spe-pkt-decoder.h"
 
@@ -258,192 +259,189 @@ int arm_spe_get_packet(const unsigned char *buf, size_t len,
 	return ret;
 }
 
+static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen,
+				  const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	/* Bail out if any error occurred */
+	if (err && *err)
+		return *err;
+
+	va_start(ap, fmt);
+	ret = vsnprintf(*buf_p, *blen, fmt, ap);
+	va_end(ap);
+
+	if (ret < 0) {
+		if (err && !*err)
+			*err = ret;
+
+	/*
+	 * A return value of *blen or more means that the output was
+	 * truncated and the buffer is overrun.
+	 */
+	} else if ((size_t)ret >= *blen) {
+		(*buf_p)[*blen - 1] = '\0';
+
+		/*
+		 * Set *err to 'ret' to avoid overflow if tries to
+		 * fill this buffer sequentially.
+		 */
+		if (err && !*err)
+			*err = ret;
+	} else {
+		*buf_p += ret;
+		*blen -= ret;
+	}
+
+	return ret;
+}
+
 int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 		     size_t buf_len)
 {
-	int ret, ns, el, idx = packet->index;
+	int ns, el, idx = packet->index;
 	unsigned long long payload = packet->payload;
 	const char *name = arm_spe_pkt_name(packet->type);
+	char *buf_orig = buf;
+	size_t blen = buf_len;
+	int err = 0;
 
 	switch (packet->type) {
 	case ARM_SPE_BAD:
 	case ARM_SPE_PAD:
 	case ARM_SPE_END:
-		return snprintf(buf, buf_len, "%s", name);
-	case ARM_SPE_EVENTS: {
-		size_t blen = buf_len;
-
-		ret = 0;
-		ret = snprintf(buf, buf_len, "EV");
-		buf += ret;
-		blen -= ret;
-		if (payload & 0x1) {
-			ret = snprintf(buf, buf_len, " EXCEPTION-GEN");
-			buf += ret;
-			blen -= ret;
-		}
-		if (payload & 0x2) {
-			ret = snprintf(buf, buf_len, " RETIRED");
-			buf += ret;
-			blen -= ret;
-		}
-		if (payload & 0x4) {
-			ret = snprintf(buf, buf_len, " L1D-ACCESS");
-			buf += ret;
-			blen -= ret;
-		}
-		if (payload & 0x8) {
-			ret = snprintf(buf, buf_len, " L1D-REFILL");
-			buf += ret;
-			blen -= ret;
-		}
-		if (payload & 0x10) {
-			ret = snprintf(buf, buf_len, " TLB-ACCESS");
-			buf += ret;
-			blen -= ret;
-		}
-		if (payload & 0x20) {
-			ret = snprintf(buf, buf_len, " TLB-REFILL");
-			buf += ret;
-			blen -= ret;
-		}
-		if (payload & 0x40) {
-			ret = snprintf(buf, buf_len, " NOT-TAKEN");
-			buf += ret;
-			blen -= ret;
-		}
-		if (payload & 0x80) {
-			ret = snprintf(buf, buf_len, " MISPRED");
-			buf += ret;
-			blen -= ret;
-		}
+		arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name);
+		break;
+	case ARM_SPE_EVENTS:
+		arm_spe_pkt_out_string(&err, &buf, &blen, "EV");
+
+		if (payload & 0x1)
+			arm_spe_pkt_out_string(&err, &buf, &blen, " EXCEPTION-GEN");
+		if (payload & 0x2)
+			arm_spe_pkt_out_string(&err, &buf, &blen, " RETIRED");
+		if (payload & 0x4)
+			arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-ACCESS");
+		if (payload & 0x8)
+			arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-REFILL");
+		if (payload & 0x10)
+			arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-ACCESS");
+		if (payload & 0x20)
+			arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-REFILL");
+		if (payload & 0x40)
+			arm_spe_pkt_out_string(&err, &buf, &blen, " NOT-TAKEN");
+		if (payload & 0x80)
+			arm_spe_pkt_out_string(&err, &buf, &blen, " MISPRED");
 		if (idx > 1) {
-			if (payload & 0x100) {
-				ret = snprintf(buf, buf_len, " LLC-ACCESS");
-				buf += ret;
-				blen -= ret;
-			}
-			if (payload & 0x200) {
-				ret = snprintf(buf, buf_len, " LLC-REFILL");
-				buf += ret;
-				blen -= ret;
-			}
-			if (payload & 0x400) {
-				ret = snprintf(buf, buf_len, " REMOTE-ACCESS");
-				buf += ret;
-				blen -= ret;
-			}
+			if (payload & 0x100)
+				arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-ACCESS");
+			if (payload & 0x200)
+				arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-REFILL");
+			if (payload & 0x400)
+				arm_spe_pkt_out_string(&err, &buf, &blen, " REMOTE-ACCESS");
 		}
-		if (ret < 0)
-			return ret;
-		blen -= ret;
-		return buf_len - blen;
-	}
+		break;
 	case ARM_SPE_OP_TYPE:
 		switch (idx) {
-		case 0:	return snprintf(buf, buf_len, "%s", payload & 0x1 ?
-					"COND-SELECT" : "INSN-OTHER");
-		case 1:	{
-			size_t blen = buf_len;
+		case 0:
+			arm_spe_pkt_out_string(&err, &buf, &blen,
+					payload & 0x1 ? "COND-SELECT" : "INSN-OTHER");
+			break;
+		case 1:
+			arm_spe_pkt_out_string(&err, &buf, &blen,
+					       payload & 0x1 ? "ST" : "LD");
 
-			if (payload & 0x1)
-				ret = snprintf(buf, buf_len, "ST");
-			else
-				ret = snprintf(buf, buf_len, "LD");
-			buf += ret;
-			blen -= ret;
 			if (payload & 0x2) {
-				if (payload & 0x4) {
-					ret = snprintf(buf, buf_len, " AT");
-					buf += ret;
-					blen -= ret;
-				}
-				if (payload & 0x8) {
-					ret = snprintf(buf, buf_len, " EXCL");
-					buf += ret;
-					blen -= ret;
-				}
-				if (payload & 0x10) {
-					ret = snprintf(buf, buf_len, " AR");
-					buf += ret;
-					blen -= ret;
-				}
+				if (payload & 0x4)
+					arm_spe_pkt_out_string(&err, &buf, &blen, " AT");
+				if (payload & 0x8)
+					arm_spe_pkt_out_string(&err, &buf, &blen, " EXCL");
+				if (payload & 0x10)
+					arm_spe_pkt_out_string(&err, &buf, &blen, " AR");
 			} else if (payload & 0x4) {
-				ret = snprintf(buf, buf_len, " SIMD-FP");
-				buf += ret;
-				blen -= ret;
-			}
-			if (ret < 0)
-				return ret;
-			blen -= ret;
-			return buf_len - blen;
-		}
-		case 2:	{
-			size_t blen = buf_len;
-
-			ret = snprintf(buf, buf_len, "B");
-			buf += ret;
-			blen -= ret;
-			if (payload & 0x1) {
-				ret = snprintf(buf, buf_len, " COND");
-				buf += ret;
-				blen -= ret;
-			}
-			if (payload & 0x2) {
-				ret = snprintf(buf, buf_len, " IND");
-				buf += ret;
-				blen -= ret;
-			}
-			if (ret < 0)
-				return ret;
-			blen -= ret;
-			return buf_len - blen;
+				arm_spe_pkt_out_string(&err, &buf, &blen, " SIMD-FP");
 			}
-		default: return 0;
+			break;
+		case 2:
+			arm_spe_pkt_out_string(&err, &buf, &blen, "B");
+
+			if (payload & 0x1)
+				arm_spe_pkt_out_string(&err, &buf, &blen, " COND");
+			if (payload & 0x2)
+				arm_spe_pkt_out_string(&err, &buf, &blen, " IND");
+
+			break;
+		default:
+			/* Unknown index */
+			err = -1;
+			break;
 		}
+		break;
 	case ARM_SPE_DATA_SOURCE:
 	case ARM_SPE_TIMESTAMP:
-		return snprintf(buf, buf_len, "%s %lld", name, payload);
+		arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload);
+		break;
 	case ARM_SPE_ADDRESS:
 		switch (idx) {
 		case 0:
-		case 1: ns = !!(packet->payload & NS_FLAG);
+		case 1:
+			ns = !!(packet->payload & NS_FLAG);
 			el = (packet->payload & EL_FLAG) >> 61;
 			payload &= ~(0xffULL << 56);
-			return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d",
+			arm_spe_pkt_out_string(&err, &buf, &blen,
+					"%s 0x%llx el%d ns=%d",
 				        (idx == 1) ? "TGT" : "PC", payload, el, ns);
-		case 2:	return snprintf(buf, buf_len, "VA 0x%llx", payload);
-		case 3:	ns = !!(packet->payload & NS_FLAG);
+			break;
+		case 2:
+			arm_spe_pkt_out_string(&err, &buf, &blen,
+					       "VA 0x%llx", payload);
+			break;
+		case 3:
+			ns = !!(packet->payload & NS_FLAG);
 			payload &= ~(0xffULL << 56);
-			return snprintf(buf, buf_len, "PA 0x%llx ns=%d",
-					payload, ns);
-		default: return 0;
+			arm_spe_pkt_out_string(&err, &buf, &blen,
+					       "PA 0x%llx ns=%d", payload, ns);
+			break;
+		default:
+			/* Unknown index */
+			err = -1;
+			break;
 		}
+		break;
 	case ARM_SPE_CONTEXT:
-		return snprintf(buf, buf_len, "%s 0x%lx el%d", name,
-				(unsigned long)payload, idx + 1);
-	case ARM_SPE_COUNTER: {
-		size_t blen = buf_len;
-
-		ret = snprintf(buf, buf_len, "%s %d ", name,
-			       (unsigned short)payload);
-		buf += ret;
-		blen -= ret;
+		arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d",
+				       name, (unsigned long)payload, idx + 1);
+		break;
+	case ARM_SPE_COUNTER:
+		arm_spe_pkt_out_string(&err, &buf, &blen, "%s %d ", name,
+				       (unsigned short)payload);
 		switch (idx) {
-		case 0:	ret = snprintf(buf, buf_len, "TOT"); break;
-		case 1:	ret = snprintf(buf, buf_len, "ISSUE"); break;
-		case 2:	ret = snprintf(buf, buf_len, "XLAT"); break;
-		default: ret = 0;
+		case 0:
+			arm_spe_pkt_out_string(&err, &buf, &blen, "TOT");
+			break;
+		case 1:
+			arm_spe_pkt_out_string(&err, &buf, &blen, "ISSUE");
+			break;
+		case 2:
+			arm_spe_pkt_out_string(&err, &buf, &blen, "XLAT");
+			break;
+		default:
+			break;
 		}
-		if (ret < 0)
-			return ret;
-		blen -= ret;
-		return buf_len - blen;
-	}
+		break;
 	default:
+		/* Unknown packet type */
+		err = -1;
 		break;
 	}
 
-	return snprintf(buf, buf_len, "%s 0x%llx (%d)",
-			name, payload, packet->index);
+	/* Output raw data if detect any error */
+	if (err) {
+		err = 0;
+		arm_spe_pkt_out_string(&err, &buf_orig, &buf_len, "%s 0x%llx (%d)",
+				       name, payload, packet->index);
+	}
+
+	return err;
 }
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 3882a5360ada..8901a1656a41 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -113,7 +113,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
 		if (ret > 0) {
 			ret = arm_spe_pkt_desc(&packet, desc,
 					       ARM_SPE_PKT_DESC_MAX);
-			if (ret > 0)
+			if (!ret)
 				color_fprintf(stdout, color, " %s\n", desc);
 		} else {
 			color_fprintf(stdout, color, " Bad packet!\n");
-- 
cgit v1.2.3-70-g09d2


From 11695142e25e957dc3e56c29dc5f9daaf9530b10 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:27 +0800
Subject: perf arm-spe: Refactor packet header parsing

The packet header parsing uses the hard coded values and it uses nested
if-else statements.

To improve the readability, this patch refactors the macros for packet
header format so it removes the hard coded values.  Furthermore, based
on the new mask macros it reduces the nested if-else statements and
changes to use the flat conditions checking, this is directive and can
easily map to the descriptions in ARMv8-a architecture reference manual
(ARM DDI 0487E.a), chapter 'D10.1.5 Statistical Profiling Extension
protocol packet headers'.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-3-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 92 ++++++++++------------
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.h     | 20 +++++
 2 files changed, 61 insertions(+), 51 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index fbededc1bcd4..a769fe5a4496 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -16,28 +16,6 @@
 #define NS_FLAG		BIT_ULL(63)
 #define EL_FLAG		(BIT_ULL(62) | BIT_ULL(61))
 
-#define SPE_HEADER0_PAD			0x0
-#define SPE_HEADER0_END			0x1
-#define SPE_HEADER0_ADDRESS		0x30 /* address packet (short) */
-#define SPE_HEADER0_ADDRESS_MASK	0x38
-#define SPE_HEADER0_COUNTER		0x18 /* counter packet (short) */
-#define SPE_HEADER0_COUNTER_MASK	0x38
-#define SPE_HEADER0_TIMESTAMP		0x71
-#define SPE_HEADER0_TIMESTAMP		0x71
-#define SPE_HEADER0_EVENTS		0x2
-#define SPE_HEADER0_EVENTS_MASK		0xf
-#define SPE_HEADER0_SOURCE		0x3
-#define SPE_HEADER0_SOURCE_MASK		0xf
-#define SPE_HEADER0_CONTEXT		0x24
-#define SPE_HEADER0_CONTEXT_MASK	0x3c
-#define SPE_HEADER0_OP_TYPE		0x8
-#define SPE_HEADER0_OP_TYPE_MASK	0x3c
-#define SPE_HEADER1_ALIGNMENT		0x0
-#define SPE_HEADER1_ADDRESS		0xb0 /* address packet (extended) */
-#define SPE_HEADER1_ADDRESS_MASK	0xf8
-#define SPE_HEADER1_COUNTER		0x98 /* counter packet (extended) */
-#define SPE_HEADER1_COUNTER_MASK	0xf8
-
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define le16_to_cpu bswap_16
 #define le32_to_cpu bswap_32
@@ -200,46 +178,58 @@ static int arm_spe_get_addr(const unsigned char *buf, size_t len,
 static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
 				 struct arm_spe_pkt *packet)
 {
-	unsigned int byte;
+	unsigned int hdr;
+	unsigned char ext_hdr = 0;
 
 	memset(packet, 0, sizeof(struct arm_spe_pkt));
 
 	if (!len)
 		return ARM_SPE_NEED_MORE_BYTES;
 
-	byte = buf[0];
-	if (byte == SPE_HEADER0_PAD)
+	hdr = buf[0];
+
+	if (hdr == SPE_HEADER0_PAD)
 		return arm_spe_get_pad(packet);
-	else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */
+
+	if (hdr == SPE_HEADER0_END) /* no timestamp at end of record */
 		return arm_spe_get_end(packet);
-	else if (byte & 0xc0 /* 0y11xxxxxx */) {
-		if (byte & 0x80) {
-			if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS)
-				return arm_spe_get_addr(buf, len, 0, packet);
-			if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER)
-				return arm_spe_get_counter(buf, len, 0, packet);
-		} else
-			if (byte == SPE_HEADER0_TIMESTAMP)
-				return arm_spe_get_timestamp(buf, len, packet);
-			else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS)
-				return arm_spe_get_events(buf, len, packet);
-			else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE)
-				return arm_spe_get_data_source(buf, len, packet);
-			else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT)
-				return arm_spe_get_context(buf, len, packet);
-			else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE)
-				return arm_spe_get_op_type(buf, len, packet);
-	} else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) {
-		/* 16-bit header */
-		byte = buf[1];
-		if (byte == SPE_HEADER1_ALIGNMENT)
+
+	if (hdr == SPE_HEADER0_TIMESTAMP)
+		return arm_spe_get_timestamp(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_EVENTS)
+		return arm_spe_get_events(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK1) == SPE_HEADER0_SOURCE)
+		return arm_spe_get_data_source(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_CONTEXT)
+		return arm_spe_get_context(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_OP_TYPE)
+		return arm_spe_get_op_type(buf, len, packet);
+
+	if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) {
+		/* 16-bit extended format header */
+		ext_hdr = 1;
+
+		hdr = buf[1];
+		if (hdr == SPE_HEADER1_ALIGNMENT)
 			return arm_spe_get_alignment(buf, len, packet);
-		else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS)
-			return arm_spe_get_addr(buf, len, 1, packet);
-		else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER)
-			return arm_spe_get_counter(buf, len, 1, packet);
 	}
 
+	/*
+	 * The short format header's byte 0 or the extended format header's
+	 * byte 1 has been assigned to 'hdr', which uses the same encoding for
+	 * address packet and counter packet, so don't need to distinguish if
+	 * it's short format or extended format and handle in once.
+	 */
+	if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_ADDRESS)
+		return arm_spe_get_addr(buf, len, ext_hdr, packet);
+
+	if ((hdr & SPE_HEADER0_MASK3) == SPE_HEADER0_COUNTER)
+		return arm_spe_get_counter(buf, len, ext_hdr, packet);
+
 	return ARM_SPE_BAD_PACKET;
 }
 
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 4c870521b8eb..129f43405eb1 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -36,6 +36,26 @@ struct arm_spe_pkt {
 	uint64_t		payload;
 };
 
+/* Short header (HEADER0) and extended header (HEADER1) */
+#define SPE_HEADER0_PAD				0x0
+#define SPE_HEADER0_END				0x1
+#define SPE_HEADER0_TIMESTAMP			0x71
+/* Mask for event & data source */
+#define SPE_HEADER0_MASK1			(GENMASK_ULL(7, 6) | GENMASK_ULL(3, 0))
+#define SPE_HEADER0_EVENTS			0x42
+#define SPE_HEADER0_SOURCE			0x43
+/* Mask for context & operation */
+#define SPE_HEADER0_MASK2			GENMASK_ULL(7, 2)
+#define SPE_HEADER0_CONTEXT			0x64
+#define SPE_HEADER0_OP_TYPE			0x48
+/* Mask for extended format */
+#define SPE_HEADER0_EXTENDED			0x20
+/* Mask for address & counter */
+#define SPE_HEADER0_MASK3			GENMASK_ULL(7, 3)
+#define SPE_HEADER0_ADDRESS			0xb0
+#define SPE_HEADER0_COUNTER			0x98
+#define SPE_HEADER1_ALIGNMENT			0x0
+
 #define SPE_ADDR_PKT_HDR_INDEX_INS		(0x0)
 #define SPE_ADDR_PKT_HDR_INDEX_BRANCH		(0x1)
 #define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT	(0x2)
-- 
cgit v1.2.3-70-g09d2


From ab2aa439e4aaa3ce0fdcfa0f847aed4bf13bf353 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:28 +0800
Subject: perf arm-spe: Add new function arm_spe_pkt_desc_addr()

This patch moves out the address parsing code from arm_spe_pkt_desc()
and uses the new introduced function arm_spe_pkt_desc_addr() to process
address packet.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-4-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 64 +++++++++++++---------
 1 file changed, 38 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index a769fe5a4496..b16d68b40bbd 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -288,10 +288,46 @@ static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen,
 	return ret;
 }
 
+static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
+				 char *buf, size_t buf_len)
+{
+	int ns, el, idx = packet->index;
+	u64 payload = packet->payload;
+	int err = 0;
+
+	switch (idx) {
+	case 0:
+	case 1:
+		ns = !!(packet->payload & NS_FLAG);
+		el = (packet->payload & EL_FLAG) >> 61;
+		payload &= ~(0xffULL << 56);
+		arm_spe_pkt_out_string(&err, &buf, &buf_len,
+				"%s 0x%llx el%d ns=%d",
+				(idx == 1) ? "TGT" : "PC", payload, el, ns);
+		break;
+	case 2:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len,
+				       "VA 0x%llx", payload);
+		break;
+	case 3:
+		ns = !!(packet->payload & NS_FLAG);
+		payload &= ~(0xffULL << 56);
+		arm_spe_pkt_out_string(&err, &buf, &buf_len,
+				       "PA 0x%llx ns=%d", payload, ns);
+		break;
+	default:
+		/* Unknown index */
+		err = -1;
+		break;
+	}
+
+	return err;
+}
+
 int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 		     size_t buf_len)
 {
-	int ns, el, idx = packet->index;
+	int idx = packet->index;
 	unsigned long long payload = packet->payload;
 	const char *name = arm_spe_pkt_name(packet->type);
 	char *buf_orig = buf;
@@ -373,31 +409,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 		arm_spe_pkt_out_string(&err, &buf, &blen, "%s %lld", name, payload);
 		break;
 	case ARM_SPE_ADDRESS:
-		switch (idx) {
-		case 0:
-		case 1:
-			ns = !!(packet->payload & NS_FLAG);
-			el = (packet->payload & EL_FLAG) >> 61;
-			payload &= ~(0xffULL << 56);
-			arm_spe_pkt_out_string(&err, &buf, &blen,
-					"%s 0x%llx el%d ns=%d",
-				        (idx == 1) ? "TGT" : "PC", payload, el, ns);
-			break;
-		case 2:
-			arm_spe_pkt_out_string(&err, &buf, &blen,
-					       "VA 0x%llx", payload);
-			break;
-		case 3:
-			ns = !!(packet->payload & NS_FLAG);
-			payload &= ~(0xffULL << 56);
-			arm_spe_pkt_out_string(&err, &buf, &blen,
-					       "PA 0x%llx ns=%d", payload, ns);
-			break;
-		default:
-			/* Unknown index */
-			err = -1;
-			break;
-		}
+		err = arm_spe_pkt_desc_addr(packet, buf, buf_len);
 		break;
 	case ARM_SPE_CONTEXT:
 		arm_spe_pkt_out_string(&err, &buf, &blen, "%s 0x%lx el%d",
-- 
cgit v1.2.3-70-g09d2


From 09935ca7b64cfa379b6ebf2b8cdb3126e09bffab Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:29 +0800
Subject: perf arm-spe: Refactor address packet handling

This patch is to refactor address packet handling, it defines macros for
address packet's header and payload, these macros are used by decoder
and the dump flow.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-5-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-decoder.c  | 29 +++++++++---------
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 26 ++++++++--------
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.h     | 35 ++++++++++++++--------
 3 files changed, 48 insertions(+), 42 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index cc18a1e8c212..776b3e6628bb 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -24,36 +24,35 @@
 
 static u64 arm_spe_calc_ip(int index, u64 payload)
 {
-	u8 *addr = (u8 *)&payload;
-	int ns, el;
+	u64 ns, el;
 
 	/* Instruction virtual address or Branch target address */
 	if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
 	    index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
-		ns = addr[7] & SPE_ADDR_PKT_NS;
-		el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
+		ns = SPE_ADDR_PKT_GET_NS(payload);
+		el = SPE_ADDR_PKT_GET_EL(payload);
+
+		/* Clean highest byte */
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
 
 		/* Fill highest byte for EL1 or EL2 (VHE) mode */
 		if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
-			addr[7] = 0xff;
-		/* Clean highest byte for other cases */
-		else
-			addr[7] = 0x0;
+			payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT;
 
 	/* Data access virtual address */
 	} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
 
+		/* Clean tags */
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
+
 		/* Fill highest byte if bits [48..55] is 0xff */
-		if (addr[6] == 0xff)
-			addr[7] = 0xff;
-		/* Otherwise, cleanup tags */
-		else
-			addr[7] = 0x0;
+		if (SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload) == 0xffULL)
+			payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT;
 
 	/* Data access physical address */
 	} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
-		/* Cleanup byte 7 */
-		addr[7] = 0x0;
+		/* Clean highest byte */
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
 	} else {
 		pr_err("unsupported address packet index: 0x%x\n", index);
 	}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index b16d68b40bbd..d37c4008adbc 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -13,9 +13,6 @@
 
 #include "arm-spe-pkt-decoder.h"
 
-#define NS_FLAG		BIT_ULL(63)
-#define EL_FLAG		(BIT_ULL(62) | BIT_ULL(61))
-
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define le16_to_cpu bswap_16
 #define le32_to_cpu bswap_32
@@ -167,10 +164,11 @@ static int arm_spe_get_addr(const unsigned char *buf, size_t len,
 			    const unsigned char ext_hdr, struct arm_spe_pkt *packet)
 {
 	packet->type = ARM_SPE_ADDRESS;
+
 	if (ext_hdr)
-		packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
+		packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]);
 	else
-		packet->index = buf[0] & 0x7;
+		packet->index = SPE_HDR_SHORT_INDEX(buf[0]);
 
 	return arm_spe_get_payload(buf, len, ext_hdr, packet);
 }
@@ -296,22 +294,22 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
 	int err = 0;
 
 	switch (idx) {
-	case 0:
-	case 1:
-		ns = !!(packet->payload & NS_FLAG);
-		el = (packet->payload & EL_FLAG) >> 61;
-		payload &= ~(0xffULL << 56);
+	case SPE_ADDR_PKT_HDR_INDEX_INS:
+	case SPE_ADDR_PKT_HDR_INDEX_BRANCH:
+		ns = !!SPE_ADDR_PKT_GET_NS(payload);
+		el = SPE_ADDR_PKT_GET_EL(payload);
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
 		arm_spe_pkt_out_string(&err, &buf, &buf_len,
 				"%s 0x%llx el%d ns=%d",
 				(idx == 1) ? "TGT" : "PC", payload, el, ns);
 		break;
-	case 2:
+	case SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT:
 		arm_spe_pkt_out_string(&err, &buf, &buf_len,
 				       "VA 0x%llx", payload);
 		break;
-	case 3:
-		ns = !!(packet->payload & NS_FLAG);
-		payload &= ~(0xffULL << 56);
+	case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS:
+		ns = !!SPE_ADDR_PKT_GET_NS(payload);
+		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
 		arm_spe_pkt_out_string(&err, &buf, &buf_len,
 				       "PA 0x%llx ns=%d", payload, ns);
 		break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 129f43405eb1..f97d6840be3a 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -56,19 +56,28 @@ struct arm_spe_pkt {
 #define SPE_HEADER0_COUNTER			0x98
 #define SPE_HEADER1_ALIGNMENT			0x0
 
-#define SPE_ADDR_PKT_HDR_INDEX_INS		(0x0)
-#define SPE_ADDR_PKT_HDR_INDEX_BRANCH		(0x1)
-#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT	(0x2)
-#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS	(0x3)
-
-#define SPE_ADDR_PKT_NS				BIT(7)
-#define SPE_ADDR_PKT_CH				BIT(6)
-#define SPE_ADDR_PKT_EL_OFFSET			(5)
-#define SPE_ADDR_PKT_EL_MASK			(0x3 << SPE_ADDR_PKT_EL_OFFSET)
-#define SPE_ADDR_PKT_EL0			(0)
-#define SPE_ADDR_PKT_EL1			(1)
-#define SPE_ADDR_PKT_EL2			(2)
-#define SPE_ADDR_PKT_EL3			(3)
+#define SPE_HDR_SHORT_INDEX(h)			((h) & GENMASK_ULL(2, 0))
+#define SPE_HDR_EXTENDED_INDEX(h0, h1)		(((h0) & GENMASK_ULL(1, 0)) << 3 | \
+						 SPE_HDR_SHORT_INDEX(h1))
+
+/* Address packet header */
+#define SPE_ADDR_PKT_HDR_INDEX_INS		0x0
+#define SPE_ADDR_PKT_HDR_INDEX_BRANCH		0x1
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT	0x2
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS	0x3
+
+/* Address packet payload */
+#define SPE_ADDR_PKT_ADDR_BYTE7_SHIFT		56
+#define SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(v)	((v) & GENMASK_ULL(55, 0))
+#define SPE_ADDR_PKT_ADDR_GET_BYTE_6(v)		(((v) & GENMASK_ULL(55, 48)) >> 48)
+
+#define SPE_ADDR_PKT_GET_NS(v)			(((v) & BIT_ULL(63)) >> 63)
+#define SPE_ADDR_PKT_GET_EL(v)			(((v) & GENMASK_ULL(62, 61)) >> 61)
+
+#define SPE_ADDR_PKT_EL0			0
+#define SPE_ADDR_PKT_EL1			1
+#define SPE_ADDR_PKT_EL2			2
+#define SPE_ADDR_PKT_EL3			3
 
 const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
 
-- 
cgit v1.2.3-70-g09d2


From 5513ddaf103c62dd1eabe9403c0a8d9f810492dc Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:30 +0800
Subject: perf arm_spe: Fixup top byte for data virtual address

To establish a valid address from the address packet payload and finally
the address value can be used for parsing data symbol in DSO, current
code uses 0xff to replace the tag in the top byte of data virtual
address.

So far the code only fixups top byte for the memory layouts with 4KB
pages, it misses to support memory layouts with 64KB pages.

This patch adds the conditions for checking bits [55:52] are 0xf, if
detects the pattern it will fill 0xff into the top byte of the address,
also adds comment to explain the fixing up.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-6-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-decoder.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 776b3e6628bb..cac2ef79c025 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -24,7 +24,7 @@
 
 static u64 arm_spe_calc_ip(int index, u64 payload)
 {
-	u64 ns, el;
+	u64 ns, el, val;
 
 	/* Instruction virtual address or Branch target address */
 	if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
@@ -45,8 +45,22 @@ static u64 arm_spe_calc_ip(int index, u64 payload)
 		/* Clean tags */
 		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
 
-		/* Fill highest byte if bits [48..55] is 0xff */
-		if (SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload) == 0xffULL)
+		/*
+		 * Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.1 Address packet"
+		 * defines the data virtual address payload format, the top byte
+		 * (bits [63:56]) is assigned as top-byte tag; so we only can
+		 * retrieve address value from bits [55:0].
+		 *
+		 * According to Documentation/arm64/memory.rst, if detects the
+		 * specific pattern in bits [55:52] of payload which falls in
+		 * the kernel space, should fixup the top byte and this allows
+		 * perf tool to parse DSO symbol for data address correctly.
+		 *
+		 * For this reason, if detects the bits [55:52] is 0xf, will
+		 * fill 0xff into the top byte.
+		 */
+		val = SPE_ADDR_PKT_ADDR_GET_BYTE_6(payload);
+		if ((val & 0xf0ULL) == 0xf0ULL)
 			payload |= 0xffULL << SPE_ADDR_PKT_ADDR_BYTE7_SHIFT;
 
 	/* Data access physical address */
-- 
cgit v1.2.3-70-g09d2


From 6550149e801a32b1533ed86509af76319cb75eba Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:31 +0800
Subject: perf arm-spe: Refactor context packet handling

Minor refactoring to use macro for index mask.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-7-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +-
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index d37c4008adbc..978f5551b82c 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -136,7 +136,7 @@ static int arm_spe_get_context(const unsigned char *buf, size_t len,
 			       struct arm_spe_pkt *packet)
 {
 	packet->type = ARM_SPE_CONTEXT;
-	packet->index = buf[0] & 0x3;
+	packet->index = SPE_CTX_PKT_HDR_INDEX(buf[0]);
 	return arm_spe_get_payload(buf, len, 0, packet);
 }
 
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index f97d6840be3a..9bc876bffd35 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -79,6 +79,9 @@ struct arm_spe_pkt {
 #define SPE_ADDR_PKT_EL2			2
 #define SPE_ADDR_PKT_EL3			3
 
+/* Context packet header */
+#define SPE_CTX_PKT_HDR_INDEX(h)		((h) & GENMASK_ULL(1, 0))
+
 const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
 
 int arm_spe_get_packet(const unsigned char *buf, size_t len,
-- 
cgit v1.2.3-70-g09d2


From c52cfe9872132407eef6d734014d6fd7790146f5 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:32 +0800
Subject: perf arm-spe: Add new function arm_spe_pkt_desc_counter()

This patch moves out the counter packet parsing code from
arm_spe_pkt_desc() to the new function arm_spe_pkt_desc_counter().

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-8-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 43 ++++++++++++++--------
 1 file changed, 28 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 978f5551b82c..397ade5ffdeb 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -322,6 +322,33 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
 	return err;
 }
 
+static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet,
+				    char *buf, size_t buf_len)
+{
+	u64 payload = packet->payload;
+	const char *name = arm_spe_pkt_name(packet->type);
+	int err = 0;
+
+	arm_spe_pkt_out_string(&err, &buf, &buf_len, "%s %d ", name,
+			       (unsigned short)payload);
+
+	switch (packet->index) {
+	case 0:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT");
+		break;
+	case 1:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE");
+		break;
+	case 2:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT");
+		break;
+	default:
+		break;
+	}
+
+	return err;
+}
+
 int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 		     size_t buf_len)
 {
@@ -414,21 +441,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 				       name, (unsigned long)payload, idx + 1);
 		break;
 	case ARM_SPE_COUNTER:
-		arm_spe_pkt_out_string(&err, &buf, &blen, "%s %d ", name,
-				       (unsigned short)payload);
-		switch (idx) {
-		case 0:
-			arm_spe_pkt_out_string(&err, &buf, &blen, "TOT");
-			break;
-		case 1:
-			arm_spe_pkt_out_string(&err, &buf, &blen, "ISSUE");
-			break;
-		case 2:
-			arm_spe_pkt_out_string(&err, &buf, &blen, "XLAT");
-			break;
-		default:
-			break;
-		}
+		err = arm_spe_pkt_desc_counter(packet, buf, buf_len);
 		break;
 	default:
 		/* Unknown packet type */
-- 
cgit v1.2.3-70-g09d2


From d158aa408f221756f99edb128ef35bfd4d3361d5 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:33 +0800
Subject: perf arm-spe: Refactor counter packet handling

This patch defines macros for counter packet header, and uses macros to
replace hard code values in functions arm_spe_get_counter() and
arm_spe_pkt_desc().

In the function arm_spe_get_counter(), adds a new line for more
readable.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-9-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 11 ++++++-----
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h |  5 +++++
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 397ade5ffdeb..52f4339b1f0c 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -152,10 +152,11 @@ static int arm_spe_get_counter(const unsigned char *buf, size_t len,
 			       const unsigned char ext_hdr, struct arm_spe_pkt *packet)
 {
 	packet->type = ARM_SPE_COUNTER;
+
 	if (ext_hdr)
-		packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
+		packet->index = SPE_HDR_EXTENDED_INDEX(buf[0], buf[1]);
 	else
-		packet->index = buf[0] & 0x7;
+		packet->index = SPE_HDR_SHORT_INDEX(buf[0]);
 
 	return arm_spe_get_payload(buf, len, ext_hdr, packet);
 }
@@ -333,13 +334,13 @@ static int arm_spe_pkt_desc_counter(const struct arm_spe_pkt *packet,
 			       (unsigned short)payload);
 
 	switch (packet->index) {
-	case 0:
+	case SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT:
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, "TOT");
 		break;
-	case 1:
+	case SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT:
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, "ISSUE");
 		break;
-	case 2:
+	case SPE_CNT_PKT_HDR_INDEX_TRANS_LAT:
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, "XLAT");
 		break;
 	default:
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 9bc876bffd35..7d8e34e35f05 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -82,6 +82,11 @@ struct arm_spe_pkt {
 /* Context packet header */
 #define SPE_CTX_PKT_HDR_INDEX(h)		((h) & GENMASK_ULL(1, 0))
 
+/* Counter packet header */
+#define SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT		0x0
+#define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT		0x1
+#define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT		0x2
+
 const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
 
 int arm_spe_get_packet(const unsigned char *buf, size_t len,
-- 
cgit v1.2.3-70-g09d2


From e66f6d75960220001ce94afe93c981826235c003 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:34 +0800
Subject: perf arm-spe: Add new function arm_spe_pkt_desc_event()

This patch moves out the event packet parsing from arm_spe_pkt_desc()
to the new function arm_spe_pkt_desc_event().

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-10-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 63 +++++++++++++---------
 1 file changed, 37 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 52f4339b1f0c..da6b9f76739c 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -287,6 +287,42 @@ static int arm_spe_pkt_out_string(int *err, char **buf_p, size_t *blen,
 	return ret;
 }
 
+static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
+				  char *buf, size_t buf_len)
+{
+	u64 payload = packet->payload;
+	int err = 0;
+
+	arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV");
+
+	if (payload & 0x1)
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN");
+	if (payload & 0x2)
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED");
+	if (payload & 0x4)
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS");
+	if (payload & 0x8)
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL");
+	if (payload & 0x10)
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS");
+	if (payload & 0x20)
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL");
+	if (payload & 0x40)
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN");
+	if (payload & 0x80)
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED");
+	if (packet->index > 1) {
+		if (payload & 0x100)
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS");
+		if (payload & 0x200)
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL");
+		if (payload & 0x400)
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
+	}
+
+	return err;
+}
+
 static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
 				 char *buf, size_t buf_len)
 {
@@ -367,32 +403,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 		arm_spe_pkt_out_string(&err, &buf, &blen, "%s", name);
 		break;
 	case ARM_SPE_EVENTS:
-		arm_spe_pkt_out_string(&err, &buf, &blen, "EV");
-
-		if (payload & 0x1)
-			arm_spe_pkt_out_string(&err, &buf, &blen, " EXCEPTION-GEN");
-		if (payload & 0x2)
-			arm_spe_pkt_out_string(&err, &buf, &blen, " RETIRED");
-		if (payload & 0x4)
-			arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-ACCESS");
-		if (payload & 0x8)
-			arm_spe_pkt_out_string(&err, &buf, &blen, " L1D-REFILL");
-		if (payload & 0x10)
-			arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-ACCESS");
-		if (payload & 0x20)
-			arm_spe_pkt_out_string(&err, &buf, &blen, " TLB-REFILL");
-		if (payload & 0x40)
-			arm_spe_pkt_out_string(&err, &buf, &blen, " NOT-TAKEN");
-		if (payload & 0x80)
-			arm_spe_pkt_out_string(&err, &buf, &blen, " MISPRED");
-		if (idx > 1) {
-			if (payload & 0x100)
-				arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-ACCESS");
-			if (payload & 0x200)
-				arm_spe_pkt_out_string(&err, &buf, &blen, " LLC-REFILL");
-			if (payload & 0x400)
-				arm_spe_pkt_out_string(&err, &buf, &blen, " REMOTE-ACCESS");
-		}
+		err = arm_spe_pkt_desc_event(packet, buf, buf_len);
 		break;
 	case ARM_SPE_OP_TYPE:
 		switch (idx) {
-- 
cgit v1.2.3-70-g09d2


From 889d1a675fcfe734f83c459de023a6f0a91a7a0e Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:35 +0800
Subject: perf arm-spe: Refactor event type handling

Move the enums of event types to arm-spe-pkt-decoder.h, thus function
arm_spe_pkt_desc_event() can use them for bitmasks.

Suggested-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-11-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-decoder.h  | 17 -----------------
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 22 +++++++++++-----------
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.h     | 18 ++++++++++++++++++
 3 files changed, 29 insertions(+), 28 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index a5111a8d4360..24727b8ca7ff 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -13,23 +13,6 @@
 
 #include "arm-spe-pkt-decoder.h"
 
-enum arm_spe_events {
-	EV_EXCEPTION_GEN	= 0,
-	EV_RETIRED		= 1,
-	EV_L1D_ACCESS		= 2,
-	EV_L1D_REFILL		= 3,
-	EV_TLB_ACCESS		= 4,
-	EV_TLB_WALK		= 5,
-	EV_NOT_TAKEN		= 6,
-	EV_MISPRED		= 7,
-	EV_LLC_ACCESS		= 8,
-	EV_LLC_MISS		= 9,
-	EV_REMOTE_ACCESS	= 10,
-	EV_ALIGNMENT		= 11,
-	EV_PARTIAL_PREDICATE	= 17,
-	EV_EMPTY_PREDICATE	= 18,
-};
-
 enum arm_spe_sample_type {
 	ARM_SPE_L1D_ACCESS	= 1 << 0,
 	ARM_SPE_L1D_MISS	= 1 << 1,
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index da6b9f76739c..3f30b2937715 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -295,28 +295,28 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
 
 	arm_spe_pkt_out_string(&err, &buf, &buf_len, "EV");
 
-	if (payload & 0x1)
+	if (payload & BIT(EV_EXCEPTION_GEN))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCEPTION-GEN");
-	if (payload & 0x2)
+	if (payload & BIT(EV_RETIRED))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " RETIRED");
-	if (payload & 0x4)
+	if (payload & BIT(EV_L1D_ACCESS))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-ACCESS");
-	if (payload & 0x8)
+	if (payload & BIT(EV_L1D_REFILL))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " L1D-REFILL");
-	if (payload & 0x10)
+	if (payload & BIT(EV_TLB_ACCESS))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-ACCESS");
-	if (payload & 0x20)
+	if (payload & BIT(EV_TLB_WALK))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " TLB-REFILL");
-	if (payload & 0x40)
+	if (payload & BIT(EV_NOT_TAKEN))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN");
-	if (payload & 0x80)
+	if (payload & BIT(EV_MISPRED))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED");
 	if (packet->index > 1) {
-		if (payload & 0x100)
+		if (payload & BIT(EV_LLC_ACCESS))
 			arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS");
-		if (payload & 0x200)
+		if (payload & BIT(EV_LLC_MISS))
 			arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL");
-		if (payload & 0x400)
+		if (payload & BIT(EV_REMOTE_ACCESS))
 			arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
 	}
 
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 7d8e34e35f05..42ed4e61ede2 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -87,6 +87,24 @@ struct arm_spe_pkt {
 #define SPE_CNT_PKT_HDR_INDEX_ISSUE_LAT		0x1
 #define SPE_CNT_PKT_HDR_INDEX_TRANS_LAT		0x2
 
+/* Event packet payload */
+enum arm_spe_events {
+	EV_EXCEPTION_GEN	= 0,
+	EV_RETIRED		= 1,
+	EV_L1D_ACCESS		= 2,
+	EV_L1D_REFILL		= 3,
+	EV_TLB_ACCESS		= 4,
+	EV_TLB_WALK		= 5,
+	EV_NOT_TAKEN		= 6,
+	EV_MISPRED		= 7,
+	EV_LLC_ACCESS		= 8,
+	EV_LLC_MISS		= 9,
+	EV_REMOTE_ACCESS	= 10,
+	EV_ALIGNMENT		= 11,
+	EV_PARTIAL_PREDICATE	= 17,
+	EV_EMPTY_PREDICATE	= 18,
+};
+
 const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
 
 int arm_spe_get_packet(const unsigned char *buf, size_t len,
-- 
cgit v1.2.3-70-g09d2


From 4d0f4ca273aa95bf592b8bad3c619b5766c8ecc7 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:36 +0800
Subject: perf arm-spe: Remove size condition checking for events

In the Armv8 ARM (ARM DDI 0487F.c), chapter "D10.2.6 Events packet", it
describes the event bit is valid with specific payload requirement.  For
example, the Last Level cache access event, the bit is defined as:

  E[8], byte 1 bit [0], when SZ == 0b01 , when SZ == 0b10 ,
  		     or when SZ == 0b11

It requires the payload size is at least 2 bytes, when byte 1 (start
counting from 0) is valid, E[8] (bit 0 in byte 1) can be used for LLC
access event type.  For safety, the code checks the condition for
payload size firstly, if meet the requirement for payload size, then
continue to parse event type.

If review function arm_spe_get_payload(), it has used cast, so any bytes
beyond the valid size have been set to zeros.

For this reason, we don't need to check payload size anymore afterwards
when parse events, thus this patch removes payload size conditions.

Suggested-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-12-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-decoder.c     |  9 +++------
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 14 ++++++--------
 2 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index cac2ef79c025..90d575cee1b9 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -192,16 +192,13 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
 			if (payload & BIT(EV_TLB_ACCESS))
 				decoder->record.type |= ARM_SPE_TLB_ACCESS;
 
-			if ((idx == 2 || idx == 4 || idx == 8) &&
-			    (payload & BIT(EV_LLC_MISS)))
+			if (payload & BIT(EV_LLC_MISS))
 				decoder->record.type |= ARM_SPE_LLC_MISS;
 
-			if ((idx == 2 || idx == 4 || idx == 8) &&
-			    (payload & BIT(EV_LLC_ACCESS)))
+			if (payload & BIT(EV_LLC_ACCESS))
 				decoder->record.type |= ARM_SPE_LLC_ACCESS;
 
-			if ((idx == 2 || idx == 4 || idx == 8) &&
-			    (payload & BIT(EV_REMOTE_ACCESS)))
+			if (payload & BIT(EV_REMOTE_ACCESS))
 				decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
 
 			if (payload & BIT(EV_MISPRED))
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 3f30b2937715..88bcf7e5be76 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -311,14 +311,12 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " NOT-TAKEN");
 	if (payload & BIT(EV_MISPRED))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " MISPRED");
-	if (packet->index > 1) {
-		if (payload & BIT(EV_LLC_ACCESS))
-			arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS");
-		if (payload & BIT(EV_LLC_MISS))
-			arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL");
-		if (payload & BIT(EV_REMOTE_ACCESS))
-			arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
-	}
+	if (payload & BIT(EV_LLC_ACCESS))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-ACCESS");
+	if (payload & BIT(EV_LLC_MISS))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL");
+	if (payload & BIT(EV_REMOTE_ACCESS))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
 
 	return err;
 }
-- 
cgit v1.2.3-70-g09d2


From 7488ffc4d981e19feddfe36a619051bf6216c7a1 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:37 +0800
Subject: perf arm-spe: Add new function arm_spe_pkt_desc_op_type()

The operation type packet is complex and contains subclass; the parsing
flow causes deep indentation; for more readable, this patch introduces
a new function arm_spe_pkt_desc_op_type() which is used for operation
type parsing.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-13-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 79 ++++++++++++----------
 1 file changed, 45 insertions(+), 34 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 88bcf7e5be76..d6c060f119b4 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -321,6 +321,50 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
 	return err;
 }
 
+static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
+				    char *buf, size_t buf_len)
+{
+	u64 payload = packet->payload;
+	int err = 0;
+
+	switch (packet->index) {
+	case 0:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len,
+				payload & 0x1 ? "COND-SELECT" : "INSN-OTHER");
+		break;
+	case 1:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len,
+				       payload & 0x1 ? "ST" : "LD");
+
+		if (payload & 0x2) {
+			if (payload & 0x4)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT");
+			if (payload & 0x8)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL");
+			if (payload & 0x10)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
+		} else if (payload & 0x4) {
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
+		}
+		break;
+	case 2:
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, "B");
+
+		if (payload & 0x1)
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND");
+		if (payload & 0x2)
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND");
+
+		break;
+	default:
+		/* Unknown index */
+		err = -1;
+		break;
+	}
+
+	return err;
+}
+
 static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
 				 char *buf, size_t buf_len)
 {
@@ -404,40 +448,7 @@ int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
 		err = arm_spe_pkt_desc_event(packet, buf, buf_len);
 		break;
 	case ARM_SPE_OP_TYPE:
-		switch (idx) {
-		case 0:
-			arm_spe_pkt_out_string(&err, &buf, &blen,
-					payload & 0x1 ? "COND-SELECT" : "INSN-OTHER");
-			break;
-		case 1:
-			arm_spe_pkt_out_string(&err, &buf, &blen,
-					       payload & 0x1 ? "ST" : "LD");
-
-			if (payload & 0x2) {
-				if (payload & 0x4)
-					arm_spe_pkt_out_string(&err, &buf, &blen, " AT");
-				if (payload & 0x8)
-					arm_spe_pkt_out_string(&err, &buf, &blen, " EXCL");
-				if (payload & 0x10)
-					arm_spe_pkt_out_string(&err, &buf, &blen, " AR");
-			} else if (payload & 0x4) {
-				arm_spe_pkt_out_string(&err, &buf, &blen, " SIMD-FP");
-			}
-			break;
-		case 2:
-			arm_spe_pkt_out_string(&err, &buf, &blen, "B");
-
-			if (payload & 0x1)
-				arm_spe_pkt_out_string(&err, &buf, &blen, " COND");
-			if (payload & 0x2)
-				arm_spe_pkt_out_string(&err, &buf, &blen, " IND");
-
-			break;
-		default:
-			/* Unknown index */
-			err = -1;
-			break;
-		}
+		err = arm_spe_pkt_desc_op_type(packet, buf, buf_len);
 		break;
 	case ARM_SPE_DATA_SOURCE:
 	case ARM_SPE_TIMESTAMP:
-- 
cgit v1.2.3-70-g09d2


From e771218f32f97c0940ae46c23e20d27f3d4c05e3 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:38 +0800
Subject: perf arm-spe: Refactor operation packet handling

Defines macros for operation packet header and formats (support sub
classes for 'other', 'branch', 'load and store', etc).  Uses these
macros for operation packet decoding and dumping.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-14-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 26 ++++++++++++----------
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.h     | 23 +++++++++++++++++++
 2 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index d6c060f119b4..1d1354a0eef4 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -144,7 +144,7 @@ static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
 			       struct arm_spe_pkt *packet)
 {
 	packet->type = ARM_SPE_OP_TYPE;
-	packet->index = buf[0] & 0x3;
+	packet->index = SPE_OP_PKT_HDR_CLASS(buf[0]);
 	return arm_spe_get_payload(buf, len, 0, packet);
 }
 
@@ -328,31 +328,33 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
 	int err = 0;
 
 	switch (packet->index) {
-	case 0:
+	case SPE_OP_PKT_HDR_CLASS_OTHER:
 		arm_spe_pkt_out_string(&err, &buf, &buf_len,
-				payload & 0x1 ? "COND-SELECT" : "INSN-OTHER");
+			payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER");
 		break;
-	case 1:
+	case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC:
 		arm_spe_pkt_out_string(&err, &buf, &buf_len,
 				       payload & 0x1 ? "ST" : "LD");
 
-		if (payload & 0x2) {
-			if (payload & 0x4)
+		if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) {
+			if (payload & SPE_OP_PKT_AT)
 				arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT");
-			if (payload & 0x8)
+			if (payload & SPE_OP_PKT_EXCL)
 				arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL");
-			if (payload & 0x10)
+			if (payload & SPE_OP_PKT_AR)
 				arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
-		} else if (payload & 0x4) {
+		} else if (SPE_OP_PKT_LDST_SUBCLASS_GET(payload) ==
+					SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP) {
 			arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
 		}
 		break;
-	case 2:
+	case SPE_OP_PKT_HDR_CLASS_BR_ERET:
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, "B");
 
-		if (payload & 0x1)
+		if (payload & SPE_OP_PKT_COND)
 			arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND");
-		if (payload & 0x2)
+
+		if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload))
 			arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND");
 
 		break;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 42ed4e61ede2..7032fc141ad4 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -105,6 +105,29 @@ enum arm_spe_events {
 	EV_EMPTY_PREDICATE	= 18,
 };
 
+/* Operation packet header */
+#define SPE_OP_PKT_HDR_CLASS(h)			((h) & GENMASK_ULL(1, 0))
+#define SPE_OP_PKT_HDR_CLASS_OTHER		0x0
+#define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC	0x1
+#define SPE_OP_PKT_HDR_CLASS_BR_ERET		0x2
+
+#define SPE_OP_PKT_COND				BIT(0)
+
+#define SPE_OP_PKT_LDST_SUBCLASS_GET(v)		((v) & GENMASK_ULL(7, 1))
+#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG		0x0
+#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP	0x4
+#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG	0x10
+#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG	0x30
+
+#define SPE_OP_PKT_IS_LDST_ATOMIC(v)		(((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2)
+
+#define SPE_OP_PKT_AR				BIT(4)
+#define SPE_OP_PKT_EXCL				BIT(3)
+#define SPE_OP_PKT_AT				BIT(2)
+#define SPE_OP_PKT_ST				BIT(0)
+
+#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v)	(((v) & GENMASK_ULL(7, 1)) == 0x2)
+
 const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
 
 int arm_spe_get_packet(const unsigned char *buf, size_t len,
-- 
cgit v1.2.3-70-g09d2


From 3d829724b16c5d2de42e6c9601c696c93a10bc61 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Thu, 19 Nov 2020 23:24:39 +0800
Subject: perf arm-spe: Add more sub classes for operation packet

For the operation type packet payload with load/store class, it misses
to support these sub classes:

  - A load/store targeting the general-purpose registers;
  - A load/store targeting unspecified registers;
  - The ARMv8.4 nested virtualisation extension can redirect system
    register accesses to a memory page controlled by the hypervisor.
    The SPE profiling feature in newer implementations can tag those
    memory accesses accordingly.

Add the bit pattern describing load/store sub classes, so that the perf
tool can decode it properly.

Inspired by Andre Przywara, refined the commit log and code for more
clear description.

Co-developed-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-15-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 1d1354a0eef4..84d661aab54f 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -343,9 +343,23 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
 				arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL");
 			if (payload & SPE_OP_PKT_AR)
 				arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR");
-		} else if (SPE_OP_PKT_LDST_SUBCLASS_GET(payload) ==
-					SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP) {
+		}
+
+		switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) {
+		case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP:
 			arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP");
+			break;
+		case SPE_OP_PKT_LDST_SUBCLASS_GP_REG:
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG");
+			break;
+		case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG:
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG");
+			break;
+		case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG:
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG");
+			break;
+		default:
+			break;
 		}
 		break;
 	case SPE_OP_PKT_HDR_CLASS_BR_ERET:
-- 
cgit v1.2.3-70-g09d2


From 3601e605501df289db149785e1e6a8d16e557d31 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Thu, 19 Nov 2020 23:24:40 +0800
Subject: perf arm_spe: Decode memory tagging properties

When SPE records a physical address, it can additionally tag the event
with information from the Memory Tagging architecture extension.

Decode the two additional fields in the SPE event payload.

[leoy: Refined patch to use predefined macros]

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wei Li <liwei391@huawei.com>
Link: https://lore.kernel.org/r/20201119152441.6972-16-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 6 +++++-
 tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 84d661aab54f..57c01ce27915 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -385,6 +385,7 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
 				 char *buf, size_t buf_len)
 {
 	int ns, el, idx = packet->index;
+	int ch, pat;
 	u64 payload = packet->payload;
 	int err = 0;
 
@@ -404,9 +405,12 @@ static int arm_spe_pkt_desc_addr(const struct arm_spe_pkt *packet,
 		break;
 	case SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS:
 		ns = !!SPE_ADDR_PKT_GET_NS(payload);
+		ch = !!SPE_ADDR_PKT_GET_CH(payload);
+		pat = SPE_ADDR_PKT_GET_PAT(payload);
 		payload = SPE_ADDR_PKT_ADDR_GET_BYTES_0_6(payload);
 		arm_spe_pkt_out_string(&err, &buf, &buf_len,
-				       "PA 0x%llx ns=%d", payload, ns);
+				       "PA 0x%llx ns=%d ch=%d pat=%x",
+				       payload, ns, ch, pat);
 		break;
 	default:
 		/* Unknown index */
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 7032fc141ad4..1ad14885c2a1 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -73,6 +73,8 @@ struct arm_spe_pkt {
 
 #define SPE_ADDR_PKT_GET_NS(v)			(((v) & BIT_ULL(63)) >> 63)
 #define SPE_ADDR_PKT_GET_EL(v)			(((v) & GENMASK_ULL(62, 61)) >> 61)
+#define SPE_ADDR_PKT_GET_CH(v)			(((v) & BIT_ULL(62)) >> 62)
+#define SPE_ADDR_PKT_GET_PAT(v)			(((v) & GENMASK_ULL(59, 56)) >> 56)
 
 #define SPE_ADDR_PKT_EL0			0
 #define SPE_ADDR_PKT_EL1			1
-- 
cgit v1.2.3-70-g09d2


From 05e91e7fe26c6fb116fa16f43c1eed78020f9463 Mon Sep 17 00:00:00 2001
From: Wei Li <liwei391@huawei.com>
Date: Thu, 19 Nov 2020 23:24:41 +0800
Subject: perf arm-spe: Add support for ARMv8.3-SPE

This patch is to support Armv8.3 extension for SPE, it adds alignment
field in the Events packet and it supports the Scalable Vector Extension
(SVE) for Operation packet and Events packet with two additions:

  - The vector length for SVE operations in the Operation Type packet;
  - The incomplete predicate and empty predicate fields in the Events
    packet.

Signed-off-by: Wei Li <liwei391@huawei.com>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20201119152441.6972-17-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.c     | 36 ++++++++++++++++++++--
 .../util/arm-spe-decoder/arm-spe-pkt-decoder.h     | 16 ++++++++++
 2 files changed, 50 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index 57c01ce27915..f3ac9d40cebf 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -317,6 +317,12 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet,
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " LLC-REFILL");
 	if (payload & BIT(EV_REMOTE_ACCESS))
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS");
+	if (payload & BIT(EV_ALIGNMENT))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT");
+	if (payload & BIT(EV_PARTIAL_PREDICATE))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED");
+	if (payload & BIT(EV_EMPTY_PREDICATE))
+		arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED");
 
 	return err;
 }
@@ -329,8 +335,23 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
 
 	switch (packet->index) {
 	case SPE_OP_PKT_HDR_CLASS_OTHER:
-		arm_spe_pkt_out_string(&err, &buf, &buf_len,
-			payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER");
+		if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) {
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER");
+
+			/* SVE effective vector length */
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
+					       SPE_OP_PKG_SVE_EVL(payload));
+
+			if (payload & SPE_OP_PKT_SVE_FP)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP");
+			if (payload & SPE_OP_PKT_SVE_PRED)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+		} else {
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER");
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s",
+					       payload & SPE_OP_PKT_COND ?
+					       "COND-SELECT" : "INSN-OTHER");
+		}
 		break;
 	case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC:
 		arm_spe_pkt_out_string(&err, &buf, &buf_len,
@@ -361,6 +382,17 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet,
 		default:
 			break;
 		}
+
+		if (SPE_OP_PKT_IS_LDST_SVE(payload)) {
+			/* SVE effective vector length */
+			arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d",
+					       SPE_OP_PKG_SVE_EVL(payload));
+
+			if (payload & SPE_OP_PKT_SVE_PRED)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED");
+			if (payload & SPE_OP_PKT_SVE_SG)
+				arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG");
+		}
 		break;
 	case SPE_OP_PKT_HDR_CLASS_BR_ERET:
 		arm_spe_pkt_out_string(&err, &buf, &buf_len, "B");
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index 1ad14885c2a1..9b970e7bf1e2 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -113,6 +113,8 @@ enum arm_spe_events {
 #define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC	0x1
 #define SPE_OP_PKT_HDR_CLASS_BR_ERET		0x2
 
+#define SPE_OP_PKT_IS_OTHER_SVE_OP(v)		(((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8)
+
 #define SPE_OP_PKT_COND				BIT(0)
 
 #define SPE_OP_PKT_LDST_SUBCLASS_GET(v)		((v) & GENMASK_ULL(7, 1))
@@ -128,6 +130,20 @@ enum arm_spe_events {
 #define SPE_OP_PKT_AT				BIT(2)
 #define SPE_OP_PKT_ST				BIT(0)
 
+#define SPE_OP_PKT_IS_LDST_SVE(v)		(((v) & (BIT(3) | BIT(1))) == 0x8)
+
+#define SPE_OP_PKT_SVE_SG			BIT(7)
+/*
+ * SVE effective vector length (EVL) is stored in byte 0 bits [6:4];
+ * the length is rounded up to a power of two and use 32 as one step,
+ * so EVL calculation is:
+ *
+ *   32 * (2 ^ bits [6:4]) = 32 << (bits [6:4])
+ */
+#define SPE_OP_PKG_SVE_EVL(v)			(32 << (((v) & GENMASK_ULL(6, 4)) >> 4))
+#define SPE_OP_PKT_SVE_PRED			BIT(2)
+#define SPE_OP_PKT_SVE_FP			BIT(1)
+
 #define SPE_OP_PKT_IS_INDIRECT_BRANCH(v)	(((v) & GENMASK_ULL(7, 1)) == 0x2)
 
 const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
-- 
cgit v1.2.3-70-g09d2


From 7679325702c90aecd393cd7cde685576c14489c0 Mon Sep 17 00:00:00 2001
From: Barry Song <song.bao.hua@hisilicon.com>
Date: Mon, 16 Nov 2020 19:08:48 +1300
Subject: selftests/dma: add test application for DMA_MAP_BENCHMARK

This patch provides the test application for DMA_MAP_BENCHMARK.

Before running the test application, we need to bind a device to dma_map_
benchmark driver. For example, unbind "xxx" from its original driver and
bind to dma_map_benchmark:

echo dma_map_benchmark > /sys/bus/platform/devices/xxx/driver_override
echo xxx > /sys/bus/platform/drivers/xxx/unbind
echo xxx > /sys/bus/platform/drivers/dma_map_benchmark/bind

Another example for PCI devices:
echo dma_map_benchmark > /sys/bus/pci/devices/0000:00:01.0/driver_override
echo 0000:00:01.0 > /sys/bus/pci/drivers/xxx/unbind
echo 0000:00:01.0 > /sys/bus/pci/drivers/dma_map_benchmark/bind

The below command will run 16 threads on numa node 0 for 10 seconds on
the device bound to dma_map_benchmark platform_driver or pci_driver:
./dma_map_benchmark -t 16 -s 10 -n 0
dma mapping benchmark: threads:16 seconds:10
average map latency(us):1.1 standard deviation:1.9
average unmap latency(us):0.5 standard deviation:0.8

Cc: Will Deacon <will@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 MAINTAINERS                                     |   6 ++
 tools/testing/selftests/dma/Makefile            |   6 ++
 tools/testing/selftests/dma/config              |   1 +
 tools/testing/selftests/dma/dma_map_benchmark.c | 123 ++++++++++++++++++++++++
 4 files changed, 136 insertions(+)
 create mode 100644 tools/testing/selftests/dma/Makefile
 create mode 100644 tools/testing/selftests/dma/config
 create mode 100644 tools/testing/selftests/dma/dma_map_benchmark.c

(limited to 'tools')

diff --git a/MAINTAINERS b/MAINTAINERS
index a008b70f3c16..c88abd1d323b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5237,6 +5237,12 @@ F:	include/linux/dma-mapping.h
 F:	include/linux/dma-map-ops.h
 F:	kernel/dma/
 
+DMA MAPPING BENCHMARK
+M:	Barry Song <song.bao.hua@hisilicon.com>
+L:	iommu@lists.linux-foundation.org
+F:	kernel/dma/map_benchmark.c
+F:	tools/testing/selftests/dma/
+
 DMA-BUF HEAPS FRAMEWORK
 M:	Sumit Semwal <sumit.semwal@linaro.org>
 R:	Benjamin Gaignard <benjamin.gaignard@linaro.org>
diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile
new file mode 100644
index 000000000000..aa8e8b5b3864
--- /dev/null
+++ b/tools/testing/selftests/dma/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := dma_map_benchmark
+
+include ../lib.mk
diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config
new file mode 100644
index 000000000000..6102ee3c43cd
--- /dev/null
+++ b/tools/testing/selftests/dma/config
@@ -0,0 +1 @@
+CONFIG_DMA_MAP_BENCHMARK=y
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
new file mode 100644
index 000000000000..7065163a8388
--- /dev/null
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Hisilicon Limited.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/types.h>
+
+#define DMA_MAP_BENCHMARK	_IOWR('d', 1, struct map_benchmark)
+#define DMA_MAP_MAX_THREADS	1024
+#define DMA_MAP_MAX_SECONDS     300
+
+#define DMA_MAP_BIDIRECTIONAL	0
+#define DMA_MAP_TO_DEVICE	1
+#define DMA_MAP_FROM_DEVICE	2
+
+static char *directions[] = {
+	"BIDIRECTIONAL",
+	"TO_DEVICE",
+	"FROM_DEVICE",
+};
+
+struct map_benchmark {
+	__u64 avg_map_100ns; /* average map latency in 100ns */
+	__u64 map_stddev; /* standard deviation of map latency */
+	__u64 avg_unmap_100ns; /* as above */
+	__u64 unmap_stddev;
+	__u32 threads; /* how many threads will do map/unmap in parallel */
+	__u32 seconds; /* how long the test will last */
+	__s32 node; /* which numa node this benchmark will run on */
+	__u32 dma_bits; /* DMA addressing capability */
+	__u32 dma_dir; /* DMA data direction */
+	__u64 expansion[10];	/* For future use */
+};
+
+int main(int argc, char **argv)
+{
+	struct map_benchmark map;
+	int fd, opt;
+	/* default single thread, run 20 seconds on NUMA_NO_NODE */
+	int threads = 1, seconds = 20, node = -1;
+	/* default dma mask 32bit, bidirectional DMA */
+	int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
+
+	int cmd = DMA_MAP_BENCHMARK;
+	char *p;
+
+	while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
+		switch (opt) {
+		case 't':
+			threads = atoi(optarg);
+			break;
+		case 's':
+			seconds = atoi(optarg);
+			break;
+		case 'n':
+			node = atoi(optarg);
+			break;
+		case 'b':
+			bits = atoi(optarg);
+			break;
+		case 'd':
+			dir = atoi(optarg);
+			break;
+		default:
+			return -1;
+		}
+	}
+
+	if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
+		fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
+			DMA_MAP_MAX_THREADS);
+		exit(1);
+	}
+
+	if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) {
+		fprintf(stderr, "invalid number of seconds, must be in 1-%d\n",
+			DMA_MAP_MAX_SECONDS);
+		exit(1);
+	}
+
+	/* suppose the mininum DMA zone is 1MB in the world */
+	if (bits < 20 || bits > 64) {
+		fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
+		exit(1);
+	}
+
+	if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE &&
+			dir != DMA_MAP_FROM_DEVICE) {
+		fprintf(stderr, "invalid dma direction\n");
+		exit(1);
+	}
+
+	fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
+	if (fd == -1) {
+		perror("open");
+		exit(1);
+	}
+
+	map.seconds = seconds;
+	map.threads = threads;
+	map.node = node;
+	map.dma_bits = bits;
+	map.dma_dir = dir;
+	if (ioctl(fd, cmd, &map)) {
+		perror("ioctl");
+		exit(1);
+	}
+
+	printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n",
+			threads, seconds, node, dir[directions]);
+	printf("average map latency(us):%.1f standard deviation:%.1f\n",
+			map.avg_map_100ns/10.0, map.map_stddev/10.0);
+	printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
+			map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0);
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From b3e453272d436aab8adbe810c6d7043670281487 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:06 +0100
Subject: tools lib: Adopt memchr_inv() from kernel

We'll use it to check for undefined/zero data.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-6-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/string.h |  1 +
 tools/lib/string.c           | 58 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'tools')

diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index 5e9e781905ed..db5c99318c79 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -46,4 +46,5 @@ extern char * __must_check skip_spaces(const char *);
 
 extern char *strim(char *);
 
+extern void *memchr_inv(const void *start, int c, size_t bytes);
 #endif /* _TOOLS_LINUX_STRING_H_ */
diff --git a/tools/lib/string.c b/tools/lib/string.c
index f645343815de..8b6892f959ab 100644
--- a/tools/lib/string.c
+++ b/tools/lib/string.c
@@ -168,3 +168,61 @@ char *strreplace(char *s, char old, char new)
 			*s = new;
 	return s;
 }
+
+static void *check_bytes8(const u8 *start, u8 value, unsigned int bytes)
+{
+	while (bytes) {
+		if (*start != value)
+			return (void *)start;
+		start++;
+		bytes--;
+	}
+	return NULL;
+}
+
+/**
+ * memchr_inv - Find an unmatching character in an area of memory.
+ * @start: The memory area
+ * @c: Find a character other than c
+ * @bytes: The size of the area.
+ *
+ * returns the address of the first character other than @c, or %NULL
+ * if the whole buffer contains just @c.
+ */
+void *memchr_inv(const void *start, int c, size_t bytes)
+{
+	u8 value = c;
+	u64 value64;
+	unsigned int words, prefix;
+
+	if (bytes <= 16)
+		return check_bytes8(start, value, bytes);
+
+	value64 = value;
+	value64 |= value64 << 8;
+	value64 |= value64 << 16;
+	value64 |= value64 << 32;
+
+	prefix = (unsigned long)start % 8;
+	if (prefix) {
+		u8 *r;
+
+		prefix = 8 - prefix;
+		r = check_bytes8(start, value, prefix);
+		if (r)
+			return r;
+		start += prefix;
+		bytes -= prefix;
+	}
+
+	words = bytes / 8;
+
+	while (words) {
+		if (*(u64 *)start != value64)
+			return check_bytes8(start, value, 8);
+		start += 8;
+		words--;
+	}
+
+	return check_bytes8(start, value, bytes % 8);
+}
-- 
cgit v1.2.3-70-g09d2


From f45edd86b23a7dc576b881b3da53936ac9f8dffb Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:08 +0100
Subject: perf tools: Add build_id__is_defined function

Adding build_id__is_defined helper to check build id is defined and is
!= zero build id.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-8-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/build-id.c | 6 ++++++
 tools/perf/util/build-id.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 6b410c3d52dc..2aacc8b29f7e 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -37,6 +37,7 @@
 
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
+#include <linux/string.h>
 #include <asm/bug.h>
 
 static bool no_buildid_cache;
@@ -912,3 +913,8 @@ void build_id__init(struct build_id *bid, const u8 *data, size_t size)
 	memcpy(bid->data, data, size);
 	bid->size = size;
 }
+
+bool build_id__is_defined(const struct build_id *bid)
+{
+	return bid && bid->size ? !!memchr_inv(bid->data, 0, bid->size) : false;
+}
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index f293f99d5dba..d53415feaf69 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -21,6 +21,7 @@ struct feat_fd;
 
 void build_id__init(struct build_id *bid, const u8 *data, size_t size);
 int build_id__sprintf(const struct build_id *build_id, char *bf);
+bool build_id__is_defined(const struct build_id *bid);
 int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
 int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
 char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
-- 
cgit v1.2.3-70-g09d2


From 7ac22b088afe26a42978ff7576730ca419da76aa Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:09 +0100
Subject: perf tools: Add filename__decompress function

Factor filename__decompress from decompress_kmodule function.  It can
decompress files with compressions supported in perf - xz and gz, the
support needs to be compiled in.

It will to be used in following changes to get build id out of
compressed elf objects.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-9-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c | 31 +++++++++++++++++++------------
 tools/perf/util/dso.h |  2 ++
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 89b5fd2b5de3..d786cf6b0cfa 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -279,18 +279,12 @@ bool dso__needs_decompress(struct dso *dso)
 		dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
 }
 
-static int decompress_kmodule(struct dso *dso, const char *name,
-			      char *pathname, size_t len)
+int filename__decompress(const char *name, char *pathname,
+			 size_t len, int comp, int *err)
 {
 	char tmpbuf[] = KMOD_DECOMP_NAME;
 	int fd = -1;
 
-	if (!dso__needs_decompress(dso))
-		return -1;
-
-	if (dso->comp == COMP_ID__NONE)
-		return -1;
-
 	/*
 	 * We have proper compression id for DSO and yet the file
 	 * behind the 'name' can still be plain uncompressed object.
@@ -304,17 +298,17 @@ static int decompress_kmodule(struct dso *dso, const char *name,
 	 * To keep this transparent, we detect this and return the file
 	 * descriptor to the uncompressed file.
 	 */
-	if (!compressions[dso->comp].is_compressed(name))
+	if (!compressions[comp].is_compressed(name))
 		return open(name, O_RDONLY);
 
 	fd = mkstemp(tmpbuf);
 	if (fd < 0) {
-		dso->load_errno = errno;
+		*err = errno;
 		return -1;
 	}
 
-	if (compressions[dso->comp].decompress(name, fd)) {
-		dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
+	if (compressions[comp].decompress(name, fd)) {
+		*err = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
 		close(fd);
 		fd = -1;
 	}
@@ -328,6 +322,19 @@ static int decompress_kmodule(struct dso *dso, const char *name,
 	return fd;
 }
 
+static int decompress_kmodule(struct dso *dso, const char *name,
+			      char *pathname, size_t len)
+{
+	if (!dso__needs_decompress(dso))
+		return -1;
+
+	if (dso->comp == COMP_ID__NONE)
+		return -1;
+
+	return filename__decompress(name, pathname, len, dso->comp,
+				    &dso->load_errno);
+}
+
 int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
 {
 	return decompress_kmodule(dso, name, NULL, 0);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index d8cb4f5680a4..cd2fe64a3c5d 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -274,6 +274,8 @@ bool dso__needs_decompress(struct dso *dso);
 int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
 int dso__decompress_kmodule_path(struct dso *dso, const char *name,
 				 char *pathname, size_t len);
+int filename__decompress(const char *name, char *pathname,
+			 size_t len, int comp, int *err);
 
 #define KMOD_DECOMP_NAME  "/tmp/perf-kmod-XXXXXX"
 #define KMOD_DECOMP_LEN   sizeof(KMOD_DECOMP_NAME)
-- 
cgit v1.2.3-70-g09d2


From af21c579c860d10da1b0620e3d5d14abdc0b5fff Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:11 +0100
Subject: perf build-id: Add check for existing link in buildid dir

When adding new build id link we fail if the link is already there.
Adding check for existing link and output debug message that the build
id is already linked.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-11-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/build-id.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 2aacc8b29f7e..4a391f13f40d 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -755,8 +755,25 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
 	tmp = dir_name + strlen(buildid_dir) - 5;
 	memcpy(tmp, "../..", 5);
 
-	if (symlink(tmp, linkname) == 0)
+	if (symlink(tmp, linkname) == 0) {
 		err = 0;
+	} else if (errno == EEXIST) {
+		char path[PATH_MAX];
+		ssize_t len;
+
+		len = readlink(linkname, path, sizeof(path) - 1);
+		if (len <= 0) {
+			pr_err("Cant read link: %s\n", linkname);
+			goto out_free;
+		}
+		path[len] = '\0';
+
+		if (strcmp(tmp, path)) {
+			pr_debug("build <%s> already linked to %s\n",
+				 sbuild_id, linkname);
+		}
+		err = 0;
+	}
 
 	/* Update SDT cache : error is just warned */
 	if (realname &&
-- 
cgit v1.2.3-70-g09d2


From 031f112f8dc0f211b59b1b33032671f035edc25d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:12 +0100
Subject: perf tools: Use struct extra_kernel_map in
 machine__process_kernel_mmap_event

Using struct extra_kernel_map in machine__process_kernel_mmap_event, to
pass mmap details. This way we can used single function for all 3 mmap
versions.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-12-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 62 +++++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 29 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 15385ea00190..1ae32a81639c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1581,32 +1581,25 @@ static bool machine__uses_kcore(struct machine *machine)
 }
 
 static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
-					     union perf_event *event)
+					     struct extra_kernel_map *xm)
 {
 	return machine__is(machine, "x86_64") &&
-	       is_entry_trampoline(event->mmap.filename);
+	       is_entry_trampoline(xm->name);
 }
 
 static int machine__process_extra_kernel_map(struct machine *machine,
-					     union perf_event *event)
+					     struct extra_kernel_map *xm)
 {
 	struct dso *kernel = machine__kernel_dso(machine);
-	struct extra_kernel_map xm = {
-		.start = event->mmap.start,
-		.end   = event->mmap.start + event->mmap.len,
-		.pgoff = event->mmap.pgoff,
-	};
 
 	if (kernel == NULL)
 		return -1;
 
-	strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
-
-	return machine__create_extra_kernel_map(machine, kernel, &xm);
+	return machine__create_extra_kernel_map(machine, kernel, xm);
 }
 
 static int machine__process_kernel_mmap_event(struct machine *machine,
-					      union perf_event *event)
+					      struct extra_kernel_map *xm)
 {
 	struct map *map;
 	enum dso_space_type dso_space;
@@ -1621,20 +1614,18 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 	else
 		dso_space = DSO_SPACE__KERNEL_GUEST;
 
-	is_kernel_mmap = memcmp(event->mmap.filename,
-				machine->mmap_name,
+	is_kernel_mmap = memcmp(xm->name, machine->mmap_name,
 				strlen(machine->mmap_name) - 1) == 0;
-	if (event->mmap.filename[0] == '/' ||
-	    (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
-		map = machine__addnew_module_map(machine, event->mmap.start,
-						 event->mmap.filename);
+	if (xm->name[0] == '/' ||
+	    (!is_kernel_mmap && xm->name[0] == '[')) {
+		map = machine__addnew_module_map(machine, xm->start,
+						 xm->name);
 		if (map == NULL)
 			goto out_problem;
 
-		map->end = map->start + event->mmap.len;
+		map->end = map->start + xm->end - xm->start;
 	} else if (is_kernel_mmap) {
-		const char *symbol_name = (event->mmap.filename +
-				strlen(machine->mmap_name));
+		const char *symbol_name = (xm->name + strlen(machine->mmap_name));
 		/*
 		 * Should be there already, from the build-id table in
 		 * the header.
@@ -1688,18 +1679,17 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		if (strstr(kernel->long_name, "vmlinux"))
 			dso__set_short_name(kernel, "[kernel.vmlinux]", false);
 
-		machine__update_kernel_mmap(machine, event->mmap.start,
-					 event->mmap.start + event->mmap.len);
+		machine__update_kernel_mmap(machine, xm->start, xm->end);
 
 		/*
 		 * Avoid using a zero address (kptr_restrict) for the ref reloc
 		 * symbol. Effectively having zero here means that at record
 		 * time /proc/sys/kernel/kptr_restrict was non zero.
 		 */
-		if (event->mmap.pgoff != 0) {
+		if (xm->pgoff != 0) {
 			map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
 							symbol_name,
-							event->mmap.pgoff);
+							xm->pgoff);
 		}
 
 		if (machine__is_default_guest(machine)) {
@@ -1708,8 +1698,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 			 */
 			dso__load(kernel, machine__kernel_map(machine));
 		}
-	} else if (perf_event__is_extra_kernel_mmap(machine, event)) {
-		return machine__process_extra_kernel_map(machine, event);
+	} else if (perf_event__is_extra_kernel_mmap(machine, xm)) {
+		return machine__process_extra_kernel_map(machine, xm);
 	}
 	return 0;
 out_problem:
@@ -1735,7 +1725,14 @@ int machine__process_mmap2_event(struct machine *machine,
 
 	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
 	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
-		ret = machine__process_kernel_mmap_event(machine, event);
+		struct extra_kernel_map xm = {
+			.start = event->mmap2.start,
+			.end   = event->mmap2.start + event->mmap2.len,
+			.pgoff = event->mmap2.pgoff,
+		};
+
+		strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN);
+		ret = machine__process_kernel_mmap_event(machine, &xm);
 		if (ret < 0)
 			goto out_problem;
 		return 0;
@@ -1785,7 +1782,14 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 
 	if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
 	    sample->cpumode == PERF_RECORD_MISC_KERNEL) {
-		ret = machine__process_kernel_mmap_event(machine, event);
+		struct extra_kernel_map xm = {
+			.start = event->mmap.start,
+			.end   = event->mmap.start + event->mmap.len,
+			.pgoff = event->mmap.pgoff,
+		};
+
+		strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
+		ret = machine__process_kernel_mmap_event(machine, &xm);
 		if (ret < 0)
 			goto out_problem;
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From ca8ea73ae109900cec4c3a1f0d3486a01a0e4434 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:13 +0100
Subject: perf symbols: Try to load vmlinux from buildid database

Currently we don't check on kernel's vmlinux the same way as we do for
normal binaries, but we either look for kallsyms file in build id
database or check on known vmlinux locations (plus some other optional
paths).

This patch adds the check for standard build id binary location, so we
are ready once we start to store it there from debuginfod in following
changes.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-13-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/build-id.c | 13 ++++++++++---
 tools/perf/util/build-id.h |  2 ++
 tools/perf/util/symbol.c   | 16 ++++++++++++++++
 3 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 4a391f13f40d..1fd58703c2d4 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -261,10 +261,9 @@ static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso,
 	    "debug" : "elf"));
 }
 
-char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
-			     bool is_debug)
+char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+			       bool is_debug, bool is_kallsyms)
 {
-	bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
 	bool is_vdso = dso__is_vdso((struct dso *)dso);
 	char sbuild_id[SBUILD_ID_SIZE];
 	char *linkname;
@@ -293,6 +292,14 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
 	return bf;
 }
 
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+			     bool is_debug)
+{
+	bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+
+	return __dso__build_id_filename(dso, bf, size, is_debug, is_kallsyms);
+}
+
 #define dsos__for_each_with_build_id(pos, head)	\
 	list_for_each_entry(pos, head, node)	\
 		if (!pos->has_build_id)		\
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index d53415feaf69..f1a2f67df6e4 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -29,6 +29,8 @@ char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
 
 char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
 			     bool is_debug);
+char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+			       bool is_debug, bool is_kallsyms);
 
 int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
 			   struct perf_sample *sample, struct evsel *evsel,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 0d14abdf3d72..64a039cbba1b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -2189,6 +2189,8 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map)
 	int err;
 	const char *kallsyms_filename = NULL;
 	char *kallsyms_allocated_filename = NULL;
+	char *filename = NULL;
+
 	/*
 	 * Step 1: if the user specified a kallsyms or vmlinux filename, use
 	 * it and only it, reporting errors to the user if it cannot be used.
@@ -2213,6 +2215,20 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map)
 		return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false);
 	}
 
+	/*
+	 * Before checking on common vmlinux locations, check if it's
+	 * stored as standard build id binary (not kallsyms) under
+	 * .debug cache.
+	 */
+	if (!symbol_conf.ignore_vmlinux_buildid)
+		filename = __dso__build_id_filename(dso, NULL, 0, false, false);
+	if (filename != NULL) {
+		err = dso__load_vmlinux(dso, map, filename, true);
+		if (err > 0)
+			return err;
+		free(filename);
+	}
+
 	if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
 		err = dso__load_vmlinux_path(dso, map);
 		if (err > 0)
-- 
cgit v1.2.3-70-g09d2


From 058f15113042bc2fa03b0b134bfc7fb8cd156878 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:21 +0100
Subject: perf data: Add is_perf_data function

Adding is_perf_data function that returns true if the given path is perf
data file. It will be used in following patches.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-21-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data.c | 19 +++++++++++++++++++
 tools/perf/util/data.h |  1 +
 2 files changed, 20 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 05bbcb663c41..f29af4fc3d09 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -492,3 +492,22 @@ char *perf_data__kallsyms_name(struct perf_data *data)
 
 	return kallsyms_name;
 }
+
+bool is_perf_data(const char *path)
+{
+	bool ret = false;
+	FILE *file;
+	u64 magic;
+
+	file = fopen(path, "r");
+	if (!file)
+		return false;
+
+	if (fread(&magic, 1, 8, file) < 8)
+		goto out;
+
+	ret = is_perf_magic(magic);
+out:
+	fclose(file);
+	return ret;
+}
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index c563fcbb0288..62a3e66fbee8 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -98,4 +98,5 @@ int perf_data__update_dir(struct perf_data *data);
 unsigned long perf_data__size(struct perf_data *data);
 int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz);
 char *perf_data__kallsyms_name(struct perf_data *data);
+bool is_perf_data(const char *path);
 #endif /* __PERF_DATA_H */
-- 
cgit v1.2.3-70-g09d2


From 0b7b9e83c76ccffb994da8266110592e5e767718 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:19 +0100
Subject: perf build-id: Use machine__for_each_dso in
 perf_session__cache_build_ids

Using machine__for_each_dso in perf_session__cache_build_ids, so we can
reuse perf_session__cache_build_ids with different callback in following
changes.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-19-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/build-id.c | 41 ++++++++++++++++-------------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 1fd58703c2d4..948a7f48d668 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -859,12 +859,16 @@ out_free:
 	return err;
 }
 
-static int dso__cache_build_id(struct dso *dso, struct machine *machine)
+static int dso__cache_build_id(struct dso *dso, struct machine *machine,
+			       void *priv __maybe_unused)
 {
 	bool is_kallsyms = dso__is_kallsyms(dso);
 	bool is_vdso = dso__is_vdso(dso);
 	const char *name = dso->long_name;
 
+	if (!dso->has_build_id)
+		return 0;
+
 	if (dso__is_kcore(dso)) {
 		is_kallsyms = true;
 		name = machine->mmap_name;
@@ -873,43 +877,30 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine)
 				     is_kallsyms, is_vdso);
 }
 
-static int __dsos__cache_build_ids(struct list_head *head,
-				   struct machine *machine)
+static int
+machines__for_each_dso(struct machines *machines, machine__dso_t fn, void *priv)
 {
-	struct dso *pos;
-	int err = 0;
-
-	dsos__for_each_with_build_id(pos, head)
-		if (dso__cache_build_id(pos, machine))
-			err = -1;
+	int ret = machine__for_each_dso(&machines->host, fn, priv);
+	struct rb_node *nd;
 
-	return err;
-}
+	for (nd = rb_first_cached(&machines->guests); nd;
+	     nd = rb_next(nd)) {
+		struct machine *pos = rb_entry(nd, struct machine, rb_node);
 
-static int machine__cache_build_ids(struct machine *machine)
-{
-	return __dsos__cache_build_ids(&machine->dsos.head, machine);
+		ret |= machine__for_each_dso(pos, fn, priv);
+	}
+	return ret ? -1 : 0;
 }
 
 int perf_session__cache_build_ids(struct perf_session *session)
 {
-	struct rb_node *nd;
-	int ret;
-
 	if (no_buildid_cache)
 		return 0;
 
 	if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
 		return -1;
 
-	ret = machine__cache_build_ids(&session->machines.host);
-
-	for (nd = rb_first_cached(&session->machines.guests); nd;
-	     nd = rb_next(nd)) {
-		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret |= machine__cache_build_ids(pos);
-	}
-	return ret ? -1 : 0;
+	return machines__for_each_dso(&session->machines, dso__cache_build_id, NULL) ?  -1 : 0;
 }
 
 static bool machine__read_build_ids(struct machine *machine, bool with_hits)
-- 
cgit v1.2.3-70-g09d2


From 75fb2af68e358324c2bdcb61be8376cffcb2d034 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:20 +0100
Subject: perf build-id: Add __perf_session__cache_build_ids function

Adding __perf_session__cache_build_ids function as an interface for
caching sessions build ids with callback function and its data pointer.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-20-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/build-id.c | 10 ++++++++--
 tools/perf/util/build-id.h |  3 +++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 948a7f48d668..6bf3cc79c5d5 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -892,7 +892,8 @@ machines__for_each_dso(struct machines *machines, machine__dso_t fn, void *priv)
 	return ret ? -1 : 0;
 }
 
-int perf_session__cache_build_ids(struct perf_session *session)
+int __perf_session__cache_build_ids(struct perf_session *session,
+				    machine__dso_t fn, void *priv)
 {
 	if (no_buildid_cache)
 		return 0;
@@ -900,7 +901,12 @@ int perf_session__cache_build_ids(struct perf_session *session)
 	if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
 		return -1;
 
-	return machines__for_each_dso(&session->machines, dso__cache_build_id, NULL) ?  -1 : 0;
+	return machines__for_each_dso(&session->machines, fn, priv) ?  -1 : 0;
+}
+
+int perf_session__cache_build_ids(struct perf_session *session)
+{
+	return __perf_session__cache_build_ids(session, dso__cache_build_id, NULL);
 }
 
 static bool machine__read_build_ids(struct machine *machine, bool with_hits)
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index f1a2f67df6e4..c6f10e3d2152 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -5,6 +5,7 @@
 #define BUILD_ID_SIZE	20
 #define SBUILD_ID_SIZE	(BUILD_ID_SIZE * 2 + 1)
 
+#include "machine.h"
 #include "tool.h"
 #include <linux/types.h>
 
@@ -46,6 +47,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits);
 int perf_session__write_buildid_table(struct perf_session *session,
 				      struct feat_fd *fd);
 int perf_session__cache_build_ids(struct perf_session *session);
+int __perf_session__cache_build_ids(struct perf_session *session,
+				    machine__dso_t fn, void *priv);
 
 char *build_id_cache__origname(const char *sbuild_id);
 char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size);
-- 
cgit v1.2.3-70-g09d2


From fd4ebb457c9ca90d10a74aeb85d54e27b08d5e76 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 26 Nov 2020 18:00:22 +0100
Subject: perf build-id: Add build_id_cache__add function

Adding build_id_cache__add function as core function that adds file into
build id database. It will be set from another callers in following
changes.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201126170026.2619053-22-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/build-id.c | 42 +++++++++++++++++++++++++++---------------
 tools/perf/util/build-id.h |  2 ++
 2 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 6bf3cc79c5d5..02df36b30ac5 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -671,24 +671,15 @@ out:
 	return realname;
 }
 
-int build_id_cache__add_s(const char *sbuild_id, const char *name,
-			  struct nsinfo *nsi, bool is_kallsyms, bool is_vdso)
+int
+build_id_cache__add(const char *sbuild_id, const char *name, const char *realname,
+		    struct nsinfo *nsi, bool is_kallsyms, bool is_vdso)
 {
 	const size_t size = PATH_MAX;
-	char *realname = NULL, *filename = NULL, *dir_name = NULL,
-	     *linkname = zalloc(size), *tmp;
+	char *filename = NULL, *dir_name = NULL, *linkname = zalloc(size), *tmp;
 	char *debugfile = NULL;
 	int err = -1;
 
-	if (!is_kallsyms) {
-		if (!is_vdso)
-			realname = nsinfo__realpath(name, nsi);
-		else
-			realname = realpath(name, NULL);
-		if (!realname)
-			goto out_free;
-	}
-
 	dir_name = build_id_cache__cachedir(sbuild_id, name, nsi, is_kallsyms,
 					    is_vdso);
 	if (!dir_name)
@@ -788,8 +779,6 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name,
 		pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
 
 out_free:
-	if (!is_kallsyms)
-		free(realname);
 	free(filename);
 	free(debugfile);
 	free(dir_name);
@@ -797,6 +786,29 @@ out_free:
 	return err;
 }
 
+int build_id_cache__add_s(const char *sbuild_id, const char *name,
+			  struct nsinfo *nsi, bool is_kallsyms, bool is_vdso)
+{
+	char *realname = NULL;
+	int err = -1;
+
+	if (!is_kallsyms) {
+		if (!is_vdso)
+			realname = nsinfo__realpath(name, nsi);
+		else
+			realname = realpath(name, NULL);
+		if (!realname)
+			goto out_free;
+	}
+
+	err = build_id_cache__add(sbuild_id, name, realname, nsi, is_kallsyms, is_vdso);
+
+out_free:
+	if (!is_kallsyms)
+		free(realname);
+	return err;
+}
+
 static int build_id_cache__add_b(const struct build_id *bid,
 				 const char *name, struct nsinfo *nsi,
 				 bool is_kallsyms, bool is_vdso)
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index c6f10e3d2152..02613f4b2c29 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -63,6 +63,8 @@ char *build_id_cache__complement(const char *incomplete_sbuild_id);
 int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
 				   struct strlist **result);
 bool build_id_cache__cached(const char *sbuild_id);
+int build_id_cache__add(const char *sbuild_id, const char *name, const char *realname,
+			struct nsinfo *nsi, bool is_kallsyms, bool is_vdso);
 int build_id_cache__add_s(const char *sbuild_id,
 			  const char *name, struct nsinfo *nsi,
 			  bool is_kallsyms, bool is_vdso);
-- 
cgit v1.2.3-70-g09d2


From 3b13eaf0ba1d5ab59368e23ff5e5350f51c1a352 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 18 Nov 2020 08:32:33 -0300
Subject: perf tools: Update copy of libbpf's hashmap.c

To pick the changes in:

  7a078d2d18801bba ("libbpf, hashmap: Fix undefined behavior in hash_bits")

That don't entail any changes in tools/perf.

This addresses this perf build warning:

  Warning: Kernel ABI header at 'tools/perf/util/hashmap.h' differs from latest version at 'tools/lib/bpf/hashmap.h'
  diff -u tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h

Not a kernel ABI, its just that this uses the mechanism in place for
checking kernel ABI files drift.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hashmap.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
index d9b385fe808c..10a4c4cd13cf 100644
--- a/tools/perf/util/hashmap.h
+++ b/tools/perf/util/hashmap.h
@@ -15,6 +15,9 @@
 static inline size_t hash_bits(size_t h, int bits)
 {
 	/* shuffle bits and return requested number of upper bits */
+	if (bits == 0)
+		return 0;
+
 #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
 	/* LP64 case */
 	return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
@@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value);
  * @key: key to iterate entries for
  */
 #define hashmap__for_each_key_entry(map, cur, _key)			    \
-	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
-					     map->cap_bits);		    \
-		     map->buckets ? map->buckets[bkt] : NULL; });	    \
+	for (cur = map->buckets						    \
+		     ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+		     : NULL;						    \
 	     cur;							    \
 	     cur = cur->next)						    \
 		if (map->equal_fn(cur->key, (_key), map->ctx))
 
 #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key)		    \
-	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
-					     map->cap_bits);		    \
-		     cur = map->buckets ? map->buckets[bkt] : NULL; });	    \
+	for (cur = map->buckets						    \
+		     ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+		     : NULL;						    \
 	     cur && ({ tmp = cur->next; true; });			    \
 	     cur = tmp)							    \
 		if (map->equal_fn(cur->key, (_key), map->ctx))
-- 
cgit v1.2.3-70-g09d2


From 9713070028b9ab317f395ee130fa2c4ea741bab4 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 24 Nov 2020 18:36:52 +0800
Subject: perf diff: Fix error return value in __cmd_diff()

An appropriate return value should be set on the failed path.

Fixes: 2a09a84c720b436a ("perf diff: Support hot streams comparison")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20201124103652.438-1-thunder.leizhen@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-diff.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 584e2e1a3793..cefc71506409 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1222,8 +1222,10 @@ static int __cmd_diff(void)
 		if (compute == COMPUTE_STREAM) {
 			d->evlist_streams = evlist__create_streams(
 						d->session->evlist, 5);
-			if (!d->evlist_streams)
+			if (!d->evlist_streams) {
+				ret = -ENOMEM;
 				goto out_delete;
+			}
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From aa50d953c169e876413bf237319e728dd41d9fdd Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 27 Nov 2020 14:43:56 +0900
Subject: perf record: Synthesize cgroup events only if needed

It didn't check the tool->cgroup_events bit which is set when the
--all-cgroups option is given.  Without it, samples will not have cgroup
info so no reason to synthesize.

We can check the PERF_RECORD_CGROUP records after running perf record
*WITHOUT* the --all-cgroups option:

Before:

  $ perf report -D | grep CGROUP
  0 0 0x8430 [0x38]: PERF_RECORD_CGROUP cgroup: 1 /
          CGROUP events:          1
          CGROUP events:          0
          CGROUP events:          0

After:

  $ perf report -D | grep CGROUP
          CGROUP events:          0
          CGROUP events:          0
          CGROUP events:          0

Committer testing:

Before:

  # perf record -a sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 2.208 MB perf.data (10003 samples) ]
  # perf report -D | grep "CGROUP events"
            CGROUP events:        146
            CGROUP events:          0
            CGROUP events:          0
  #

After:

  # perf record -a sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 2.208 MB perf.data (10448 samples) ]
  # perf report -D | grep "CGROUP events"
            CGROUP events:          0
            CGROUP events:          0
            CGROUP events:          0
  #

With all-cgroups:

  # perf record --all-cgroups -a sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 2.374 MB perf.data (11526 samples) ]
  # perf report -D | grep "CGROUP events"
            CGROUP events:        146
            CGROUP events:          0
            CGROUP events:          0
  #

Fixes: 8fb4b67939e16 ("perf record: Add --all-cgroups option")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201127054356.405481-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/synthetic-events.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 8a23391558cf..d9c624377da7 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -563,6 +563,9 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool,
 	char cgrp_root[PATH_MAX];
 	size_t mount_len;  /* length of mount point in the path */
 
+	if (!tool || !tool->cgroup_events)
+		return 0;
+
 	if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) {
 		pr_debug("cannot find cgroup mount point\n");
 		return -1;
-- 
cgit v1.2.3-70-g09d2


From c0ee1d5ae8c8650031badcfca6483a28c0f94f38 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 27 Nov 2020 13:14:03 +0900
Subject: perf stat: Use proper cpu for shadow stats

Currently perf stat shows some metrics (like IPC) for defined events.
But when no aggregation mode is used (-A option), it shows incorrect
values since it used a value from a different cpu.

Before:

  $ perf stat -aA -e cycles,instructions sleep 1

   Performance counter stats for 'system wide':

  CPU0      116,057,380      cycles
  CPU1       86,084,722      cycles
  CPU2       99,423,125      cycles
  CPU3       98,272,994      cycles
  CPU0       53,369,217      instructions      #    0.46  insn per cycle
  CPU1       33,378,058      instructions      #    0.29  insn per cycle
  CPU2       58,150,086      instructions      #    0.50  insn per cycle
  CPU3       40,029,703      instructions      #    0.34  insn per cycle

       1.001816971 seconds time elapsed

So the IPC for CPU1 should be 0.38 (= 33,378,058 / 86,084,722)
but it was 0.29 (= 33,378,058 / 116,057,380) and so on.

After:

  $ perf stat -aA -e cycles,instructions sleep 1

   Performance counter stats for 'system wide':

  CPU0      109,621,384      cycles
  CPU1      159,026,454      cycles
  CPU2       99,460,366      cycles
  CPU3      124,144,142      cycles
  CPU0       44,396,706      instructions      #    0.41  insn per cycle
  CPU1      120,195,425      instructions      #    0.76  insn per cycle
  CPU2       44,763,978      instructions      #    0.45  insn per cycle
  CPU3       69,049,079      instructions      #    0.56  insn per cycle

       1.001910444 seconds time elapsed

Fixes: 44d49a600259 ("perf stat: Support metrics in --per-core/socket mode")
Reported-by: Sam Xi <xyzsam@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201127041404.390276-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 4b57c0c07632..a963b5b8eb72 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -324,13 +324,10 @@ static int first_shadow_cpu(struct perf_stat_config *config,
 	struct evlist *evlist = evsel->evlist;
 	int i;
 
-	if (!config->aggr_get_id)
-		return 0;
-
 	if (config->aggr_mode == AGGR_NONE)
 		return id;
 
-	if (config->aggr_mode == AGGR_GLOBAL)
+	if (!config->aggr_get_id)
 		return 0;
 
 	for (i = 0; i < evsel__nr_cpus(evsel); i++) {
-- 
cgit v1.2.3-70-g09d2


From ab4200c17ba6fe71d2da64317aae8a8aa684624c Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 27 Nov 2020 14:48:46 +0900
Subject: perf probe: Fix to die_entrypc() returns error correctly

Fix die_entrypc() to return error correctly if the DIE has no
DW_AT_ranges attribute. Since dwarf_ranges() will treat the case as an
empty ranges and return 0, we have to check it by ourselves.

Fixes: 91e2f539eeda ("perf probe: Fix to show function entry line as probe-able")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: http://lore.kernel.org/lkml/160645612634.2824037.5284932731175079426.stgit@devnote2
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index aa898014ad12..03c1a39c312a 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -373,6 +373,7 @@ bool die_is_func_def(Dwarf_Die *dw_die)
 int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr)
 {
 	Dwarf_Addr base, end;
+	Dwarf_Attribute attr;
 
 	if (!addr)
 		return -EINVAL;
@@ -380,6 +381,13 @@ int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr)
 	if (dwarf_entrypc(dw_die, addr) == 0)
 		return 0;
 
+	/*
+	 *  Since the dwarf_ranges() will return 0 if there is no
+	 * DW_AT_ranges attribute, we should check it first.
+	 */
+	if (!dwarf_attr(dw_die, DW_AT_ranges, &attr))
+		return -ENOENT;
+
 	return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From a9ffd0484eb4426e6befd07e7be6c01108716302 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 27 Nov 2020 14:48:55 +0900
Subject: perf probe: Change function definition check due to broken DWARF

Since some gcc generates a broken DWARF which lacks DW_AT_declaration
attribute from the subprogram DIE of function prototype.
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97060)

So, in addition to the DW_AT_declaration check, we also check the
subprogram DIE has DW_AT_inline or actual entry pc.

Committer testing:

  # cat /etc/fedora-release
  Fedora release 33 (Thirty Three)
  #

Before:

  # perf test vfs_getname
  78: Use vfs_getname probe to get syscall args filenames             : FAILED!
  79: Check open filename arg using perf trace + vfs_getname          : FAILED!
  81: Add vfs_getname probe to get syscall args filenames             : FAILED!
  #

After:

  # perf test vfs_getname
  78: Use vfs_getname probe to get syscall args filenames             : Ok
  79: Check open filename arg using perf trace + vfs_getname          : Ok
  81: Add vfs_getname probe to get syscall args filenames             : Ok
  #

Reported-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: http://lore.kernel.org/lkml/160645613571.2824037.7441351537890235895.stgit@devnote2
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c    | 20 ++++++++++++++++++--
 tools/perf/util/probe-finder.c |  3 +--
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 03c1a39c312a..7b2d471a6419 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -356,9 +356,25 @@ bool die_is_signed_type(Dwarf_Die *tp_die)
 bool die_is_func_def(Dwarf_Die *dw_die)
 {
 	Dwarf_Attribute attr;
+	Dwarf_Addr addr = 0;
+
+	if (dwarf_tag(dw_die) != DW_TAG_subprogram)
+		return false;
+
+	if (dwarf_attr(dw_die, DW_AT_declaration, &attr))
+		return false;
 
-	return (dwarf_tag(dw_die) == DW_TAG_subprogram &&
-		dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL);
+	/*
+	 * DW_AT_declaration can be lost from function declaration
+	 * by gcc's bug #97060.
+	 * So we need to check this subprogram DIE has DW_AT_inline
+	 * or an entry address.
+	 */
+	if (!dwarf_attr(dw_die, DW_AT_inline, &attr) &&
+	    die_entrypc(dw_die, &addr) < 0)
+		return false;
+
+	return true;
 }
 
 /**
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 2c4061035f77..76dd349aa48d 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1885,8 +1885,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
 	if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
 		return DWARF_CB_OK;
 
-	if (die_is_func_def(sp_die) &&
-	    die_match_name(sp_die, lr->function)) {
+	if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) {
 		lf->fname = dwarf_decl_file(sp_die);
 		dwarf_decl_line(sp_die, &lr->offset);
 		pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
-- 
cgit v1.2.3-70-g09d2


From c6232bd40b2eda3819d108e6e3f621ec604e15d8 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Thu, 26 Nov 2020 14:46:41 +0100
Subject: KVM: arm64: selftests: Filter out DEMUX registers

DEMUX register presence depends on the host's hardware (the
CLIDR_EL1 register to be precise). This means there's no set
of them that we can bless and that it's possible to encounter
new ones when running on different hardware (which would
generate "Consider adding them ..." messages, but we'll never
want to add them.)

Remove the ones we have in the blessed list and filter them
out of the new list, but also provide a new command line switch
to list them if one so desires.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20201126134641.35231-3-drjones@redhat.com
---
 tools/testing/selftests/kvm/aarch64/get-reg-list.c | 39 +++++++++++++++++-----
 1 file changed, 30 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 33218a395d9f..486932164cf2 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -42,12 +42,16 @@
 #define for_each_reg(i)								\
 	for ((i) = 0; (i) < reg_list->n; ++(i))
 
+#define for_each_reg_filtered(i)						\
+	for_each_reg(i)								\
+		if (!filter_reg(reg_list->reg[i]))
+
 #define for_each_missing_reg(i)							\
 	for ((i) = 0; (i) < blessed_n; ++(i))					\
 		if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
 
 #define for_each_new_reg(i)							\
-	for ((i) = 0; (i) < reg_list->n; ++(i))					\
+	for_each_reg_filtered(i)						\
 		if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
 
 
@@ -57,6 +61,18 @@ static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
 static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
 static __u64 *blessed_reg, blessed_n;
 
+static bool filter_reg(__u64 reg)
+{
+	/*
+	 * DEMUX register presence depends on the host's CLIDR_EL1.
+	 * This means there's no set of them that we can bless.
+	 */
+	if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return true;
+
+	return false;
+}
+
 static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
 {
 	int i;
@@ -325,7 +341,7 @@ int main(int ac, char **av)
 	struct kvm_vcpu_init init = { .target = -1, };
 	int new_regs = 0, missing_regs = 0, i;
 	int failed_get = 0, failed_set = 0, failed_reject = 0;
-	bool print_list = false, fixup_core_regs = false;
+	bool print_list = false, print_filtered = false, fixup_core_regs = false;
 	struct kvm_vm *vm;
 	__u64 *vec_regs;
 
@@ -336,8 +352,10 @@ int main(int ac, char **av)
 			fixup_core_regs = true;
 		else if (strcmp(av[i], "--list") == 0)
 			print_list = true;
+		else if (strcmp(av[i], "--list-filtered") == 0)
+			print_filtered = true;
 		else
-			fprintf(stderr, "Ignoring unknown option: %s\n", av[i]);
+			TEST_FAIL("Unknown option: %s\n", av[i]);
 	}
 
 	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
@@ -350,10 +368,14 @@ int main(int ac, char **av)
 	if (fixup_core_regs)
 		core_reg_fixup();
 
-	if (print_list) {
+	if (print_list || print_filtered) {
 		putchar('\n');
-		for_each_reg(i)
-			print_reg(reg_list->reg[i]);
+		for_each_reg(i) {
+			__u64 id = reg_list->reg[i];
+			if ((print_list && !filter_reg(id)) ||
+			    (print_filtered && filter_reg(id)))
+				print_reg(id);
+		}
 		putchar('\n');
 		return 0;
 	}
@@ -458,6 +480,8 @@ int main(int ac, char **av)
 /*
  * The current blessed list was primed with the output of kernel version
  * v4.15 with --core-reg-fixup and then later updated with new registers.
+ *
+ * The blessed list is up to date with kernel version v5.10-rc5
  */
 static __u64 base_regs[] = {
 	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -736,9 +760,6 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(3, 4, 3, 0, 0),	/* DACR32_EL2 */
 	ARM64_SYS_REG(3, 4, 5, 0, 1),	/* IFSR32_EL2 */
 	ARM64_SYS_REG(3, 4, 5, 3, 0),	/* FPEXC32_EL2 */
-	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0,
-	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1,
-	KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2,
 };
 static __u64 base_regs_n = ARRAY_SIZE(base_regs);
 
-- 
cgit v1.2.3-70-g09d2


From 105c4e75feb411a60f5089f7a1e68b8523f986cc Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Thu, 26 Nov 2020 10:37:35 +0100
Subject: libbpf: Replace size_t with __u32 in xsk interfaces

Replace size_t with __u32 in the xsk interfaces that contain this.
There is no reason to have size_t since the internal variable that
is manipulated is a __u32. The following APIs are affected:

__u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx)
void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
__u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb)
void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)

The "nb" variable and the return values have been changed from size_t
to __u32.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/1606383455-8243-1-git-send-email-magnus.karlsson@gmail.com
---
 tools/lib/bpf/xsk.h | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 1719a327e5f9..5865e082ba0b 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -113,8 +113,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
 	return (entries > nb) ? nb : entries;
 }
 
-static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
-					    size_t nb, __u32 *idx)
+static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx)
 {
 	if (xsk_prod_nb_free(prod, nb) < nb)
 		return 0;
@@ -125,7 +124,7 @@ static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
 	return nb;
 }
 
-static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
 {
 	/* Make sure everything has been written to the ring before indicating
 	 * this to the kernel by writing the producer pointer.
@@ -135,10 +134,9 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
 	*prod->producer += nb;
 }
 
-static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
-					 size_t nb, __u32 *idx)
+static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
 {
-	size_t entries = xsk_cons_nb_avail(cons, nb);
+	__u32 entries = xsk_cons_nb_avail(cons, nb);
 
 	if (entries > 0) {
 		/* Make sure we do not speculatively read the data before
@@ -153,13 +151,12 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
 	return entries;
 }
 
-static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons,
-					 size_t nb)
+static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb)
 {
 	cons->cached_cons -= nb;
 }
 
-static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
 {
 	/* Make sure data has been read before indicating we are done
 	 * with the entries by updating the consumer pointer.
-- 
cgit v1.2.3-70-g09d2


From 4f336e88a870ecc56832154dff22853a3ca33e24 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Tue, 24 Nov 2020 18:24:50 +0300
Subject: selftests/tls: add CHACHA20-POLY1305 to tls selftests

Add new cipher as a variant of standard tls selftests

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/tls.c | 40 ++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index b599f1fa99b5..cb0d1890a860 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -103,32 +103,58 @@ FIXTURE(tls)
 
 FIXTURE_VARIANT(tls)
 {
-	unsigned int tls_version;
+	u16 tls_version;
+	u16 cipher_type;
 };
 
-FIXTURE_VARIANT_ADD(tls, 12)
+FIXTURE_VARIANT_ADD(tls, 12_gcm)
 {
 	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_AES_GCM_128,
 };
 
-FIXTURE_VARIANT_ADD(tls, 13)
+FIXTURE_VARIANT_ADD(tls, 13_gcm)
 {
 	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_AES_GCM_128,
+};
+
+FIXTURE_VARIANT_ADD(tls, 12_chacha)
+{
+	.tls_version = TLS_1_2_VERSION,
+	.cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
+};
+
+FIXTURE_VARIANT_ADD(tls, 13_chacha)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
 };
 
 FIXTURE_SETUP(tls)
 {
-	struct tls12_crypto_info_aes_gcm_128 tls12;
+	union tls_crypto_context tls12;
 	struct sockaddr_in addr;
 	socklen_t len;
 	int sfd, ret;
+	size_t tls12_sz;
 
 	self->notls = false;
 	len = sizeof(addr);
 
 	memset(&tls12, 0, sizeof(tls12));
 	tls12.info.version = variant->tls_version;
-	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+	tls12.info.cipher_type = variant->cipher_type;
+	switch (variant->cipher_type) {
+	case TLS_CIPHER_CHACHA20_POLY1305:
+		tls12_sz = sizeof(tls12_crypto_info_chacha20_poly1305);
+		break;
+	case TLS_CIPHER_AES_GCM_128:
+		tls12_sz = sizeof(tls12_crypto_info_aes_gcm_128);
+		break;
+	default:
+		tls12_sz = 0;
+	}
 
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -156,7 +182,7 @@ FIXTURE_SETUP(tls)
 
 	if (!self->notls) {
 		ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
-				 sizeof(tls12));
+				 tls12_sz);
 		ASSERT_EQ(ret, 0);
 	}
 
@@ -169,7 +195,7 @@ FIXTURE_SETUP(tls)
 		ASSERT_EQ(ret, 0);
 
 		ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
-				 sizeof(tls12));
+				 tls12_sz);
 		ASSERT_EQ(ret, 0);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From e14038a7ead09faa180eb072adc4a2157a0b475f Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Thu, 26 Nov 2020 19:47:47 +0100
Subject: selftests: tc-testing: enable CONFIG_NET_SCH_RED as a module

a proper kernel configuration for running kselftest can be obtained with:

 $ yes | make kselftest-merge

enable compile support for the 'red' qdisc: otherwise, tdc kselftest fail
when trying to run tdc test items contained in red.json.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Link: https://lore.kernel.org/r/cfa23f2d4f672401e6cebca3a321dd1901a9ff07.1606416464.git.dcaratti@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index c33a7aac27ff..b71828df5a6d 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -59,6 +59,7 @@ CONFIG_NET_IFE_SKBPRIO=m
 CONFIG_NET_IFE_SKBTCINDEX=m
 CONFIG_NET_SCH_FIFO=y
 CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_RED=m
 
 #
 ## Network testing
-- 
cgit v1.2.3-70-g09d2


From 94b69c615e4e2b04d1392d1193c72406ff9fd73e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 27 Nov 2020 13:14:04 +0900
Subject: perf test: Add shadow stat test

It calculates IPC from the cycles and instruction counts and compares it
with the shadow stat for both global aggregation (default) and no
aggregation mode.

 $ perf stat -a -A -e cycles,instructions sleep 1

   Performance counter stats for 'system wide':

  CPU0   39,580,880      cycles
  CPU1   45,426,945      cycles
  CPU2   31,151,685      cycles
  CPU3   55,167,421      cycles
  CPU0   17,073,564      instructions      #    0.43  insn per cycle
  CPU1   34,955,764      instructions      #    0.77  insn per cycle
  CPU2   15,688,459      instructions      #    0.50  insn per cycle
  CPU3   34,699,217      instructions      #    0.63  insn per cycle

       1.003275495 seconds time elapsed

In this example, the 'insn per cycle' should be matched to the number
for each cpu.  For CPU2, 0.50 = 15,688,459 / 31,151,685 .

Committer testing:

  # perf test shadow
  78: perf stat metrics (shadow stat) test                            : Ok
  #

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201127041404.390276-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/stat+shadow_stat.sh | 80 ++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100755 tools/perf/tests/shell/stat+shadow_stat.sh

(limited to 'tools')

diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh
new file mode 100755
index 000000000000..249dfe48cf6a
--- /dev/null
+++ b/tools/perf/tests/shell/stat+shadow_stat.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+# perf stat metrics (shadow stat) test
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# skip if system-wide mode is forbidden
+perf stat -a true > /dev/null 2>&1 || exit 2
+
+test_global_aggr()
+{
+	local cyc
+
+	perf stat -a --no-big-num -e cycles,instructions sleep 1  2>&1 | \
+	grep -e cycles -e instructions | \
+	while read num evt hash ipc rest
+	do
+		# skip not counted events
+		if [[ $num == "<not" ]]; then
+			continue
+		fi
+
+		# save cycles count
+		if [[ $evt == "cycles" ]]; then
+			cyc=$num
+			continue
+		fi
+
+		# skip if no cycles
+		if [[ -z $cyc ]]; then
+			continue
+		fi
+
+		# use printf for rounding and a leading zero
+		local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
+		if [[ $ipc != $res ]]; then
+			echo "IPC is different: $res != $ipc  ($num / $cyc)"
+			exit 1
+		fi
+	done
+}
+
+test_no_aggr()
+{
+	declare -A results
+
+	perf stat -a -A --no-big-num -e cycles,instructions sleep 1  2>&1 | \
+	grep ^CPU | \
+	while read cpu num evt hash ipc rest
+	do
+		# skip not counted events
+		if [[ $num == "<not" ]]; then
+			continue
+		fi
+
+		# save cycles count
+		if [[ $evt == "cycles" ]]; then
+			results[$cpu]=$num
+			continue
+		fi
+
+		# skip if no cycles
+		local cyc=${results[$cpu]}
+		if [[ -z $cyc ]]; then
+			continue
+		fi
+
+		# use printf for rounding and a leading zero
+		local res=`printf "%.2f" $(echo "scale=6; $num / $cyc" | bc -q)`
+		if [[ $ipc != $res ]]; then
+			echo "IPC is different for $cpu: $res != $ipc  ($num / $cyc)"
+			exit 1
+		fi
+	done
+}
+
+test_global_aggr
+test_no_aggr
+
+exit 0
-- 
cgit v1.2.3-70-g09d2


From 56933029d00c5aaac8ccdc22f1b536dd272902be Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:08:24 -0300
Subject: perf evsel: Convert last 'struct evsel' methods to the right evsel__
 prefix

As 'perf_evsel__' means its a function in tools/lib/perf/.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/evsel-tp-sched.c | 25 ++++++++++++------------
 tools/perf/ui/browsers/hists.c    | 28 ++++++++++-----------------
 tools/perf/util/evsel.c           | 40 +++++++++++++++------------------------
 tools/perf/util/header.c          |  5 ++---
 tools/perf/util/stat.c            |  4 ++--
 5 files changed, 41 insertions(+), 61 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 0e224a0a55d9..f9e34bd26cf3 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -5,8 +5,7 @@
 #include "tests.h"
 #include "debug.h"
 
-static int perf_evsel__test_field(struct evsel *evsel, const char *name,
-				  int size, bool should_be_signed)
+static int evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed)
 {
 	struct tep_format_field *field = evsel__field(evsel, name);
 	int is_signed;
@@ -43,25 +42,25 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
 		return -1;
 	}
 
-	if (perf_evsel__test_field(evsel, "prev_comm", 16, false))
+	if (evsel__test_field(evsel, "prev_comm", 16, false))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "prev_pid", 4, true))
+	if (evsel__test_field(evsel, "prev_pid", 4, true))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "prev_prio", 4, true))
+	if (evsel__test_field(evsel, "prev_prio", 4, true))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true))
+	if (evsel__test_field(evsel, "prev_state", sizeof(long), true))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "next_comm", 16, false))
+	if (evsel__test_field(evsel, "next_comm", 16, false))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "next_pid", 4, true))
+	if (evsel__test_field(evsel, "next_pid", 4, true))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "next_prio", 4, true))
+	if (evsel__test_field(evsel, "next_prio", 4, true))
 		ret = -1;
 
 	evsel__delete(evsel);
@@ -73,16 +72,16 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
 		return -1;
 	}
 
-	if (perf_evsel__test_field(evsel, "comm", 16, false))
+	if (evsel__test_field(evsel, "comm", 16, false))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "pid", 4, true))
+	if (evsel__test_field(evsel, "pid", 4, true))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "prio", 4, true))
+	if (evsel__test_field(evsel, "prio", 4, true))
 		ret = -1;
 
-	if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
+	if (evsel__test_field(evsel, "target_cpu", 4, true))
 		ret = -1;
 
 	evsel__delete(evsel);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index b0e1880cf992..e735d7e37da9 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2946,14 +2946,10 @@ next:
 	}
 }
 
-static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
-				    const char *helpline,
-				    bool left_exits,
-				    struct hist_browser_timer *hbt,
-				    float min_pcnt,
-				    struct perf_env *env,
-				    bool warn_lost_event,
-				    struct annotation_options *annotation_opts)
+static int evsel__hists_browse(struct evsel *evsel, int nr_events, const char *helpline,
+			       bool left_exits, struct hist_browser_timer *hbt, float min_pcnt,
+			       struct perf_env *env, bool warn_lost_event,
+			       struct annotation_options *annotation_opts)
 {
 	struct hists *hists = evsel__hists(evsel);
 	struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts);
@@ -3505,12 +3501,10 @@ browse_hists:
 			 */
 			if (hbt)
 				hbt->timer(hbt->arg);
-			key = perf_evsel__hists_browse(pos, nr_events, help,
-						       true, hbt,
-						       menu->min_pcnt,
-						       menu->env,
-						       warn_lost_event,
-						       menu->annotation_opts);
+			key = evsel__hists_browse(pos, nr_events, help, true, hbt,
+						  menu->min_pcnt, menu->env,
+						  warn_lost_event,
+						  menu->annotation_opts);
 			ui_browser__show_title(&menu->b, title);
 			switch (key) {
 			case K_TAB:
@@ -3633,10 +3627,8 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
 single_entry: {
 		struct evsel *first = evlist__first(evlist);
 
-		return perf_evsel__hists_browse(first, nr_entries, help,
-						false, hbt, min_pcnt,
-						env, warn_lost_event,
-						annotation_opts);
+		return evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt,
+					   env, warn_lost_event, annotation_opts);
 	}
 	}
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1cad6051d8b0..9005cc974f43 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -497,7 +497,7 @@ static const char *__evsel__hw_name(u64 config)
 	return "unknown-hardware";
 }
 
-static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
+static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
 {
 	int colon = 0, r = 0;
 	struct perf_event_attr *attr = &evsel->core.attr;
@@ -536,7 +536,7 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
 static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
 {
 	int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
-	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+	return r + evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
 const char *evsel__sw_names[PERF_COUNT_SW_MAX] = {
@@ -562,7 +562,7 @@ static const char *__evsel__sw_name(u64 config)
 static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
 {
 	int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
-	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+	return r + evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
 static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
@@ -587,7 +587,7 @@ static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 	int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
-	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+	return r + evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
 const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
@@ -682,13 +682,13 @@ out_err:
 static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
 {
 	int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
-	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+	return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
 static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
 {
 	int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
-	return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+	return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
 }
 
 static int evsel__tool_name(char *bf, size_t size)
@@ -850,9 +850,7 @@ void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
 		return __evsel__config_callchain(evsel, opts, param);
 }
 
-static void
-perf_evsel__reset_callgraph(struct evsel *evsel,
-			    struct callchain_param *param)
+static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 
@@ -988,7 +986,7 @@ static void evsel__apply_config_terms(struct evsel *evsel,
 
 		/* If global callgraph set, clear it */
 		if (callchain_param.enabled)
-			perf_evsel__reset_callgraph(evsel, &callchain_param);
+			evsel__reset_callgraph(evsel, &callchain_param);
 
 		/* set perf-event callgraph */
 		if (param.enabled) {
@@ -1434,9 +1432,7 @@ static int evsel__read_one(struct evsel *evsel, int cpu, int thread)
 	return perf_evsel__read(&evsel->core, cpu, thread, count);
 }
 
-static void
-perf_evsel__set_count(struct evsel *counter, int cpu, int thread,
-		      u64 val, u64 ena, u64 run)
+static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run)
 {
 	struct perf_counts_values *count;
 
@@ -1449,9 +1445,7 @@ perf_evsel__set_count(struct evsel *counter, int cpu, int thread,
 	perf_counts__set_loaded(counter->counts, cpu, thread, true);
 }
 
-static int
-perf_evsel__process_group_data(struct evsel *leader,
-			       int cpu, int thread, u64 *data)
+static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data)
 {
 	u64 read_format = leader->core.attr.read_format;
 	struct sample_read_value *v;
@@ -1470,8 +1464,7 @@ perf_evsel__process_group_data(struct evsel *leader,
 
 	v = (struct sample_read_value *) data;
 
-	perf_evsel__set_count(leader, cpu, thread,
-			      v[0].value, ena, run);
+	evsel__set_count(leader, cpu, thread, v[0].value, ena, run);
 
 	for (i = 1; i < nr; i++) {
 		struct evsel *counter;
@@ -1480,8 +1473,7 @@ perf_evsel__process_group_data(struct evsel *leader,
 		if (!counter)
 			return -EINVAL;
 
-		perf_evsel__set_count(counter, cpu, thread,
-				      v[i].value, ena, run);
+		evsel__set_count(counter, cpu, thread, v[i].value, ena, run);
 	}
 
 	return 0;
@@ -1514,7 +1506,7 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread)
 	if (readn(FD(leader, cpu, thread), data, size) <= 0)
 		return -errno;
 
-	return perf_evsel__process_group_data(leader, cpu, thread, data);
+	return evsel__process_group_data(leader, cpu, thread, data);
 }
 
 int evsel__read_counter(struct evsel *evsel, int cpu, int thread)
@@ -1567,9 +1559,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
 	return fd;
 }
 
-static void perf_evsel__remove_fd(struct evsel *pos,
-				  int nr_cpus, int nr_threads,
-				  int thread_idx)
+static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx)
 {
 	for (int cpu = 0; cpu < nr_cpus; cpu++)
 		for (int thread = thread_idx; thread < nr_threads - 1; thread++)
@@ -1588,7 +1578,7 @@ static int update_fds(struct evsel *evsel,
 	evlist__for_each_entry(evsel->evlist, pos) {
 		nr_cpus = pos != evsel ? nr_cpus : cpu_idx;
 
-		perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
+		evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
 
 		/*
 		 * Since fds for next evsel has not been created,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b9171bd11fe6..b65c8f7ce36a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3735,8 +3735,7 @@ static int read_attr(int fd, struct perf_header *ph,
 	return ret <= 0 ? -1 : 0;
 }
 
-static int perf_evsel__prepare_tracepoint_event(struct evsel *evsel,
-						struct tep_handle *pevent)
+static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handle *pevent)
 {
 	struct tep_event *event;
 	char bf[128];
@@ -3774,7 +3773,7 @@ static int perf_evlist__prepare_tracepoint_events(struct evlist *evlist,
 
 	evlist__for_each_entry(evlist, pos) {
 		if (pos->core.attr.type == PERF_TYPE_TRACEPOINT &&
-		    perf_evsel__prepare_tracepoint_event(pos, pevent))
+		    evsel__prepare_tracepoint_event(pos, pevent))
 			return -1;
 	}
 
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index bd0decd6d753..5e98d591cb38 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -229,7 +229,7 @@ void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
 		evsel__reset_prev_raw_counts(evsel);
 }
 
-static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel)
+static void evsel__copy_prev_raw_counts(struct evsel *evsel)
 {
 	int ncpus = evsel__nr_cpus(evsel);
 	int nthreads = perf_thread_map__nr(evsel->core.threads);
@@ -250,7 +250,7 @@ void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel)
-		perf_evsel__copy_prev_raw_counts(evsel);
+		evsel__copy_prev_raw_counts(evsel);
 }
 
 void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
-- 
cgit v1.2.3-70-g09d2


From a622eafa1a54043c2eaedfccdd1b1ee5ffeb9d06 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:22:07 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' methods:
 evlist__set_leader()

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      | 2 +-
 tools/perf/util/evlist.c       | 8 ++++----
 tools/perf/util/evlist.h       | 4 ++--
 tools/perf/util/parse-events.c | 2 +-
 tools/perf/util/python.c       | 2 +-
 tools/perf/util/record.c       | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f15b2f8aa14d..2dcfd6d3f879 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -733,7 +733,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
 	}
 
 	if (group)
-		perf_evlist__set_leader(evsel_list);
+		evlist__set_leader(evsel_list);
 
 	if (affinity__setup(&affinity) < 0)
 		return -1;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 8bdf3d2c907c..655691eebe85 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -212,7 +212,7 @@ out:
 	return err;
 }
 
-void __perf_evlist__set_leader(struct list_head *list)
+void __evlist__set_leader(struct list_head *list)
 {
 	struct evsel *evsel, *leader;
 
@@ -226,11 +226,11 @@ void __perf_evlist__set_leader(struct list_head *list)
 	}
 }
 
-void perf_evlist__set_leader(struct evlist *evlist)
+void evlist__set_leader(struct evlist *evlist)
 {
 	if (evlist->core.nr_entries) {
 		evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0;
-		__perf_evlist__set_leader(&evlist->core.entries);
+		__evlist__set_leader(&evlist->core.entries);
 	}
 }
 
@@ -1694,7 +1694,7 @@ void perf_evlist__force_leader(struct evlist *evlist)
 	if (!evlist->nr_groups) {
 		struct evsel *leader = evlist__first(evlist);
 
-		perf_evlist__set_leader(evlist);
+		evlist__set_leader(evlist);
 		leader->forced_leader = true;
 	}
 }
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index e1a450322bc5..c155f1364077 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -220,8 +220,8 @@ void perf_evlist__set_selected(struct evlist *evlist,
 int perf_evlist__create_maps(struct evlist *evlist, struct target *target);
 int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
 
-void __perf_evlist__set_leader(struct list_head *list);
-void perf_evlist__set_leader(struct evlist *evlist);
+void __evlist__set_leader(struct list_head *list);
+void evlist__set_leader(struct evlist *evlist);
 
 u64 __evlist__combined_sample_type(struct evlist *evlist);
 u64 evlist__combined_sample_type(struct evlist *evlist);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3b581d7b3213..8fa87f60133f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1769,7 +1769,7 @@ void parse_events__set_leader(char *name, struct list_head *list,
 	if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
 		return;
 
-	__perf_evlist__set_leader(list);
+	__evlist__set_leader(list);
 	leader = list_entry(list->next, struct evsel, core.node);
 	leader->group_name = name ? strdup(name) : NULL;
 }
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index ae8edde7c50e..3d3b46514863 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -1089,7 +1089,7 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
 		return NULL;
 
 	if (group)
-		perf_evlist__set_leader(evlist);
+		evlist__set_leader(evlist);
 
 	if (evlist__open(evlist) < 0) {
 		PyErr_SetFromErrno(PyExc_OSError);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 07e4b96a6625..7330407db4ba 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -102,7 +102,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
 	 * since some might depend on this info.
 	 */
 	if (opts->group)
-		perf_evlist__set_leader(evlist);
+		evlist__set_leader(evlist);
 
 	if (evlist->core.cpus->map[0] < 0)
 		opts->no_inherit = true;
-- 
cgit v1.2.3-70-g09d2


From 7b392ef04ef570c15de8fc0d36171f9bc80dd539 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:26:54 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' 'workload'
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-ftrace.c    |  9 ++++-----
 tools/perf/builtin-record.c    | 11 +++++------
 tools/perf/builtin-stat.c      |  7 +++----
 tools/perf/builtin-trace.c     |  7 +++----
 tools/perf/tests/event-times.c |  4 ++--
 tools/perf/tests/perf-record.c |  8 ++++----
 tools/perf/tests/task-exit.c   |  9 ++++-----
 tools/perf/util/evlist.c       |  9 ++++-----
 tools/perf/util/evlist.h       | 10 ++++------
 9 files changed, 33 insertions(+), 41 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 9366fad591dc..bf4d8e9ab8f5 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -67,7 +67,7 @@ static void sig_handler(int sig __maybe_unused)
 }
 
 /*
- * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
+ * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
  * we asked by setting its exec_error to the function below,
  * ftrace__workload_exec_failed_signal.
  *
@@ -600,9 +600,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 	if (write_tracing_file("trace", "0") < 0)
 		goto out;
 
-	if (argc && perf_evlist__prepare_workload(ftrace->evlist,
-				&ftrace->target, argv, false,
-				ftrace__workload_exec_failed_signal) < 0) {
+	if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false,
+					     ftrace__workload_exec_failed_signal) < 0) {
 		goto out;
 	}
 
@@ -644,7 +643,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 		}
 	}
 
-	perf_evlist__start_workload(ftrace->evlist);
+	evlist__start_workload(ftrace->evlist);
 
 	if (ftrace->initial_delay) {
 		usleep(ftrace->initial_delay * 1000);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index adf311d15d3d..4302f32c80c5 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1333,7 +1333,7 @@ record__switch_output(struct record *rec, bool at_exit)
 static volatile int workload_exec_errno;
 
 /*
- * perf_evlist__prepare_workload will send a SIGUSR1
+ * evlist__prepare_workload will send a SIGUSR1
  * if the fork fails, since we asked by setting its
  * want_signal to true.
  */
@@ -1689,9 +1689,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	record__init_features(rec);
 
 	if (forks) {
-		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
-						    argv, data->is_pipe,
-						    workload_exec_failed_signal);
+		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
+					       workload_exec_failed_signal);
 		if (err < 0) {
 			pr_err("Couldn't run the workload!\n");
 			status = err;
@@ -1835,7 +1834,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 						  machine);
 		free(event);
 
-		perf_evlist__start_workload(rec->evlist);
+		evlist__start_workload(rec->evlist);
 	}
 
 	if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack))
@@ -2413,7 +2412,7 @@ static bool dry_run;
  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
  * with it and switch to use the library functions in perf_evlist that came
  * from builtin-record.c, i.e. use record_opts,
- * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
+ * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
  * using pipes, etc.
  */
 static struct option __record_options[] = {
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2dcfd6d3f879..460e9b206533 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -534,7 +534,7 @@ static void disable_counters(void)
 static volatile int workload_exec_errno;
 
 /*
- * perf_evlist__prepare_workload will send a SIGUSR1
+ * evlist__prepare_workload will send a SIGUSR1
  * if the fork fails, since we asked by setting its
  * want_signal to true.
  */
@@ -724,8 +724,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
 	bool second_pass = false;
 
 	if (forks) {
-		if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
-						  workload_exec_failed_signal) < 0) {
+		if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) {
 			perror("failed to prepare workload");
 			return -1;
 		}
@@ -876,7 +875,7 @@ try_again_reset:
 	clock_gettime(CLOCK_MONOTONIC, &ref_time);
 
 	if (forks) {
-		perf_evlist__start_workload(evsel_list);
+		evlist__start_workload(evsel_list);
 		enable_counters();
 
 		if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index de80534473af..80af0bf37d76 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3680,7 +3680,7 @@ static int trace__set_filter_pids(struct trace *trace)
 	 * Better not use !target__has_task() here because we need to cover the
 	 * case where no threads were specified in the command line, but a
 	 * workload was, and in that case we will fill in the thread_map when
-	 * we fork the workload in perf_evlist__prepare_workload.
+	 * we fork the workload in evlist__prepare_workload.
 	 */
 	if (trace->filter_pids.nr > 0) {
 		err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
@@ -3969,8 +3969,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	signal(SIGINT, sig_handler);
 
 	if (forks) {
-		err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
-						    argv, false, NULL);
+		err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
 		if (err < 0) {
 			fprintf(trace->output, "Couldn't run the workload!\n");
 			goto out_delete_evlist;
@@ -4043,7 +4042,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		evlist__enable(evlist);
 
 	if (forks)
-		perf_evlist__start_workload(evlist);
+		evlist__start_workload(evlist);
 
 	if (trace->opts.initial_delay) {
 		usleep(trace->opts.initial_delay * 1000);
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
index db68894a6f40..9da2d4f58b8e 100644
--- a/tools/perf/tests/event-times.c
+++ b/tools/perf/tests/event-times.c
@@ -32,7 +32,7 @@ static int attach__enable_on_exec(struct evlist *evlist)
 		return err;
 	}
 
-	err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL);
+	err = evlist__prepare_workload(evlist, &target, argv, false, NULL);
 	if (err < 0) {
 		pr_debug("Couldn't run the workload!\n");
 		return err;
@@ -47,7 +47,7 @@ static int attach__enable_on_exec(struct evlist *evlist)
 		return err;
 	}
 
-	return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL;
+	return evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL;
 }
 
 static int detach__enable_on_exec(struct evlist *evlist)
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 67d3f5aad016..8745bf5f93e6 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -81,7 +81,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 	/*
 	 * Create maps of threads and cpus to monitor. In this case
 	 * we start with all threads and cpus (-1, -1) but then in
-	 * perf_evlist__prepare_workload we'll fill in the only thread
+	 * evlist__prepare_workload we'll fill in the only thread
 	 * we're monitoring, the one forked there.
 	 */
 	err = perf_evlist__create_maps(evlist, &opts.target);
@@ -92,11 +92,11 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 
 	/*
 	 * Prepare the workload in argv[] to run, it'll fork it, and then wait
-	 * for perf_evlist__start_workload() to exec it. This is done this way
+	 * for evlist__start_workload() to exec it. This is done this way
 	 * so that we have time to open the evlist (calling sys_perf_event_open
 	 * on all the fds) and then mmap them.
 	 */
-	err = perf_evlist__prepare_workload(evlist, &opts.target, argv, false, NULL);
+	err = evlist__prepare_workload(evlist, &opts.target, argv, false, NULL);
 	if (err < 0) {
 		pr_debug("Couldn't run the workload!\n");
 		goto out_delete_evlist;
@@ -161,7 +161,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 	/*
 	 * Now!
 	 */
-	perf_evlist__start_workload(evlist);
+	evlist__start_workload(evlist);
 
 	while (1) {
 		int before = total_events;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index adaff9044331..452d51048cc1 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -23,7 +23,7 @@ static void sig_handler(int sig __maybe_unused)
 }
 
 /*
- * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
+ * evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
  * we asked by setting its exec_error to this handler.
  */
 static void workload_exec_failed_signal(int signo __maybe_unused,
@@ -67,7 +67,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 	/*
 	 * Create maps of threads and cpus to monitor. In this case
 	 * we start with all threads and cpus (-1, -1) but then in
-	 * perf_evlist__prepare_workload we'll fill in the only thread
+	 * evlist__prepare_workload we'll fill in the only thread
 	 * we're monitoring, the one forked there.
 	 */
 	cpus = perf_cpu_map__dummy_new();
@@ -83,8 +83,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 	cpus	= NULL;
 	threads = NULL;
 
-	err = perf_evlist__prepare_workload(evlist, &target, argv, false,
-					    workload_exec_failed_signal);
+	err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal);
 	if (err < 0) {
 		pr_debug("Couldn't run the workload!\n");
 		goto out_delete_evlist;
@@ -116,7 +115,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 		goto out_delete_evlist;
 	}
 
-	perf_evlist__start_workload(evlist);
+	evlist__start_workload(evlist);
 
 retry:
 	md = &evlist->mmap[0];
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 655691eebe85..198d4ef03884 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1315,9 +1315,8 @@ out_err:
 	return err;
 }
 
-int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
-				  const char *argv[], bool pipe_output,
-				  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
+int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[],
+			     bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
 {
 	int child_ready_pipe[2], go_pipe[2];
 	char bf;
@@ -1362,7 +1361,7 @@ int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target,
 		/*
 		 * The parent will ask for the execvp() to be performed by
 		 * writing exactly one byte, in workload.cork_fd, usually via
-		 * perf_evlist__start_workload().
+		 * evlist__start_workload().
 		 *
 		 * For cancelling the workload without actually running it,
 		 * the parent will just close workload.cork_fd, without writing
@@ -1429,7 +1428,7 @@ out_close_ready_pipe:
 	return -1;
 }
 
-int perf_evlist__start_workload(struct evlist *evlist)
+int evlist__start_workload(struct evlist *evlist)
 {
 	if (evlist->workload.cork_fd > 0) {
 		char bf = 0;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c155f1364077..a0a53f33a272 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -182,12 +182,10 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
 			 struct callchain_param *callchain);
 int record_opts__config(struct record_opts *opts);
 
-int perf_evlist__prepare_workload(struct evlist *evlist,
-				  struct target *target,
-				  const char *argv[], bool pipe_output,
-				  void (*exec_error)(int signo, siginfo_t *info,
-						     void *ucontext));
-int perf_evlist__start_workload(struct evlist *evlist);
+int evlist__prepare_workload(struct evlist *evlist, struct target *target,
+			     const char *argv[], bool pipe_output,
+			     void (*exec_error)(int signo, siginfo_t *info, void *ucontext));
+int evlist__start_workload(struct evlist *evlist);
 
 struct option;
 
-- 
cgit v1.2.3-70-g09d2


From 53f5e9084d0195209bfc7e5fa547fd35bbaadbee Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:31:04 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' stats methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c     |  6 +++---
 tools/perf/builtin-stat.c       | 16 ++++++++--------
 tools/perf/tests/parse-metric.c |  4 ++--
 tools/perf/util/stat.c          | 14 +++++++-------
 tools/perf/util/stat.h          | 12 ++++++------
 5 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 48588ccf902e..983c101965ab 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1847,7 +1847,7 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 	u64 val;
 
 	if (!evsel->stats)
-		perf_evlist__alloc_stats(script->session->evlist, false);
+		evlist__alloc_stats(script->session->evlist, false);
 	if (evsel_script(evsel->leader)->gnum++ == 0)
 		perf_stat__reset_shadow_stats();
 	val = sample->period * evsel->scale;
@@ -3308,7 +3308,7 @@ static int set_maps(struct perf_script *script)
 
 	perf_evlist__set_maps(&evlist->core, script->cpus, script->threads);
 
-	if (perf_evlist__alloc_stats(evlist, true))
+	if (evlist__alloc_stats(evlist, true))
 		return -ENOMEM;
 
 	script->allocated = true;
@@ -3935,7 +3935,7 @@ out_delete:
 		zfree(&script.ptime_range);
 	}
 
-	perf_evlist__free_stats(session->evlist);
+	evlist__free_stats(session->evlist);
 	perf_session__delete(session);
 
 	if (script_started)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 460e9b206533..b81e64017808 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -270,7 +270,7 @@ static void perf_stat__reset_stats(void)
 {
 	int i;
 
-	perf_evlist__reset_stats(evsel_list);
+	evlist__reset_stats(evsel_list);
 	perf_stat__reset_shadow_stats();
 
 	for (i = 0; i < stat_config.stats_num; i++)
@@ -913,10 +913,10 @@ try_again_reset:
 		update_stats(&walltime_nsecs_stats, t1 - t0);
 
 		if (stat_config.aggr_mode == AGGR_GLOBAL)
-			perf_evlist__save_aggr_prev_raw_counts(evsel_list);
+			evlist__save_aggr_prev_raw_counts(evsel_list);
 
-		perf_evlist__copy_prev_raw_counts(evsel_list);
-		perf_evlist__reset_prev_raw_counts(evsel_list);
+		evlist__copy_prev_raw_counts(evsel_list);
+		evlist__reset_prev_raw_counts(evsel_list);
 		runtime_stat_reset(&stat_config);
 		perf_stat__reset_shadow_per_stat(&rt_stat);
 	} else
@@ -1907,7 +1907,7 @@ static int set_maps(struct perf_stat *st)
 
 	perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
 
-	if (perf_evlist__alloc_stats(evsel_list, true))
+	if (evlist__alloc_stats(evsel_list, true))
 		return -ENOMEM;
 
 	st->maps_allocated = true;
@@ -2309,7 +2309,7 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 	}
 
-	if (perf_evlist__alloc_stats(evsel_list, interval))
+	if (evlist__alloc_stats(evsel_list, interval))
 		goto out;
 
 	if (perf_stat_init_aggr_mode())
@@ -2349,7 +2349,7 @@ int cmd_stat(int argc, const char **argv)
 				run_idx + 1);
 
 		if (run_idx != 0)
-			perf_evlist__reset_prev_raw_counts(evsel_list);
+			evlist__reset_prev_raw_counts(evsel_list);
 
 		status = run_perf_stat(argc, argv, run_idx);
 		if (forever && status != -1 && !interval) {
@@ -2400,7 +2400,7 @@ int cmd_stat(int argc, const char **argv)
 	}
 
 	perf_stat__exit_aggr_mode();
-	perf_evlist__free_stats(evsel_list);
+	evlist__free_stats(evsel_list);
 out:
 	zfree(&stat_config.walltime_run);
 
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index 7c1bde01cb50..ce7be37f0d88 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -166,7 +166,7 @@ static int __compute_metric(const char *name, struct value *vals,
 	if (err)
 		goto out;
 
-	err = perf_evlist__alloc_stats(evlist, false);
+	err = evlist__alloc_stats(evlist, false);
 	if (err)
 		goto out;
 
@@ -183,7 +183,7 @@ out:
 	/* ... clenup. */
 	metricgroup__rblist_exit(&metric_events);
 	runtime_stat__exit(&st);
-	perf_evlist__free_stats(evlist);
+	evlist__free_stats(evlist);
 	perf_cpu_map__put(cpus);
 	evlist__delete(evlist);
 	return err;
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 5e98d591cb38..8be9c3da9e56 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -184,7 +184,7 @@ static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
 	return 0;
 }
 
-int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
+int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
 {
 	struct evsel *evsel;
 
@@ -196,11 +196,11 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
 	return 0;
 
 out_free:
-	perf_evlist__free_stats(evlist);
+	evlist__free_stats(evlist);
 	return -1;
 }
 
-void perf_evlist__free_stats(struct evlist *evlist)
+void evlist__free_stats(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
@@ -211,7 +211,7 @@ void perf_evlist__free_stats(struct evlist *evlist)
 	}
 }
 
-void perf_evlist__reset_stats(struct evlist *evlist)
+void evlist__reset_stats(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
@@ -221,7 +221,7 @@ void perf_evlist__reset_stats(struct evlist *evlist)
 	}
 }
 
-void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
+void evlist__reset_prev_raw_counts(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
@@ -245,7 +245,7 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel)
 	evsel->counts->aggr = evsel->prev_raw_counts->aggr;
 }
 
-void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
+void evlist__copy_prev_raw_counts(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
@@ -253,7 +253,7 @@ void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
 		evsel__copy_prev_raw_counts(evsel);
 }
 
-void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
+void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 05adf8165025..4cba4b106e45 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -213,12 +213,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 				   struct runtime_stat *st);
 void perf_stat__collect_metric_expr(struct evlist *);
 
-int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
-void perf_evlist__free_stats(struct evlist *evlist);
-void perf_evlist__reset_stats(struct evlist *evlist);
-void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
-void perf_evlist__copy_prev_raw_counts(struct evlist *evlist);
-void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
+int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
+void evlist__free_stats(struct evlist *evlist);
+void evlist__reset_stats(struct evlist *evlist);
+void evlist__reset_prev_raw_counts(struct evlist *evlist);
+void evlist__copy_prev_raw_counts(struct evlist *evlist);
+void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct evsel *counter);
-- 
cgit v1.2.3-70-g09d2


From ade9d208d6f054c0cd69af16c0a23af62b3da3b8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:33:55 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' 'toggle'
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c    | 8 ++++----
 tools/perf/builtin-top.c       | 6 +++---
 tools/perf/ui/browsers/hists.c | 2 +-
 tools/perf/util/evlist.c       | 7 +++----
 tools/perf/util/evlist.h       | 4 ++--
 5 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4302f32c80c5..d0e3dd57c108 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1166,7 +1166,7 @@ static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
 		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
 
 	if (overwrite)
-		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
 out:
 	return rc;
 }
@@ -1860,11 +1860,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		 * BKW_MMAP_EMPTY here: when done == true and
 		 * hits != rec->samples in previous round.
 		 *
-		 * perf_evlist__toggle_bkw_mmap ensure we never
+		 * evlist__toggle_bkw_mmap ensure we never
 		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
 		 */
 		if (trigger_is_hit(&switch_output_trigger) || done || draining)
-			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
+			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
 
 		if (record__mmap_read_all(rec, false) < 0) {
 			trigger_error(&auxtrace_snapshot_trigger);
@@ -1903,7 +1903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 			 * record__mmap_read_all(): we should have collected
 			 * data from it.
 			 */
-			perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
+			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
 
 			if (!quiet)
 				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 7c64134472c7..b7da09376c72 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -918,14 +918,14 @@ static void perf_top__mmap_read(struct perf_top *top)
 	int i;
 
 	if (overwrite)
-		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
+		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
 
 	for (i = 0; i < top->evlist->core.nr_mmaps; i++)
 		perf_top__mmap_read_idx(top, i);
 
 	if (overwrite) {
-		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
-		perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
 	}
 }
 
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e735d7e37da9..48814efb98e8 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3264,7 +3264,7 @@ do_hotkey:		 // key came straight from options ui__popup_menu()
 			if (!is_report_browser(hbt)) {
 				struct perf_top *top = hbt->arg;
 
-				perf_evlist__toggle_enable(top->evlist);
+				evlist__toggle_enable(top->evlist);
 				/*
 				 * No need to refresh, resort/decay histogram
 				 * entries if we are not collecting samples:
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 198d4ef03884..3ea3c649ab04 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -447,7 +447,7 @@ void evlist__enable(struct evlist *evlist)
 	evlist->enabled = true;
 }
 
-void perf_evlist__toggle_enable(struct evlist *evlist)
+void evlist__toggle_enable(struct evlist *evlist)
 {
 	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
 }
@@ -727,7 +727,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
 		if (overwrite) {
 			evlist->overwrite_mmap = maps;
 			if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
-				perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+				evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
 		} else {
 			evlist->mmap = maps;
 		}
@@ -1613,8 +1613,7 @@ perf_evlist__find_evsel_by_str(struct evlist *evlist,
 	return NULL;
 }
 
-void perf_evlist__toggle_bkw_mmap(struct evlist *evlist,
-				  enum bkw_mmap_state state)
+void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state)
 {
 	enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
 	enum action {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a0a53f33a272..e5a4de1a8915 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -168,7 +168,7 @@ struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
 
 struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id);
 
-void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state);
+void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state);
 
 void evlist__mmap_consume(struct evlist *evlist, int idx);
 
@@ -207,7 +207,7 @@ size_t evlist__mmap_size(unsigned long pages);
 
 void evlist__disable(struct evlist *evlist);
 void evlist__enable(struct evlist *evlist);
-void perf_evlist__toggle_enable(struct evlist *evlist);
+void evlist__toggle_enable(struct evlist *evlist);
 
 int perf_evlist__enable_event_idx(struct evlist *evlist,
 				  struct evsel *evsel, int idx);
-- 
cgit v1.2.3-70-g09d2


From 24bf91a7540bc0d14c389dd4f612eea57c06dc93 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:38:02 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' 'filter'
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c |  2 +-
 tools/perf/builtin-stat.c   |  2 +-
 tools/perf/builtin-trace.c  |  8 ++++----
 tools/perf/util/evlist.c    | 22 +++++++++++-----------
 tools/perf/util/evlist.h    | 14 +++++++-------
 5 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d0e3dd57c108..d53396421a02 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -946,7 +946,7 @@ try_again:
 "even with a suitable vmlinux or kallsyms file.\n\n");
 	}
 
-	if (perf_evlist__apply_filters(evlist, &pos)) {
+	if (evlist__apply_filters(evlist, &pos)) {
 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 			pos->filter, evsel__name(pos), errno,
 			str_error_r(errno, msg, sizeof(msg)));
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b81e64017808..29f7d4752e84 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -842,7 +842,7 @@ try_again_reset:
 			return -1;
 	}
 
-	if (perf_evlist__apply_filters(evsel_list, &counter)) {
+	if (evlist__apply_filters(evsel_list, &counter)) {
 		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
 			counter->filter, evsel__name(counter), errno,
 			str_error_r(errno, msg, sizeof(msg)));
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 80af0bf37d76..9f6f0f873cbc 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3666,7 +3666,7 @@ static int trace__set_filter_loop_pids(struct trace *trace)
 		thread = parent;
 	}
 
-	err = perf_evlist__append_tp_filter_pids(trace->evlist, nr, pids);
+	err = evlist__append_tp_filter_pids(trace->evlist, nr, pids);
 	if (!err && trace->filter_pids.map)
 		err = bpf_map__set_filter_pids(trace->filter_pids.map, nr, pids);
 
@@ -3683,8 +3683,8 @@ static int trace__set_filter_pids(struct trace *trace)
 	 * we fork the workload in evlist__prepare_workload.
 	 */
 	if (trace->filter_pids.nr > 0) {
-		err = perf_evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
-							 trace->filter_pids.entries);
+		err = evlist__append_tp_filter_pids(trace->evlist, trace->filter_pids.nr,
+						    trace->filter_pids.entries);
 		if (!err && trace->filter_pids.map) {
 			err = bpf_map__set_filter_pids(trace->filter_pids.map, trace->filter_pids.nr,
 						       trace->filter_pids.entries);
@@ -4027,7 +4027,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	err = trace__expand_filters(trace, &evsel);
 	if (err)
 		goto out_delete_evlist;
-	err = perf_evlist__apply_filters(evlist, &evsel);
+	err = evlist__apply_filters(evlist, &evsel);
 	if (err < 0)
 		goto out_error_apply_filters;
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3ea3c649ab04..725f9f58bdd1 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -975,7 +975,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist,
 		__evsel__reset_sample_bit(evsel, bit);
 }
 
-int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
 {
 	struct evsel *evsel;
 	int err = 0;
@@ -998,7 +998,7 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
 	return err;
 }
 
-int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
+int evlist__set_tp_filter(struct evlist *evlist, const char *filter)
 {
 	struct evsel *evsel;
 	int err = 0;
@@ -1018,7 +1018,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
 	return err;
 }
 
-int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
+int evlist__append_tp_filter(struct evlist *evlist, const char *filter)
 {
 	struct evsel *evsel;
 	int err = 0;
@@ -1064,32 +1064,32 @@ out_free:
 	return NULL;
 }
 
-int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
 {
 	char *filter = asprintf__tp_filter_pids(npids, pids);
-	int ret = perf_evlist__set_tp_filter(evlist, filter);
+	int ret = evlist__set_tp_filter(evlist, filter);
 
 	free(filter);
 	return ret;
 }
 
-int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
+int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid)
 {
-	return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
+	return evlist__set_tp_filter_pids(evlist, 1, &pid);
 }
 
-int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
+int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids)
 {
 	char *filter = asprintf__tp_filter_pids(npids, pids);
-	int ret = perf_evlist__append_tp_filter(evlist, filter);
+	int ret = evlist__append_tp_filter(evlist, filter);
 
 	free(filter);
 	return ret;
 }
 
-int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
+int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
 {
-	return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
+	return evlist__append_tp_filter_pids(evlist, 1, &pid);
 }
 
 bool evlist__valid_sample_type(struct evlist *evlist)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index e5a4de1a8915..3b1acf70b5dd 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -141,14 +141,14 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist,
 #define perf_evlist__reset_sample_bit(evlist, bit) \
 	__perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit)
 
-int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter);
-int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid);
-int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
+int evlist__set_tp_filter(struct evlist *evlist, const char *filter);
+int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid);
+int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
 
-int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter);
+int evlist__append_tp_filter(struct evlist *evlist, const char *filter);
 
-int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid);
-int perf_evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
+int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid);
+int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
 
 struct evsel *
 perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
@@ -216,7 +216,7 @@ void perf_evlist__set_selected(struct evlist *evlist,
 			       struct evsel *evsel);
 
 int perf_evlist__create_maps(struct evlist *evlist, struct target *target);
-int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
+int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
 
 void __evlist__set_leader(struct list_head *list);
 void evlist__set_leader(struct evlist *evlist);
-- 
cgit v1.2.3-70-g09d2


From 08c83997ca87f9e162563a59ea43eabadc9e4231 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:40:10 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' sideband thread
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c       |  4 ++--
 tools/perf/builtin-top.c          |  4 ++--
 tools/perf/util/bpf-event.c       |  2 +-
 tools/perf/util/evlist.h          | 11 ++++-------
 tools/perf/util/sideband_evlist.c |  8 ++++----
 5 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d53396421a02..04946bd3d090 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1548,7 +1548,7 @@ static int record__setup_sb_evlist(struct record *rec)
 		}
 	}
 #endif
-	if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
+	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
 		opts->no_bpf_event = true;
 	}
@@ -2065,7 +2065,7 @@ out_delete_session:
 	perf_session__delete(session);
 
 	if (!opts->no_bpf_event)
-		perf_evlist__stop_sb_thread(rec->sb_evlist);
+		evlist__stop_sb_thread(rec->sb_evlist);
 	return status;
 }
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index b7da09376c72..50dd42da66ee 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1762,7 +1762,7 @@ int cmd_top(int argc, const char **argv)
 	}
 #endif
 
-	if (perf_evlist__start_sb_thread(top.sb_evlist, target)) {
+	if (evlist__start_sb_thread(top.sb_evlist, target)) {
 		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
 		opts->no_bpf_event = true;
 	}
@@ -1770,7 +1770,7 @@ int cmd_top(int argc, const char **argv)
 	status = __cmd_top(&top);
 
 	if (!opts->no_bpf_event)
-		perf_evlist__stop_sb_thread(top.sb_evlist);
+		evlist__stop_sb_thread(top.sb_evlist);
 
 out_delete_evlist:
 	evlist__delete(top.evlist);
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 3742511a08d1..57d58c81a5f8 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -526,7 +526,7 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
 	 */
 	attr.wakeup_watermark = 1;
 
-	return perf_evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env);
+	return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env);
 }
 
 void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 3b1acf70b5dd..736cb639df84 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -112,14 +112,11 @@ int __evlist__add_default_attrs(struct evlist *evlist,
 
 int evlist__add_dummy(struct evlist *evlist);
 
-int perf_evlist__add_sb_event(struct evlist *evlist,
-			      struct perf_event_attr *attr,
-			      evsel__sb_cb_t cb,
-			      void *data);
+int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+			 evsel__sb_cb_t cb, void *data);
 void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data);
-int perf_evlist__start_sb_thread(struct evlist *evlist,
-				 struct target *target);
-void perf_evlist__stop_sb_thread(struct evlist *evlist);
+int evlist__start_sb_thread(struct evlist *evlist, struct target *target);
+void evlist__stop_sb_thread(struct evlist *evlist);
 
 int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler);
 
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
index ded9ced02797..90ed016bb348 100644
--- a/tools/perf/util/sideband_evlist.c
+++ b/tools/perf/util/sideband_evlist.c
@@ -12,8 +12,8 @@
 #include <sched.h>
 #include <stdbool.h>
 
-int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
-			      evsel__sb_cb_t cb, void *data)
+int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+			 evsel__sb_cb_t cb, void *data)
 {
 	struct evsel *evsel;
 
@@ -94,7 +94,7 @@ void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data)
       }
 }
 
-int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target)
+int evlist__start_sb_thread(struct evlist *evlist, struct target *target)
 {
 	struct evsel *counter;
 
@@ -138,7 +138,7 @@ out_delete_evlist:
 	return -1;
 }
 
-void perf_evlist__stop_sb_thread(struct evlist *evlist)
+void evlist__stop_sb_thread(struct evlist *evlist)
 {
 	if (!evlist)
 		return;
-- 
cgit v1.2.3-70-g09d2


From 2a6599cd5e093b3c607a39288f14a618c03a0e24 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:43:07 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' sample parsing
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kvm.c                  | 2 +-
 tools/perf/builtin-top.c                  | 4 ++--
 tools/perf/builtin-trace.c                | 5 ++---
 tools/perf/tests/code-reading.c           | 4 ++--
 tools/perf/tests/mmap-basic.c             | 2 +-
 tools/perf/tests/parse-no-sample-id-all.c | 4 ++--
 tools/perf/tests/perf-record.c            | 2 +-
 tools/perf/tests/sw-clock.c               | 2 +-
 tools/perf/tests/switch-tracking.c        | 8 ++++----
 tools/perf/util/auxtrace.c                | 2 +-
 tools/perf/util/evlist.c                  | 7 ++-----
 tools/perf/util/evlist.h                  | 8 ++------
 tools/perf/util/s390-cpumsf.c             | 4 ++--
 tools/perf/util/session.c                 | 7 +++----
 14 files changed, 26 insertions(+), 35 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 460945ded6dd..e1b60aad13cb 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -764,7 +764,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 		return (err == -EAGAIN) ? 0 : -1;
 
 	while ((event = perf_mmap__read_event(&md->core)) != NULL) {
-		err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+		err = evlist__parse_sample_timestamp(evlist, event, &timestamp);
 		if (err) {
 			perf_mmap__consume(&md->core);
 			pr_err("Failed to parse sample\n");
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 50dd42da66ee..fff1d5ef5c49 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -890,7 +890,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
 	while ((event = perf_mmap__read_event(&md->core)) != NULL) {
 		int ret;
 
-		ret = perf_evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
+		ret = evlist__parse_sample_timestamp(evlist, event, &last_timestamp);
 		if (ret && ret != -1)
 			break;
 
@@ -1153,7 +1153,7 @@ static int deliver_event(struct ordered_events *qe,
 		return 0;
 	}
 
-	ret = perf_evlist__parse_sample(evlist, event, &sample);
+	ret = evlist__parse_sample(evlist, event, &sample);
 	if (ret) {
 		pr_err("Can't parse sample, err = %d\n", ret);
 		goto next_event;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 9f6f0f873cbc..5638eae36ec8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3700,9 +3700,8 @@ static int __trace__deliver_event(struct trace *trace, union perf_event *event)
 {
 	struct evlist *evlist = trace->evlist;
 	struct perf_sample sample;
-	int err;
+	int err = evlist__parse_sample(evlist, event, &sample);
 
-	err = perf_evlist__parse_sample(evlist, event, &sample);
 	if (err)
 		fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
 	else
@@ -3735,7 +3734,7 @@ static int trace__deliver_event(struct trace *trace, union perf_event *event)
 	if (!trace->sort_events)
 		return __trace__deliver_event(trace, event);
 
-	err = perf_evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
+	err = evlist__parse_sample_timestamp(trace->evlist, event, &trace->oe.last);
 	if (err && err != -1)
 		return err;
 
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 035c9123549a..339d432b1bf1 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -378,8 +378,8 @@ static int process_sample_event(struct machine *machine,
 	struct thread *thread;
 	int ret;
 
-	if (perf_evlist__parse_sample(evlist, event, &sample)) {
-		pr_debug("perf_evlist__parse_sample failed\n");
+	if (evlist__parse_sample(evlist, event, &sample)) {
+		pr_debug("evlist__parse_sample failed\n");
 		return -1;
 	}
 
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 7b0dbfc0e17d..d826153adbf8 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -126,7 +126,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 			goto out_delete_evlist;
 		}
 
-		err = perf_evlist__parse_sample(evlist, event, &sample);
+		err = evlist__parse_sample(evlist, event, &sample);
 		if (err) {
 			pr_err("Can't parse sample, err = %d\n", err);
 			goto out_delete_evlist;
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
index adf3c9c4a416..471273676701 100644
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -27,8 +27,8 @@ static int process_event(struct evlist **pevlist, union perf_event *event)
 	if (!*pevlist)
 		return -1;
 
-	if (perf_evlist__parse_sample(*pevlist, event, &sample)) {
-		pr_debug("perf_evlist__parse_sample failed\n");
+	if (evlist__parse_sample(*pevlist, event, &sample)) {
+		pr_debug("evlist__parse_sample failed\n");
 		return -1;
 	}
 
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 8745bf5f93e6..f0b2066c7ea9 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -182,7 +182,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 				if (type < PERF_RECORD_MAX)
 					nr_events[type]++;
 
-				err = perf_evlist__parse_sample(evlist, event, &sample);
+				err = evlist__parse_sample(evlist, event, &sample);
 				if (err < 0) {
 					if (verbose > 0)
 						perf_event__fprintf(event, NULL, stderr);
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 4b9b731977c8..a49c9e23053b 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -109,7 +109,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 		if (event->header.type != PERF_RECORD_SAMPLE)
 			goto next_event;
 
-		err = perf_evlist__parse_sample(evlist, event, &sample);
+		err = evlist__parse_sample(evlist, event, &sample);
 		if (err < 0) {
 			pr_debug("Error during parse sample\n");
 			goto out_delete_evlist;
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index db5e1f70053a..b8cdbe6f324f 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -128,8 +128,8 @@ static int process_sample_event(struct evlist *evlist,
 	pid_t next_tid, prev_tid;
 	int cpu, err;
 
-	if (perf_evlist__parse_sample(evlist, event, &sample)) {
-		pr_debug("perf_evlist__parse_sample failed\n");
+	if (evlist__parse_sample(evlist, event, &sample)) {
+		pr_debug("evlist__parse_sample failed\n");
 		return -1;
 	}
 
@@ -223,8 +223,8 @@ static int add_event(struct evlist *evlist, struct list_head *events,
 	node->event = event;
 	list_add(&node->list, events);
 
-	if (perf_evlist__parse_sample(evlist, event, &sample)) {
-		pr_debug("perf_evlist__parse_sample failed\n");
+	if (evlist__parse_sample(evlist, event, &sample)) {
+		pr_debug("evlist__parse_sample failed\n");
 		return -1;
 	}
 
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 62e7f6c5f8b5..647afdcb2699 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1082,7 +1082,7 @@ static int auxtrace_queue_data_cb(struct perf_session *session,
 	if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE)
 		return 0;
 
-	err = perf_evlist__parse_sample(session->evlist, event, &sample);
+	err = evlist__parse_sample(session->evlist, event, &sample);
 	if (err)
 		return err;
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 725f9f58bdd1..943262aa6505 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1447,8 +1447,7 @@ int evlist__start_workload(struct evlist *evlist)
 	return 0;
 }
 
-int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
-			      struct perf_sample *sample)
+int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
 {
 	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
 
@@ -1457,9 +1456,7 @@ int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
 	return evsel__parse_sample(evsel, event, sample);
 }
 
-int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
-					union perf_event *event,
-					u64 *timestamp)
+int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp)
 {
 	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 736cb639df84..c59a08cf9f25 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -224,12 +224,8 @@ u64 evlist__combined_branch_type(struct evlist *evlist);
 bool evlist__sample_id_all(struct evlist *evlist);
 u16 perf_evlist__id_hdr_size(struct evlist *evlist);
 
-int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
-			      struct perf_sample *sample);
-
-int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
-					union perf_event *event,
-					u64 *timestamp);
+int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
+int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp);
 
 bool evlist__valid_sample_type(struct evlist *evlist);
 bool evlist__valid_sample_id_all(struct evlist *evlist);
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index f8861998e5bd..959e9890bccc 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -96,9 +96,9 @@
  * |    than PERF_RECORD_USER_TYPE_START) are handled by
  * |    perf_session__process_user_event(see below)
  * |  - Those generated by the kernel are handled by
- * |    perf_evlist__parse_sample_timestamp()
+ * |    evlist__parse_sample_timestamp()
  * |
- * perf_evlist__parse_sample_timestamp()
+ * evlist__parse_sample_timestamp()
  * |  Extract time stamp from sample data.
  * |
  * perf_session__queue_event()
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5cc722b6fe7c..61e4b3cc7313 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1523,9 +1523,8 @@ static int perf_session__deliver_event(struct perf_session *session,
 				       u64 file_offset)
 {
 	struct perf_sample sample;
-	int ret;
+	int ret = evlist__parse_sample(session->evlist, event, &sample);
 
-	ret = perf_evlist__parse_sample(session->evlist, event, &sample);
 	if (ret) {
 		pr_err("Can't parse sample, err = %d\n", ret);
 		return ret;
@@ -1697,7 +1696,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
 out_parse_sample:
 
 	if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
-	    perf_evlist__parse_sample(session->evlist, event, sample))
+	    evlist__parse_sample(session->evlist, event, sample))
 		return -1;
 
 	*event_ptr = event;
@@ -1754,7 +1753,7 @@ static s64 perf_session__process_event(struct perf_session *session,
 	if (tool->ordered_events) {
 		u64 timestamp = -1ULL;
 
-		ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+		ret = evlist__parse_sample_timestamp(evlist, event, &timestamp);
 		if (ret && ret != -1)
 			return ret;
 
-- 
cgit v1.2.3-70-g09d2


From b02736f776d5f50bb13ff85eb34efaed0c3f5ffa Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:48:07 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' 'find' methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c    |  7 ++-----
 tools/perf/builtin-sched.c   |  6 ++----
 tools/perf/builtin-trace.c   | 14 +++++---------
 tools/perf/util/bpf-loader.c |  3 +--
 tools/perf/util/evlist.c     | 14 ++++----------
 tools/perf/util/evlist.h     | 11 +++--------
 tools/perf/util/evswitch.c   |  4 ++--
 tools/perf/util/header.c     |  5 ++---
 8 files changed, 21 insertions(+), 43 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index a50dae2c4ae9..0062445e8ead 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1960,18 +1960,15 @@ int cmd_kmem(int argc, const char **argv)
 	ret = -1;
 
 	if (kmem_slab) {
-		if (!perf_evlist__find_tracepoint_by_name(session->evlist,
-							  "kmem:kmalloc")) {
+		if (!evlist__find_tracepoint_by_name(session->evlist, "kmem:kmalloc")) {
 			pr_err(errmsg, "slab", "slab");
 			goto out_delete;
 		}
 	}
 
 	if (kmem_page) {
-		struct evsel *evsel;
+		struct evsel *evsel = evlist__find_tracepoint_by_name(session->evlist, "kmem:mm_page_alloc");
 
-		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-							     "kmem:mm_page_alloc");
 		if (evsel == NULL) {
 			pr_err(errmsg, "page", "page");
 			goto out_delete;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 0e16f9d5a947..69c769b04a61 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -3036,8 +3036,7 @@ static int perf_sched__timehist(struct perf_sched *sched)
 	setup_pager();
 
 	/* prefer sched_waking if it is captured */
-	if (perf_evlist__find_tracepoint_by_name(session->evlist,
-						  "sched:sched_waking"))
+	if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking"))
 		handlers[1].handler = timehist_sched_wakeup_ignore;
 
 	/* setup per-evsel handlers */
@@ -3045,8 +3044,7 @@ static int perf_sched__timehist(struct perf_sched *sched)
 		goto out;
 
 	/* sched_switch event at a minimum needs to exist */
-	if (!perf_evlist__find_tracepoint_by_name(session->evlist,
-						  "sched:sched_switch")) {
+	if (!evlist__find_tracepoint_by_name(session->evlist, "sched:sched_switch")) {
 		pr_err("No sched_switch events found. Have you run 'perf sched record'?\n");
 		goto out;
 	}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5638eae36ec8..6e47b19a1dcb 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4227,12 +4227,10 @@ static int trace__replay(struct trace *trace)
 	if (err)
 		goto out;
 
-	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-						     "raw_syscalls:sys_enter");
+	evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
 	/* older kernels have syscalls tp versus raw_syscalls */
 	if (evsel == NULL)
-		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-							     "syscalls:sys_enter");
+		evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
 
 	if (evsel &&
 	    (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
@@ -4241,11 +4239,9 @@ static int trace__replay(struct trace *trace)
 		goto out;
 	}
 
-	evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-						     "raw_syscalls:sys_exit");
+	evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
 	if (evsel == NULL)
-		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
-							     "syscalls:sys_exit");
+		evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
 	if (evsel &&
 	    (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
 	    perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
@@ -4905,7 +4901,7 @@ int cmd_trace(int argc, const char **argv)
 	if (evsel) {
 		trace.syscalls.events.augmented = evsel;
 
-		evsel = perf_evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
+		evsel = evlist__find_tracepoint_by_name(trace.evlist, "raw_syscalls:sys_enter");
 		if (evsel == NULL) {
 			pr_err("ERROR: raw_syscalls:sys_enter not found in the augmented BPF object\n");
 			goto out;
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 0374adcb223c..9087f1bffd3d 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -1058,12 +1058,11 @@ __bpf_map__config_event(struct bpf_map *map,
 			struct parse_events_term *term,
 			struct evlist *evlist)
 {
-	struct evsel *evsel;
 	const struct bpf_map_def *def;
 	struct bpf_map_op *op;
 	const char *map_name = bpf_map__name(map);
+	struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str);
 
-	evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
 	if (!evsel) {
 		pr_debug("Event (for '%s') '%s' doesn't exist\n",
 			 map_name, term->val.str);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 943262aa6505..e6b2715ad31d 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -191,13 +191,12 @@ void perf_evlist__splice_list_tail(struct evlist *evlist,
 int __evlist__set_tracepoints_handlers(struct evlist *evlist,
 				       const struct evsel_str_handler *assocs, size_t nr_assocs)
 {
-	struct evsel *evsel;
 	size_t i;
 	int err;
 
 	for (i = 0; i < nr_assocs; i++) {
 		// Adding a handler for an event not in this evlist, just ignore it.
-		evsel = perf_evlist__find_tracepoint_by_name(evlist, assocs[i].name);
+		struct evsel *evsel = evlist__find_tracepoint_by_name(evlist, assocs[i].name);
 		if (evsel == NULL)
 			continue;
 
@@ -294,8 +293,7 @@ int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *a
 	return evlist__add_attrs(evlist, attrs, nr_attrs);
 }
 
-struct evsel *
-perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
+struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
 {
 	struct evsel *evsel;
 
@@ -308,9 +306,7 @@ perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
 	return NULL;
 }
 
-struct evsel *
-perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
-				     const char *name)
+struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name)
 {
 	struct evsel *evsel;
 
@@ -1594,9 +1590,7 @@ void perf_evlist__set_tracking_event(struct evlist *evlist,
 	tracking_evsel->tracking = true;
 }
 
-struct evsel *
-perf_evlist__find_evsel_by_str(struct evlist *evlist,
-			       const char *str)
+struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str)
 {
 	struct evsel *evsel;
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c59a08cf9f25..aa9b3f9431e7 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -147,12 +147,8 @@ int evlist__append_tp_filter(struct evlist *evlist, const char *filter);
 int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid);
 int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
 
-struct evsel *
-perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
-
-struct evsel *
-perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
-				     const char *name);
+struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id);
+struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name);
 
 int evlist__add_pollfd(struct evlist *evlist, int fd);
 int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
@@ -338,8 +334,7 @@ void evlist__cpu_iter_start(struct evlist *evlist);
 bool evsel__cpu_iter_skip(struct evsel *ev, int cpu);
 bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu);
 
-struct evsel *
-perf_evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
+struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
 
 struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
 					    union perf_event *event);
diff --git a/tools/perf/util/evswitch.c b/tools/perf/util/evswitch.c
index 3ba72f743d3c..40cb56a9347d 100644
--- a/tools/perf/util/evswitch.c
+++ b/tools/perf/util/evswitch.c
@@ -41,7 +41,7 @@ static int evswitch__fprintf_enoent(FILE *fp, const char *evtype, const char *ev
 int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp)
 {
 	if (evswitch->on_name) {
-		evswitch->on = perf_evlist__find_evsel_by_str(evlist, evswitch->on_name);
+		evswitch->on = evlist__find_evsel_by_str(evlist, evswitch->on_name);
 		if (evswitch->on == NULL) {
 			evswitch__fprintf_enoent(fp, "on", evswitch->on_name);
 			return -ENOENT;
@@ -50,7 +50,7 @@ int evswitch__init(struct evswitch *evswitch, struct evlist *evlist, FILE *fp)
 	}
 
 	if (evswitch->off_name) {
-		evswitch->off = perf_evlist__find_evsel_by_str(evlist, evswitch->off_name);
+		evswitch->off = evlist__find_evsel_by_str(evlist, evswitch->off_name);
 		if (evswitch->off == NULL) {
 			evswitch__fprintf_enoent(fp, "off", evswitch->off_name);
 			return -ENOENT;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b65c8f7ce36a..64a3b83b3090 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2263,8 +2263,7 @@ static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused)
 	return 0;
 }
 
-static struct evsel *
-perf_evlist__find_by_index(struct evlist *evlist, int idx)
+static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx)
 {
 	struct evsel *evsel;
 
@@ -2285,7 +2284,7 @@ perf_evlist__set_event_name(struct evlist *evlist,
 	if (!event->name)
 		return;
 
-	evsel = perf_evlist__find_by_index(evlist, event->idx);
+	evsel = evlist__find_by_index(evlist, event->idx);
 	if (!evsel)
 		return;
 
-- 
cgit v1.2.3-70-g09d2


From fd643db5a8797dde0fe8d6f2fd01f36971d43fe0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 09:54:08 -0300
Subject: perf evlist: Ditch unused set/reset sample_bit methods

Not used anymore, ditch them.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 18 ------------------
 tools/perf/util/evlist.h | 11 -----------
 2 files changed, 29 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e6b2715ad31d..86172ee394d0 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -953,24 +953,6 @@ out_delete_threads:
 	return -1;
 }
 
-void __perf_evlist__set_sample_bit(struct evlist *evlist,
-				   enum perf_event_sample_format bit)
-{
-	struct evsel *evsel;
-
-	evlist__for_each_entry(evlist, evsel)
-		__evsel__set_sample_bit(evsel, bit);
-}
-
-void __perf_evlist__reset_sample_bit(struct evlist *evlist,
-				     enum perf_event_sample_format bit)
-{
-	struct evsel *evsel;
-
-	evlist__for_each_entry(evlist, evsel)
-		__evsel__reset_sample_bit(evsel, bit);
-}
-
 int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
 {
 	struct evsel *evsel;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index aa9b3f9431e7..a1288b35717c 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -127,17 +127,6 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist,
 #define evlist__set_tracepoints_handlers(evlist, array) \
 	__evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
 
-void __perf_evlist__set_sample_bit(struct evlist *evlist,
-				   enum perf_event_sample_format bit);
-void __perf_evlist__reset_sample_bit(struct evlist *evlist,
-				     enum perf_event_sample_format bit);
-
-#define perf_evlist__set_sample_bit(evlist, bit) \
-	__perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit)
-
-#define perf_evlist__reset_sample_bit(evlist, bit) \
-	__perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit)
-
 int evlist__set_tp_filter(struct evlist *evlist, const char *filter);
 int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid);
 int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids);
-- 
cgit v1.2.3-70-g09d2


From 3ccf8a7b66b6bff69a7be62f2d5a2a61328ebe91 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:17:57 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' sample id
 lookup  methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c        |  2 +-
 tools/perf/builtin-top.c           |  2 +-
 tools/perf/builtin-trace.c         |  2 +-
 tools/perf/tests/mmap-basic.c      |  2 +-
 tools/perf/tests/switch-tracking.c |  2 +-
 tools/perf/util/auxtrace.c         |  4 ++--
 tools/perf/util/evlist.c           | 23 ++++++++++-------------
 tools/perf/util/evlist.h           | 10 ++++------
 tools/perf/util/evsel.c            |  2 +-
 tools/perf/util/header.c           |  2 +-
 tools/perf/util/intel-pt.c         |  3 +--
 tools/perf/util/python.c           |  2 +-
 tools/perf/util/s390-cpumsf.c      |  2 +-
 tools/perf/util/s390-sample-raw.c  |  2 +-
 tools/perf/util/session.c          |  6 +++---
 tools/perf/util/sideband_evlist.c  |  2 +-
 tools/perf/util/stat.c             |  2 +-
 tools/perf/util/synthetic-events.c |  2 +-
 18 files changed, 33 insertions(+), 39 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 983c101965ab..1c322c129185 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2224,7 +2224,7 @@ static int print_event_with_time(struct perf_tool *tool,
 {
 	struct perf_script *script = container_of(tool, struct perf_script, tool);
 	struct perf_session *session = script->session;
-	struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+	struct evsel *evsel = evlist__id2evsel(session->evlist, sample->id);
 	struct thread *thread = NULL;
 
 	if (evsel && !evsel->core.attr.sample_id_all) {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index fff1d5ef5c49..88bd1ac5c8cf 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1159,7 +1159,7 @@ static int deliver_event(struct ordered_events *qe,
 		goto next_event;
 	}
 
-	evsel = perf_evlist__id2evsel(session->evlist, sample.id);
+	evsel = evlist__id2evsel(session->evlist, sample.id);
 	assert(evsel != NULL);
 
 	if (event->header.type == PERF_RECORD_SAMPLE) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 6e47b19a1dcb..a463ce8fb8e0 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3105,7 +3105,7 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
 		return;
 	}
 
-	evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
+	evsel = evlist__id2evsel(trace->evlist, sample->id);
 	if (evsel == NULL) {
 		fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
 		return;
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index d826153adbf8..c01d7e12d241 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -133,7 +133,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 		}
 
 		err = -1;
-		evsel = perf_evlist__id2evsel(evlist, sample.id);
+		evsel = evlist__id2evsel(evlist, sample.id);
 		if (evsel == NULL) {
 			pr_debug("event with id %" PRIu64
 				 " doesn't map to an evsel\n", sample.id);
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index b8cdbe6f324f..3c0bc3f1fd0e 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -133,7 +133,7 @@ static int process_sample_event(struct evlist *evlist,
 		return -1;
 	}
 
-	evsel = perf_evlist__id2evsel(evlist, sample.id);
+	evsel = evlist__id2evsel(evlist, sample.id);
 	if (evsel == switch_tracking->switch_evsel) {
 		next_tid = evsel__intval(evsel, &sample, "next_pid");
 		prev_tid = evsel__intval(evsel, &sample, "prev_pid");
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 647afdcb2699..b538b9093567 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1017,7 +1017,7 @@ struct auxtrace_queue *auxtrace_queues__sample_queue(struct auxtrace_queues *que
 	if (!id)
 		return NULL;
 
-	sid = perf_evlist__id2sid(session->evlist, id);
+	sid = evlist__id2sid(session->evlist, id);
 	if (!sid)
 		return NULL;
 
@@ -1047,7 +1047,7 @@ int auxtrace_queues__add_sample(struct auxtrace_queues *queues,
 	if (!id)
 		return -EINVAL;
 
-	sid = perf_evlist__id2sid(session->evlist, id);
+	sid = evlist__id2sid(session->evlist, id);
 	if (!sid)
 		return -ENOENT;
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 86172ee394d0..51775ff2979f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -509,7 +509,7 @@ int evlist__poll(struct evlist *evlist, int timeout)
 	return perf_evlist__poll(&evlist->core, timeout);
 }
 
-struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
+struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id)
 {
 	struct hlist_head *head;
 	struct perf_sample_id *sid;
@@ -525,14 +525,14 @@ struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id)
 	return NULL;
 }
 
-struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
+struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id)
 {
 	struct perf_sample_id *sid;
 
 	if (evlist->core.nr_entries == 1 || !id)
 		return evlist__first(evlist);
 
-	sid = perf_evlist__id2sid(evlist, id);
+	sid = evlist__id2sid(evlist, id);
 	if (sid)
 		return container_of(sid->evsel, struct evsel, core);
 
@@ -542,23 +542,21 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
 	return NULL;
 }
 
-struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
-						u64 id)
+struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id)
 {
 	struct perf_sample_id *sid;
 
 	if (!id)
 		return NULL;
 
-	sid = perf_evlist__id2sid(evlist, id);
+	sid = evlist__id2sid(evlist, id);
 	if (sid)
 		return container_of(sid->evsel, struct evsel, core);
 
 	return NULL;
 }
 
-static int perf_evlist__event2id(struct evlist *evlist,
-				 union perf_event *event, u64 *id)
+static int evlist__event2id(struct evlist *evlist, union perf_event *event, u64 *id)
 {
 	const __u64 *array = event->sample.array;
 	ssize_t n;
@@ -578,8 +576,7 @@ static int perf_evlist__event2id(struct evlist *evlist,
 	return 0;
 }
 
-struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
-					    union perf_event *event)
+struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event)
 {
 	struct evsel *first = evlist__first(evlist);
 	struct hlist_head *head;
@@ -594,7 +591,7 @@ struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
 	    event->header.type != PERF_RECORD_SAMPLE)
 		return first;
 
-	if (perf_evlist__event2id(evlist, event, &id))
+	if (evlist__event2id(evlist, event, &id))
 		return NULL;
 
 	/* Synthesized events have an id of zero */
@@ -1427,7 +1424,7 @@ int evlist__start_workload(struct evlist *evlist)
 
 int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
 {
-	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+	struct evsel *evsel = evlist__event2evsel(evlist, event);
 
 	if (!evsel)
 		return -EFAULT;
@@ -1436,7 +1433,7 @@ int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct
 
 int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp)
 {
-	struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+	struct evsel *evsel = evlist__event2evsel(evlist, event);
 
 	if (!evsel)
 		return -EFAULT;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a1288b35717c..90f1127fecf6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -144,11 +144,10 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask);
 
 int evlist__poll(struct evlist *evlist, int timeout);
 
-struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id);
-struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist,
-						u64 id);
+struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id);
+struct evsel *evlist__id2evsel_strict(struct evlist *evlist, u64 id);
 
-struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id);
+struct perf_sample_id *evlist__id2sid(struct evlist *evlist, u64 id);
 
 void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state);
 
@@ -325,8 +324,7 @@ bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu);
 
 struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
 
-struct evsel *perf_evlist__event2evsel(struct evlist *evlist,
-					    union perf_event *event);
+struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event);
 
 bool perf_evlist__exclude_kernel(struct evlist *evlist);
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 9005cc974f43..3dd0eae9810d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1469,7 +1469,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread,
 	for (i = 1; i < nr; i++) {
 		struct evsel *counter;
 
-		counter = perf_evlist__id2evsel(leader->evlist, v[i].id);
+		counter = evlist__id2evsel(leader->evlist, v[i].id);
 		if (!counter)
 			return -EINVAL;
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 64a3b83b3090..be219051119c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -4030,7 +4030,7 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
 
 	evlist = *pevlist;
 
-	evsel = perf_evlist__id2evsel(evlist, ev->id);
+	evsel = evlist__id2evsel(evlist, ev->id);
 	if (evsel == NULL)
 		return -EINVAL;
 
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 3a0348caec7d..60214de42f31 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2520,11 +2520,10 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
 static int intel_pt_process_switch(struct intel_pt *pt,
 				   struct perf_sample *sample)
 {
-	struct evsel *evsel;
 	pid_t tid;
 	int cpu, ret;
+	struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id);
 
-	evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
 	if (evsel != pt->switch_evsel)
 		return 0;
 
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 3d3b46514863..cc5ade85a33f 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -1055,7 +1055,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
 		if (pyevent == NULL)
 			return PyErr_NoMemory();
 
-		evsel = perf_evlist__event2evsel(evlist, event);
+		evsel = evlist__event2evsel(evlist, event);
 		if (!evsel) {
 			Py_INCREF(Py_None);
 			return Py_None;
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 959e9890bccc..078a71773565 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -932,7 +932,7 @@ s390_cpumsf_process_event(struct perf_session *session,
 	if (event->header.type == PERF_RECORD_SAMPLE &&
 	    sample->raw_size) {
 		/* Handle event with raw data */
-		ev_bc000 = perf_evlist__event2evsel(session->evlist, event);
+		ev_bc000 = evlist__event2evsel(session->evlist, event);
 		if (ev_bc000 &&
 		    ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG)
 			err = s390_cpumcf_dumpctr(sf, sample);
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
index 05b43ab4eeef..d177e6179839 100644
--- a/tools/perf/util/s390-sample-raw.c
+++ b/tools/perf/util/s390-sample-raw.c
@@ -205,7 +205,7 @@ void perf_evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event
 	if (event->header.type != PERF_RECORD_SAMPLE)
 		return;
 
-	ev_bc000 = perf_evlist__event2evsel(evlist, event);
+	ev_bc000 = evlist__event2evsel(evlist, event);
 	if (ev_bc000 == NULL ||
 	    ev_bc000->core.attr.config != PERF_EVENT_CPUM_CF_DIAG)
 		return;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 61e4b3cc7313..ce381b3dca06 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1364,7 +1364,7 @@ static int deliver_sample_value(struct evlist *evlist,
 				struct sample_read_value *v,
 				struct machine *machine)
 {
-	struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id);
+	struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
 	struct evsel *evsel;
 
 	if (sid) {
@@ -1445,7 +1445,7 @@ static int machines__deliver_event(struct machines *machines,
 
 	dump_event(evlist, event, file_offset, sample);
 
-	evsel = perf_evlist__id2evsel(evlist, sample->id);
+	evsel = evlist__id2evsel(evlist, sample->id);
 
 	machine = machines__find_for_cpumode(machines, event, sample);
 
@@ -2476,7 +2476,7 @@ int perf_event__process_id_index(struct perf_session *session,
 			fprintf(stdout,	"  tid: %"PRI_ld64"\n", e->tid);
 		}
 
-		sid = perf_evlist__id2sid(evlist, e->id);
+		sid = evlist__id2sid(evlist, e->id);
 		if (!sid)
 			return -ENOENT;
 		sid->idx = e->idx;
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
index 90ed016bb348..9c04c71dbf51 100644
--- a/tools/perf/util/sideband_evlist.c
+++ b/tools/perf/util/sideband_evlist.c
@@ -62,7 +62,7 @@ static void *perf_evlist__poll_thread(void *arg)
 			if (perf_mmap__read_init(&map->core))
 				continue;
 			while ((event = perf_mmap__read_event(&map->core)) != NULL) {
-				struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+				struct evsel *evsel = evlist__event2evsel(evlist, event);
 
 				if (evsel && evsel->side_band.cb)
 					evsel->side_band.cb(event, evsel->side_band.data);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 8be9c3da9e56..1e125e39ff84 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -458,7 +458,7 @@ int perf_event__process_stat_event(struct perf_session *session,
 	count.ena = st->ena;
 	count.run = st->run;
 
-	counter = perf_evlist__id2evsel(session->evlist, st->id);
+	counter = evlist__id2evsel(session->evlist, st->id);
 	if (!counter) {
 		pr_err("Failed to resolve counter for stat event.\n");
 		return -EINVAL;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index d9c624377da7..297987c6960b 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1643,7 +1643,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_
 
 			e->id = evsel->core.id[j];
 
-			sid = perf_evlist__id2sid(evlist, e->id);
+			sid = evlist__id2sid(evlist, e->id);
 			if (!sid) {
 				free(ev);
 				return -ENOENT;
-- 
cgit v1.2.3-70-g09d2


From f4bd0b4a9b21c609ede28cee2dcd16824c0489a8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:23:35 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' browser methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c    | 19 +++++++------------
 tools/perf/builtin-top.c       |  9 +++------
 tools/perf/ui/browsers/hists.c | 25 ++++++++-----------------
 tools/perf/ui/gtk/gtk.h        |  5 ++---
 tools/perf/ui/gtk/hists.c      |  6 ++----
 tools/perf/util/hist.h         | 23 ++++++++++-------------
 6 files changed, 32 insertions(+), 55 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 3c74c9c0f3c3..11f13aafde44 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -493,8 +493,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
 	return ret + fprintf(fp, "\n#\n");
 }
 
-static int perf_evlist__tui_block_hists_browse(struct evlist *evlist,
-					       struct report *rep)
+static int evlist__tui_block_hists_browse(struct evlist *evlist, struct report *rep)
 {
 	struct evsel *pos;
 	int i = 0, ret;
@@ -511,9 +510,7 @@ static int perf_evlist__tui_block_hists_browse(struct evlist *evlist,
 	return 0;
 }
 
-static int perf_evlist__tty_browse_hists(struct evlist *evlist,
-					 struct report *rep,
-					 const char *help)
+static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, const char *help)
 {
 	struct evsel *pos;
 	int i = 0;
@@ -595,7 +592,7 @@ static int report__gtk_browse_hists(struct report *rep, const char *help)
 	int (*hist_browser)(struct evlist *evlist, const char *help,
 			    struct hist_browser_timer *timer, float min_pcnt);
 
-	hist_browser = dlsym(perf_gtk_handle, "perf_evlist__gtk_browse_hists");
+	hist_browser = dlsym(perf_gtk_handle, "evlist__gtk_browse_hists");
 
 	if (hist_browser == NULL) {
 		ui__error("GTK browser not found!\n");
@@ -622,14 +619,12 @@ static int report__browse_hists(struct report *rep)
 	switch (use_browser) {
 	case 1:
 		if (rep->total_cycles_mode) {
-			ret = perf_evlist__tui_block_hists_browse(evlist, rep);
+			ret = evlist__tui_block_hists_browse(evlist, rep);
 			break;
 		}
 
-		ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
-						    rep->min_percent,
-						    &session->header.env,
-						    true, &rep->annotation_opts);
+		ret = evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent,
+					       &session->header.env, true, &rep->annotation_opts);
 		/*
 		 * Usually "ret" is the last pressed key, and we only
 		 * care if the key notifies us to switch data file.
@@ -641,7 +636,7 @@ static int report__browse_hists(struct report *rep)
 		ret = report__gtk_browse_hists(rep, help);
 		break;
 	default:
-		ret = perf_evlist__tty_browse_hists(evlist, rep, help);
+		ret = evlist__tty_browse_hists(evlist, rep, help);
 		break;
 	}
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 88bd1ac5c8cf..5601a35a5ba4 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -641,12 +641,9 @@ repeat:
 		hists->uid_filter_str = top->record_opts.target.uid_str;
 	}
 
-	ret = perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
-				      top->min_percent,
-				      &top->session->header.env,
-				      !top->record_opts.overwrite,
-				      &top->annotation_opts);
-
+	ret = evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
+				       &top->session->header.env, !top->record_opts.overwrite,
+				       &top->annotation_opts);
 	if (ret == K_RELOAD) {
 		top->zero = true;
 		goto repeat;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 48814efb98e8..2f3ce505eec5 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3559,13 +3559,9 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused,
 	return false;
 }
 
-static int __perf_evlist__tui_browse_hists(struct evlist *evlist,
-					   int nr_entries, const char *help,
-					   struct hist_browser_timer *hbt,
-					   float min_pcnt,
-					   struct perf_env *env,
-					   bool warn_lost_event,
-					   struct annotation_options *annotation_opts)
+static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, const char *help,
+				      struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env,
+				      bool warn_lost_event, struct annotation_options *annotation_opts)
 {
 	struct evsel *pos;
 	struct evsel_menu menu = {
@@ -3614,12 +3610,9 @@ static bool perf_evlist__single_entry(struct evlist *evlist)
 	return false;
 }
 
-int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
-				  struct hist_browser_timer *hbt,
-				  float min_pcnt,
-				  struct perf_env *env,
-				  bool warn_lost_event,
-				  struct annotation_options *annotation_opts)
+int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt,
+			     float min_pcnt, struct perf_env *env, bool warn_lost_event,
+			     struct annotation_options *annotation_opts)
 {
 	int nr_entries = evlist->core.nr_entries;
 
@@ -3645,10 +3638,8 @@ single_entry: {
 			goto single_entry;
 	}
 
-	return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
-					       hbt, min_pcnt, env,
-					       warn_lost_event,
-					       annotation_opts);
+	return __evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env,
+					  warn_lost_event, annotation_opts);
 }
 
 static int block_hists_browser__title(struct hist_browser *browser, char *bf,
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
index a9563932fa04..a2b497f03fd6 100644
--- a/tools/perf/ui/gtk/gtk.h
+++ b/tools/perf/ui/gtk/gtk.h
@@ -57,9 +57,8 @@ struct evlist;
 struct hist_entry;
 struct hist_browser_timer;
 
-int perf_evlist__gtk_browse_hists(struct evlist *evlist, const char *help,
-				  struct hist_browser_timer *hbt,
-				  float min_pcnt);
+int evlist__gtk_browse_hists(struct evlist *evlist, const char *help,
+			     struct hist_browser_timer *hbt, float min_pcnt);
 int hist_entry__gtk_annotate(struct hist_entry *he,
 			     struct evsel *evsel,
 			     struct hist_browser_timer *hbt);
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 53ef71a1b15d..c83be2d57f7e 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -590,10 +590,8 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
 	gtk_container_add(GTK_CONTAINER(window), view);
 }
 
-int perf_evlist__gtk_browse_hists(struct evlist *evlist,
-				  const char *help,
-				  struct hist_browser_timer *hbt __maybe_unused,
-				  float min_pcnt)
+int evlist__gtk_browse_hists(struct evlist *evlist, const char *help,
+			     struct hist_browser_timer *hbt __maybe_unused, float min_pcnt)
 {
 	struct evsel *pos;
 	GtkWidget *vbox;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 96b1c13bbccc..278b49e915b0 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -464,12 +464,9 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel,
 			     struct hist_browser_timer *hbt,
 			     struct annotation_options *annotation_opts);
 
-int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
-				  struct hist_browser_timer *hbt,
-				  float min_pcnt,
-				  struct perf_env *env,
-				  bool warn_lost_event,
-				  struct annotation_options *annotation_options);
+int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt,
+			     float min_pcnt, struct perf_env *env, bool warn_lost_event,
+			     struct annotation_options *annotation_options);
 
 int script_browse(const char *script_opt, struct evsel *evsel);
 
@@ -483,13 +480,13 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel,
 			   struct annotation_options *annotation_opts);
 #else
 static inline
-int perf_evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
-				  const char *help __maybe_unused,
-				  struct hist_browser_timer *hbt __maybe_unused,
-				  float min_pcnt __maybe_unused,
-				  struct perf_env *env __maybe_unused,
-				  bool warn_lost_event __maybe_unused,
-				  struct annotation_options *annotation_options __maybe_unused)
+int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused,
+			     const char *help __maybe_unused,
+			     struct hist_browser_timer *hbt __maybe_unused,
+			     float min_pcnt __maybe_unused,
+			     struct perf_env *env __maybe_unused,
+			     bool warn_lost_event __maybe_unused,
+			     struct annotation_options *annotation_options __maybe_unused)
 {
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From e80db255525a014a78af414b346413142e9142da Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:39:41 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' tracking event
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm/util/cs-etm.c    | 2 +-
 tools/perf/arch/arm64/util/arm-spe.c | 2 +-
 tools/perf/arch/x86/util/intel-bts.c | 2 +-
 tools/perf/arch/x86/util/intel-pt.c  | 2 +-
 tools/perf/builtin-record.c          | 4 ++--
 tools/perf/tests/switch-tracking.c   | 2 +-
 tools/perf/util/evlist.c             | 5 ++---
 tools/perf/util/evlist.h             | 5 ++---
 8 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index cad7bf783413..bc0afeb903c2 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -420,7 +420,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 			goto out;
 
 		tracking_evsel = evlist__last(evlist);
-		perf_evlist__set_tracking_event(evlist, tracking_evsel);
+		evlist__set_tracking_event(evlist, tracking_evsel);
 
 		tracking_evsel->core.attr.freq = 0;
 		tracking_evsel->core.attr.sample_period = 1;
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index e3593063b3d1..beccf1b36654 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -130,7 +130,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 		return err;
 
 	tracking_evsel = evlist__last(evlist);
-	perf_evlist__set_tracking_event(evlist, tracking_evsel);
+	evlist__set_tracking_event(evlist, tracking_evsel);
 
 	tracking_evsel->core.attr.freq = 0;
 	tracking_evsel->core.attr.sample_period = 1;
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 0dc09b5809c1..66ca98df48b0 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -238,7 +238,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
 
 		tracking_evsel = evlist__last(evlist);
 
-		perf_evlist__set_tracking_event(evlist, tracking_evsel);
+		evlist__set_tracking_event(evlist, tracking_evsel);
 
 		tracking_evsel->core.attr.freq = 0;
 		tracking_evsel->core.attr.sample_period = 1;
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 082e5f2a415a..f1b5380a7401 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -865,7 +865,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 
 		tracking_evsel = evlist__last(evlist);
 
-		perf_evlist__set_tracking_event(evlist, tracking_evsel);
+		evlist__set_tracking_event(evlist, tracking_evsel);
 
 		tracking_evsel->core.attr.freq = 0;
 		tracking_evsel->core.attr.sample_period = 1;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 04946bd3d090..2d74a410d008 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -891,13 +891,13 @@ static int record__open(struct record *rec)
 	 * event synthesis.
 	 */
 	if (opts->initial_delay || target__has_cpu(&opts->target)) {
-		pos = perf_evlist__get_tracking_event(evlist);
+		pos = evlist__get_tracking_event(evlist);
 		if (!evsel__is_dummy_event(pos)) {
 			/* Set up dummy event. */
 			if (evlist__add_dummy(evlist))
 				return -ENOMEM;
 			pos = evlist__last(evlist);
-			perf_evlist__set_tracking_event(evlist, pos);
+			evlist__set_tracking_event(evlist, pos);
 		}
 
 		/*
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 3c0bc3f1fd0e..8bf484a0f35d 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -424,7 +424,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
 
 	tracking_evsel = evlist__last(evlist);
 
-	perf_evlist__set_tracking_event(evlist, tracking_evsel);
+	evlist__set_tracking_event(evlist, tracking_evsel);
 
 	tracking_evsel->core.attr.freq = 0;
 	tracking_evsel->core.attr.sample_period = 1;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 51775ff2979f..b5a53e118564 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1541,7 +1541,7 @@ void perf_evlist__to_front(struct evlist *evlist,
 	list_splice(&move, &evlist->core.entries);
 }
 
-struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist)
+struct evsel *evlist__get_tracking_event(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
@@ -1553,8 +1553,7 @@ struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist)
 	return evlist__first(evlist);
 }
 
-void perf_evlist__set_tracking_event(struct evlist *evlist,
-				     struct evsel *tracking_evsel)
+void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel)
 {
 	struct evsel *evsel;
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 90f1127fecf6..14bf7e2f9c0e 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -314,9 +314,8 @@ void perf_evlist__to_front(struct evlist *evlist,
 	evlist__cpu_iter_start(evlist);			\
 	perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus)
 
-struct evsel *perf_evlist__get_tracking_event(struct evlist *evlist);
-void perf_evlist__set_tracking_event(struct evlist *evlist,
-				     struct evsel *tracking_evsel);
+struct evsel *evlist__get_tracking_event(struct evlist *evlist);
+void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel);
 
 void evlist__cpu_iter_start(struct evlist *evlist);
 bool evsel__cpu_iter_skip(struct evsel *ev, int cpu);
-- 
cgit v1.2.3-70-g09d2


From 2a99ff822dfa4a88d54b2c4f17d33748bcedd899 Mon Sep 17 00:00:00 2001
From: Alexandre Truong <alexandre.truong@arm.com>
Date: Fri, 27 Nov 2020 15:39:23 +0000
Subject: perf tools: Add aarch64 registers to --user-regs

Previously, this command returns no help message on aarch64:

  -> ./perf record --user-regs=?

  available registers:
  Usage: perf record [<options>] [<command>]
      or: perf record [<options>] -- <command> [<options>]

With this change, the registers are listed.

  -> ./perf record --user-regs=?

  available registers: x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 lr sp pc

It's also now possible to record subsets of registers on aarch64:

  -> ./perf record --user-regs=x4,x5 ls
  -> ./perf report --dump-raw-trace

  12801163749305260 0xc70 [0x40]: PERF_RECORD_SAMPLE(IP, 0x2): 51956/51956: 0xffffaa6571f0 period: 145785 addr: 0
  ... user regs: mask 0x30 ABI 64-bit
  .... x4    0x000000000000006c
  .... x5    0x0000001001000001
   ... thread: ls:51956
    ...... dso: /usr/lib64/ld-2.17.so

Signed-off-by: Alexandre Truong <alexandre.truong@arm.com>
Tested-by: James Clark <james.clark@arm.com>
Acked-by: John Garry <john.garry@huawei.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lore.kernel.org/lkml/20201127153923.26717-1-alexandre.truong@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/util/perf_regs.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 2833e101a7c6..54efa12fdbea 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -2,5 +2,38 @@
 #include "../../../util/perf_regs.h"
 
 const struct sample_reg sample_reg_masks[] = {
+	SMPL_REG(x0, PERF_REG_ARM64_X0),
+	SMPL_REG(x1, PERF_REG_ARM64_X1),
+	SMPL_REG(x2, PERF_REG_ARM64_X2),
+	SMPL_REG(x3, PERF_REG_ARM64_X3),
+	SMPL_REG(x4, PERF_REG_ARM64_X4),
+	SMPL_REG(x5, PERF_REG_ARM64_X5),
+	SMPL_REG(x6, PERF_REG_ARM64_X6),
+	SMPL_REG(x7, PERF_REG_ARM64_X7),
+	SMPL_REG(x8, PERF_REG_ARM64_X8),
+	SMPL_REG(x9, PERF_REG_ARM64_X9),
+	SMPL_REG(x10, PERF_REG_ARM64_X10),
+	SMPL_REG(x11, PERF_REG_ARM64_X11),
+	SMPL_REG(x12, PERF_REG_ARM64_X12),
+	SMPL_REG(x13, PERF_REG_ARM64_X13),
+	SMPL_REG(x14, PERF_REG_ARM64_X14),
+	SMPL_REG(x15, PERF_REG_ARM64_X15),
+	SMPL_REG(x16, PERF_REG_ARM64_X16),
+	SMPL_REG(x17, PERF_REG_ARM64_X17),
+	SMPL_REG(x18, PERF_REG_ARM64_X18),
+	SMPL_REG(x19, PERF_REG_ARM64_X19),
+	SMPL_REG(x20, PERF_REG_ARM64_X20),
+	SMPL_REG(x21, PERF_REG_ARM64_X21),
+	SMPL_REG(x22, PERF_REG_ARM64_X22),
+	SMPL_REG(x23, PERF_REG_ARM64_X23),
+	SMPL_REG(x24, PERF_REG_ARM64_X24),
+	SMPL_REG(x25, PERF_REG_ARM64_X25),
+	SMPL_REG(x26, PERF_REG_ARM64_X26),
+	SMPL_REG(x27, PERF_REG_ARM64_X27),
+	SMPL_REG(x28, PERF_REG_ARM64_X28),
+	SMPL_REG(x29, PERF_REG_ARM64_X29),
+	SMPL_REG(lr, PERF_REG_ARM64_LR),
+	SMPL_REG(sp, PERF_REG_ARM64_SP),
+	SMPL_REG(pc, PERF_REG_ARM64_PC),
 	SMPL_REG_END
 };
-- 
cgit v1.2.3-70-g09d2


From 0a7e7ec90e601d98cc5914626b78fd043598b85b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:44:40 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' id_pos methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c          | 12 ++++++------
 tools/perf/util/evlist.h          |  2 +-
 tools/perf/util/record.c          |  2 +-
 tools/perf/util/sideband_evlist.c |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index b5a53e118564..26c436abac77 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -103,13 +103,13 @@ struct evlist *perf_evlist__new_dummy(void)
 }
 
 /**
- * perf_evlist__set_id_pos - set the positions of event ids.
+ * evlist__set_id_pos - set the positions of event ids.
  * @evlist: selected event list
  *
  * Events with compatible sample types all have the same id_pos
  * and is_pos.  For convenience, put a copy on evlist.
  */
-void perf_evlist__set_id_pos(struct evlist *evlist)
+void evlist__set_id_pos(struct evlist *evlist)
 {
 	struct evsel *first = evlist__first(evlist);
 
@@ -117,14 +117,14 @@ void perf_evlist__set_id_pos(struct evlist *evlist)
 	evlist->is_pos = first->is_pos;
 }
 
-static void perf_evlist__update_id_pos(struct evlist *evlist)
+static void evlist__update_id_pos(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel)
 		evsel__calc_id_pos(evsel);
 
-	perf_evlist__set_id_pos(evlist);
+	evlist__set_id_pos(evlist);
 }
 
 static void evlist__purge(struct evlist *evlist)
@@ -168,7 +168,7 @@ void evlist__add(struct evlist *evlist, struct evsel *entry)
 	perf_evlist__add(&evlist->core, &entry->core);
 
 	if (evlist->core.nr_entries == 1)
-		perf_evlist__set_id_pos(evlist);
+		evlist__set_id_pos(evlist);
 }
 
 void evlist__remove(struct evlist *evlist, struct evsel *evsel)
@@ -1275,7 +1275,7 @@ int evlist__open(struct evlist *evlist)
 			goto out_err;
 	}
 
-	perf_evlist__update_id_pos(evlist);
+	evlist__update_id_pos(evlist);
 
 	evlist__for_each_entry(evlist, evsel) {
 		err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 14bf7e2f9c0e..85da0c443ae6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -158,7 +158,7 @@ void evlist__close(struct evlist *evlist);
 
 struct callchain_param;
 
-void perf_evlist__set_id_pos(struct evlist *evlist);
+void evlist__set_id_pos(struct evlist *evlist);
 void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
 			 struct callchain_param *callchain);
 int record_opts__config(struct record_opts *opts);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 7330407db4ba..220b9910427b 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -144,7 +144,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
 			evsel__set_sample_id(evsel, use_sample_identifier);
 	}
 
-	perf_evlist__set_id_pos(evlist);
+	evlist__set_id_pos(evlist);
 }
 
 static int get_max_rate(unsigned int *rate)
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
index 9c04c71dbf51..13f387b76456 100644
--- a/tools/perf/util/sideband_evlist.c
+++ b/tools/perf/util/sideband_evlist.c
@@ -110,7 +110,7 @@ int evlist__start_sb_thread(struct evlist *evlist, struct target *target)
 		evlist__for_each_entry(evlist, counter)
 			evsel__set_sample_id(counter, can_sample_identifier);
 
-		perf_evlist__set_id_pos(evlist);
+		evlist__set_id_pos(evlist);
 	}
 
 	evlist__for_each_entry(evlist, counter) {
-- 
cgit v1.2.3-70-g09d2


From 37b01abe2a63db1b6a69af32257cb50795c725f8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:47:05 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' enable event
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c |  3 +--
 tools/perf/util/evlist.c   | 16 ++++++----------
 tools/perf/util/evlist.h   |  3 +--
 3 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index b538b9093567..8c94e21db2aa 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -658,8 +658,7 @@ int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx)
 		if (evsel->core.attr.type == itr->pmu->type) {
 			if (evsel->disabled)
 				return 0;
-			return perf_evlist__enable_event_idx(itr->evlist, evsel,
-							     idx);
+			return evlist__enable_event_idx(itr->evlist, evsel, idx);
 		}
 	}
 	return -EINVAL;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 26c436abac77..1c7b6d4a76be 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -448,8 +448,7 @@ void evlist__toggle_enable(struct evlist *evlist)
 	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
 }
 
-static int perf_evlist__enable_event_cpu(struct evlist *evlist,
-					 struct evsel *evsel, int cpu)
+static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu)
 {
 	int thread;
 	int nr_threads = perf_evlist__nr_threads(evlist, evsel);
@@ -465,9 +464,7 @@ static int perf_evlist__enable_event_cpu(struct evlist *evlist,
 	return 0;
 }
 
-static int perf_evlist__enable_event_thread(struct evlist *evlist,
-					    struct evsel *evsel,
-					    int thread)
+static int evlist__enable_event_thread(struct evlist *evlist, struct evsel *evsel, int thread)
 {
 	int cpu;
 	int nr_cpus = perf_cpu_map__nr(evlist->core.cpus);
@@ -483,15 +480,14 @@ static int perf_evlist__enable_event_thread(struct evlist *evlist,
 	return 0;
 }
 
-int perf_evlist__enable_event_idx(struct evlist *evlist,
-				  struct evsel *evsel, int idx)
+int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx)
 {
 	bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.cpus);
 
 	if (per_cpu_mmaps)
-		return perf_evlist__enable_event_cpu(evlist, evsel, idx);
-	else
-		return perf_evlist__enable_event_thread(evlist, evsel, idx);
+		return evlist__enable_event_cpu(evlist, evsel, idx);
+
+	return evlist__enable_event_thread(evlist, evsel, idx);
 }
 
 int evlist__add_pollfd(struct evlist *evlist, int fd)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 85da0c443ae6..f563e546b055 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -190,8 +190,7 @@ void evlist__disable(struct evlist *evlist);
 void evlist__enable(struct evlist *evlist);
 void evlist__toggle_enable(struct evlist *evlist);
 
-int perf_evlist__enable_event_idx(struct evlist *evlist,
-				  struct evsel *evsel, int idx);
+int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx);
 
 void perf_evlist__set_selected(struct evlist *evlist,
 			       struct evsel *evsel);
-- 
cgit v1.2.3-70-g09d2


From 0a60b339475970213f2685d0da55a26d5f4f22f9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:49:05 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' pause/resume
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 1c7b6d4a76be..3ca211e533ba 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -604,7 +604,7 @@ struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event
 	return NULL;
 }
 
-static int perf_evlist__set_paused(struct evlist *evlist, bool value)
+static int evlist__set_paused(struct evlist *evlist, bool value)
 {
 	int i;
 
@@ -624,14 +624,14 @@ static int perf_evlist__set_paused(struct evlist *evlist, bool value)
 	return 0;
 }
 
-static int perf_evlist__pause(struct evlist *evlist)
+static int evlist__pause(struct evlist *evlist)
 {
-	return perf_evlist__set_paused(evlist, true);
+	return evlist__set_paused(evlist, true);
 }
 
-static int perf_evlist__resume(struct evlist *evlist)
+static int evlist__resume(struct evlist *evlist)
 {
-	return perf_evlist__set_paused(evlist, false);
+	return evlist__set_paused(evlist, false);
 }
 
 static void evlist__munmap_nofree(struct evlist *evlist)
@@ -1621,10 +1621,10 @@ void evlist__toggle_bkw_mmap(struct evlist *evlist, enum bkw_mmap_state state)
 
 	switch (action) {
 	case PAUSE:
-		perf_evlist__pause(evlist);
+		evlist__pause(evlist);
 		break;
 	case RESUME:
-		perf_evlist__resume(evlist);
+		evlist__resume(evlist);
 		break;
 	case NONE:
 	default:
-- 
cgit v1.2.3-70-g09d2


From e414fd1a3f709984a03f0fa287e39df6a7218e22 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:52:44 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' evsel list
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm/util/cs-etm.c    | 2 +-
 tools/perf/arch/arm64/util/arm-spe.c | 2 +-
 tools/perf/arch/x86/util/intel-bts.c | 2 +-
 tools/perf/arch/x86/util/intel-pt.c  | 2 +-
 tools/perf/tests/bpf.c               | 2 +-
 tools/perf/tests/expand-cgroup.c     | 4 ++--
 tools/perf/tests/switch-tracking.c   | 2 +-
 tools/perf/ui/browsers/hists.c       | 4 ++--
 tools/perf/util/cgroup.c             | 4 ++--
 tools/perf/util/evlist.c             | 8 +++-----
 tools/perf/util/evlist.h             | 8 +++-----
 tools/perf/util/parse-events.c       | 2 +-
 tools/perf/util/sort.c               | 2 +-
 13 files changed, 20 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index bc0afeb903c2..bd446aba64f7 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -395,7 +395,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 	 * To obtain the auxtrace buffer file descriptor, the auxtrace
 	 * event must come first.
 	 */
-	perf_evlist__to_front(evlist, cs_etm_evsel);
+	evlist__to_front(evlist, cs_etm_evsel);
 
 	/*
 	 * In the case of per-cpu mmaps, we need the CPU on the
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index beccf1b36654..414c8a5584b1 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -118,7 +118,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 	 * To obtain the auxtrace buffer file descriptor, the auxtrace event
 	 * must come first.
 	 */
-	perf_evlist__to_front(evlist, arm_spe_evsel);
+	evlist__to_front(evlist, arm_spe_evsel);
 
 	evsel__set_sample_bit(arm_spe_evsel, CPU);
 	evsel__set_sample_bit(arm_spe_evsel, TIME);
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 66ca98df48b0..4a76d49d25d6 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -218,7 +218,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
 		 * To obtain the auxtrace buffer file descriptor, the auxtrace event
 		 * must come first.
 		 */
-		perf_evlist__to_front(evlist, intel_bts_evsel);
+		evlist__to_front(evlist, intel_bts_evsel);
 		/*
 		 * In the case of per-cpu mmaps, we need the CPU on the
 		 * AUX event.
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index f1b5380a7401..cb9c0c85dc8f 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -846,7 +846,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 		 * To obtain the auxtrace buffer file descriptor, the auxtrace
 		 * event must come first.
 		 */
-		perf_evlist__to_front(evlist, intel_pt_evsel);
+		evlist__to_front(evlist, intel_pt_evsel);
 		/*
 		 * In the case of per-cpu mmaps, we need the CPU on the
 		 * AUX event.
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index d880c588a951..7ed284a00ea2 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -157,7 +157,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 		goto out_delete_evlist;
 	}
 
-	perf_evlist__splice_list_tail(evlist, &parse_state.list);
+	evlist__splice_list_tail(evlist, &parse_state.list);
 	evlist->nr_groups = parse_state.nr_groups;
 
 	perf_evlist__config(evlist, &opts, NULL);
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
index 4c59f3ae438f..e8187f37fe1e 100644
--- a/tools/perf/tests/expand-cgroup.c
+++ b/tools/perf/tests/expand-cgroup.c
@@ -26,7 +26,7 @@ static int test_expand_events(struct evlist *evlist,
 	char **ev_name;
 	struct evsel *evsel;
 
-	TEST_ASSERT_VAL("evlist is empty", !perf_evlist__empty(evlist));
+	TEST_ASSERT_VAL("evlist is empty", !evlist__empty(evlist));
 
 	nr_events = evlist->core.nr_entries;
 	ev_name = calloc(nr_events, sizeof(*ev_name));
@@ -161,7 +161,7 @@ static int expand_libpfm_events(void)
 			 event_str, ret);
 		goto out;
 	}
-	if (perf_evlist__empty(evlist)) {
+	if (evlist__empty(evlist)) {
 		pr_debug("libpfm was not enabled\n");
 		goto out;
 	}
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 8bf484a0f35d..e07fe84bb304 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -406,7 +406,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
 		pr_debug("cycles event already at front");
 		goto out_err;
 	}
-	perf_evlist__to_front(evlist, cycles_evsel);
+	evlist__to_front(evlist, cycles_evsel);
 	if (cycles_evsel != evlist__first(evlist)) {
 		pr_debug("Failed to move cycles event to front");
 		goto out_err;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 2f3ce505eec5..766be504abca 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3593,7 +3593,7 @@ static int __evlist__tui_browse_hists(struct evlist *evlist, int nr_entries, con
 				    hbt, warn_lost_event);
 }
 
-static bool perf_evlist__single_entry(struct evlist *evlist)
+static bool evlist__single_entry(struct evlist *evlist)
 {
 	int nr_entries = evlist->core.nr_entries;
 
@@ -3616,7 +3616,7 @@ int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct his
 {
 	int nr_entries = evlist->core.nr_entries;
 
-	if (perf_evlist__single_entry(evlist)) {
+	if (evlist__single_entry(evlist)) {
 single_entry: {
 		struct evsel *first = evlist__first(evlist);
 
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 704333748549..5dff7e489921 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -371,7 +371,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
 	}
 
 	/* save original events and init evlist */
-	perf_evlist__splice_list_tail(orig_list, &evlist->core.entries);
+	evlist__splice_list_tail(orig_list, &evlist->core.entries);
 	evlist->core.nr_entries = 0;
 
 	if (metric_events) {
@@ -430,7 +430,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
 				goto out_err;
 		}
 
-		perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries);
+		evlist__splice_list_tail(evlist, &tmp_list->core.entries);
 		tmp_list->core.nr_entries = 0;
 	}
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 3ca211e533ba..28b4d347d4ba 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -177,8 +177,7 @@ void evlist__remove(struct evlist *evlist, struct evsel *evsel)
 	perf_evlist__remove(&evlist->core, &evsel->core);
 }
 
-void perf_evlist__splice_list_tail(struct evlist *evlist,
-				   struct list_head *list)
+void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list)
 {
 	struct evsel *evsel, *temp;
 
@@ -273,7 +272,7 @@ static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attr
 		list_add_tail(&evsel->core.node, &head);
 	}
 
-	perf_evlist__splice_list_tail(evlist, &head);
+	evlist__splice_list_tail(evlist, &head);
 
 	return 0;
 
@@ -1520,8 +1519,7 @@ int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size
 	return 0;
 }
 
-void perf_evlist__to_front(struct evlist *evlist,
-			   struct evsel *move_evsel)
+void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel)
 {
 	struct evsel *evsel, *n;
 	LIST_HEAD(move);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index f563e546b055..8d3fdeab2f22 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -214,10 +214,9 @@ bool evlist__valid_sample_type(struct evlist *evlist);
 bool evlist__valid_sample_id_all(struct evlist *evlist);
 bool perf_evlist__valid_read_format(struct evlist *evlist);
 
-void perf_evlist__splice_list_tail(struct evlist *evlist,
-				   struct list_head *list);
+void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list);
 
-static inline bool perf_evlist__empty(struct evlist *evlist)
+static inline bool evlist__empty(struct evlist *evlist)
 {
 	return list_empty(&evlist->core.entries);
 }
@@ -240,8 +239,7 @@ int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size
 int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
 
 bool perf_evlist__can_select_event(struct evlist *evlist, const char *str);
-void perf_evlist__to_front(struct evlist *evlist,
-			   struct evsel *move_evsel);
+void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
 
 /**
  * __evlist__for_each_entry - iterate thru all the evsels
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 8fa87f60133f..42c84adeb2fb 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2183,7 +2183,7 @@ int __parse_events(struct evlist *evlist, const char *str,
 	/*
 	 * Add list to the evlist even with errors to allow callers to clean up.
 	 */
-	perf_evlist__splice_list_tail(evlist, &parse_state.list);
+	evlist__splice_list_tail(evlist, &parse_state.list);
 
 	if (!ret) {
 		struct evsel *last;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index d42339df20f8..7d87bfcffb3f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2756,7 +2756,7 @@ static const char *get_default_sort_order(struct evlist *evlist)
 
 	BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
 
-	if (evlist == NULL || perf_evlist__empty(evlist))
+	if (evlist == NULL || evlist__empty(evlist))
 		goto out_no_evlist;
 
 	evlist__for_each_entry(evlist, evsel) {
-- 
cgit v1.2.3-70-g09d2


From 712737241980476a277a4108e3121240a29de968 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:55:12 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' print methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c  | 2 +-
 tools/perf/builtin-report.c    | 2 +-
 tools/perf/builtin-stat.c      | 3 +--
 tools/perf/util/hist.c         | 2 +-
 tools/perf/util/hist.h         | 2 +-
 tools/perf/util/session.c      | 6 ++----
 tools/perf/util/stat-display.c | 8 ++------
 tools/perf/util/stat.h         | 8 ++------
 8 files changed, 11 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 4940d10074c3..d6a81a6e1041 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -412,7 +412,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
 
 	if (dump_trace) {
 		perf_session__fprintf_nr_events(session, stdout);
-		perf_evlist__fprintf_nr_events(session->evlist, stdout);
+		evlist__fprintf_nr_events(session->evlist, stdout);
 		goto out;
 	}
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 11f13aafde44..0a335bef64d0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -928,7 +928,7 @@ static int __cmd_report(struct report *rep)
 
 		if (dump_trace) {
 			perf_session__fprintf_nr_events(session, stdout);
-			perf_evlist__fprintf_nr_events(session->evlist, stdout);
+			evlist__fprintf_nr_events(session->evlist, stdout);
 			return 0;
 		}
 	}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 29f7d4752e84..d69378f135e6 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -974,8 +974,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	if (stat_config.quiet)
 		return;
 
-	perf_evlist__print_counters(evsel_list, &stat_config, &target,
-				    ts, argc, argv);
+	evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
 }
 
 static volatile int signr = -1;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 8a793e4c9400..7feeaa07c777 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -2654,7 +2654,7 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
 	}
 }
 
-size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp)
+size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp)
 {
 	struct evsel *pos;
 	size_t ret = 0;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 278b49e915b0..df6c6eea0960 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -196,7 +196,7 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered);
 size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
 		      int max_cols, float min_pcnt, FILE *fp,
 		      bool ignore_callchains);
-size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp);
+size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp);
 
 void hists__filter_by_dso(struct hists *hists);
 void hists__filter_by_thread(struct hists *hists);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ce381b3dca06..6707a01b7ef8 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1191,9 +1191,7 @@ static void stack_user__printf(struct stack_dump *dump)
 	       dump->size, dump->offset);
 }
 
-static void perf_evlist__print_tstamp(struct evlist *evlist,
-				       union perf_event *event,
-				       struct perf_sample *sample)
+static void evlist__print_tstamp(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
 {
 	u64 sample_type = __evlist__combined_sample_type(evlist);
 
@@ -1254,7 +1252,7 @@ static void dump_event(struct evlist *evlist, union perf_event *event,
 		evlist->trace_event_sample_raw(evlist, event, sample);
 
 	if (sample)
-		perf_evlist__print_tstamp(evlist, event, sample);
+		evlist__print_tstamp(evlist, event, sample);
 
 	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
 	       event->header.size, perf_event__name(event->header.type));
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index a963b5b8eb72..fee7543843a8 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1184,12 +1184,8 @@ static void print_percore(struct perf_stat_config *config,
 		fputc('\n', output);
 }
 
-void
-perf_evlist__print_counters(struct evlist *evlist,
-			    struct perf_stat_config *config,
-			    struct target *_target,
-			    struct timespec *ts,
-			    int argc, const char **argv)
+void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
+			    struct target *_target, struct timespec *ts, int argc, const char **argv)
 {
 	bool metric_only = config->metric_only;
 	int interval = config->interval;
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 4cba4b106e45..9979b4b100f2 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -238,12 +238,8 @@ int create_perf_stat_counter(struct evsel *evsel,
 			     struct perf_stat_config *config,
 			     struct target *target,
 			     int cpu);
-void
-perf_evlist__print_counters(struct evlist *evlist,
-			    struct perf_stat_config *config,
-			    struct target *_target,
-			    struct timespec *ts,
-			    int argc, const char **argv);
+void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
+			    struct target *_target, struct timespec *ts, int argc, const char **argv);
 
 struct metric_expr;
 double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
-- 
cgit v1.2.3-70-g09d2


From 7748bb7175ccad5ee29e7355134b0061d8edf3d2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:56:52 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' create maps
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-ftrace.c                 | 2 +-
 tools/perf/builtin-kvm.c                    | 2 +-
 tools/perf/builtin-record.c                 | 2 +-
 tools/perf/builtin-stat.c                   | 2 +-
 tools/perf/builtin-top.c                    | 2 +-
 tools/perf/builtin-trace.c                  | 2 +-
 tools/perf/tests/backward-ring-buffer.c     | 2 +-
 tools/perf/tests/bpf.c                      | 2 +-
 tools/perf/tests/event-times.c              | 2 +-
 tools/perf/tests/openat-syscall-tp-fields.c | 4 ++--
 tools/perf/tests/perf-record.c              | 2 +-
 tools/perf/util/evlist.c                    | 6 +++---
 tools/perf/util/evlist.h                    | 2 +-
 tools/perf/util/sideband_evlist.c           | 2 +-
 14 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index bf4d8e9ab8f5..d49448a1060c 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -957,7 +957,7 @@ int cmd_ftrace(int argc, const char **argv)
 		goto out_delete_filters;
 	}
 
-	ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target);
+	ret = evlist__create_maps(ftrace.evlist, &ftrace.target);
 	if (ret < 0)
 		goto out_delete_evlist;
 
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index e1b60aad13cb..7560d6b1d6a3 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1442,7 +1442,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 		goto out;
 	}
 
-	if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
+	if (evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
 		usage_with_options(live_usage, live_options);
 
 	/*
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2d74a410d008..e956348c9081 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2792,7 +2792,7 @@ int cmd_record(int argc, const char **argv)
 	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
 
 	err = -ENOMEM;
-	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
+	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
 		usage_with_options(record_usage, record_options);
 
 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d69378f135e6..1e967c32db7c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2249,7 +2249,7 @@ int cmd_stat(int argc, const char **argv)
 	if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
 		target.per_thread = true;
 
-	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
+	if (evlist__create_maps(evsel_list, &target) < 0) {
 		if (target__has_task(&target)) {
 			pr_err("Problems finding threads of monitor\n");
 			parse_options_usage(stat_usage, stat_options, "p", 1);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5601a35a5ba4..51600f656771 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1694,7 +1694,7 @@ int cmd_top(int argc, const char **argv)
 	if (target__none(target))
 		target->system_wide = true;
 
-	if (perf_evlist__create_maps(top.evlist, target) < 0) {
+	if (evlist__create_maps(top.evlist, target) < 0) {
 		ui__error("Couldn't create thread/CPU maps: %s\n",
 			  errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
 		goto out_delete_evlist;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a463ce8fb8e0..ebf4fe944525 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3950,7 +3950,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	if (trace->cgroup)
 		evlist__set_default_cgroup(trace->evlist, trace->cgroup);
 
-	err = perf_evlist__create_maps(evlist, &trace->opts.target);
+	err = evlist__create_maps(evlist, &trace->opts.target);
 	if (err < 0) {
 		fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
 		goto out_delete_evlist;
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 15cea518f5ad..f00f7f34efbd 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -109,7 +109,7 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m
 		return TEST_FAIL;
 	}
 
-	err = perf_evlist__create_maps(evlist, &opts.target);
+	err = evlist__create_maps(evlist, &opts.target);
 	if (err < 0) {
 		pr_debug("Not enough memory to create thread/cpu maps\n");
 		goto out_delete_evlist;
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 7ed284a00ea2..d4b232fe9448 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -151,7 +151,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 		return TEST_FAIL;
 	}
 
-	err = perf_evlist__create_maps(evlist, &opts.target);
+	err = evlist__create_maps(evlist, &opts.target);
 	if (err < 0) {
 		pr_debug("Not enough memory to create thread/cpu maps\n");
 		goto out_delete_evlist;
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
index 9da2d4f58b8e..04ce4401f775 100644
--- a/tools/perf/tests/event-times.c
+++ b/tools/perf/tests/event-times.c
@@ -26,7 +26,7 @@ static int attach__enable_on_exec(struct evlist *evlist)
 
 	pr_debug("attaching to spawned child, enable on exec\n");
 
-	err = perf_evlist__create_maps(evlist, &target);
+	err = evlist__create_maps(evlist, &target);
 	if (err < 0) {
 		pr_debug("Not enough memory to create thread/cpu maps\n");
 		return err;
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 1f5f5e79ae25..78367b17f1bb 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -54,9 +54,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 
 	evlist__add(evlist, evsel);
 
-	err = perf_evlist__create_maps(evlist, &opts.target);
+	err = evlist__create_maps(evlist, &opts.target);
 	if (err < 0) {
-		pr_debug("%s: perf_evlist__create_maps\n", __func__);
+		pr_debug("%s: evlist__create_maps\n", __func__);
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index f0b2066c7ea9..01c9282c7a3b 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -84,7 +84,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 	 * evlist__prepare_workload we'll fill in the only thread
 	 * we're monitoring, the one forked there.
 	 */
-	err = perf_evlist__create_maps(evlist, &opts.target);
+	err = evlist__create_maps(evlist, &opts.target);
 	if (err < 0) {
 		pr_debug("Not enough memory to create thread/cpu maps\n");
 		goto out_delete_evlist;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 28b4d347d4ba..e68e5a40b49a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -892,7 +892,7 @@ int evlist__mmap(struct evlist *evlist, unsigned int pages)
 	return evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
 }
 
-int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
+int evlist__create_maps(struct evlist *evlist, struct target *target)
 {
 	bool all_threads = (target->per_thread && target->system_wide);
 	struct perf_cpu_map *cpus;
@@ -1223,7 +1223,7 @@ void evlist__close(struct evlist *evlist)
 	}
 }
 
-static int perf_evlist__create_syswide_maps(struct evlist *evlist)
+static int evlist__create_syswide_maps(struct evlist *evlist)
 {
 	struct perf_cpu_map *cpus;
 	struct perf_thread_map *threads;
@@ -1265,7 +1265,7 @@ int evlist__open(struct evlist *evlist)
 	 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
 	 */
 	if (evlist->core.threads == NULL && evlist->core.cpus == NULL) {
-		err = perf_evlist__create_syswide_maps(evlist);
+		err = evlist__create_syswide_maps(evlist);
 		if (err < 0)
 			goto out_err;
 	}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 8d3fdeab2f22..3b3bf3d28204 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -195,7 +195,7 @@ int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx
 void perf_evlist__set_selected(struct evlist *evlist,
 			       struct evsel *evsel);
 
-int perf_evlist__create_maps(struct evlist *evlist, struct target *target);
+int evlist__create_maps(struct evlist *evlist, struct target *target);
 int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
 
 void __evlist__set_leader(struct list_head *list);
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
index 13f387b76456..748371ac22be 100644
--- a/tools/perf/util/sideband_evlist.c
+++ b/tools/perf/util/sideband_evlist.c
@@ -101,7 +101,7 @@ int evlist__start_sb_thread(struct evlist *evlist, struct target *target)
 	if (!evlist)
 		return 0;
 
-	if (perf_evlist__create_maps(evlist, target))
+	if (evlist__create_maps(evlist, target))
 		goto out_delete_evlist;
 
 	if (evlist->core.nr_entries > 1) {
-- 
cgit v1.2.3-70-g09d2


From 64b4778b863b6fa84e36e043fb34bde6b847fa96 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 14:58:32 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' event group
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c | 2 +-
 tools/perf/builtin-record.c   | 2 +-
 tools/perf/builtin-report.c   | 2 +-
 tools/perf/builtin-stat.c     | 2 +-
 tools/perf/util/auxtrace.c    | 6 ++----
 tools/perf/util/evlist.c      | 6 ++----
 tools/perf/util/evlist.h      | 7 +++----
 7 files changed, 11 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index d6a81a6e1041..a23ba6bb99b6 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -598,7 +598,7 @@ int cmd_annotate(int argc, const char **argv)
 						      HEADER_BRANCH_STACK);
 
 	if (annotate.group_set)
-		perf_evlist__force_leader(annotate.session->evlist);
+		evlist__force_leader(annotate.session->evlist);
 
 	ret = symbol__annotation_init();
 	if (ret < 0)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e956348c9081..412717406dc9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -923,7 +923,7 @@ try_again:
 			if ((errno == EINVAL || errno == EBADF) &&
 			    pos->leader != pos &&
 			    pos->weak_group) {
-			        pos = perf_evlist__reset_weak_group(evlist, pos, true);
+			        pos = evlist__reset_weak_group(evlist, pos, true);
 				goto try_again;
 			}
 			rc = -errno;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 0a335bef64d0..e78614ab7a16 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -211,7 +211,7 @@ static void setup_forced_leader(struct report *report,
 				struct evlist *evlist)
 {
 	if (report->group_set)
-		perf_evlist__force_leader(evlist);
+		evlist__force_leader(evlist);
 }
 
 static int process_feature_event(struct perf_session *session,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1e967c32db7c..89c32692f40c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -759,7 +759,7 @@ try_again:
 				if ((errno == EINVAL || errno == EBADF) &&
 				    counter->leader != counter &&
 				    counter->weak_group) {
-					perf_evlist__reset_weak_group(evsel_list, counter, false);
+					evlist__reset_weak_group(evsel_list, counter, false);
 					assert(counter->reset_group);
 					second_pass = true;
 					continue;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 8c94e21db2aa..a60878498139 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -62,9 +62,7 @@
  * Make a group from 'leader' to 'last', requiring that the events were not
  * already grouped to a different leader.
  */
-static int perf_evlist__regroup(struct evlist *evlist,
-				struct evsel *leader,
-				struct evsel *last)
+static int evlist__regroup(struct evlist *evlist, struct evsel *leader, struct evsel *last)
 {
 	struct evsel *evsel;
 	bool grp;
@@ -775,7 +773,7 @@ no_opt:
 			evsel->core.attr.aux_sample_size = term->val.aux_sample_size;
 			/* If possible, group with the AUX event */
 			if (aux_evsel && evsel->core.attr.aux_sample_size)
-				perf_evlist__regroup(evlist, aux_evsel, evsel);
+				evlist__regroup(evlist, aux_evsel, evsel);
 		}
 	}
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e68e5a40b49a..869195b6f21c 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1650,7 +1650,7 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist)
  * the group display. Set the artificial group and set the leader's
  * forced_leader flag to notify the display code.
  */
-void perf_evlist__force_leader(struct evlist *evlist)
+void evlist__force_leader(struct evlist *evlist)
 {
 	if (!evlist->nr_groups) {
 		struct evsel *leader = evlist__first(evlist);
@@ -1660,9 +1660,7 @@ void perf_evlist__force_leader(struct evlist *evlist)
 	}
 }
 
-struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
-						 struct evsel *evsel,
-						bool close)
+struct evsel *evlist__reset_weak_group(struct evlist *evsel_list, struct evsel *evsel, bool close)
 {
 	struct evsel *c2, *leader;
 	bool is_open = true;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 3b3bf3d28204..d09970e7c838 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -324,11 +324,10 @@ struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event
 
 bool perf_evlist__exclude_kernel(struct evlist *evlist);
 
-void perf_evlist__force_leader(struct evlist *evlist);
+void evlist__force_leader(struct evlist *evlist);
+
+struct evsel *evlist__reset_weak_group(struct evlist *evlist, struct evsel *evsel, bool close);
 
-struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
-						 struct evsel *evsel,
-						bool close);
 #define EVLIST_CTL_CMD_ENABLE_TAG  "enable"
 #define EVLIST_CTL_CMD_DISABLE_TAG "disable"
 #define EVLIST_CTL_CMD_ACK_TAG     "ack\n"
-- 
cgit v1.2.3-70-g09d2


From 900c8ead5b0b21d73236ffbc4bc2f47a506d8297 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:01:08 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' event selection
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/util/intel-pt.c | 2 +-
 tools/perf/tests/switch-tracking.c  | 2 +-
 tools/perf/ui/browsers/hists.c      | 2 +-
 tools/perf/util/evlist.c            | 3 +--
 tools/perf/util/evlist.h            | 5 ++---
 tools/perf/util/record.c            | 2 +-
 6 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index cb9c0c85dc8f..a6420c647959 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -416,7 +416,7 @@ static int intel_pt_track_switches(struct evlist *evlist)
 	struct evsel *evsel;
 	int err;
 
-	if (!perf_evlist__can_select_event(evlist, sched_switch))
+	if (!evlist__can_select_event(evlist, sched_switch))
 		return -EPERM;
 
 	err = parse_events(evlist, sched_switch, NULL);
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index e07fe84bb304..a4cf5ee22c28 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -380,7 +380,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
 	cycles_evsel = evlist__last(evlist);
 
 	/* Third event */
-	if (!perf_evlist__can_select_event(evlist, sched_switch)) {
+	if (!evlist__can_select_event(evlist, sched_switch)) {
 		pr_debug("No sched_switch\n");
 		err = 0;
 		goto out;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 766be504abca..3b9818ee9546 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3494,7 +3494,7 @@ static int perf_evsel_menu__run(struct evsel_menu *menu,
 				continue;
 			pos = menu->selection;
 browse_hists:
-			perf_evlist__set_selected(evlist, pos);
+			evlist__set_selected(evlist, pos);
 			/*
 			 * Give the calling tool a chance to populate the non
 			 * default evsel resorted hists tree.
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 869195b6f21c..a11364d94720 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1183,8 +1183,7 @@ bool evlist__sample_id_all(struct evlist *evlist)
 	return first->core.attr.sample_id_all;
 }
 
-void perf_evlist__set_selected(struct evlist *evlist,
-			       struct evsel *evsel)
+void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
 {
 	evlist->selected = evsel;
 }
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index d09970e7c838..e858a351b014 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -192,8 +192,7 @@ void evlist__toggle_enable(struct evlist *evlist);
 
 int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx);
 
-void perf_evlist__set_selected(struct evlist *evlist,
-			       struct evsel *evsel);
+void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
 
 int evlist__create_maps(struct evlist *evlist, struct target *target);
 int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
@@ -238,7 +237,7 @@ static inline struct evsel *evlist__last(struct evlist *evlist)
 int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
 int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
 
-bool perf_evlist__can_select_event(struct evlist *evlist, const char *str);
+bool evlist__can_select_event(struct evlist *evlist, const char *str);
 void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel);
 
 /**
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 220b9910427b..f72c8e05e15c 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -217,7 +217,7 @@ int record_opts__config(struct record_opts *opts)
 	return record_opts__config_freq(opts);
 }
 
-bool perf_evlist__can_select_event(struct evlist *evlist, const char *str)
+bool evlist__can_select_event(struct evlist *evlist, const char *str)
 {
 	struct evlist *temp_evlist;
 	struct evsel *evsel;
-- 
cgit v1.2.3-70-g09d2


From 606e2c29334556797e1639115bd198aedb331f07 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:04:05 -0300
Subject: perf evlist: Use the right prefix for alternative 'struct evlist'
 constructors

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/intel-cqm.c       | 2 +-
 tools/perf/tests/bpf.c                      | 2 +-
 tools/perf/tests/code-reading.c             | 2 +-
 tools/perf/tests/event_update.c             | 3 +--
 tools/perf/tests/expand-cgroup.c            | 3 +--
 tools/perf/tests/mmap-basic.c               | 2 +-
 tools/perf/tests/openat-syscall-tp-fields.c | 2 +-
 tools/perf/tests/perf-record.c              | 4 ++--
 tools/perf/tests/task-exit.c                | 4 ++--
 tools/perf/tests/topology.c                 | 2 +-
 tools/perf/util/evlist.c                    | 4 ++--
 tools/perf/util/evlist.h                    | 4 ++--
 12 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index 3ec562a2aaba..27dd8cf9e060 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -52,7 +52,7 @@ int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subt
 
 	evlist = evlist__new();
 	if (!evlist) {
-		pr_debug("perf_evlist__new failed\n");
+		pr_debug("evlist__new failed\n");
 		return TEST_FAIL;
 	}
 
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index d4b232fe9448..22d7f567c41c 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -144,7 +144,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 	pid[sizeof(pid) - 1] = '\0';
 	opts.target.tid = opts.target.pid = pid;
 
-	/* Instead of perf_evlist__new_default, don't add default events */
+	/* Instead of evlist__new_default, don't add default events */
 	evlist = evlist__new();
 	if (!evlist) {
 		pr_debug("Not enough memory to create evlist\n");
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 339d432b1bf1..0c494aa90a8d 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -637,7 +637,7 @@ static int do_test_code_reading(bool try_kcore)
 
 		evlist = evlist__new();
 		if (!evlist) {
-			pr_debug("perf_evlist__new failed\n");
+			pr_debug("evlist__new failed\n");
 			goto out_put;
 		}
 
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index bdcf032f8516..656218179222 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -85,11 +85,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
 
 int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused)
 {
-	struct evlist *evlist;
 	struct evsel *evsel;
 	struct event_name tmp;
+	struct evlist *evlist = evlist__new_default();
 
-	evlist = perf_evlist__new_default();
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
 	evsel = evlist__first(evlist);
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
index e8187f37fe1e..0e46aeb843ce 100644
--- a/tools/perf/tests/expand-cgroup.c
+++ b/tools/perf/tests/expand-cgroup.c
@@ -100,10 +100,9 @@ out:	for (i = 0; i < nr_events; i++)
 static int expand_default_events(void)
 {
 	int ret;
-	struct evlist *evlist;
 	struct rblist metric_events;
+	struct evlist *evlist = evlist__new_default();
 
-	evlist = perf_evlist__new_default();
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
 	rblist__init(&metric_events);
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index c01d7e12d241..57093aeacc6f 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -69,7 +69,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 
 	evlist = evlist__new();
 	if (evlist == NULL) {
-		pr_debug("perf_evlist__new\n");
+		pr_debug("evlist__new\n");
 		goto out_free_cpus;
 	}
 
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 78367b17f1bb..5e4af2f0f14a 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -42,7 +42,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 	char sbuf[STRERR_BUFSIZE];
 
 	if (evlist == NULL) {
-		pr_debug("%s: perf_evlist__new\n", __func__);
+		pr_debug("%s: evlist__new\n", __func__);
 		goto out;
 	}
 
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 01c9282c7a3b..61d1ff30253c 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -53,7 +53,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 	};
 	cpu_set_t cpu_mask;
 	size_t cpu_mask_size = sizeof(cpu_mask);
-	struct evlist *evlist = perf_evlist__new_dummy();
+	struct evlist *evlist = evlist__new_dummy();
 	struct evsel *evsel;
 	struct perf_sample sample;
 	const char *cmd = "sleep";
@@ -71,7 +71,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 	char sbuf[STRERR_BUFSIZE];
 
 	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
-		evlist = perf_evlist__new_default();
+		evlist = evlist__new_default();
 
 	if (evlist == NULL) {
 		pr_debug("Not enough memory to create evlist\n");
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 452d51048cc1..bbf94e4aa145 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -58,9 +58,9 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
 
 	signal(SIGCHLD, sig_handler);
 
-	evlist = perf_evlist__new_default();
+	evlist = evlist__new_default();
 	if (evlist == NULL) {
-		pr_debug("perf_evlist__new_default\n");
+		pr_debug("evlist__new_default\n");
 		return -1;
 	}
 
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 22daf2bdf5fa..165feedc7863 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -40,7 +40,7 @@ static int session_write_header(char *path)
 	session = perf_session__new(&data, false, NULL);
 	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
-	session->evlist = perf_evlist__new_default();
+	session->evlist = evlist__new_default();
 	TEST_ASSERT_VAL("can't get evlist", session->evlist);
 
 	perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index a11364d94720..70b5b6e506c8 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -78,7 +78,7 @@ struct evlist *evlist__new(void)
 	return evlist;
 }
 
-struct evlist *perf_evlist__new_default(void)
+struct evlist *evlist__new_default(void)
 {
 	struct evlist *evlist = evlist__new();
 
@@ -90,7 +90,7 @@ struct evlist *perf_evlist__new_default(void)
 	return evlist;
 }
 
-struct evlist *perf_evlist__new_dummy(void)
+struct evlist *evlist__new_dummy(void)
 {
 	struct evlist *evlist = evlist__new();
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index e858a351b014..9e73d4ae16d9 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -87,8 +87,8 @@ struct evsel_str_handler {
 };
 
 struct evlist *evlist__new(void);
-struct evlist *perf_evlist__new_default(void);
-struct evlist *perf_evlist__new_dummy(void);
+struct evlist *evlist__new_default(void);
+struct evlist *evlist__new_dummy(void);
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
 		  struct perf_thread_map *threads);
 void evlist__exit(struct evlist *evlist);
-- 
cgit v1.2.3-70-g09d2


From 78e1bc25786656c490befc6d44d265f263cb8861 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:07:49 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' event attribute
 config methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kvm.c                |  2 +-
 tools/perf/builtin-record.c             |  6 +++---
 tools/perf/builtin-report.c             |  2 +-
 tools/perf/builtin-top.c                |  4 ++--
 tools/perf/builtin-trace.c              |  2 +-
 tools/perf/tests/backward-ring-buffer.c |  2 +-
 tools/perf/tests/bpf.c                  |  2 +-
 tools/perf/tests/code-reading.c         |  2 +-
 tools/perf/tests/keep-tracking.c        |  2 +-
 tools/perf/tests/parse-events.c         | 10 ++++------
 tools/perf/tests/perf-record.c          |  2 +-
 tools/perf/tests/perf-time-to-tsc.c     |  2 +-
 tools/perf/tests/switch-tracking.c      |  2 +-
 tools/perf/util/evlist.c                |  4 ++--
 tools/perf/util/evlist.h                |  7 +++----
 tools/perf/util/record.c                |  3 +--
 tools/perf/util/session.c               |  2 +-
 17 files changed, 26 insertions(+), 30 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 7560d6b1d6a3..616125160b58 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1022,7 +1022,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
 	struct evlist *evlist = kvm->evlist;
 	char sbuf[STRERR_BUFSIZE];
 
-	perf_evlist__config(evlist, &kvm->opts, NULL);
+	evlist__config(evlist, &kvm->opts, NULL);
 
 	/*
 	 * Note: exclude_{guest,host} do not apply here.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 412717406dc9..2a732414ae99 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -910,7 +910,7 @@ static int record__open(struct record *rec)
 			pos->immediate = 1;
 	}
 
-	perf_evlist__config(evlist, opts, &callchain_param);
+	evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each_entry(evlist, pos) {
 try_again:
@@ -935,7 +935,7 @@ try_again:
 		pos->supported = true;
 	}
 
-	if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(evlist)) {
+	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
 		pr_warning(
 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
@@ -1444,7 +1444,7 @@ static int record__synthesize(struct record *rec, bool tail)
 			goto out;
 	}
 
-	if (!perf_evlist__exclude_kernel(rec->evlist)) {
+	if (!evlist__exclude_kernel(rec->evlist)) {
 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 							 machine);
 		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index e78614ab7a16..5efbd0602f17 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -563,7 +563,7 @@ static void report__warn_kptr_restrict(const struct report *rep)
 	struct map *kernel_map = machine__kernel_map(&rep->session->machines.host);
 	struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL;
 
-	if (perf_evlist__exclude_kernel(rep->session->evlist))
+	if (evlist__exclude_kernel(rep->session->evlist))
 		return;
 
 	if (kernel_map == NULL ||
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 51600f656771..f65aa1c22c2a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -779,7 +779,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	if (!machine->kptr_restrict_warned &&
 	    symbol_conf.kptr_restrict &&
 	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
-		if (!perf_evlist__exclude_kernel(top->session->evlist)) {
+		if (!evlist__exclude_kernel(top->session->evlist)) {
 			ui__warning(
 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
 "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
@@ -1022,7 +1022,7 @@ static int perf_top__start_counters(struct perf_top *top)
 		goto out_err;
 	}
 
-	perf_evlist__config(evlist, opts, &callchain_param);
+	evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each_entry(evlist, counter) {
 try_again:
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ebf4fe944525..fa7d57fc53da 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3962,7 +3962,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
-	perf_evlist__config(evlist, &trace->opts, &callchain_param);
+	evlist__config(evlist, &trace->opts, &callchain_param);
 
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index f00f7f34efbd..b4b9a9488d51 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -127,7 +127,7 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m
 		goto out_delete_evlist;
 	}
 
-	perf_evlist__config(evlist, &opts, NULL);
+	evlist__config(evlist, &opts, NULL);
 
 	err = evlist__open(evlist);
 	if (err < 0) {
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 22d7f567c41c..f57e075b0ed2 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -160,7 +160,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
 	evlist__splice_list_tail(evlist, &parse_state.list);
 	evlist->nr_groups = parse_state.nr_groups;
 
-	perf_evlist__config(evlist, &opts, NULL);
+	evlist__config(evlist, &opts, NULL);
 
 	err = evlist__open(evlist);
 	if (err < 0) {
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 0c494aa90a8d..7c098d49c77e 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -651,7 +651,7 @@ static int do_test_code_reading(bool try_kcore)
 			goto out_put;
 		}
 
-		perf_evlist__config(evlist, &opts, NULL);
+		evlist__config(evlist, &opts, NULL);
 
 		evsel = evlist__first(evlist);
 
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 50a0c9fcde7d..e6f1b2a38e03 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -92,7 +92,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un
 	CHECK__(parse_events(evlist, "dummy:u", NULL));
 	CHECK__(parse_events(evlist, "cycles:u", NULL));
 
-	perf_evlist__config(evlist, &opts, NULL);
+	evlist__config(evlist, &opts, NULL);
 
 	evsel = evlist__first(evlist);
 
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 611512f22b34..a7f6661e6112 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -115,7 +115,7 @@ static int test__checkevent_symbolic_name_config(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong config",
 			PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
 	/*
-	 * The period value gets configured within perf_evlist__config,
+	 * The period value gets configured within evlist__config,
 	 * while this test executes only parse events method.
 	 */
 	TEST_ASSERT_VAL("wrong period",
@@ -443,7 +443,7 @@ static int test__checkevent_pmu(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong config1",    1 == evsel->core.attr.config1);
 	TEST_ASSERT_VAL("wrong config2",    3 == evsel->core.attr.config2);
 	/*
-	 * The period value gets configured within perf_evlist__config,
+	 * The period value gets configured within evlist__config,
 	 * while this test executes only parse events method.
 	 */
 	TEST_ASSERT_VAL("wrong period",     0 == evsel->core.attr.sample_period);
@@ -520,8 +520,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",  1 == evsel->core.attr.config);
 	/*
-	 * The period, time and callgraph value gets configured
-	 * within perf_evlist__config,
+	 * The period, time and callgraph value gets configured within evlist__config,
 	 * while this test executes only parse events method.
 	 */
 	TEST_ASSERT_VAL("wrong period",     0 == evsel->core.attr.sample_period);
@@ -533,8 +532,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
 	TEST_ASSERT_VAL("wrong config",  2 == evsel->core.attr.config);
 	/*
-	 * The period, time and callgraph value gets configured
-	 * within perf_evlist__config,
+	 * The period, time and callgraph value gets configured within evlist__config,
 	 * while this test executes only parse events method.
 	 */
 	TEST_ASSERT_VAL("wrong period",     0 == evsel->core.attr.sample_period);
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 61d1ff30253c..0df471bf1590 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -109,7 +109,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
 	evsel__set_sample_bit(evsel, CPU);
 	evsel__set_sample_bit(evsel, TID);
 	evsel__set_sample_bit(evsel, TIME);
-	perf_evlist__config(evlist, &opts, NULL);
+	evlist__config(evlist, &opts, NULL);
 
 	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
 	if (err < 0) {
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index a9560e0f6360..7cff02664d0e 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -80,7 +80,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
 
 	CHECK__(parse_events(evlist, "cycles:u", NULL));
 
-	perf_evlist__config(evlist, &opts, NULL);
+	evlist__config(evlist, &opts, NULL);
 
 	evsel = evlist__first(evlist);
 
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index a4cf5ee22c28..15a2ab765d89 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -432,7 +432,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
 	evsel__set_sample_bit(tracking_evsel, TIME);
 
 	/* Config events */
-	perf_evlist__config(evlist, &opts, NULL);
+	evlist__config(evlist, &opts, NULL);
 
 	/* Check moved event is still at the front */
 	if (cycles_evsel != evlist__first(evlist)) {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 70b5b6e506c8..7ca92a5d3206 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1110,7 +1110,7 @@ u64 evlist__combined_branch_type(struct evlist *evlist)
 	return branch_type;
 }
 
-bool perf_evlist__valid_read_format(struct evlist *evlist)
+bool evlist__valid_read_format(struct evlist *evlist)
 {
 	struct evsel *first = evlist__first(evlist), *pos = first;
 	u64 read_format = first->core.attr.read_format;
@@ -1632,7 +1632,7 @@ state_err:
 	return;
 }
 
-bool perf_evlist__exclude_kernel(struct evlist *evlist)
+bool evlist__exclude_kernel(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 9e73d4ae16d9..ebc4550870a1 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -159,8 +159,7 @@ void evlist__close(struct evlist *evlist);
 struct callchain_param;
 
 void evlist__set_id_pos(struct evlist *evlist);
-void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
-			 struct callchain_param *callchain);
+void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain);
 int record_opts__config(struct record_opts *opts);
 
 int evlist__prepare_workload(struct evlist *evlist, struct target *target,
@@ -211,7 +210,7 @@ int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *even
 
 bool evlist__valid_sample_type(struct evlist *evlist);
 bool evlist__valid_sample_id_all(struct evlist *evlist);
-bool perf_evlist__valid_read_format(struct evlist *evlist);
+bool evlist__valid_read_format(struct evlist *evlist);
 
 void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list);
 
@@ -321,7 +320,7 @@ struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
 
 struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event);
 
-bool perf_evlist__exclude_kernel(struct evlist *evlist);
+bool evlist__exclude_kernel(struct evlist *evlist);
 
 void evlist__force_leader(struct evlist *evlist);
 
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index f72c8e05e15c..e70c9dd04567 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -89,8 +89,7 @@ static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *ev
 			    leader->core.attr.sample_type;
 }
 
-void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
-			 struct callchain_param *callchain)
+void evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain)
 {
 	struct evsel *evsel;
 	bool use_sample_identifier = false;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6707a01b7ef8..bf10576d257a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -125,7 +125,7 @@ static int perf_session__open(struct perf_session *session)
 		return -1;
 	}
 
-	if (!perf_evlist__valid_read_format(session->evlist)) {
+	if (!evlist__valid_read_format(session->evlist)) {
 		pr_err("non matching read_format\n");
 		return -1;
 	}
-- 
cgit v1.2.3-70-g09d2


From 25f84702f3590ce6caa3e5bb98e001692f3a2b9e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:09:45 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' mmap pages
 parsing method

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kvm.c    | 3 +--
 tools/perf/builtin-record.c | 4 ++--
 tools/perf/builtin-top.c    | 3 +--
 tools/perf/builtin-trace.c  | 3 +--
 tools/perf/util/evlist.c    | 7 +++----
 tools/perf/util/evlist.h    | 6 ++----
 6 files changed, 10 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 616125160b58..1105c9e40a80 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1349,8 +1349,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 		OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
 			"record events on existing process id"),
 		OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages",
-			"number of mmap data pages",
-			perf_evlist__parse_mmap_pages),
+			"number of mmap data pages", evlist__parse_mmap_pages),
 		OPT_INCR('v', "verbose", &verbose,
 			"be more verbose (show counter open errors, etc)"),
 		OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2a732414ae99..92039fbf6337 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2221,7 +2221,7 @@ static int record__parse_mmap_pages(const struct option *opt,
 		*p = '\0';
 
 	if (*s) {
-		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
+		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
 		if (ret)
 			goto out_free;
 		opts->mmap_pages = mmap_pages;
@@ -2232,7 +2232,7 @@ static int record__parse_mmap_pages(const struct option *opt,
 		goto out_free;
 	}
 
-	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
+	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
 	if (ret)
 		goto out_free;
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f65aa1c22c2a..3673c04d16b6 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1466,8 +1466,7 @@ int cmd_top(int argc, const char **argv)
 	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
 		    "hide kernel symbols"),
 	OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages",
-		     "number of mmap data pages",
-		     perf_evlist__parse_mmap_pages),
+		     "number of mmap data pages", evlist__parse_mmap_pages),
 	OPT_INTEGER('r', "realtime", &top.realtime_prio,
 		    "collect data with this RT SCHED_FIFO priority"),
 	OPT_INTEGER('d', "delay", &top.delay_secs,
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index fa7d57fc53da..85b6a46e85b6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4763,8 +4763,7 @@ int cmd_trace(int argc, const char **argv)
 	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
 		    "child tasks do not inherit counters"),
 	OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
-		     "number of mmap data pages",
-		     perf_evlist__parse_mmap_pages),
+		     "number of mmap data pages", evlist__parse_mmap_pages),
 	OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
 		   "user to profile"),
 	OPT_CALLBACK(0, "duration", &trace, "float",
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 7ca92a5d3206..59264a7a858a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -815,7 +815,7 @@ static long parse_pages_arg(const char *str, unsigned long min,
 	return pages;
 }
 
-int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
+int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
 {
 	unsigned long max = UINT_MAX;
 	long pages;
@@ -833,10 +833,9 @@ int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
 	return 0;
 }
 
-int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
-				  int unset __maybe_unused)
+int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset __maybe_unused)
 {
-	return __perf_evlist__parse_mmap_pages(opt->value, str);
+	return __evlist__parse_mmap_pages(opt->value, str);
 }
 
 /**
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index ebc4550870a1..8a5b515193b3 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -169,10 +169,8 @@ int evlist__start_workload(struct evlist *evlist);
 
 struct option;
 
-int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
-int perf_evlist__parse_mmap_pages(const struct option *opt,
-				  const char *str,
-				  int unset);
+int __evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
+int evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset);
 
 unsigned long perf_event_mlock_kb_in_pages(void);
 
-- 
cgit v1.2.3-70-g09d2


From 44d2a5573665ab5dfb72572e43184388d15d695e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:11:10 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' raw samples
 methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/s390-sample-raw.c | 3 +--
 tools/perf/util/sample-raw.c      | 4 ++--
 tools/perf/util/sample-raw.h      | 7 ++-----
 tools/perf/util/session.c         | 2 +-
 4 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
index d177e6179839..cfcf8d534d76 100644
--- a/tools/perf/util/s390-sample-raw.c
+++ b/tools/perf/util/s390-sample-raw.c
@@ -197,8 +197,7 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample)
  * its raw data.
  * The function is only invoked when the dump flag -D is set.
  */
-void perf_evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event,
-				  struct perf_sample *sample)
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample)
 {
 	struct evsel *ev_bc000;
 
diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c
index e84bbe0e441a..cde5cd3ce49b 100644
--- a/tools/perf/util/sample-raw.c
+++ b/tools/perf/util/sample-raw.c
@@ -9,10 +9,10 @@
  * Check platform the perf data file was created on and perform platform
  * specific interpretation.
  */
-void perf_evlist__init_trace_event_sample_raw(struct evlist *evlist)
+void evlist__init_trace_event_sample_raw(struct evlist *evlist)
 {
 	const char *arch_pf = perf_env__arch(evlist->env);
 
 	if (arch_pf && !strcmp("s390", arch_pf))
-		evlist->trace_event_sample_raw = perf_evlist__s390_sample_raw;
+		evlist->trace_event_sample_raw = evlist__s390_sample_raw;
 }
diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h
index afe1491a117e..4be84a5510cf 100644
--- a/tools/perf/util/sample-raw.h
+++ b/tools/perf/util/sample-raw.h
@@ -6,9 +6,6 @@ struct evlist;
 union perf_event;
 struct perf_sample;
 
-void perf_evlist__s390_sample_raw(struct evlist *evlist,
-				  union perf_event *event,
-				  struct perf_sample *sample);
-
-void perf_evlist__init_trace_event_sample_raw(struct evlist *evlist);
+void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
+void evlist__init_trace_event_sample_raw(struct evlist *evlist);
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index bf10576d257a..83de32090008 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -221,7 +221,7 @@ struct perf_session *perf_session__new(struct perf_data *data,
 				perf_session__set_comm_exec(session);
 			}
 
-			perf_evlist__init_trace_event_sample_raw(session->evlist);
+			evlist__init_trace_event_sample_raw(session->evlist);
 
 			/* Open the directory data. */
 			if (data->is_dir) {
-- 
cgit v1.2.3-70-g09d2


From 1420ba2f6250270c4143d96af86f654f9f4d9997 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:13:12 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' header methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c  |  2 +-
 tools/perf/util/evlist.h  |  2 +-
 tools/perf/util/header.c  | 15 +++++----------
 tools/perf/util/session.c |  2 +-
 4 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 59264a7a858a..23eef3cedc2f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1131,7 +1131,7 @@ bool evlist__valid_read_format(struct evlist *evlist)
 	return true;
 }
 
-u16 perf_evlist__id_hdr_size(struct evlist *evlist)
+u16 evlist__id_hdr_size(struct evlist *evlist)
 {
 	struct evsel *first = evlist__first(evlist);
 	struct perf_sample *data;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 8a5b515193b3..9b0c795736bb 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -201,7 +201,7 @@ u64 __evlist__combined_sample_type(struct evlist *evlist);
 u64 evlist__combined_sample_type(struct evlist *evlist);
 u64 evlist__combined_branch_type(struct evlist *evlist);
 bool evlist__sample_id_all(struct evlist *evlist);
-u16 perf_evlist__id_hdr_size(struct evlist *evlist);
+u16 evlist__id_hdr_size(struct evlist *evlist);
 
 int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample);
 int evlist__parse_sample_timestamp(struct evlist *evlist, union perf_event *event, u64 *timestamp);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index be219051119c..062383e225a3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -2275,9 +2275,7 @@ static struct evsel *evlist__find_by_index(struct evlist *evlist, int idx)
 	return NULL;
 }
 
-static void
-perf_evlist__set_event_name(struct evlist *evlist,
-			    struct evsel *event)
+static void evlist__set_event_name(struct evlist *evlist, struct evsel *event)
 {
 	struct evsel *evsel;
 
@@ -2312,7 +2310,7 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused)
 	}
 
 	for (evsel = events; evsel->core.attr.size; evsel++)
-		perf_evlist__set_event_name(session->evlist, evsel);
+		evlist__set_event_name(session->evlist, evsel);
 
 	if (!session->data->is_pipe)
 		free_event_desc(events);
@@ -3765,8 +3763,7 @@ static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handl
 	return 0;
 }
 
-static int perf_evlist__prepare_tracepoint_events(struct evlist *evlist,
-						  struct tep_handle *pevent)
+static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_handle *pevent)
 {
 	struct evsel *pos;
 
@@ -3881,8 +3878,7 @@ int perf_session__read_header(struct perf_session *session)
 	perf_header__process_sections(header, fd, &session->tevent,
 				      perf_file_section__process);
 
-	if (perf_evlist__prepare_tracepoint_events(session->evlist,
-						   session->tevent.pevent))
+	if (evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent))
 		goto out_delete_evlist;
 
 	return 0;
@@ -4103,8 +4099,7 @@ int perf_event__process_tracing_data(struct perf_session *session,
 		return -1;
 	}
 
-	perf_evlist__prepare_tracepoint_events(session->evlist,
-					       session->tevent.pevent);
+	evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent);
 
 	return size_read + padding;
 }
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 83de32090008..69799e747bf0 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -135,7 +135,7 @@ static int perf_session__open(struct perf_session *session)
 
 void perf_session__set_id_hdr_size(struct perf_session *session)
 {
-	u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist);
+	u16 id_hdr_size = evlist__id_hdr_size(session->evlist);
 
 	machines__set_id_hdr_size(&session->machines, id_hdr_size);
 }
-- 
cgit v1.2.3-70-g09d2


From 515ea461c26e19ebca4351266480306979a113fc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:16:29 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' deliver event
 method

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 69799e747bf0..3b3c50b12791 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1407,13 +1407,9 @@ static int deliver_sample_group(struct evlist *evlist,
 	return ret;
 }
 
-static int
- perf_evlist__deliver_sample(struct evlist *evlist,
-			     struct perf_tool *tool,
-			     union  perf_event *event,
-			     struct perf_sample *sample,
-			     struct evsel *evsel,
-			     struct machine *machine)
+static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool,
+				  union  perf_event *event, struct perf_sample *sample,
+				  struct evsel *evsel, struct machine *machine)
 {
 	/* We know evsel != NULL. */
 	u64 sample_type = evsel->core.attr.sample_type;
@@ -1458,7 +1454,7 @@ static int machines__deliver_event(struct machines *machines,
 			++evlist->stats.nr_unprocessable_samples;
 			return 0;
 		}
-		return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
+		return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
 	case PERF_RECORD_MMAP:
 		return tool->mmap(tool, event, sample, machine);
 	case PERF_RECORD_MMAP2:
-- 
cgit v1.2.3-70-g09d2


From f63c2f5a8b0eb4a7a8d5d19c8e0ccbbd0ee41d14 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:17:20 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' nr_threads
 method

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 23eef3cedc2f..493819173a8e 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -330,8 +330,7 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name,
 	return 0;
 }
 
-static int perf_evlist__nr_threads(struct evlist *evlist,
-				   struct evsel *evsel)
+static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel)
 {
 	if (evsel->core.system_wide)
 		return 1;
@@ -450,7 +449,7 @@ void evlist__toggle_enable(struct evlist *evlist)
 static int evlist__enable_event_cpu(struct evlist *evlist, struct evsel *evsel, int cpu)
 {
 	int thread;
-	int nr_threads = perf_evlist__nr_threads(evlist, evsel);
+	int nr_threads = evlist__nr_threads(evlist, evsel);
 
 	if (!evsel->core.fd)
 		return -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From b979a2f13b1b98c26b8f94d9401cd5255f75f978 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:18:48 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' diff methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-diff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index cefc71506409..8f6c784ce629 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -494,7 +494,7 @@ static struct evsel *evsel_match(struct evsel *evsel,
 	return NULL;
 }
 
-static void perf_evlist__collapse_resort(struct evlist *evlist)
+static void evlist__collapse_resort(struct evlist *evlist)
 {
 	struct evsel *evsel;
 
@@ -1214,7 +1214,7 @@ static int __cmd_diff(void)
 			goto out_delete;
 		}
 
-		perf_evlist__collapse_resort(d->session->evlist);
+		evlist__collapse_resort(d->session->evlist);
 
 		if (pdiff.ptime_range)
 			zfree(&pdiff.ptime_range);
-- 
cgit v1.2.3-70-g09d2


From db0ea13cc741e7c93f26bf5b3d313f48d00f15a4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 30 Nov 2020 15:19:40 -0300
Subject: perf evlist: Use the right prefix for 'struct evlist' record methods

perf_evlist__ is for 'struct perf_evlist' methods, in tools/lib/perf/,
go on completing this split.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 92039fbf6337..d832c108a1ca 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1349,8 +1349,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 static void snapshot_sig_handler(int sig);
 static void alarm_sig_handler(int sig);
 
-static const struct perf_event_mmap_page *
-perf_evlist__pick_pc(struct evlist *evlist)
+static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
 {
 	if (evlist) {
 		if (evlist->mmap && evlist->mmap[0].core.base)
@@ -1363,9 +1362,7 @@ perf_evlist__pick_pc(struct evlist *evlist)
 
 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
 {
-	const struct perf_event_mmap_page *pc;
-
-	pc = perf_evlist__pick_pc(rec->evlist);
+	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
 	if (pc)
 		return pc;
 	return NULL;
-- 
cgit v1.2.3-70-g09d2


From f3ed003e64fe7faecbe4c34bd2a1f5571a23f05a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 26 Oct 2020 18:59:27 +0200
Subject: kunit: Introduce get_file_path() helper

Helper allows to derive file names depending on --build_dir argument.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Tested-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_kernel.py | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index 2e3cc0fac726..57c1724b7e5d 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -23,6 +23,11 @@ DEFAULT_KUNITCONFIG_PATH = 'arch/um/configs/kunit_defconfig'
 BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
 OUTFILE_PATH = 'test.log'
 
+def get_file_path(build_dir, default):
+	if build_dir:
+		default = os.path.join(build_dir, default)
+	return default
+
 class ConfigError(Exception):
 	"""Represents an error trying to configure the Linux kernel."""
 
@@ -97,9 +102,7 @@ class LinuxSourceTreeOperations(object):
 
 	def linux_bin(self, params, timeout, build_dir):
 		"""Runs the Linux UML binary. Must be named 'linux'."""
-		linux_bin = './linux'
-		if build_dir:
-			linux_bin = os.path.join(build_dir, 'linux')
+		linux_bin = get_file_path(build_dir, 'linux')
 		outfile = get_outfile_path(build_dir)
 		with open(outfile, 'w') as output:
 			process = subprocess.Popen([linux_bin] + params,
@@ -108,22 +111,13 @@ class LinuxSourceTreeOperations(object):
 			process.wait(timeout)
 
 def get_kconfig_path(build_dir):
-	kconfig_path = KCONFIG_PATH
-	if build_dir:
-		kconfig_path = os.path.join(build_dir, KCONFIG_PATH)
-	return kconfig_path
+	return get_file_path(build_dir, KCONFIG_PATH)
 
 def get_kunitconfig_path(build_dir):
-	kunitconfig_path = KUNITCONFIG_PATH
-	if build_dir:
-		kunitconfig_path = os.path.join(build_dir, KUNITCONFIG_PATH)
-	return kunitconfig_path
+	return get_file_path(build_dir, KUNITCONFIG_PATH)
 
 def get_outfile_path(build_dir):
-	outfile_path = OUTFILE_PATH
-	if build_dir:
-		outfile_path = os.path.join(build_dir, OUTFILE_PATH)
-	return outfile_path
+	return get_file_path(build_dir, OUTFILE_PATH)
 
 class LinuxSourceTree(object):
 	"""Represents a Linux kernel source tree with KUnit tests."""
-- 
cgit v1.2.3-70-g09d2


From 271e0c9dce1b02a825b3cc1a7aa1fab7c381d44b Mon Sep 17 00:00:00 2001
From: Libo Chen <libo.chen@oracle.com>
Date: Fri, 20 Nov 2020 18:12:43 -0800
Subject: ktest.pl: Fix incorrect reboot for grub2bls

This issue was first noticed when I was testing different kernels on
Oracle Linux 8 which as Fedora 30+ adopts BLS as default. Even though a
kernel entry was added successfully and the index of that kernel entry was
retrieved correctly, ktest still wouldn't reboot the system into
user-specified kernel.

The bug was spotted in subroutine reboot_to where the if-statement never
checks for REBOOT_TYPE "grub2bls", therefore the desired entry will not be
set for the next boot.

Add a check for "grub2bls" so that $grub_reboot $grub_number can
be run before a reboot if REBOOT_TYPE is "grub2bls" then we can boot to
the correct kernel.

Link: https://lkml.kernel.org/r/20201121021243.1532477-1-libo.chen@oracle.com

Cc: stable@vger.kernel.org
Fixes: ac2466456eaa ("ktest: introduce grub2bls REBOOT_TYPE option")
Signed-off-by: Libo Chen <libo.chen@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index cb16d2aac51c..54188ee16c48 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -2040,7 +2040,7 @@ sub reboot_to {
 
     if ($reboot_type eq "grub") {
 	run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
-    } elsif ($reboot_type eq "grub2") {
+    } elsif (($reboot_type eq "grub2") or ($reboot_type eq "grub2bls")) {
 	run_ssh "$grub_reboot $grub_number";
     } elsif ($reboot_type eq "syslinux") {
 	run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path";
-- 
cgit v1.2.3-70-g09d2


From 8cd6bc0359deebd8500e6de95899a8a78d3ec4ba Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 30 Nov 2020 16:32:55 -0500
Subject: ktest.pl: If size of log is too big to email, email error message

If the size of the error log is too big to send via email, and the sending
fails, it wont email any result. This can be confusing for the user who is
waiting for an email on the completion of the tests.

If it fails to send email, then try again without the log file stating that
it failed to send an email. Obviously this will not be of use if the sending
of email failed for some other reasons, but it will at least give the user
some information when it fails for the most common reason.

Cc: stable@vger.kernel.org
Fixes: c2d84ddb338c8 ("ktest.pl: Add MAIL_COMMAND option to define how to send email")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 54188ee16c48..54f7d008e840 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -4253,7 +4253,12 @@ sub do_send_mail {
     $mail_command =~ s/\$SUBJECT/$subject/g;
     $mail_command =~ s/\$MESSAGE/$message/g;
 
-    run_command $mail_command;
+    my $ret = run_command $mail_command;
+    if (!$ret && defined($file)) {
+	# try again without the file
+	$message .= "\n\n*** FAILED TO SEND LOG ***\n\n";
+	do_send_email($subject, $message);
+    }
 }
 
 sub send_email {
-- 
cgit v1.2.3-70-g09d2


From 170f4869e66275f498ae4736106fb54c0fdcd036 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 30 Nov 2020 16:38:41 -0500
Subject: ktest.pl: Fix the logic for truncating the size of the log file for
 email

The logic for truncating the log file for emailing based on the
MAIL_MAX_SIZE option is confusing and incorrect. Simplify it and have the
tail of the log file truncated to the max size specified in the config.

Cc: stable@vger.kernel.org
Fixes: 855d8abd2e8ff ("ktest.pl: Change the logic to control the size of the log file emailed")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 54f7d008e840..4e2450964517 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1499,17 +1499,16 @@ sub dodie {
 	my $log_file;
 
 	if (defined($opt{"LOG_FILE"})) {
-	    my $whence = 0; # beginning of file
-	    my $pos = $test_log_start;
+	    my $whence = 2; # End of file
+	    my $log_size = tell LOG;
+	    my $size = $log_size - $test_log_start;
 
 	    if (defined($mail_max_size)) {
-		my $log_size = tell LOG;
-		$log_size -= $test_log_start;
-		if ($log_size > $mail_max_size) {
-		    $whence = 2; # end of file
-		    $pos = - $mail_max_size;
+		if ($size > $mail_max_size) {
+		    $size = $mail_max_size;
 		}
 	    }
+	    my $pos = - $size;
 	    $log_file = "$tmpdir/log";
 	    open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)";
 	    open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n";
-- 
cgit v1.2.3-70-g09d2


From 854055c0cf30d732b3514ce7956976f60496b1a1 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 26 Nov 2020 18:49:46 +0000
Subject: selftests/bpf: Fix flavored variants of test_ima

Flavored variants of test_progs (e.g. test_progs-no_alu32) change their
working directory to the corresponding subdirectory (e.g. no_alu32).
Since the setup script required by test_ima (ima_setup.sh) is not
mentioned in the dependencies, it does not get copied to these
subdirectories and causes flavored variants of test_ima to fail.

Adding the script to TRUNNER_EXTRA_FILES ensures that the file is also
copied to the subdirectories for the flavored variants of test_progs.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Yonghong Song <yhs@fb.com>
Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201126184946.1708213-1-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3d5940cd110d..894192c319fb 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -389,6 +389,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 network_helpers.c testing_helpers.c		\
 			 btf_helpers.c	flow_dissector_load.h
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
+		       ima_setup.sh					\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
-- 
cgit v1.2.3-70-g09d2


From e86843580d1bb1ce12544bca3115cf11d51603ff Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 20 Nov 2020 11:29:13 +0900
Subject: tools/bootconfig: Store size and checksum in footer as le32

Store the size and the checksum fields in the footer as le32
instead of u32. This will allow us to apply bootconfig to the
cross build initrd without caring the endianness.

Link: https://lkml.kernel.org/r/160583935332.547349.5897811300636587426.stgit@devnote2

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 4a445b6304bb..7362bef1a368 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -10,6 +10,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <errno.h>
+#include <endian.h>
 
 #include <linux/kernel.h>
 #include <linux/bootconfig.h>
@@ -183,9 +184,11 @@ static int load_xbc_from_initrd(int fd, char **buf)
 
 	if (read(fd, &size, sizeof(u32)) < 0)
 		return pr_errno("Failed to read size", -errno);
+	size = le32toh(size);
 
 	if (read(fd, &csum, sizeof(u32)) < 0)
 		return pr_errno("Failed to read checksum", -errno);
+	csum = le32toh(csum);
 
 	/* Wrong size error  */
 	if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) {
@@ -407,10 +410,10 @@ static int apply_xbc(const char *path, const char *xbc_path)
 
 	/* Add a footer */
 	p = data + size;
-	*(u32 *)p = size;
+	*(u32 *)p = htole32(size);
 	p += sizeof(u32);
 
-	*(u32 *)p = csum;
+	*(u32 *)p = htole32(csum);
 	p += sizeof(u32);
 
 	memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
-- 
cgit v1.2.3-70-g09d2


From 0c7a7e1a8ff3fb6d01467b43c62760e6bf0afab3 Mon Sep 17 00:00:00 2001
From: David Gow <davidgow@google.com>
Date: Mon, 9 Nov 2020 23:29:36 -0800
Subject: kunit: kunit_tool: Correctly parse diagnostic messages

Currently, kunit_tool expects all diagnostic lines in test results to
contain ": " somewhere, as both the subtest header and the crash report
do. Fix this to accept any line starting with (minus indent) "# " as
being a valid diagnostic line.

This matches what the TAP spec[1] and the draft KTAP spec[2] are
expecting.

[1]: http://testanything.org/tap-specification.html
[2]: https://lore.kernel.org/linux-kselftest/CY4PR13MB1175B804E31E502221BC8163FD830@CY4PR13MB1175.namprd13.prod.outlook.com/T/

Signed-off-by: David Gow <davidgow@google.com>
Acked-by: Marco Elver <elver@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/kunit/kunit_parser.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index bbfe1b4e4c1c..6614ec4d0898 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -135,8 +135,8 @@ def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool:
 	else:
 		return False
 
-SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# .*?: (.*)$')
-DIAGNOSTIC_CRASH_MESSAGE = 'kunit test case crashed!'
+SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# (.*)$')
+DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^[\s]+# .*?: kunit test case crashed!$')
 
 def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
 	save_non_diagnositic(lines, test_case)
@@ -146,7 +146,8 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
 	match = SUBTEST_DIAGNOSTIC.match(line)
 	if match:
 		test_case.log.append(lines.pop(0))
-		if match.group(1) == DIAGNOSTIC_CRASH_MESSAGE:
+		crash_match = DIAGNOSTIC_CRASH_MESSAGE.match(line)
+		if crash_match:
 			test_case.status = TestStatus.TEST_CRASHED
 		return True
 	else:
-- 
cgit v1.2.3-70-g09d2


From 008cb2ec4354fa1c4a166eca8e5eec15112847b3 Mon Sep 17 00:00:00 2001
From: Danielle Ratson <danieller@nvidia.com>
Date: Sun, 29 Nov 2020 14:54:07 +0200
Subject: selftests: forwarding: Add QinQ veto testing

Test that each veto that was added in the previous patch, is indeed
vetoed.

$ ./q_in_q_veto.sh

TEST: create 802.1ad vlan upper on top of a front panel             [ OK ]
TEST: create 802.1ad vlan upper on top of a bridge port             [ OK ]
TEST: create 802.1ad vlan upper on top of a lag                     [ OK ]
TEST: create 802.1ad vlan upper on top 802.1q bridge                [ OK ]
TEST: create 802.1ad vlan upper on top 802.1ad bridge               [ OK ]
TEST: create 802.1q vlan upper on top 802.1ad bridge                [ OK ]
TEST: create vlan upper on top of front panel enslaved to 802.1ad bridge
[ OK ]
TEST: create vlan upper on top of lag enslaved to 802.1ad bridge    [ OK ]
TEST: enslave front panel with vlan upper to 802.1ad bridge         [ OK ]
TEST: enslave lag with vlan upper to 802.1ad bridge                 [ OK ]
TEST: IP address addition to 802.1ad bridge                         [ OK ]
TEST: switch bridge protocol                                        [ OK ]

Signed-off-by: Danielle Ratson <danieller@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/q_in_q_veto.sh     | 296 +++++++++++++++++++++
 1 file changed, 296 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
new file mode 100755
index 000000000000..7edaed8eb86a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
@@ -0,0 +1,296 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	create_8021ad_vlan_upper_on_top_front_panel_port
+	create_8021ad_vlan_upper_on_top_bridge_port
+	create_8021ad_vlan_upper_on_top_lag
+	create_8021ad_vlan_upper_on_top_bridge
+	create_8021ad_vlan_upper_on_top_8021ad_bridge
+	create_vlan_upper_on_top_8021ad_bridge
+	create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge
+	create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge
+	enslave_front_panel_with_vlan_upper_to_8021ad_bridge
+	enslave_lag_with_vlan_upper_to_8021ad_bridge
+	add_ip_address_to_8021ad_bridge
+	switch_bridge_protocol_from_8021q_to_8021ad
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	sleep 10
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+create_vlan_upper_on_top_of_bridge()
+{
+	RET=0
+
+	local bridge_proto=$1; shift
+	local netdev_proto=$1; shift
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol $bridge_proto vlan_default_pvid 0 mcast_snooping 0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 master br0
+
+	ip link add name br0.100 link br0 type vlan \
+		protocol $netdev_proto id 100 2>/dev/null
+	check_fail $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge not rejected"
+
+	ip link add name br0.100 link br0 type vlan \
+		protocol $netdev_proto id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge rejected without extack"
+
+	log_test "create $netdev_proto vlan upper on top $bridge_proto bridge"
+
+	ip link del dev br0
+}
+
+create_8021ad_vlan_upper_on_top_front_panel_port()
+{
+	RET=0
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>/dev/null
+	check_fail $? "802.1ad vlan upper creation on top of a front panel not rejected"
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad vlan upper creation on top of a front panel rejected without extack"
+
+	log_test "create 802.1ad vlan upper on top of a front panel"
+}
+
+create_8021ad_vlan_upper_on_top_bridge_port()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_default_pvid 0 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev br0 up
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>/dev/null
+	check_fail $? "802.1ad vlan upper creation on top of a bridge port not rejected"
+
+	ip link add name $swp1.100 link $swp1 type vlan \
+		protocol 802.1ad id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad vlan upper creation on top of a bridge port rejected without extack"
+
+	log_test "create 802.1ad vlan upper on top of a bridge port"
+
+	ip link del dev br0
+}
+
+create_8021ad_vlan_upper_on_top_lag()
+{
+	RET=0
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1
+
+	ip link add name bond1.100 link bond1 type vlan \
+		protocol 802.1ad id 100 2>/dev/null
+	check_fail $? "802.1ad vlan upper creation on top of a lag not rejected"
+
+	ip link add name bond1.100 link bond1 type vlan \
+		protocol 802.1ad id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad vlan upper creation on top of a lag rejected without extack"
+
+	log_test "create 802.1ad vlan upper on top of a lag"
+
+	ip link del dev bond1
+}
+
+create_8021ad_vlan_upper_on_top_bridge()
+{
+	RET=0
+
+	create_vlan_upper_on_top_of_bridge "802.1q" "802.1ad"
+}
+
+create_8021ad_vlan_upper_on_top_8021ad_bridge()
+{
+	RET=0
+
+	create_vlan_upper_on_top_of_bridge "802.1ad" "802.1ad"
+}
+
+create_vlan_upper_on_top_8021ad_bridge()
+{
+	RET=0
+
+	create_vlan_upper_on_top_of_bridge "802.1ad" "802.1q"
+}
+
+create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+
+	ip link add name $swp1.100 link $swp1 type vlan id 100 2>/dev/null
+	check_fail $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge not rejected"
+
+	ip link add name $swp1.100 link $swp1 type vlan id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge rejected without extack"
+
+	log_test "create vlan upper on top of front panel enslaved to 802.1ad bridge"
+
+	ip link del dev br0
+}
+
+create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1
+	ip link set dev bond1 master br0
+
+	ip link add name bond1.100 link bond1 type vlan id 100 2>/dev/null
+	check_fail $? "vlan upper creation on top of lag enslaved to 802.1ad bridge not rejected"
+
+	ip link add name bond1.100 link bond1 type vlan id 100 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "vlan upper creation on top of lag enslaved to 802.1ad bridge rejected without extack"
+
+	log_test "create vlan upper on top of lag enslaved to 802.1ad bridge"
+
+	ip link del dev bond1
+	ip link del dev br0
+}
+
+enslave_front_panel_with_vlan_upper_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name $swp1.100 link $swp1 type vlan id 100
+
+	ip link set dev $swp1 master br0 2>/dev/null
+	check_fail $? "front panel with vlan upper enslavemnt to 802.1ad bridge not rejected"
+
+	ip link set dev $swp1 master br0 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $? "front panel with vlan upper enslavemnt to 802.1ad bridge rejected without extack"
+
+	log_test "enslave front panel with vlan upper to 802.1ad bridge"
+
+	ip link del dev $swp1.100
+	ip link del dev br0
+}
+
+enslave_lag_with_vlan_upper_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1
+	ip link add name bond1.100 link bond1 type vlan id 100
+
+	ip link set dev bond1 master br0 2>/dev/null
+	check_fail $? "lag with vlan upper enslavemnt to 802.1ad bridge not rejected"
+
+	ip link set dev bond1 master br0 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "lag with vlan upper enslavemnt to 802.1ad bridge rejected without extack"
+
+	log_test "enslave lag with vlan upper to 802.1ad bridge"
+
+	ip link del dev bond1
+	ip link del dev br0
+}
+
+
+add_ip_address_to_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 master br0
+
+	ip addr add dev br0 192.0.2.17/28 2>/dev/null
+	check_fail $? "IP address addition to 802.1ad bridge not rejected"
+
+	ip addr add dev br0 192.0.2.17/28 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $? "IP address addition to 802.1ad bridge rejected without extack"
+
+	log_test "IP address addition to 802.1ad bridge"
+
+	ip link del dev br0
+}
+
+switch_bridge_protocol_from_8021q_to_8021ad()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 \
+		vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 master br0
+
+	ip link set dev br0 type bridge vlan_protocol 802.1q 2>/dev/null
+	check_fail $? "switching bridge protocol from 802.1q to 802.1ad not rejected"
+
+	log_test "switch bridge protocol"
+
+	ip link del dev br0
+}
+
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-70-g09d2


From f6a8250ea1e42ad1f4f3bab01c851ec5fd48f0e7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 30 Nov 2020 14:33:35 -0800
Subject: libbpf: Fix ring_buffer__poll() to return number of consumed samples

Fix ring_buffer__poll() to return the number of non-discarded records
consumed, just like its documentation states. It's also consistent with
ring_buffer__consume() return. Fix up selftests with wrong expected results.

Fixes: bf99c936f947 ("libbpf: Add BPF ring buffer support")
Fixes: cb1c9ddd5525 ("selftests/bpf: Add BPF ringbuf selftests")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201130223336.904192-1-andrii@kernel.org
---
 tools/lib/bpf/ringbuf.c                                | 2 +-
 tools/testing/selftests/bpf/prog_tests/ringbuf.c       | 2 +-
 tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 5c6522c89af1..98537ff2679e 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -278,7 +278,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 		err = ringbuf_process_ring(ring);
 		if (err < 0)
 			return err;
-		res += cnt;
+		res += err;
 	}
 	return cnt < 0 ? -errno : res;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index c1650548433c..1a48c6f7f54e 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -217,7 +217,7 @@ void test_ringbuf(void)
 	if (CHECK(err, "join_bg", "err %d\n", err))
 		goto cleanup;
 
-	if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret))
+	if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret))
 		goto cleanup;
 
 	/* 3 rounds, 2 samples each */
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index 78e450609803..d37161e59bb2 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -81,7 +81,7 @@ void test_ringbuf_multi(void)
 
 	/* poll for samples, should get 2 ringbufs back */
 	err = ring_buffer__poll(ringbuf, -1);
-	if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err))
+	if (CHECK(err != 2, "poll_res", "expected 2 records, got %d\n", err))
 		goto cleanup;
 
 	/* expect extra polling to return nothing */
-- 
cgit v1.2.3-70-g09d2


From 156c9b70dbfb83eeeff39e9202eb5f8bb6d0fd04 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 30 Nov 2020 14:33:36 -0800
Subject: selftests/bpf: Drain ringbuf samples at the end of test

Avoid occasional test failures due to the last sample being delayed to
another ring_buffer__poll() call. Instead, drain samples completely with
ring_buffer__consume(). This is supposed to fix a rare and non-deterministic
test failure in libbpf CI.

Fixes: cb1c9ddd5525 ("selftests/bpf: Add BPF ringbuf selftests")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201130223336.904192-2-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/ringbuf.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index 1a48c6f7f54e..fddbc5db5d6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -220,6 +220,12 @@ void test_ringbuf(void)
 	if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret))
 		goto cleanup;
 
+	/* due to timing variations, there could still be non-notified
+	 * samples, so consume them here to collect all the samples
+	 */
+	err = ring_buffer__consume(ringbuf);
+	CHECK(err < 0, "rb_consume", "failed: %d\b", err);
+
 	/* 3 rounds, 2 samples each */
 	cnt = atomic_xchg(&sample_cnt, 0);
 	CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt);
-- 
cgit v1.2.3-70-g09d2


From 179ef035992e89646e17138b18b130bb874b86bb Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Fri, 27 Nov 2020 14:32:36 -0500
Subject: selftests: Add kselftest for syscall user dispatch

Implement functionality tests for syscall user dispatch.  In order to
make the test portable, refrain from open coding syscall dispatchers and
calculating glibc memory ranges.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20201127193238.821364-6-krisman@collabora.com
---
 tools/testing/selftests/Makefile                   |   1 +
 .../selftests/syscall_user_dispatch/.gitignore     |   3 +
 .../selftests/syscall_user_dispatch/Makefile       |   9 +
 .../testing/selftests/syscall_user_dispatch/config |   1 +
 .../selftests/syscall_user_dispatch/sud_test.c     | 310 +++++++++++++++++++++
 5 files changed, 324 insertions(+)
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/.gitignore
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/Makefile
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/config
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/sud_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d9c283503159..96d5682fb9d8 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -56,6 +56,7 @@ TARGETS += sparc64
 TARGETS += splice
 TARGETS += static_keys
 TARGETS += sync
+TARGETS += syscall_user_dispatch
 TARGETS += sysctl
 TARGETS += tc-testing
 TARGETS += timens
diff --git a/tools/testing/selftests/syscall_user_dispatch/.gitignore b/tools/testing/selftests/syscall_user_dispatch/.gitignore
new file mode 100644
index 000000000000..f539615ad5da
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+sud_test
+sud_benchmark
diff --git a/tools/testing/selftests/syscall_user_dispatch/Makefile b/tools/testing/selftests/syscall_user_dispatch/Makefile
new file mode 100644
index 000000000000..8e15fa42bcda
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+top_srcdir = ../../../..
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+
+CFLAGS += -Wall -I$(LINUX_HDR_PATH)
+
+TEST_GEN_PROGS := sud_test
+include ../lib.mk
diff --git a/tools/testing/selftests/syscall_user_dispatch/config b/tools/testing/selftests/syscall_user_dispatch/config
new file mode 100644
index 000000000000..039e303e59d7
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/config
@@ -0,0 +1 @@
+CONFIG_GENERIC_ENTRY=y
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
new file mode 100644
index 000000000000..6498b050ef89
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 Collabora Ltd.
+ *
+ * Test code for syscall user dispatch
+ */
+
+#define _GNU_SOURCE
+#include <sys/prctl.h>
+#include <sys/sysinfo.h>
+#include <sys/syscall.h>
+#include <signal.h>
+
+#include <asm/unistd.h>
+#include "../kselftest_harness.h"
+
+#ifndef PR_SET_SYSCALL_USER_DISPATCH
+# define PR_SET_SYSCALL_USER_DISPATCH	59
+# define PR_SYS_DISPATCH_OFF	0
+# define PR_SYS_DISPATCH_ON	1
+#endif
+
+#ifndef SYS_USER_DISPATCH
+# define SYS_USER_DISPATCH	2
+#endif
+
+#ifdef __NR_syscalls
+# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
+#else
+# define MAGIC_SYSCALL_1 (0xff00)  /* Bad Linux syscall number */
+#endif
+
+#define SYSCALL_DISPATCH_ON(x) ((x) = 1)
+#define SYSCALL_DISPATCH_OFF(x) ((x) = 0)
+
+/* Test Summary:
+ *
+ * - dispatch_trigger_sigsys: Verify if PR_SET_SYSCALL_USER_DISPATCH is
+ *   able to trigger SIGSYS on a syscall.
+ *
+ * - bad_selector: Test that a bad selector value triggers SIGSYS with
+ *   si_errno EINVAL.
+ *
+ * - bad_prctl_param: Test that the API correctly rejects invalid
+ *   parameters on prctl
+ *
+ * - dispatch_and_return: Test that a syscall is selectively dispatched
+ *   to userspace depending on the value of selector.
+ *
+ * - disable_dispatch: Test that the PR_SYS_DISPATCH_OFF correctly
+ *   disables the dispatcher
+ *
+ * - direct_dispatch_range: Test that a syscall within the allowed range
+ *   can bypass the dispatcher.
+ */
+
+TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
+{
+	char sel = 0;
+	struct sysinfo info;
+	int ret;
+
+	ret = sysinfo(&info);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	SYSCALL_DISPATCH_ON(sel);
+
+	sysinfo(&info);
+
+	EXPECT_FALSE(true) {
+		TH_LOG("Unreachable!");
+	}
+}
+
+TEST(bad_prctl_param)
+{
+	char sel = 0;
+	int op;
+
+	/* Invalid op */
+	op = -1;
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel);
+	ASSERT_EQ(EINVAL, errno);
+
+	/* PR_SYS_DISPATCH_OFF */
+	op = PR_SYS_DISPATCH_OFF;
+
+	/* offset != 0 */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0);
+	EXPECT_EQ(EINVAL, errno);
+
+	/* len != 0 */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0);
+	EXPECT_EQ(EINVAL, errno);
+
+	/* sel != NULL */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel);
+	EXPECT_EQ(EINVAL, errno);
+
+	/* Valid parameter */
+	errno = 0;
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0);
+	EXPECT_EQ(0, errno);
+
+	/* PR_SYS_DISPATCH_ON */
+	op = PR_SYS_DISPATCH_ON;
+
+	/* Dispatcher region is bad (offset > 0 && len == 0) */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel);
+	EXPECT_EQ(EINVAL, errno);
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel);
+	EXPECT_EQ(EINVAL, errno);
+
+	/* Invalid selector */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1);
+	ASSERT_EQ(EFAULT, errno);
+
+	/*
+	 * Dispatcher range overflows unsigned long
+	 */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel);
+	ASSERT_EQ(EINVAL, errno) {
+		TH_LOG("Should reject bad syscall range");
+	}
+
+	/*
+	 * Allowed range overflows usigned long
+	 */
+	prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel);
+	ASSERT_EQ(EINVAL, errno) {
+		TH_LOG("Should reject bad syscall range");
+	}
+}
+
+/*
+ * Use global selector for handle_sigsys tests, to avoid passing
+ * selector to signal handler
+ */
+char glob_sel;
+int nr_syscalls_emulated;
+int si_code;
+int si_errno;
+
+static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
+{
+	si_code = info->si_code;
+	si_errno = info->si_errno;
+
+	if (info->si_syscall == MAGIC_SYSCALL_1)
+		nr_syscalls_emulated++;
+
+	/* In preparation for sigreturn. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+}
+
+TEST(dispatch_and_return)
+{
+	long ret;
+	struct sigaction act;
+	sigset_t mask;
+
+	glob_sel = 0;
+	nr_syscalls_emulated = 0;
+	si_code = 0;
+	si_errno = 0;
+
+	memset(&act, 0, sizeof(act));
+	sigemptyset(&mask);
+
+	act.sa_sigaction = handle_sigsys;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_mask = mask;
+
+	ret = sigaction(SIGSYS, &act, NULL);
+	ASSERT_EQ(0, ret);
+
+	/* Make sure selector is good prior to prctl. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	/* MAGIC_SYSCALL_1 doesn't exist. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+	ret = syscall(MAGIC_SYSCALL_1);
+	EXPECT_EQ(-1, ret) {
+		TH_LOG("Dispatch triggered unexpectedly");
+	}
+
+	/* MAGIC_SYSCALL_1 should be emulated. */
+	nr_syscalls_emulated = 0;
+	SYSCALL_DISPATCH_ON(glob_sel);
+
+	ret = syscall(MAGIC_SYSCALL_1);
+	EXPECT_EQ(MAGIC_SYSCALL_1, ret) {
+		TH_LOG("Failed to intercept syscall");
+	}
+	EXPECT_EQ(1, nr_syscalls_emulated) {
+		TH_LOG("Failed to emulate syscall");
+	}
+	ASSERT_EQ(SYS_USER_DISPATCH, si_code) {
+		TH_LOG("Bad si_code in SIGSYS");
+	}
+	ASSERT_EQ(0, si_errno) {
+		TH_LOG("Bad si_errno in SIGSYS");
+	}
+}
+
+TEST_SIGNAL(bad_selector, SIGSYS)
+{
+	long ret;
+	struct sigaction act;
+	sigset_t mask;
+	struct sysinfo info;
+
+	glob_sel = 0;
+	nr_syscalls_emulated = 0;
+	si_code = 0;
+	si_errno = 0;
+
+	memset(&act, 0, sizeof(act));
+	sigemptyset(&mask);
+
+	act.sa_sigaction = handle_sigsys;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_mask = mask;
+
+	ret = sigaction(SIGSYS, &act, NULL);
+	ASSERT_EQ(0, ret);
+
+	/* Make sure selector is good prior to prctl. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	glob_sel = -1;
+
+	sysinfo(&info);
+
+	/* Even though it is ready to catch SIGSYS, the signal is
+	 * supposed to be uncatchable.
+	 */
+
+	EXPECT_FALSE(true) {
+		TH_LOG("Unreachable!");
+	}
+}
+
+TEST(disable_dispatch)
+{
+	int ret;
+	struct sysinfo info;
+	char sel = 0;
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	/* MAGIC_SYSCALL_1 doesn't exist. */
+	SYSCALL_DISPATCH_OFF(glob_sel);
+
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF, 0, 0, 0);
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Failed to unset syscall user dispatch");
+	}
+
+	/* Shouldn't have any effect... */
+	SYSCALL_DISPATCH_ON(glob_sel);
+
+	ret = syscall(__NR_sysinfo, &info);
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Dispatch triggered unexpectedly");
+	}
+}
+
+TEST(direct_dispatch_range)
+{
+	int ret = 0;
+	struct sysinfo info;
+	char sel = 0;
+
+	/*
+	 * Instead of calculating libc addresses; allow the entire
+	 * memory map and lock the selector.
+	 */
+	ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, -1L, &sel);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+	}
+
+	SYSCALL_DISPATCH_ON(sel);
+
+	ret = sysinfo(&info);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Dispatch triggered unexpectedly");
+	}
+}
+
+TEST_HARNESS_MAIN
-- 
cgit v1.2.3-70-g09d2


From d87ae0fa21c26db2d7c66f22dee9c27ecda48ce2 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Fri, 27 Nov 2020 14:32:37 -0500
Subject: selftests: Add benchmark for syscall user dispatch

This is the patch I'm using to evaluate the impact syscall user dispatch
has on native syscall (syscalls not redirected to userspace) when
enabled for the process and submiting syscalls though the unblocked
dispatch selector. It works by running a step to define a baseline of
the cost of executing sysinfo, then enabling SUD, and rerunning that
step.

On my test machine, an AMD Ryzen 5 1500X, I have the following results
with the latest version of syscall user dispatch patches.

root@olga:~# syscall_user_dispatch/sud_benchmark
  Calibrating test set to last ~5 seconds...
  test iterations = 37500000
  Avg syscall time 134ns.
  Caught sys_ff00
  trapped_call_count 1, native_call_count 0.
  Avg syscall time 147ns.
  Interception overhead: 9.7% (+13ns).

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20201127193238.821364-7-krisman@collabora.com
---
 .../selftests/syscall_user_dispatch/Makefile       |   2 +-
 .../syscall_user_dispatch/sud_benchmark.c          | 200 +++++++++++++++++++++
 2 files changed, 201 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c

(limited to 'tools')

diff --git a/tools/testing/selftests/syscall_user_dispatch/Makefile b/tools/testing/selftests/syscall_user_dispatch/Makefile
index 8e15fa42bcda..03c120270953 100644
--- a/tools/testing/selftests/syscall_user_dispatch/Makefile
+++ b/tools/testing/selftests/syscall_user_dispatch/Makefile
@@ -5,5 +5,5 @@ LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
 
 CFLAGS += -Wall -I$(LINUX_HDR_PATH)
 
-TEST_GEN_PROGS := sud_test
+TEST_GEN_PROGS := sud_test sud_benchmark
 include ../lib.mk
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
new file mode 100644
index 000000000000..6689f1183dbf
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 Collabora Ltd.
+ *
+ * Benchmark and test syscall user dispatch
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/sysinfo.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#ifndef PR_SET_SYSCALL_USER_DISPATCH
+# define PR_SET_SYSCALL_USER_DISPATCH	59
+# define PR_SYS_DISPATCH_OFF	0
+# define PR_SYS_DISPATCH_ON	1
+#endif
+
+#ifdef __NR_syscalls
+# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
+#else
+# define MAGIC_SYSCALL_1 (0xff00)  /* Bad Linux syscall number */
+#endif
+
+/*
+ * To test returning from a sigsys with selector blocked, the test
+ * requires some per-architecture support (i.e. knowledge about the
+ * signal trampoline address).  On i386, we know it is on the vdso, and
+ * a small trampoline is open-coded for x86_64.  Other architectures
+ * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN
+ * out of the box, but don't enable them until they support syscall user
+ * dispatch.
+ */
+#if defined(__x86_64__) || defined(__i386__)
+#define TEST_BLOCKED_RETURN
+#endif
+
+#ifdef __x86_64__
+void* (syscall_dispatcher_start)(void);
+void* (syscall_dispatcher_end)(void);
+#else
+unsigned long syscall_dispatcher_start = 0;
+unsigned long syscall_dispatcher_end = 0;
+#endif
+
+unsigned long trapped_call_count = 0;
+unsigned long native_call_count = 0;
+
+char selector;
+#define SYSCALL_BLOCK   (selector = PR_SYS_DISPATCH_ON)
+#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF)
+
+#define CALIBRATION_STEP 100000
+#define CALIBRATE_TO_SECS 5
+int factor;
+
+static double one_sysinfo_step(void)
+{
+	struct timespec t1, t2;
+	int i;
+	struct sysinfo info;
+
+	clock_gettime(CLOCK_MONOTONIC, &t1);
+	for (i = 0; i < CALIBRATION_STEP; i++)
+		sysinfo(&info);
+	clock_gettime(CLOCK_MONOTONIC, &t2);
+	return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec);
+}
+
+static void calibrate_set(void)
+{
+	double elapsed = 0;
+
+	printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS);
+
+	while (elapsed < 1) {
+		elapsed += one_sysinfo_step();
+		factor += CALIBRATE_TO_SECS;
+	}
+
+	printf("test iterations = %d\n", CALIBRATION_STEP * factor);
+}
+
+static double perf_syscall(void)
+{
+	unsigned int i;
+	double partial = 0;
+
+	for (i = 0; i < factor; ++i)
+		partial += one_sysinfo_step()/(CALIBRATION_STEP*factor);
+	return partial;
+}
+
+static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
+{
+	char buf[1024];
+	int len;
+
+	SYSCALL_UNBLOCK;
+
+	/* printf and friends are not signal-safe. */
+	len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall);
+	write(1, buf, len);
+
+	if (info->si_syscall == MAGIC_SYSCALL_1)
+		trapped_call_count++;
+	else
+		native_call_count++;
+
+#ifdef TEST_BLOCKED_RETURN
+	SYSCALL_BLOCK;
+#endif
+
+#ifdef __x86_64__
+	__asm__ volatile("movq $0xf, %rax");
+	__asm__ volatile("leaveq");
+	__asm__ volatile("add $0x8, %rsp");
+	__asm__ volatile("syscall_dispatcher_start:");
+	__asm__ volatile("syscall");
+	__asm__ volatile("nop"); /* Landing pad within dispatcher area */
+	__asm__ volatile("syscall_dispatcher_end:");
+#endif
+
+}
+
+int main(void)
+{
+	struct sigaction act;
+	double time1, time2;
+	int ret;
+	sigset_t mask;
+
+	memset(&act, 0, sizeof(act));
+	sigemptyset(&mask);
+
+	act.sa_sigaction = handle_sigsys;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_mask = mask;
+
+	calibrate_set();
+
+	time1 = perf_syscall();
+	printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9);
+
+	ret = sigaction(SIGSYS, &act, NULL);
+	if (ret) {
+		perror("Error sigaction:");
+		exit(-1);
+	}
+
+	fprintf(stderr, "Enabling syscall trapping.\n");
+
+	if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON,
+		  syscall_dispatcher_start,
+		  (syscall_dispatcher_end - syscall_dispatcher_start + 1),
+		  &selector)) {
+		perror("prctl failed\n");
+		exit(-1);
+	}
+
+	SYSCALL_BLOCK;
+	syscall(MAGIC_SYSCALL_1);
+
+#ifdef TEST_BLOCKED_RETURN
+	if (selector == PR_SYS_DISPATCH_OFF) {
+		fprintf(stderr, "Failed to return with selector blocked.\n");
+		exit(-1);
+	}
+#endif
+
+	SYSCALL_UNBLOCK;
+
+	if (!trapped_call_count) {
+		fprintf(stderr, "syscall trapping does not work.\n");
+		exit(-1);
+	}
+
+	time2 = perf_syscall();
+
+	if (native_call_count) {
+		perror("syscall trapping intercepted more syscalls than expected\n");
+		exit(-1);
+	}
+
+	printf("trapped_call_count %lu, native_call_count %lu.\n",
+	       trapped_call_count, native_call_count);
+	printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9);
+	printf("Interception overhead: %.1lf%% (+%.0lfns).\n",
+	       100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1));
+	return 0;
+
+}
-- 
cgit v1.2.3-70-g09d2


From 2c07343abd8932200a45ff7b10950e71081e9e77 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@linux.microsoft.com>
Date: Wed, 2 Dec 2020 17:26:43 +0100
Subject: selftests/seccomp: Update kernel config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

seccomp_bpf.c uses unshare(CLONE_NEWPID), which requires CONFIG_PID_NS
to be set.

Cc: Kees Cook <keescook@chromium.org>
Cc: Shuah Khan <shuah@kernel.org>
Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
Acked-by: Tycho Andersen <tycho@tycho.pizza>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20201202162643.249276-1-mic@digikod.net
---
 tools/testing/selftests/seccomp/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config
index 64c19d8eba79..ad431a5178fb 100644
--- a/tools/testing/selftests/seccomp/config
+++ b/tools/testing/selftests/seccomp/config
@@ -1,3 +1,4 @@
+CONFIG_PID_NS=y
 CONFIG_SECCOMP=y
 CONFIG_SECCOMP_FILTER=y
 CONFIG_USER_NS=y
-- 
cgit v1.2.3-70-g09d2


From a999696c547f1a8ef2ddbb9b0e77abc3f6db4ff1 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 2 Dec 2020 09:25:14 -0800
Subject: selftests/bpf: Rewrite test_sock_addr bind bpf into C

I'm planning to extend it in the next patches. It's much easier to
work with C than BPF assembly.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201202172516.3483656-2-sdf@google.com
---
 tools/testing/selftests/bpf/progs/bind4_prog.c |  71 +++++++++
 tools/testing/selftests/bpf/progs/bind6_prog.c |  88 +++++++++++
 tools/testing/selftests/bpf/test_sock_addr.c   | 196 ++-----------------------
 3 files changed, 171 insertions(+), 184 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bind4_prog.c
 create mode 100644 tools/testing/selftests/bpf/progs/bind6_prog.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
new file mode 100644
index 000000000000..0951302a984a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define SERV4_IP		0xc0a801feU /* 192.168.1.254 */
+#define SERV4_PORT		4040
+#define SERV4_REWRITE_IP	0x7f000001U /* 127.0.0.1 */
+#define SERV4_REWRITE_PORT	4444
+
+SEC("cgroup/bind4")
+int bind_v4_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock *sk;
+	__u32 user_ip4;
+	__u16 user_port;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 0;
+
+	if (sk->family != AF_INET)
+		return 0;
+
+	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+		return 0;
+
+	if (ctx->user_ip4 != bpf_htonl(SERV4_IP) ||
+	    ctx->user_port != bpf_htons(SERV4_PORT))
+		return 0;
+
+	// u8 narrow loads:
+	user_ip4 = 0;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[0] << 0;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[1] << 8;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[2] << 16;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[3] << 24;
+	if (ctx->user_ip4 != user_ip4)
+		return 0;
+
+	user_port = 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+	if (ctx->user_port != user_port)
+		return 0;
+
+	// u16 narrow loads:
+	user_ip4 = 0;
+	user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[0] << 0;
+	user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[1] << 16;
+	if (ctx->user_ip4 != user_ip4)
+		return 0;
+
+	ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
+	ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
new file mode 100644
index 000000000000..16da1cf85418
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define SERV6_IP_0		0xfaceb00c /* face:b00c:1234:5678::abcd */
+#define SERV6_IP_1		0x12345678
+#define SERV6_IP_2		0x00000000
+#define SERV6_IP_3		0x0000abcd
+#define SERV6_PORT		6060
+#define SERV6_REWRITE_IP_0	0x00000000
+#define SERV6_REWRITE_IP_1	0x00000000
+#define SERV6_REWRITE_IP_2	0x00000000
+#define SERV6_REWRITE_IP_3	0x00000001
+#define SERV6_REWRITE_PORT	6666
+
+SEC("cgroup/bind6")
+int bind_v6_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock *sk;
+	__u32 user_ip6;
+	__u16 user_port;
+	int i;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 0;
+
+	if (sk->family != AF_INET6)
+		return 0;
+
+	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+		return 0;
+
+	if (ctx->user_ip6[0] != bpf_htonl(SERV6_IP_0) ||
+	    ctx->user_ip6[1] != bpf_htonl(SERV6_IP_1) ||
+	    ctx->user_ip6[2] != bpf_htonl(SERV6_IP_2) ||
+	    ctx->user_ip6[3] != bpf_htonl(SERV6_IP_3) ||
+	    ctx->user_port != bpf_htons(SERV6_PORT))
+		return 0;
+
+	// u8 narrow loads:
+	for (i = 0; i < 4; i++) {
+		user_ip6 = 0;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[0] << 0;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[1] << 8;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[2] << 16;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[3] << 24;
+		if (ctx->user_ip6[i] != user_ip6)
+			return 0;
+	}
+
+	user_port = 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+	if (ctx->user_port != user_port)
+		return 0;
+
+	// u16 narrow loads:
+	for (i = 0; i < 4; i++) {
+		user_ip6 = 0;
+		user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[0] << 0;
+		user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[1] << 16;
+		if (ctx->user_ip6[i] != user_ip6)
+			return 0;
+	}
+
+	ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
+	ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
+	ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
+	ctx->user_ip6[3] = bpf_htonl(SERV6_REWRITE_IP_3);
+	ctx->user_port = bpf_htons(SERV6_REWRITE_PORT);
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index b8c72c1d9cf7..dcb83ab02919 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -31,6 +31,8 @@
 #define CONNECT6_PROG_PATH	"./connect6_prog.o"
 #define SENDMSG4_PROG_PATH	"./sendmsg4_prog.o"
 #define SENDMSG6_PROG_PATH	"./sendmsg6_prog.o"
+#define BIND4_PROG_PATH		"./bind4_prog.o"
+#define BIND6_PROG_PATH		"./bind6_prog.o"
 
 #define SERV4_IP		"192.168.1.254"
 #define SERV4_REWRITE_IP	"127.0.0.1"
@@ -660,190 +662,6 @@ static int load_insns(const struct sock_addr_test *test,
 	return ret;
 }
 
-/* [1] These testing programs try to read different context fields, including
- * narrow loads of different sizes from user_ip4 and user_ip6, and write to
- * those allowed to be overridden.
- *
- * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
- * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
- * in such cases since it accepts only _signed_ 32bit integer as IMM
- * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
- * to count jumps properly.
- */
-
-static int bind4_prog_load(const struct sock_addr_test *test)
-{
-	union {
-		uint8_t u4_addr8[4];
-		uint16_t u4_addr16[2];
-		uint32_t u4_addr32;
-	} ip4, port;
-	struct sockaddr_in addr4_rw;
-
-	if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
-		log_err("Invalid IPv4: %s", SERV4_IP);
-		return -1;
-	}
-
-	port.u4_addr32 = htons(SERV4_PORT);
-
-	if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
-			(struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
-		return -1;
-
-	/* See [1]. */
-	struct bpf_insn insns[] = {
-		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
-		/* if (sk.family == AF_INET && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, family)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32),
-
-		/*     (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, type)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
-		BPF_JMP_A(1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28),
-
-		/*     1st_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26),
-
-		/*     2nd_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24),
-
-		/*     3rd_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22),
-
-		/*     4th_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 3),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20),
-
-		/*     1st_half_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18),
-
-		/*     2nd_half_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16),
-
-		/*     whole_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
-		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12),
-
-		/*     1st_byte_of_user_port == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10),
-
-		/*     1st_half_of_user_port == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8),
-
-		/*     user_port == expected) { */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */
-		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
-
-		/*      user_ip4 = addr4_rw.sin_addr */
-		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-
-		/*      user_port = addr4_rw.sin_port */
-		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		/* } */
-
-		/* return 1 */
-		BPF_MOV64_IMM(BPF_REG_0, 1),
-		BPF_EXIT_INSN(),
-	};
-
-	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
-}
-
-static int bind6_prog_load(const struct sock_addr_test *test)
-{
-	struct sockaddr_in6 addr6_rw;
-	struct in6_addr ip6;
-
-	if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
-		log_err("Invalid IPv6: %s", SERV6_IP);
-		return -1;
-	}
-
-	if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
-			(struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
-		return -1;
-
-	/* See [1]. */
-	struct bpf_insn insns[] = {
-		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
-		/* if (sk.family == AF_INET6 && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, family)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
-
-		/*            5th_byte_of_user_ip6 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip6[1])),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
-
-		/*            3rd_half_of_user_ip6 == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip6[1])),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
-
-		/*            last_word_of_user_ip6 == expected) { */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip6[3])),
-		BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]),  /* See [2]. */
-		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
-
-
-#define STORE_IPV6_WORD(N)						       \
-		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]),     \
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,		       \
-			    offsetof(struct bpf_sock_addr, user_ip6[N]))
-
-		/*      user_ip6 = addr6_rw.sin6_addr */
-		STORE_IPV6_WORD(0),
-		STORE_IPV6_WORD(1),
-		STORE_IPV6_WORD(2),
-		STORE_IPV6_WORD(3),
-
-		/*      user_port = addr6_rw.sin6_port */
-		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
-			    offsetof(struct bpf_sock_addr, user_port)),
-
-		/* } */
-
-		/* return 1 */
-		BPF_MOV64_IMM(BPF_REG_0, 1),
-		BPF_EXIT_INSN(),
-	};
-
-	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
-}
-
 static int load_path(const struct sock_addr_test *test, const char *path)
 {
 	struct bpf_prog_load_attr attr;
@@ -865,6 +683,16 @@ static int load_path(const struct sock_addr_test *test, const char *path)
 	return prog_fd;
 }
 
+static int bind4_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, BIND4_PROG_PATH);
+}
+
+static int bind6_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, BIND6_PROG_PATH);
+}
+
 static int connect4_prog_load(const struct sock_addr_test *test)
 {
 	return load_path(test, CONNECT4_PROG_PATH);
-- 
cgit v1.2.3-70-g09d2


From a540c81a2bcb95227c3e24a4478956824858a6b0 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 2 Dec 2020 09:25:16 -0800
Subject: selftests/bpf: Extend bind{4,6} programs with a call to
 bpf_setsockopt

To make sure it doesn't trigger sock_owned_by_me splat.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201202172516.3483656-4-sdf@google.com
---
 tools/testing/selftests/bpf/progs/bind4_prog.c | 31 ++++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bind6_prog.c | 31 ++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index 0951302a984a..c6520f21f5f5 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -19,6 +19,33 @@
 #define SERV4_REWRITE_IP	0x7f000001U /* 127.0.0.1 */
 #define SERV4_REWRITE_PORT	4444
 
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+	char veth1[IFNAMSIZ] = "test_sock_addr1";
+	char veth2[IFNAMSIZ] = "test_sock_addr2";
+	char missing[IFNAMSIZ] = "nonexistent_dev";
+	char del_bind[IFNAMSIZ] = "";
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth1, sizeof(veth1)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth2, sizeof(veth2)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&missing, sizeof(missing)) != -ENODEV)
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&del_bind, sizeof(del_bind)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/bind4")
 int bind_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -62,6 +89,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
 	if (ctx->user_ip4 != user_ip4)
 		return 0;
 
+	/* Bind to device and unbind it. */
+	if (bind_to_device(ctx))
+		return 0;
+
 	ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
 	ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
 
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index 16da1cf85418..4358e44dcf47 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -25,6 +25,33 @@
 #define SERV6_REWRITE_IP_3	0x00000001
 #define SERV6_REWRITE_PORT	6666
 
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+	char veth1[IFNAMSIZ] = "test_sock_addr1";
+	char veth2[IFNAMSIZ] = "test_sock_addr2";
+	char missing[IFNAMSIZ] = "nonexistent_dev";
+	char del_bind[IFNAMSIZ] = "";
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth1, sizeof(veth1)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth2, sizeof(veth2)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&missing, sizeof(missing)) != -ENODEV)
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&del_bind, sizeof(del_bind)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/bind6")
 int bind_v6_prog(struct bpf_sock_addr *ctx)
 {
@@ -76,6 +103,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
 			return 0;
 	}
 
+	/* Bind to device and unbind it. */
+	if (bind_to_device(ctx))
+		return 0;
+
 	ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
 	ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
 	ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
-- 
cgit v1.2.3-70-g09d2


From 80ee81e0403c48f4eb342f7c8d40477c89b8836a Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 1 Dec 2020 13:58:58 -0800
Subject: bpf: Eliminate rlimit-based memory accounting infra for bpf maps

Remove rlimit-based accounting infrastructure code, which is not used
anymore.

To provide a backward compatibility, use an approximation of the
bpf map memory footprint as a "memlock" value, available to a user
via map info. The approximation is based on the maximal number of
elements and key and value sizes.

Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201201215900.3569844-33-guro@fb.com
---
 include/linux/bpf.h                                | 12 ---
 kernel/bpf/syscall.c                               | 96 ++++------------------
 .../testing/selftests/bpf/progs/bpf_iter_bpf_map.c |  2 +-
 tools/testing/selftests/bpf/progs/map_ptr_kern.c   |  7 --
 4 files changed, 17 insertions(+), 100 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e1f2c95c15ec..61331a148cde 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -138,11 +138,6 @@ struct bpf_map_ops {
 	const struct bpf_iter_seq_info *iter_seq_info;
 };
 
-struct bpf_map_memory {
-	u32 pages;
-	struct user_struct *user;
-};
-
 struct bpf_map {
 	/* The first two cachelines with read-mostly members of which some
 	 * are also accessed in fast-path (e.g. ops, max_entries).
@@ -163,7 +158,6 @@ struct bpf_map {
 	u32 btf_key_type_id;
 	u32 btf_value_type_id;
 	struct btf *btf;
-	struct bpf_map_memory memory;
 #ifdef CONFIG_MEMCG_KMEM
 	struct mem_cgroup *memcg;
 #endif
@@ -1224,12 +1218,6 @@ void bpf_map_inc_with_uref(struct bpf_map *map);
 struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
-int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
-void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
-int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size);
-void bpf_map_charge_finish(struct bpf_map_memory *mem);
-void bpf_map_charge_move(struct bpf_map_memory *dst,
-			 struct bpf_map_memory *src);
 void *bpf_map_area_alloc(u64 size, int numa_node);
 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
 void bpf_map_area_free(void *base);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index dff3a5f62d7a..29096d96d989 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -128,7 +128,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
 	return map;
 }
 
-static u32 bpf_map_value_size(struct bpf_map *map)
+static u32 bpf_map_value_size(const struct bpf_map *map)
 {
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -346,77 +346,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
 	map->numa_node = bpf_map_attr_numa_node(attr);
 }
 
-static int bpf_charge_memlock(struct user_struct *user, u32 pages)
-{
-	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-	if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
-		atomic_long_sub(pages, &user->locked_vm);
-		return -EPERM;
-	}
-	return 0;
-}
-
-static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
-{
-	if (user)
-		atomic_long_sub(pages, &user->locked_vm);
-}
-
-int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
-{
-	u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
-	struct user_struct *user;
-	int ret;
-
-	if (size >= U32_MAX - PAGE_SIZE)
-		return -E2BIG;
-
-	user = get_current_user();
-	ret = bpf_charge_memlock(user, pages);
-	if (ret) {
-		free_uid(user);
-		return ret;
-	}
-
-	mem->pages = pages;
-	mem->user = user;
-
-	return 0;
-}
-
-void bpf_map_charge_finish(struct bpf_map_memory *mem)
-{
-	bpf_uncharge_memlock(mem->user, mem->pages);
-	free_uid(mem->user);
-}
-
-void bpf_map_charge_move(struct bpf_map_memory *dst,
-			 struct bpf_map_memory *src)
-{
-	*dst = *src;
-
-	/* Make sure src will not be used for the redundant uncharging. */
-	memset(src, 0, sizeof(struct bpf_map_memory));
-}
-
-int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
-{
-	int ret;
-
-	ret = bpf_charge_memlock(map->memory.user, pages);
-	if (ret)
-		return ret;
-	map->memory.pages += pages;
-	return ret;
-}
-
-void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
-{
-	bpf_uncharge_memlock(map->memory.user, pages);
-	map->memory.pages -= pages;
-}
-
 static int bpf_map_alloc_id(struct bpf_map *map)
 {
 	int id;
@@ -524,14 +453,11 @@ static void bpf_map_release_memcg(struct bpf_map *map)
 static void bpf_map_free_deferred(struct work_struct *work)
 {
 	struct bpf_map *map = container_of(work, struct bpf_map, work);
-	struct bpf_map_memory mem;
 
-	bpf_map_charge_move(&mem, &map->memory);
 	security_bpf_map_free(map);
 	bpf_map_release_memcg(map);
 	/* implementation dependent freeing */
 	map->ops->map_free(map);
-	bpf_map_charge_finish(&mem);
 }
 
 static void bpf_map_put_uref(struct bpf_map *map)
@@ -592,6 +518,19 @@ static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
 }
 
 #ifdef CONFIG_PROC_FS
+/* Provides an approximation of the map's memory footprint.
+ * Used only to provide a backward compatibility and display
+ * a reasonable "memlock" info.
+ */
+static unsigned long bpf_map_memory_footprint(const struct bpf_map *map)
+{
+	unsigned long size;
+
+	size = round_up(map->key_size + bpf_map_value_size(map), 8);
+
+	return round_up(map->max_entries * size, PAGE_SIZE);
+}
+
 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 {
 	const struct bpf_map *map = filp->private_data;
@@ -610,7 +549,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 		   "value_size:\t%u\n"
 		   "max_entries:\t%u\n"
 		   "map_flags:\t%#x\n"
-		   "memlock:\t%llu\n"
+		   "memlock:\t%lu\n"
 		   "map_id:\t%u\n"
 		   "frozen:\t%u\n",
 		   map->map_type,
@@ -618,7 +557,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 		   map->value_size,
 		   map->max_entries,
 		   map->map_flags,
-		   map->memory.pages * 1ULL << PAGE_SHIFT,
+		   bpf_map_memory_footprint(map),
 		   map->id,
 		   READ_ONCE(map->frozen));
 	if (type) {
@@ -861,7 +800,6 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 static int map_create(union bpf_attr *attr)
 {
 	int numa_node = bpf_map_attr_numa_node(attr);
-	struct bpf_map_memory mem;
 	struct bpf_map *map;
 	int f_flags;
 	int err;
@@ -960,9 +898,7 @@ free_map_sec:
 	security_bpf_map_free(map);
 free_map:
 	btf_put(map->btf);
-	bpf_map_charge_move(&mem, &map->memory);
 	map->ops->map_free(map);
-	bpf_map_charge_finish(&mem);
 	return err;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index 08651b23edba..b83b5d2e17dc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -23,6 +23,6 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
 
 	BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
 		       map->usercnt.counter,
-		       map->memory.user->locked_vm.counter);
+		       0LLU);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index c325405751e2..d8850bc6a9f1 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -26,17 +26,12 @@ __u32 g_line = 0;
 		return 0;	\
 })
 
-struct bpf_map_memory {
-	__u32 pages;
-} __attribute__((preserve_access_index));
-
 struct bpf_map {
 	enum bpf_map_type map_type;
 	__u32 key_size;
 	__u32 value_size;
 	__u32 max_entries;
 	__u32 id;
-	struct bpf_map_memory memory;
 } __attribute__((preserve_access_index));
 
 static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
@@ -47,7 +42,6 @@ static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
 	VERIFY(map->value_size == value_size);
 	VERIFY(map->max_entries == max_entries);
 	VERIFY(map->id > 0);
-	VERIFY(map->memory.pages > 0);
 
 	return 1;
 }
@@ -60,7 +54,6 @@ static inline int check_bpf_map_ptr(struct bpf_map *indirect,
 	VERIFY(indirect->value_size == direct->value_size);
 	VERIFY(indirect->max_entries == direct->max_entries);
 	VERIFY(indirect->id == direct->id);
-	VERIFY(indirect->memory.pages == direct->memory.pages);
 
 	return 1;
 }
-- 
cgit v1.2.3-70-g09d2


From 4e62d55d77bbdb33d821f5e16306caab38d42267 Mon Sep 17 00:00:00 2001
From: Aleksa Sarai <cyphar@cyphar.com>
Date: Wed, 28 Oct 2020 10:50:44 +1100
Subject: selftests: openat2: add RESOLVE_ conflict test

Now that we reject conflicting RESOLVE_ flags, add a selftest to avoid
regressions.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/r/20201027235044.5240-3-cyphar@cyphar.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/openat2/openat2_test.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index b386367c606b..381d874cce99 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -155,7 +155,7 @@ struct flag_test {
 	int err;
 };
 
-#define NUM_OPENAT2_FLAG_TESTS 23
+#define NUM_OPENAT2_FLAG_TESTS 24
 
 void test_openat2_flags(void)
 {
@@ -210,6 +210,12 @@ void test_openat2_flags(void)
 		  .how.flags = O_TMPFILE | O_RDWR,
 		  .how.mode = 0x0000A00000000000ULL, .err = -EINVAL },
 
+		/* ->resolve flags must not conflict. */
+		{ .name = "incompatible resolve flags (BENEATH | IN_ROOT)",
+		  .how.flags = O_RDONLY,
+		  .how.resolve = RESOLVE_BENEATH | RESOLVE_IN_ROOT,
+		  .err = -EINVAL },
+
 		/* ->resolve must only contain RESOLVE_* flags. */
 		{ .name = "invalid how.resolve and O_RDONLY",
 		  .how.flags = O_RDONLY,
-- 
cgit v1.2.3-70-g09d2


From f0812f6ca8299e864fe0f41bd7ffdaae3ce7630e Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Wed, 2 Dec 2020 01:44:27 +1100
Subject: selftests/powerpc: update .gitignore

I did an in-place build of the self-tests and found that it left
the tree dirty.

Add missed test binaries to .gitignore

Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201201144427.1228745-1-dja@axtens.net
---
 tools/testing/selftests/powerpc/nx-gzip/.gitignore  | 3 +++
 tools/testing/selftests/powerpc/security/.gitignore | 1 +
 tools/testing/selftests/powerpc/signal/.gitignore   | 1 +
 tools/testing/selftests/powerpc/syscalls/.gitignore | 1 +
 4 files changed, 6 insertions(+)
 create mode 100644 tools/testing/selftests/powerpc/nx-gzip/.gitignore

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/nx-gzip/.gitignore b/tools/testing/selftests/powerpc/nx-gzip/.gitignore
new file mode 100644
index 000000000000..886d522d52df
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+gunz_test
+gzfht_test
diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore
index 4257a1f156bb..93614b125ded 100644
--- a/tools/testing/selftests/powerpc/security/.gitignore
+++ b/tools/testing/selftests/powerpc/security/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 rfi_flush
 entry_flush
+spectre_v2
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
index 405b5364044c..ce3375cd8e73 100644
--- a/tools/testing/selftests/powerpc/signal/.gitignore
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -3,3 +3,4 @@ signal
 signal_tm
 sigfuz
 sigreturn_vdso
+sig_sc_double_restart
diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore
index b00cab225476..a1e19ccdef84 100644
--- a/tools/testing/selftests/powerpc/syscalls/.gitignore
+++ b/tools/testing/selftests/powerpc/syscalls/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 ipc_unmuxed
+rtas_filter
-- 
cgit v1.2.3-70-g09d2


From c9344769e2b46ba28b947bec7a8a8f0a091ecd57 Mon Sep 17 00:00:00 2001
From: Harish <harish@linux.ibm.com>
Date: Tue, 1 Dec 2020 14:54:03 +0530
Subject: selftests/powerpc: Fix uninitialized variable warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch fixes uninitialized variable warning in bad_accesses test
which causes the selftests build to fail in older distibutions

bad_accesses.c: In function ‘bad_access’:
bad_accesses.c:52:9: error: ‘x’ may be used uninitialized in this function [-Werror=maybe-uninitialized]
   printf("Bad - no SEGV! (%c)\n", x);
         ^
cc1: all warnings being treated as errors

Signed-off-by: Harish <harish@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201201092403.238182-1-harish@linux.ibm.com
---
 tools/testing/selftests/powerpc/mm/bad_accesses.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index fd747b2ffcfc..65d2148b05dc 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -38,7 +38,7 @@ static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
 
 int bad_access(char *p, bool write)
 {
-	char x;
+	char x = 0;
 
 	fault_code = 0;
 	fault_addr = 0;
-- 
cgit v1.2.3-70-g09d2


From 3ba150fb21207e4a7f4b600eb2dbbe83f94571fe Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@linux.ibm.com>
Date: Mon, 30 Nov 2020 14:00:57 +0530
Subject: lkdtm/powerpc: Add SLB multihit test

To check machine check handling, add support to inject slb
multihit errors.

Co-developed-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Signed-off-by: Ganesh Goudar <ganeshgr@linux.ibm.com>
[mpe: Use CONFIG_PPC_BOOK3S_64 to fix compile errors reported by lkp@intel.com]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201130083057.135610-1-ganeshgr@linux.ibm.com
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  28 +++++-
 arch/powerpc/mm/book3s64/hash_utils.c         |   1 +
 arch/powerpc/mm/book3s64/slb.c                |  27 ------
 drivers/misc/lkdtm/Makefile                   |   1 +
 drivers/misc/lkdtm/core.c                     |   3 +
 drivers/misc/lkdtm/lkdtm.h                    |   3 +
 drivers/misc/lkdtm/powerpc.c                  | 120 ++++++++++++++++++++++++++
 tools/testing/selftests/lkdtm/tests.txt       |   1 +
 8 files changed, 156 insertions(+), 28 deletions(-)
 create mode 100644 drivers/misc/lkdtm/powerpc.c

(limited to 'tools')

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 9192cb05a6ab..066b1d34c7bc 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -843,6 +843,32 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
 
 unsigned htab_shift_for_mem_size(unsigned long mem_size);
 
-#endif /* __ASSEMBLY__ */
+enum slb_index {
+	LINEAR_INDEX	= 0, /* Kernel linear map  (0xc000000000000000) */
+	KSTACK_INDEX	= 1, /* Kernel stack map */
+};
 
+#define slb_esid_mask(ssize)	\
+	(((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T)
+
+static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
+					 enum slb_index index)
+{
+	return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
+}
+
+static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
+					   unsigned long flags)
+{
+	return (vsid << slb_vsid_shift(ssize)) | flags |
+		((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT);
+}
+
+static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
+					 unsigned long flags)
+{
+	return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
+}
+
+#endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index e0fe1a43e7b8..73b06adb6eeb 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -112,6 +112,7 @@ int mmu_linear_psize = MMU_PAGE_4K;
 EXPORT_SYMBOL_GPL(mmu_linear_psize);
 int mmu_virtual_psize = MMU_PAGE_4K;
 int mmu_vmalloc_psize = MMU_PAGE_4K;
+EXPORT_SYMBOL_GPL(mmu_vmalloc_psize);
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 int mmu_vmemmap_psize = MMU_PAGE_4K;
 #endif
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 6d720c1c08a4..584567970c11 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -28,35 +28,8 @@
 #include "internal.h"
 
 
-enum slb_index {
-	LINEAR_INDEX	= 0, /* Kernel linear map  (0xc000000000000000) */
-	KSTACK_INDEX	= 1, /* Kernel stack map */
-};
-
 static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
 
-#define slb_esid_mask(ssize)	\
-	(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
-
-static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
-					 enum slb_index index)
-{
-	return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
-}
-
-static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
-					 unsigned long flags)
-{
-	return (vsid << slb_vsid_shift(ssize)) | flags |
-		((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
-}
-
-static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
-					 unsigned long flags)
-{
-	return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
-}
-
 bool stress_slb_enabled __initdata;
 
 static int __init parse_stress_slb(char *p)
diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index c70b3822013f..5a92c74eca92 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -10,6 +10,7 @@ lkdtm-$(CONFIG_LKDTM)		+= rodata_objcopy.o
 lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
 lkdtm-$(CONFIG_LKDTM)		+= stackleak.o
 lkdtm-$(CONFIG_LKDTM)		+= cfi.o
+lkdtm-$(CONFIG_PPC_BOOK3S_64)	+= powerpc.o
 
 KASAN_SANITIZE_stackleak.o	:= n
 KCOV_INSTRUMENT_rodata.o	:= n
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 97803f213d9d..1f612c76a61b 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -176,6 +176,9 @@ static const struct crashtype crashtypes[] = {
 #ifdef CONFIG_X86_32
 	CRASHTYPE(DOUBLE_FAULT),
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	CRASHTYPE(PPC_SLB_MULTIHIT),
+#endif
 };
 
 
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 6dec4c9b442f..79ec05c18dd1 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -102,4 +102,7 @@ void lkdtm_STACKLEAK_ERASING(void);
 /* cfi.c */
 void lkdtm_CFI_FORWARD_PROTO(void);
 
+/* powerpc.c */
+void lkdtm_PPC_SLB_MULTIHIT(void);
+
 #endif
diff --git a/drivers/misc/lkdtm/powerpc.c b/drivers/misc/lkdtm/powerpc.c
new file mode 100644
index 000000000000..077c9f9ed8d0
--- /dev/null
+++ b/drivers/misc/lkdtm/powerpc.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "lkdtm.h"
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/mmu.h>
+
+/* Inserts new slb entries */
+static void insert_slb_entry(unsigned long p, int ssize, int page_size)
+{
+	unsigned long flags;
+
+	flags = SLB_VSID_KERNEL | mmu_psize_defs[page_size].sllp;
+	preempt_disable();
+
+	asm volatile("slbmte %0,%1" :
+		     : "r" (mk_vsid_data(p, ssize, flags)),
+		       "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED))
+		     : "memory");
+
+	asm volatile("slbmte %0,%1" :
+			: "r" (mk_vsid_data(p, ssize, flags)),
+			  "r" (mk_esid_data(p, ssize, SLB_NUM_BOLTED + 1))
+			: "memory");
+	preempt_enable();
+}
+
+/* Inject slb multihit on vmalloc-ed address i.e 0xD00... */
+static int inject_vmalloc_slb_multihit(void)
+{
+	char *p;
+
+	p = vmalloc(PAGE_SIZE);
+	if (!p)
+		return -ENOMEM;
+
+	insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_vmalloc_psize);
+	/*
+	 * This triggers exception, If handled correctly we must recover
+	 * from this error.
+	 */
+	p[0] = '!';
+	vfree(p);
+	return 0;
+}
+
+/* Inject slb multihit on kmalloc-ed address i.e 0xC00... */
+static int inject_kmalloc_slb_multihit(void)
+{
+	char *p;
+
+	p = kmalloc(2048, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	insert_slb_entry((unsigned long)p, MMU_SEGSIZE_1T, mmu_linear_psize);
+	/*
+	 * This triggers exception, If handled correctly we must recover
+	 * from this error.
+	 */
+	p[0] = '!';
+	kfree(p);
+	return 0;
+}
+
+/*
+ * Few initial SLB entries are bolted. Add a test to inject
+ * multihit in bolted entry 0.
+ */
+static void insert_dup_slb_entry_0(void)
+{
+	unsigned long test_address = PAGE_OFFSET, *test_ptr;
+	unsigned long esid, vsid;
+	unsigned long i = 0;
+
+	test_ptr = (unsigned long *)test_address;
+	preempt_disable();
+
+	asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
+	asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
+
+	/* for i !=0 we would need to mask out the old entry number */
+	asm volatile("slbmte %0,%1" :
+			: "r" (vsid),
+			  "r" (esid | SLB_NUM_BOLTED)
+			: "memory");
+
+	asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (i));
+	asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (i));
+
+	/* for i !=0 we would need to mask out the old entry number */
+	asm volatile("slbmte %0,%1" :
+			: "r" (vsid),
+			  "r" (esid | (SLB_NUM_BOLTED + 1))
+			: "memory");
+
+	pr_info("%s accessing test address 0x%lx: 0x%lx\n",
+		__func__, test_address, *test_ptr);
+
+	preempt_enable();
+}
+
+void lkdtm_PPC_SLB_MULTIHIT(void)
+{
+	if (!radix_enabled()) {
+		pr_info("Injecting SLB multihit errors\n");
+		/*
+		 * These need not be separate tests, And they do pretty
+		 * much same thing. In any case we must recover from the
+		 * errors introduced by these functions, machine would not
+		 * survive these tests in case of failure to handle.
+		 */
+		inject_vmalloc_slb_multihit();
+		inject_kmalloc_slb_multihit();
+		insert_dup_slb_entry_0();
+		pr_info("Recovered from SLB multihit errors\n");
+	} else {
+		pr_err("XFAIL: This test is for ppc64 and with hash mode MMU only\n");
+	}
+}
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 74a8d329a72c..18e4599863c0 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -68,3 +68,4 @@ USERCOPY_STACK_BEYOND
 USERCOPY_KERNEL
 STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
 CFI_FORWARD_PROTO
+PPC_SLB_MULTIHIT Recovered
-- 
cgit v1.2.3-70-g09d2


From d4bff72c8401e6f56194ecf455db70ebc22929e2 Mon Sep 17 00:00:00 2001
From: Thomas Karlsson <thomas.karlsson@paneda.se>
Date: Wed, 2 Dec 2020 19:49:58 +0100
Subject: macvlan: Support for high multicast packet rate

Background:
Broadcast and multicast packages are enqueued for later processing.
This queue was previously hardcoded to 1000.

This proved insufficient for handling very high packet rates.
This resulted in packet drops for multicast.
While at the same time unicast worked fine.

The change:
This patch make the queue length adjustable to accommodate
for environments with very high multicast packet rate.
But still keeps the default value of 1000 unless specified.

The queue length is specified as a request per macvlan
using the IFLA_MACVLAN_BC_QUEUE_LEN parameter.

The actual used queue length will then be the maximum of
any macvlan connected to the same port. The actual used
queue length for the port can be retrieved (read only)
by the IFLA_MACVLAN_BC_QUEUE_LEN_USED parameter for verification.

This will be followed up by a patch to iproute2
in order to adjust the parameter from userspace.

Signed-off-by: Thomas Karlsson <thomas.karlsson@paneda.se>
Link: https://lore.kernel.org/r/dd4673b2-7eab-edda-6815-85c67ce87f63@paneda.se
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/macvlan.c              | 40 ++++++++++++++++++++++++++++++++++++--
 include/linux/if_macvlan.h         |  1 +
 include/uapi/linux/if_link.h       |  2 ++
 tools/include/uapi/linux/if_link.h |  2 ++
 4 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d9b6c44a5911..fb51329f8964 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -35,7 +35,7 @@
 
 #define MACVLAN_HASH_BITS	8
 #define MACVLAN_HASH_SIZE	(1<<MACVLAN_HASH_BITS)
-#define MACVLAN_BC_QUEUE_LEN	1000
+#define MACVLAN_DEFAULT_BC_QUEUE_LEN	1000
 
 #define MACVLAN_F_PASSTHRU	1
 #define MACVLAN_F_ADDRCHANGE	2
@@ -46,6 +46,7 @@ struct macvlan_port {
 	struct list_head	vlans;
 	struct sk_buff_head	bc_queue;
 	struct work_struct	bc_work;
+	u32			bc_queue_len_used;
 	u32			flags;
 	int			count;
 	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
@@ -67,6 +68,7 @@ struct macvlan_skb_cb {
 #define MACVLAN_SKB_CB(__skb) ((struct macvlan_skb_cb *)&((__skb)->cb[0]))
 
 static void macvlan_port_destroy(struct net_device *dev);
+static void update_port_bc_queue_len(struct macvlan_port *port);
 
 static inline bool macvlan_passthru(const struct macvlan_port *port)
 {
@@ -354,7 +356,7 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port,
 	MACVLAN_SKB_CB(nskb)->src = src;
 
 	spin_lock(&port->bc_queue.lock);
-	if (skb_queue_len(&port->bc_queue) < MACVLAN_BC_QUEUE_LEN) {
+	if (skb_queue_len(&port->bc_queue) < port->bc_queue_len_used) {
 		if (src)
 			dev_hold(src->dev);
 		__skb_queue_tail(&port->bc_queue, nskb);
@@ -1218,6 +1220,7 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_source_hash[i]);
 
+	port->bc_queue_len_used = 0;
 	skb_queue_head_init(&port->bc_queue);
 	INIT_WORK(&port->bc_work, macvlan_process_broadcast);
 
@@ -1486,6 +1489,10 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 			goto destroy_macvlan_port;
 	}
 
+	vlan->bc_queue_len_req = MACVLAN_DEFAULT_BC_QUEUE_LEN;
+	if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN])
+		vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]);
+
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto destroy_macvlan_port;
@@ -1496,6 +1503,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		goto unregister_netdev;
 
 	list_add_tail_rcu(&vlan->list, &port->vlans);
+	update_port_bc_queue_len(vlan->port);
 	netif_stacked_transfer_operstate(lowerdev, dev);
 	linkwatch_fire_event(dev);
 
@@ -1529,6 +1537,7 @@ void macvlan_dellink(struct net_device *dev, struct list_head *head)
 	if (vlan->mode == MACVLAN_MODE_SOURCE)
 		macvlan_flush_sources(vlan->port, vlan);
 	list_del_rcu(&vlan->list);
+	update_port_bc_queue_len(vlan->port);
 	unregister_netdevice_queue(dev, head);
 	netdev_upper_dev_unlink(vlan->lowerdev, dev);
 }
@@ -1572,6 +1581,12 @@ static int macvlan_changelink(struct net_device *dev,
 		}
 		vlan->flags = flags;
 	}
+
+	if (data && data[IFLA_MACVLAN_BC_QUEUE_LEN]) {
+		vlan->bc_queue_len_req = nla_get_u32(data[IFLA_MACVLAN_BC_QUEUE_LEN]);
+		update_port_bc_queue_len(vlan->port);
+	}
+
 	if (set_mode)
 		vlan->mode = mode;
 	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
@@ -1602,6 +1617,8 @@ static size_t macvlan_get_size(const struct net_device *dev)
 		+ nla_total_size(2) /* IFLA_MACVLAN_FLAGS */
 		+ nla_total_size(4) /* IFLA_MACVLAN_MACADDR_COUNT */
 		+ macvlan_get_size_mac(vlan) /* IFLA_MACVLAN_MACADDR */
+		+ nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN */
+		+ nla_total_size(4) /* IFLA_MACVLAN_BC_QUEUE_LEN_USED */
 		);
 }
 
@@ -1625,6 +1642,7 @@ static int macvlan_fill_info(struct sk_buff *skb,
 				const struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_port *port = vlan->port;
 	int i;
 	struct nlattr *nest;
 
@@ -1645,6 +1663,10 @@ static int macvlan_fill_info(struct sk_buff *skb,
 		}
 		nla_nest_end(skb, nest);
 	}
+	if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN, vlan->bc_queue_len_req))
+		goto nla_put_failure;
+	if (nla_put_u32(skb, IFLA_MACVLAN_BC_QUEUE_LEN_USED, port->bc_queue_len_used))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
@@ -1658,6 +1680,8 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
 	[IFLA_MACVLAN_MACADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
 	[IFLA_MACVLAN_MACADDR_DATA] = { .type = NLA_NESTED },
 	[IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 },
+	[IFLA_MACVLAN_BC_QUEUE_LEN] = { .type = NLA_U32 },
+	[IFLA_MACVLAN_BC_QUEUE_LEN_USED] = { .type = NLA_REJECT },
 };
 
 int macvlan_link_register(struct rtnl_link_ops *ops)
@@ -1688,6 +1712,18 @@ static struct rtnl_link_ops macvlan_link_ops = {
 	.priv_size      = sizeof(struct macvlan_dev),
 };
 
+static void update_port_bc_queue_len(struct macvlan_port *port)
+{
+	u32 max_bc_queue_len_req = 0;
+	struct macvlan_dev *vlan;
+
+	list_for_each_entry(vlan, &port->vlans, list) {
+		if (vlan->bc_queue_len_req > max_bc_queue_len_req)
+			max_bc_queue_len_req = vlan->bc_queue_len_req;
+	}
+	port->bc_queue_len_used = max_bc_queue_len_req;
+}
+
 static int macvlan_device_event(struct notifier_block *unused,
 				unsigned long event, void *ptr)
 {
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index a367ead4bf4b..96556c64c95d 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -30,6 +30,7 @@ struct macvlan_dev {
 	enum macvlan_mode	mode;
 	u16			flags;
 	unsigned int		macaddr_count;
+	u32			bc_queue_len_req;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll		*netpoll;
 #endif
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c4b23f06f69e..874cc12a34d9 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -588,6 +588,8 @@ enum {
 	IFLA_MACVLAN_MACADDR,
 	IFLA_MACVLAN_MACADDR_DATA,
 	IFLA_MACVLAN_MACADDR_COUNT,
+	IFLA_MACVLAN_BC_QUEUE_LEN,
+	IFLA_MACVLAN_BC_QUEUE_LEN_USED,
 	__IFLA_MACVLAN_MAX,
 };
 
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 781e482dc499..d208b2af697f 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -409,6 +409,8 @@ enum {
 	IFLA_MACVLAN_MACADDR,
 	IFLA_MACVLAN_MACADDR_DATA,
 	IFLA_MACVLAN_MACADDR_COUNT,
+	IFLA_MACVLAN_BC_QUEUE_LEN,
+	IFLA_MACVLAN_BC_QUEUE_LEN_USED,
 	__IFLA_MACVLAN_MAX,
 };
 
-- 
cgit v1.2.3-70-g09d2


From 0c55f867f0c96dff93d4e0b5973975d65afb26d8 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Wed, 2 Dec 2020 21:35:36 +0100
Subject: selftests: kvm/set_memory_region_test: Fix race in move region test

The current memory region move test correctly handles the situation that
the second (realigning) memslot move operation would temporarily trigger
MMIO until it completes, however it does not handle the case in which the
first (misaligning) move operation does this, too.
This results in false test assertions in case it does so.

Fix this by handling temporary MMIO from the first memslot move operation
in the test guest code, too.

Fixes: 8a0639fe9201 ("KVM: sefltests: Add explicit synchronization to move mem region test")
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <0fdddb94bb0e31b7da129a809a308d91c10c0b5e.1606941224.git.maciej.szmigiero@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/set_memory_region_test.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index b3ece55a2da6..6f441dd9f33c 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -156,14 +156,23 @@ static void guest_code_move_memory_region(void)
 	GUEST_SYNC(0);
 
 	/*
-	 * Spin until the memory region is moved to a misaligned address.  This
-	 * may or may not trigger MMIO, as the window where the memslot is
-	 * invalid is quite small.
+	 * Spin until the memory region starts getting moved to a
+	 * misaligned address.
+	 * Every region move may or may not trigger MMIO, as the
+	 * window where the memslot is invalid is usually quite small.
 	 */
 	val = guest_spin_on_val(0);
 	GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
 
-	/* Spin until the memory region is realigned. */
+	/* Spin until the misaligning memory region move completes. */
+	val = guest_spin_on_val(MMIO_VAL);
+	GUEST_ASSERT_1(val == 1 || val == 0, val);
+
+	/* Spin until the memory region starts to get re-aligned. */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+
+	/* Spin until the re-aligning memory region move completes. */
 	val = guest_spin_on_val(MMIO_VAL);
 	GUEST_ASSERT_1(val == 1, val);
 
-- 
cgit v1.2.3-70-g09d2


From 71ccb50074f31a50a1da4c1d8306d54da0907b00 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 1 Dec 2020 22:52:41 -0800
Subject: tools/bpftool: Emit name <anon> for anonymous BTFs

For consistency of output, emit "name <anon>" for BTFs without the name. This
keeps output more consistent and obvious.

Suggested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202065244.530571-2-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index ed5e97157241..bd46af6a61cc 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -750,6 +750,8 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
 		printf("name [%s]  ", name);
 	else if (name && name[0])
 		printf("name %s  ", name);
+	else
+		printf("name <anon>  ");
 	printf("size %uB", info->btf_size);
 
 	n = 0;
-- 
cgit v1.2.3-70-g09d2


From 0cfdcd6378071f383c900e3d8862347e2af1d1ca Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 1 Dec 2020 22:52:42 -0800
Subject: libbpf: Add base BTF accessor

Add ability to get base BTF. It can be also used to check if BTF is split BTF.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202065244.530571-3-andrii@kernel.org
---
 tools/lib/bpf/btf.c      | 5 +++++
 tools/lib/bpf/btf.h      | 1 +
 tools/lib/bpf/libbpf.map | 1 +
 3 files changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 8ff46cd30ca1..1935e83d309c 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -432,6 +432,11 @@ __u32 btf__get_nr_types(const struct btf *btf)
 	return btf->start_id + btf->nr_types - 1;
 }
 
+const struct btf *btf__base_btf(const struct btf *btf)
+{
+	return btf->base_btf;
+}
+
 /* internal helper returning non-const pointer to a type */
 static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
 {
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 1093f6fe6800..1237bcd1dd17 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -51,6 +51,7 @@ LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
 LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
 					const char *type_name, __u32 kind);
 LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
+LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf);
 LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
 						  __u32 id);
 LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 29ff4807b909..ed55498c4122 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -340,6 +340,7 @@ LIBBPF_0.2.0 {
 
 LIBBPF_0.3.0 {
 	global:
+		btf__base_btf;
 		btf__parse_elf_split;
 		btf__parse_raw_split;
 		btf__parse_split;
-- 
cgit v1.2.3-70-g09d2


From fa4528379a51ff8d5271e1bcfa0d5ef71657869f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 1 Dec 2020 22:52:43 -0800
Subject: tools/bpftool: Auto-detect split BTFs in common cases

In case of working with module's split BTF from /sys/kernel/btf/*,
auto-substitute /sys/kernel/btf/vmlinux as the base BTF. This makes using
bpftool with module BTFs faster and simpler.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202065244.530571-4-andrii@kernel.org
---
 tools/bpf/bpftool/btf.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index bd46af6a61cc..94cd3bad5430 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -357,11 +357,13 @@ static int dump_btf_raw(const struct btf *btf,
 			dump_btf_type(btf, root_type_ids[i], t);
 		}
 	} else {
+		const struct btf *base;
 		int cnt = btf__get_nr_types(btf);
 		int start_id = 1;
 
-		if (base_btf)
-			start_id = btf__get_nr_types(base_btf) + 1;
+		base = btf__base_btf(btf);
+		if (base)
+			start_id = btf__get_nr_types(base) + 1;
 
 		for (i = start_id; i <= cnt; i++) {
 			t = btf__type_by_id(btf, i);
@@ -428,7 +430,7 @@ done:
 
 static int do_dump(int argc, char **argv)
 {
-	struct btf *btf = NULL;
+	struct btf *btf = NULL, *base = NULL;
 	__u32 root_type_ids[2];
 	int root_type_cnt = 0;
 	bool dump_c = false;
@@ -502,7 +504,21 @@ static int do_dump(int argc, char **argv)
 		}
 		NEXT_ARG();
 	} else if (is_prefix(src, "file")) {
-		btf = btf__parse_split(*argv, base_btf);
+		const char sysfs_prefix[] = "/sys/kernel/btf/";
+		const char sysfs_vmlinux[] = "/sys/kernel/btf/vmlinux";
+
+		if (!base_btf &&
+		    strncmp(*argv, sysfs_prefix, sizeof(sysfs_prefix) - 1) == 0 &&
+		    strcmp(*argv, sysfs_vmlinux) != 0) {
+			base = btf__parse(sysfs_vmlinux, NULL);
+			if (libbpf_get_error(base)) {
+				p_err("failed to parse vmlinux BTF at '%s': %ld\n",
+				      sysfs_vmlinux, libbpf_get_error(base));
+				base = NULL;
+			}
+		}
+
+		btf = btf__parse_split(*argv, base ?: base_btf);
 		if (IS_ERR(btf)) {
 			err = -PTR_ERR(btf);
 			btf = NULL;
@@ -567,6 +583,7 @@ static int do_dump(int argc, char **argv)
 done:
 	close(fd);
 	btf__free(btf);
+	btf__free(base);
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From a874c8c389a12b9f5ab67ba01995f06bf82e94fe Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 2 Dec 2020 09:49:47 -0800
Subject: selftests/bpf: Copy file using read/write in local storage test

Splice (copy_file_range) doesn't work on all filesystems. I'm running
test kernels on top of my read-only disk image and it uses plan9 under the
hood. This prevents test_local_storage from successfully passing.

There is really no technical reason to use splice, so lets do
old-school read/write to copy file; this should work in all
environments.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202174947.3621989-1-sdf@google.com
---
 .../selftests/bpf/prog_tests/test_local_storage.c  | 28 ++++++++++++++--------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index fcca7ba1f368..c0fe73a17ed1 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -21,14 +21,6 @@ static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
 	return syscall(__NR_pidfd_open, pid, flags);
 }
 
-static inline ssize_t copy_file_range(int fd_in, loff_t *off_in, int fd_out,
-				      loff_t *off_out, size_t len,
-				      unsigned int flags)
-{
-	return syscall(__NR_copy_file_range, fd_in, off_in, fd_out, off_out,
-		       len, flags);
-}
-
 static unsigned int duration;
 
 #define TEST_STORAGE_VALUE 0xbeefdead
@@ -47,6 +39,7 @@ static int copy_rm(char *dest)
 {
 	int fd_in, fd_out = -1, ret = 0;
 	struct stat stat;
+	char *buf = NULL;
 
 	fd_in = open("/bin/rm", O_RDONLY);
 	if (fd_in < 0)
@@ -64,18 +57,33 @@ static int copy_rm(char *dest)
 		goto out;
 	}
 
-	ret = copy_file_range(fd_in, NULL, fd_out, NULL, stat.st_size, 0);
-	if (ret == -1) {
+	buf = malloc(stat.st_blksize);
+	if (!buf) {
 		ret = -errno;
 		goto out;
 	}
 
+	while (ret = read(fd_in, buf, stat.st_blksize), ret > 0) {
+		ret = write(fd_out, buf, ret);
+		if (ret < 0) {
+			ret = -errno;
+			goto out;
+
+		}
+	}
+	if (ret < 0) {
+		ret = -errno;
+		goto out;
+
+	}
+
 	/* Set executable permission on the copied file */
 	ret = chmod(dest, 0100);
 	if (ret == -1)
 		ret = -errno;
 
 out:
+	free(buf);
 	close(fd_in);
 	close(fd_out);
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From 22e8ebe35a2e30ee19e02c41cacc99c2f896bc4b Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Thu, 3 Dec 2020 10:22:34 +0000
Subject: tools/resolve_btfids: Fix some error messages

Add missing newlines and fix polarity of strerror argument.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20201203102234.648540-1-jackmanb@google.com
---
 tools/bpf/resolve_btfids/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index dfa540d8a02d..e3ea569ee125 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -454,7 +454,7 @@ static int symbols_collect(struct object *obj)
 			return -ENOMEM;
 
 		if (id->addr_cnt >= ADDR_CNT) {
-			pr_err("FAILED symbol %s crossed the number of allowed lists",
+			pr_err("FAILED symbol %s crossed the number of allowed lists\n",
 				id->name);
 			return -1;
 		}
@@ -477,8 +477,8 @@ static int symbols_resolve(struct object *obj)
 	btf = btf__parse(obj->btf ?: obj->path, NULL);
 	err = libbpf_get_error(btf);
 	if (err) {
-		pr_err("FAILED: load BTF from %s: %s",
-			obj->path, strerror(err));
+		pr_err("FAILED: load BTF from %s: %s\n",
+			obj->path, strerror(-err));
 		return -1;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From e459f49b4394e2630ea55d5ac7a49402686848fe Mon Sep 17 00:00:00 2001
From: Mariusz Dudek <mariuszx.dudek@intel.com>
Date: Thu, 3 Dec 2020 10:05:45 +0100
Subject: libbpf: Separate XDP program load with xsk socket creation

Add support for separation of eBPF program load and xsk socket
creation.

This is needed for use-case when you want to privide as little
privileges as possible to the data plane application that will
handle xsk socket creation and incoming traffic.

With this patch the data entity container can be run with only
CAP_NET_RAW capability to fulfill its purpose of creating xsk
socket and handling packages. In case your umem is larger or
equal process limit for MEMLOCK you need either increase the
limit or CAP_IPC_LOCK capability.

To resolve privileges issue two APIs are introduced:

- xsk_setup_xdp_prog - loads the built in XDP program. It can
also return xsks_map_fd which is needed by unprivileged process
to update xsks_map with AF_XDP socket "fd"

- xsk_socket__update_xskmap - inserts an AF_XDP socket into an xskmap
for a particular xsk_socket

Signed-off-by: Mariusz Dudek <mariuszx.dudek@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20201203090546.11976-2-mariuszx.dudek@intel.com
---
 tools/lib/bpf/libbpf.map |  2 ++
 tools/lib/bpf/xsk.c      | 92 +++++++++++++++++++++++++++++++++++++++++++-----
 tools/lib/bpf/xsk.h      |  5 +++
 3 files changed, 90 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index ed55498c4122..7c4126542e2b 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -346,4 +346,6 @@ LIBBPF_0.3.0 {
 		btf__parse_split;
 		btf__new_empty_split;
 		btf__new_split;
+		xsk_setup_xdp_prog;
+		xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 9bc537d0b92d..4b051ec7cfbb 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -566,8 +566,35 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 				   &xsk->fd, 0);
 }
 
-static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
+static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
 {
+	char ifname[IFNAMSIZ];
+	struct xsk_ctx *ctx;
+	char *interface;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx)
+		return -ENOMEM;
+
+	interface = if_indextoname(ifindex, &ifname[0]);
+	if (!interface) {
+		free(ctx);
+		return -errno;
+	}
+
+	ctx->ifindex = ifindex;
+	strncpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+	ctx->ifname[IFNAMSIZ - 1] = 0;
+
+	xsk->ctx = ctx;
+
+	return 0;
+}
+
+static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
+				int *xsks_map_fd)
+{
+	struct xsk_socket *xsk = _xdp;
 	struct xsk_ctx *ctx = xsk->ctx;
 	__u32 prog_id = 0;
 	int err;
@@ -584,8 +611,7 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 
 		err = xsk_load_xdp_prog(xsk);
 		if (err) {
-			xsk_delete_bpf_maps(xsk);
-			return err;
+			goto err_load_xdp_prog;
 		}
 	} else {
 		ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
@@ -598,15 +624,29 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 		}
 	}
 
-	if (xsk->rx)
+	if (xsk->rx) {
 		err = xsk_set_bpf_maps(xsk);
-	if (err) {
-		xsk_delete_bpf_maps(xsk);
-		close(ctx->prog_fd);
-		return err;
+		if (err) {
+			if (!prog_id) {
+				goto err_set_bpf_maps;
+			} else {
+				close(ctx->prog_fd);
+				return err;
+			}
+		}
 	}
+	if (xsks_map_fd)
+		*xsks_map_fd = ctx->xsks_map_fd;
 
 	return 0;
+
+err_set_bpf_maps:
+	close(ctx->prog_fd);
+	bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
+err_load_xdp_prog:
+	xsk_delete_bpf_maps(xsk);
+
+	return err;
 }
 
 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
@@ -689,6 +729,40 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 	return ctx;
 }
 
+static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
+{
+	free(xsk->ctx);
+	free(xsk);
+}
+
+int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
+{
+	xsk->ctx->xsks_map_fd = fd;
+	return xsk_set_bpf_maps(xsk);
+}
+
+int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
+{
+	struct xsk_socket *xsk;
+	int res;
+
+	xsk = calloc(1, sizeof(*xsk));
+	if (!xsk)
+		return -ENOMEM;
+
+	res = xsk_create_xsk_struct(ifindex, xsk);
+	if (res) {
+		free(xsk);
+		return -EINVAL;
+	}
+
+	res = __xsk_setup_xdp_prog(xsk, xsks_map_fd);
+
+	xsk_destroy_xsk_struct(xsk);
+
+	return res;
+}
+
 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 			      const char *ifname,
 			      __u32 queue_id, struct xsk_umem *umem,
@@ -838,7 +912,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 	ctx->prog_fd = -1;
 
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
-		err = xsk_setup_xdp_prog(xsk);
+		err = __xsk_setup_xdp_prog(xsk, NULL);
 		if (err)
 			goto out_mmap_tx;
 	}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 5865e082ba0b..e9f121f5d129 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -204,6 +204,11 @@ struct xsk_umem_config {
 	__u32 flags;
 };
 
+LIBBPF_API int xsk_setup_xdp_prog(int ifindex,
+				  int *xsks_map_fd);
+LIBBPF_API int xsk_socket__update_xskmap(struct xsk_socket *xsk,
+					 int xsks_map_fd);
+
 /* Flags for the libbpf_flags field. */
 #define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
 
-- 
cgit v1.2.3-70-g09d2


From 3db980449bc3b9765c78210787bcbf4305636982 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 3 Dec 2020 19:14:34 +0000
Subject: selftests/bpf: Update ima_setup.sh for busybox

losetup on busybox does not output the name of loop device on using
-f with --show. It also doesn't support -j to find the loop devices
for a given backing file. losetup is updated to use "-a" which is
available on busybox.

blkid does not support options (-s and -o) to only display the uuid, so
parse the output instead.

Not all environments have mkfs.ext4, the test requires a loop device
with a backing image file which could formatted with any filesystem.
Update to using mkfs.ext2 which is available on busybox.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203191437.666737-2-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/ima_setup.sh | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 15490ccc5e55..137f2d32598f 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -3,6 +3,7 @@
 
 set -e
 set -u
+set -o pipefail
 
 IMA_POLICY_FILE="/sys/kernel/security/ima/policy"
 TEST_BINARY="/bin/true"
@@ -23,13 +24,15 @@ setup()
 
         dd if=/dev/zero of="${mount_img}" bs=1M count=10
 
-        local loop_device="$(losetup --find --show ${mount_img})"
+        losetup -f "${mount_img}"
+        local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
 
-        mkfs.ext4 "${loop_device}"
+        mkfs.ext2 "${loop_device:?}"
         mount "${loop_device}" "${mount_dir}"
 
         cp "${TEST_BINARY}" "${mount_dir}"
-        local mount_uuid="$(blkid -s UUID -o value ${loop_device})"
+        local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')"
+
         echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
 }
 
@@ -38,7 +41,8 @@ cleanup() {
         local mount_img="${tmp_dir}/test.img"
         local mount_dir="${tmp_dir}/mnt"
 
-        local loop_devices=$(losetup -j ${mount_img} -O NAME --noheadings)
+        local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
+
         for loop_dev in "${loop_devices}"; do
                 losetup -d $loop_dev
         done
-- 
cgit v1.2.3-70-g09d2


From 1ee076719d4e14c005f375c50731ed44eb48fee4 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 3 Dec 2020 19:14:35 +0000
Subject: selftests/bpf: Ensure securityfs mount before writing ima policy

SecurityFS may not be mounted even if it is enabled in the kernel
config. So, check if the mount exists in /proc/mounts by parsing the
file and, if not, mount it on /sys/kernel/security.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203191437.666737-3-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/ima_setup.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 137f2d32598f..b1ee4bf06996 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -14,6 +14,20 @@ usage()
         exit 1
 }
 
+ensure_mount_securityfs()
+{
+        local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}')
+
+        if [ -z "${securityfs_dir}" ]; then
+                securityfs_dir=/sys/kernel/security
+                mount -t securityfs security "${securityfs_dir}"
+        fi
+
+        if [ ! -d "${securityfs_dir}" ]; then
+                echo "${securityfs_dir}: securityfs is not mounted" && exit 1
+        fi
+}
+
 setup()
 {
         local tmp_dir="$1"
@@ -33,6 +47,7 @@ setup()
         cp "${TEST_BINARY}" "${mount_dir}"
         local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')"
 
+        ensure_mount_securityfs
         echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
 }
 
-- 
cgit v1.2.3-70-g09d2


From d932e043b9d6d60113e90267ae2fbe4e946d7b08 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 3 Dec 2020 19:14:36 +0000
Subject: selftests/bpf: Add config dependency on BLK_DEV_LOOP

The ima selftest restricts its scope to a test filesystem image
mounted on a loop device and prevents permanent ima policy changes for
the whole system.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203191437.666737-4-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 365bf9771b07..37e1f303fc11 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -43,3 +43,4 @@ CONFIG_IMA=y
 CONFIG_SECURITYFS=y
 CONFIG_IMA_WRITE_POLICY=y
 CONFIG_IMA_READ_POLICY=y
+CONFIG_BLK_DEV_LOOP=y
-- 
cgit v1.2.3-70-g09d2


From ffebecd9d49542046c5ecbb410af01e016636e19 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@google.com>
Date: Thu, 3 Dec 2020 19:14:37 +0000
Subject: selftests/bpf: Indent ima_setup.sh with tabs.

The file was formatted with spaces instead of tabs and went unnoticed
as checkpatch.pl did not complain (probably because this is a shell
script). Re-indent it with tabs to be consistent with other scripts.

Signed-off-by: KP Singh <kpsingh@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203191437.666737-5-kpsingh@chromium.org
---
 tools/testing/selftests/bpf/ima_setup.sh | 108 +++++++++++++++----------------
 1 file changed, 54 insertions(+), 54 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index b1ee4bf06996..2bfc646bc230 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -10,90 +10,90 @@ TEST_BINARY="/bin/true"
 
 usage()
 {
-        echo "Usage: $0 <setup|cleanup|run> <existing_tmp_dir>"
-        exit 1
+	echo "Usage: $0 <setup|cleanup|run> <existing_tmp_dir>"
+	exit 1
 }
 
 ensure_mount_securityfs()
 {
-        local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}')
+	local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}')
 
-        if [ -z "${securityfs_dir}" ]; then
-                securityfs_dir=/sys/kernel/security
-                mount -t securityfs security "${securityfs_dir}"
-        fi
+	if [ -z "${securityfs_dir}" ]; then
+		securityfs_dir=/sys/kernel/security
+		mount -t securityfs security "${securityfs_dir}"
+	fi
 
-        if [ ! -d "${securityfs_dir}" ]; then
-                echo "${securityfs_dir}: securityfs is not mounted" && exit 1
-        fi
+	if [ ! -d "${securityfs_dir}" ]; then
+		echo "${securityfs_dir}: securityfs is not mounted" && exit 1
+	fi
 }
 
 setup()
 {
-        local tmp_dir="$1"
-        local mount_img="${tmp_dir}/test.img"
-        local mount_dir="${tmp_dir}/mnt"
-        local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
-        mkdir -p ${mount_dir}
+	local tmp_dir="$1"
+	local mount_img="${tmp_dir}/test.img"
+	local mount_dir="${tmp_dir}/mnt"
+	local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+	mkdir -p ${mount_dir}
 
-        dd if=/dev/zero of="${mount_img}" bs=1M count=10
+	dd if=/dev/zero of="${mount_img}" bs=1M count=10
 
-        losetup -f "${mount_img}"
-        local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
+	losetup -f "${mount_img}"
+	local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
 
-        mkfs.ext2 "${loop_device:?}"
-        mount "${loop_device}" "${mount_dir}"
+	mkfs.ext2 "${loop_device:?}"
+	mount "${loop_device}" "${mount_dir}"
 
-        cp "${TEST_BINARY}" "${mount_dir}"
-        local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')"
+	cp "${TEST_BINARY}" "${mount_dir}"
+	local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')"
 
-        ensure_mount_securityfs
-        echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
+	ensure_mount_securityfs
+	echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
 }
 
 cleanup() {
-        local tmp_dir="$1"
-        local mount_img="${tmp_dir}/test.img"
-        local mount_dir="${tmp_dir}/mnt"
+	local tmp_dir="$1"
+	local mount_img="${tmp_dir}/test.img"
+	local mount_dir="${tmp_dir}/mnt"
 
-        local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
+	local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
 
-        for loop_dev in "${loop_devices}"; do
-                losetup -d $loop_dev
-        done
+	for loop_dev in "${loop_devices}"; do
+		losetup -d $loop_dev
+	done
 
-        umount ${mount_dir}
-        rm -rf ${tmp_dir}
+	umount ${mount_dir}
+	rm -rf ${tmp_dir}
 }
 
 run()
 {
-        local tmp_dir="$1"
-        local mount_dir="${tmp_dir}/mnt"
-        local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+	local tmp_dir="$1"
+	local mount_dir="${tmp_dir}/mnt"
+	local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
 
-        exec "${copied_bin_path}"
+	exec "${copied_bin_path}"
 }
 
 main()
 {
-        [[ $# -ne 2 ]] && usage
-
-        local action="$1"
-        local tmp_dir="$2"
-
-        [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
-
-        if [[ "${action}" == "setup" ]]; then
-                setup "${tmp_dir}"
-        elif [[ "${action}" == "cleanup" ]]; then
-                cleanup "${tmp_dir}"
-        elif [[ "${action}" == "run" ]]; then
-                run "${tmp_dir}"
-        else
-                echo "Unknown action: ${action}"
-                exit 1
-        fi
+	[[ $# -ne 2 ]] && usage
+
+	local action="$1"
+	local tmp_dir="$2"
+
+	[[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
+
+	if [[ "${action}" == "setup" ]]; then
+		setup "${tmp_dir}"
+	elif [[ "${action}" == "cleanup" ]]; then
+		cleanup "${tmp_dir}"
+	elif [[ "${action}" == "run" ]]; then
+		run "${tmp_dir}"
+	else
+		echo "Unknown action: ${action}"
+		exit 1
+	fi
 }
 
 main "$@"
-- 
cgit v1.2.3-70-g09d2


From 80b2b5c3a701d56de98d00d99bc9cc384fb316d9 Mon Sep 17 00:00:00 2001
From: Andrei Matei <andreimatei1@gmail.com>
Date: Wed, 2 Dec 2020 23:34:10 -0500
Subject: libbpf: Fail early when loading programs with unspecified type

Before this patch, a program with unspecified type
(BPF_PROG_TYPE_UNSPEC) would be passed to the BPF syscall, only to have
the kernel reject it with an opaque invalid argument error. This patch
makes libbpf reject such programs with a nicer error message - in
particular libbpf now tries to diagnose bad ELF section names at both
open time and load time.

Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203043410.59699-1-andreimatei1@gmail.com
---
 tools/lib/bpf/libbpf.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 313034117070..d6f45538444d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6629,6 +6629,16 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	char *log_buf = NULL;
 	int btf_fd, ret;
 
+	if (prog->type == BPF_PROG_TYPE_UNSPEC) {
+		/*
+		 * The program type must be set.  Most likely we couldn't find a proper
+		 * section definition at load time, and thus we didn't infer the type.
+		 */
+		pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
+			prog->name, prog->sec_name);
+		return -EINVAL;
+	}
+
 	if (!insns || !insns_cnt)
 		return -EINVAL;
 
@@ -6920,9 +6930,12 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 
 	bpf_object__for_each_program(prog, obj) {
 		prog->sec_def = find_sec_def(prog->sec_name);
-		if (!prog->sec_def)
+		if (!prog->sec_def) {
 			/* couldn't guess, but user might manually specify */
+			pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
+				prog->name, prog->sec_name);
 			continue;
+		}
 
 		if (prog->sec_def->is_sleepable)
 			prog->prog_flags |= BPF_F_SLEEPABLE;
-- 
cgit v1.2.3-70-g09d2


From 9cf309c56f7910a81fbe053b6f11c3b1f0987b12 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Thu, 3 Dec 2020 10:33:06 +0100
Subject: libbpf: Sanitise map names before pinning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we added sanitising of map names before loading programs to libbpf, we
still allowed periods in the name. While the kernel will accept these for
the map names themselves, they are not allowed in file names when pinning
maps. This means that bpf_object__pin_maps() will fail if called on an
object that contains internal maps (such as sections .rodata).

Fix this by replacing periods with underscores when constructing map pin
paths. This only affects the paths generated by libbpf when
bpf_object__pin_maps() is called with a path argument. Any pin paths set
by bpf_map__set_pin_path() are unaffected, and it will still be up to the
caller to avoid invalid characters in those.

Fixes: 113e6b7e15e2 ("libbpf: Sanitise internal map names so they are not rejected by the kernel")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201203093306.107676-1-toke@redhat.com
---
 tools/lib/bpf/libbpf.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d6f45538444d..b2e16efabde0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7659,6 +7659,16 @@ bool bpf_map__is_pinned(const struct bpf_map *map)
 	return map->pinned;
 }
 
+static void sanitize_pin_path(char *s)
+{
+	/* bpffs disallows periods in path names */
+	while (*s) {
+		if (*s == '.')
+			*s = '_';
+		s++;
+	}
+}
+
 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 {
 	struct bpf_map *map;
@@ -7688,6 +7698,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 				err = -ENAMETOOLONG;
 				goto err_unpin_maps;
 			}
+			sanitize_pin_path(buf);
 			pin_path = buf;
 		} else if (!map->pin_path) {
 			continue;
@@ -7732,6 +7743,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 				return -EINVAL;
 			else if (len >= PATH_MAX)
 				return -ENAMETOOLONG;
+			sanitize_pin_path(buf);
 			pin_path = buf;
 		} else if (!map->pin_path) {
 			continue;
-- 
cgit v1.2.3-70-g09d2


From d6d418bd8f92aaa4c7c26d606188147c2ee0dae9 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 2 Dec 2020 15:13:32 -0800
Subject: libbpf: Cap retries in sys_bpf_prog_load

I've seen a situation, where a process that's under pprof constantly
generates SIGPROF which prevents program loading indefinitely.
The right thing to do probably is to disable signals in the upper
layers while loading, but it still would be nice to get some error from
libbpf instead of an endless loop.

Let's add some small retry limit to the program loading:
try loading the program 5 (arbitrary) times and give up.

v2:
* 10 -> 5 retires (Andrii Nakryiko)

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201202231332.3923644-1-sdf@google.com
---
 tools/lib/bpf/bpf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d27e34133973..4025266d0fb0 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -67,11 +67,12 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 
 static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
 {
+	int retries = 5;
 	int fd;
 
 	do {
 		fd = sys_bpf(BPF_PROG_LOAD, attr, size);
-	} while (fd < 0 && errno == EAGAIN);
+	} while (fd < 0 && errno == EAGAIN && retries-- > 0);
 
 	return fd;
 }
-- 
cgit v1.2.3-70-g09d2


From 58c185b85d0c1753b0b6a9390294bd883faf4d77 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Thu, 3 Dec 2020 12:08:50 +0000
Subject: bpf: Fix cold build of test_progs-no_alu32

This object lives inside the trunner output dir,
i.e. tools/testing/selftests/bpf/no_alu32/btf_data.o

At some point it gets copied into the parent directory during another
part of the build, but that doesn't happen when building
test_progs-no_alu32 from clean.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20201203120850.859170-1-jackmanb@google.com
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 894192c319fb..371b022d932c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -378,7 +378,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 			     | $(TRUNNER_BINARY)-extras
 	$$(call msg,BINARY,,$$@)
 	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
-	$(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
+	$(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@
 
 endef
 
-- 
cgit v1.2.3-70-g09d2


From 55144f31f0d2fdd3e74ead67f1649bf577961eaa Mon Sep 17 00:00:00 2001
From: Prankur gupta <prankgup@fb.com>
Date: Wed, 2 Dec 2020 13:31:52 -0800
Subject: selftests/bpf: Add Userspace tests for TCP_WINDOW_CLAMP

Adding selftests for new added functionality to set TCP_WINDOW_CLAMP
from bpf setsockopt.

Signed-off-by: Prankur gupta <prankgup@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201202213152.435886-3-prankgup@fb.com
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h      |  1 +
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c |  4 +++
 .../testing/selftests/bpf/progs/test_tcpbpf_kern.c | 33 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tcpbpf.h          |  2 ++
 4 files changed, 40 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 2915664c335d..6a9053162cf2 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -56,6 +56,7 @@ struct tcp_sock {
 	__u32	rcv_nxt;
 	__u32	snd_nxt;
 	__u32	snd_una;
+	__u32	window_clamp;
 	__u8	ecn_flags;
 	__u32	delivered;
 	__u32	delivered_ce;
diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index ab5281475f44..87923d2865b7 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -42,6 +42,10 @@ static void verify_result(struct tcpbpf_globals *result)
 
 	/* check getsockopt for SAVED_SYN */
 	ASSERT_EQ(result->tcp_saved_syn, 1, "tcp_saved_syn");
+
+	/* check getsockopt for window_clamp */
+	ASSERT_EQ(result->window_clamp_client, 9216, "window_clamp_client");
+	ASSERT_EQ(result->window_clamp_server, 9216, "window_clamp_server");
 }
 
 static void run_test(struct tcpbpf_globals *result)
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index e85e49deba70..94f50f7e94d6 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -12,17 +12,41 @@
 #include <linux/tcp.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
 #include "test_tcpbpf.h"
 
 struct tcpbpf_globals global = {};
 int _version SEC("version") = 1;
 
+/**
+ * SOL_TCP is defined in <netinet/tcp.h> while
+ * TCP_SAVED_SYN is defined in already included <linux/tcp.h>
+ */
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+static __always_inline int get_tp_window_clamp(struct bpf_sock_ops *skops)
+{
+	struct bpf_sock *sk;
+	struct tcp_sock *tp;
+
+	sk = skops->sk;
+	if (!sk)
+		return -1;
+	tp = bpf_skc_to_tcp_sock(sk);
+	if (!tp)
+		return -1;
+	return tp->window_clamp;
+}
+
 SEC("sockops")
 int bpf_testcb(struct bpf_sock_ops *skops)
 {
 	char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
 	struct bpf_sock_ops *reuse = skops;
 	struct tcphdr *thdr;
+	int window_clamp = 9216;
 	int good_call_rv = 0;
 	int bad_call_rv = 0;
 	int save_syn = 1;
@@ -75,6 +99,11 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 	global.event_map |= (1 << op);
 
 	switch (op) {
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		rv = bpf_setsockopt(skops, SOL_TCP, TCP_WINDOW_CLAMP,
+				    &window_clamp, sizeof(window_clamp));
+		global.window_clamp_client = get_tp_window_clamp(skops);
+		break;
 	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
 		/* Test failure to set largest cb flag (assumes not defined) */
 		global.bad_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
@@ -100,6 +129,10 @@ int bpf_testcb(struct bpf_sock_ops *skops)
 				global.tcp_saved_syn = v;
 			}
 		}
+		rv = bpf_setsockopt(skops, SOL_TCP, TCP_WINDOW_CLAMP,
+				    &window_clamp, sizeof(window_clamp));
+
+		global.window_clamp_server = get_tp_window_clamp(skops);
 		break;
 	case BPF_SOCK_OPS_RTO_CB:
 		break;
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
index 0ed33521cbbb..9dd9b5590f9d 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf.h
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -16,5 +16,7 @@ struct tcpbpf_globals {
 	__u32 num_close_events;
 	__u32 tcp_save_syn;
 	__u32 tcp_saved_syn;
+	__u32 window_clamp_client;
+	__u32 window_clamp_server;
 };
 #endif
-- 
cgit v1.2.3-70-g09d2


From a19f93cfafdf85851c69bc9f677aa4f40c53610f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:23 -0800
Subject: libbpf: Add internal helper to load BTF data by FD

Add a btf_get_from_fd() helper, which constructs struct btf from in-kernel BTF
data by FD. This is used for loading module BTFs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-4-andrii@kernel.org
---
 tools/lib/bpf/btf.c             | 61 +++++++++++++++++++++++------------------
 tools/lib/bpf/libbpf_internal.h |  1 +
 2 files changed, 36 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 1935e83d309c..3c3f2bc6c652 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1323,35 +1323,27 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
 	return btf__str_by_offset(btf, offset);
 }
 
-int btf__get_from_id(__u32 id, struct btf **btf)
+struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
 {
-	struct bpf_btf_info btf_info = { 0 };
+	struct bpf_btf_info btf_info;
 	__u32 len = sizeof(btf_info);
 	__u32 last_size;
-	int btf_fd;
+	struct btf *btf;
 	void *ptr;
 	int err;
 
-	err = 0;
-	*btf = NULL;
-	btf_fd = bpf_btf_get_fd_by_id(id);
-	if (btf_fd < 0)
-		return 0;
-
 	/* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
 	 * let's start with a sane default - 4KiB here - and resize it only if
 	 * bpf_obj_get_info_by_fd() needs a bigger buffer.
 	 */
-	btf_info.btf_size = 4096;
-	last_size = btf_info.btf_size;
+	last_size = 4096;
 	ptr = malloc(last_size);
-	if (!ptr) {
-		err = -ENOMEM;
-		goto exit_free;
-	}
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
 
-	memset(ptr, 0, last_size);
+	memset(&btf_info, 0, sizeof(btf_info));
 	btf_info.btf = ptr_to_u64(ptr);
+	btf_info.btf_size = last_size;
 	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
 
 	if (!err && btf_info.btf_size > last_size) {
@@ -1360,31 +1352,48 @@ int btf__get_from_id(__u32 id, struct btf **btf)
 		last_size = btf_info.btf_size;
 		temp_ptr = realloc(ptr, last_size);
 		if (!temp_ptr) {
-			err = -ENOMEM;
+			btf = ERR_PTR(-ENOMEM);
 			goto exit_free;
 		}
 		ptr = temp_ptr;
-		memset(ptr, 0, last_size);
+
+		len = sizeof(btf_info);
+		memset(&btf_info, 0, sizeof(btf_info));
 		btf_info.btf = ptr_to_u64(ptr);
+		btf_info.btf_size = last_size;
+
 		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
 	}
 
 	if (err || btf_info.btf_size > last_size) {
-		err = errno;
+		btf = err ? ERR_PTR(-errno) : ERR_PTR(-E2BIG);
 		goto exit_free;
 	}
 
-	*btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
-	if (IS_ERR(*btf)) {
-		err = PTR_ERR(*btf);
-		*btf = NULL;
-	}
+	btf = btf_new(ptr, btf_info.btf_size, base_btf);
 
 exit_free:
-	close(btf_fd);
 	free(ptr);
+	return btf;
+}
 
-	return err;
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+	struct btf *res;
+	int btf_fd;
+
+	*btf = NULL;
+	btf_fd = bpf_btf_get_fd_by_id(id);
+	if (btf_fd < 0)
+		return -errno;
+
+	res = btf_get_from_fd(btf_fd, NULL);
+	close(btf_fd);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	*btf = res;
+	return 0;
 }
 
 int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index d99bc847bf84..e569ae63808e 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -155,6 +155,7 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
 			     __u32 *size);
 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
 				__u32 *off);
+struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
 
 struct btf_ext_info {
 	/*
-- 
cgit v1.2.3-70-g09d2


From 0f7515ca7cddadabe04e28e20a257b1bbb6cb98a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:24 -0800
Subject: libbpf: Refactor CO-RE relocs to not assume a single BTF object

Refactor CO-RE relocation candidate search to not expect a single BTF, rather
return all candidate types with their corresponding BTF objects. This will
allow to extend CO-RE relocations to accommodate kernel module BTFs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-5-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 187 +++++++++++++++++++++++++++++--------------------
 1 file changed, 111 insertions(+), 76 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b2e16efabde0..2e4fa3ce6b94 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -462,11 +462,14 @@ struct bpf_object {
 	struct list_head list;
 
 	struct btf *btf;
+	struct btf_ext *btf_ext;
+
 	/* Parse and load BTF vmlinux if any of the programs in the object need
 	 * it at load time.
 	 */
 	struct btf *btf_vmlinux;
-	struct btf_ext *btf_ext;
+	/* vmlinux BTF override for CO-RE relocations */
+	struct btf *btf_vmlinux_override;
 
 	void *priv;
 	bpf_object_clear_priv_t clear_priv;
@@ -4600,46 +4603,43 @@ static size_t bpf_core_essential_name_len(const char *name)
 	return n;
 }
 
-/* dynamically sized list of type IDs */
-struct ids_vec {
-	__u32 *data;
+struct core_cand
+{
+	const struct btf *btf;
+	const struct btf_type *t;
+	const char *name;
+	__u32 id;
+};
+
+/* dynamically sized list of type IDs and its associated struct btf */
+struct core_cand_list {
+	struct core_cand *cands;
 	int len;
 };
 
-static void bpf_core_free_cands(struct ids_vec *cand_ids)
+static void bpf_core_free_cands(struct core_cand_list *cands)
 {
-	free(cand_ids->data);
-	free(cand_ids);
+	free(cands->cands);
+	free(cands);
 }
 
-static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
-					   __u32 local_type_id,
-					   const struct btf *targ_btf)
+static int bpf_core_add_cands(struct core_cand *local_cand,
+			      size_t local_essent_len,
+			      const struct btf *targ_btf,
+			      const char *targ_btf_name,
+			      int targ_start_id,
+			      struct core_cand_list *cands)
 {
-	size_t local_essent_len, targ_essent_len;
-	const char *local_name, *targ_name;
-	const struct btf_type *t, *local_t;
-	struct ids_vec *cand_ids;
-	__u32 *new_ids;
-	int i, err, n;
-
-	local_t = btf__type_by_id(local_btf, local_type_id);
-	if (!local_t)
-		return ERR_PTR(-EINVAL);
-
-	local_name = btf__name_by_offset(local_btf, local_t->name_off);
-	if (str_is_empty(local_name))
-		return ERR_PTR(-EINVAL);
-	local_essent_len = bpf_core_essential_name_len(local_name);
-
-	cand_ids = calloc(1, sizeof(*cand_ids));
-	if (!cand_ids)
-		return ERR_PTR(-ENOMEM);
+	struct core_cand *new_cands, *cand;
+	const struct btf_type *t;
+	const char *targ_name;
+	size_t targ_essent_len;
+	int n, i;
 
 	n = btf__get_nr_types(targ_btf);
-	for (i = 1; i <= n; i++) {
+	for (i = targ_start_id; i <= n; i++) {
 		t = btf__type_by_id(targ_btf, i);
-		if (btf_kind(t) != btf_kind(local_t))
+		if (btf_kind(t) != btf_kind(local_cand->t))
 			continue;
 
 		targ_name = btf__name_by_offset(targ_btf, t->name_off);
@@ -4650,25 +4650,62 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
 		if (targ_essent_len != local_essent_len)
 			continue;
 
-		if (strncmp(local_name, targ_name, local_essent_len) == 0) {
-			pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n",
-				 local_type_id, btf_kind_str(local_t),
-				 local_name, i, btf_kind_str(t), targ_name);
-			new_ids = libbpf_reallocarray(cand_ids->data,
-						      cand_ids->len + 1,
-						      sizeof(*cand_ids->data));
-			if (!new_ids) {
-				err = -ENOMEM;
-				goto err_out;
-			}
-			cand_ids->data = new_ids;
-			cand_ids->data[cand_ids->len++] = i;
-		}
+		if (strncmp(local_cand->name, targ_name, local_essent_len) != 0)
+			continue;
+
+		pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
+			 local_cand->id, btf_kind_str(local_cand->t),
+			 local_cand->name, i, btf_kind_str(t), targ_name,
+			 targ_btf_name);
+		new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
+					      sizeof(*cands->cands));
+		if (!new_cands)
+			return -ENOMEM;
+
+		cand = &new_cands[cands->len];
+		cand->btf = targ_btf;
+		cand->t = t;
+		cand->name = targ_name;
+		cand->id = i;
+
+		cands->cands = new_cands;
+		cands->len++;
 	}
-	return cand_ids;
-err_out:
-	bpf_core_free_cands(cand_ids);
-	return ERR_PTR(err);
+	return 0;
+}
+
+static struct core_cand_list *
+bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
+{
+	struct core_cand local_cand = {};
+	struct core_cand_list *cands;
+	size_t local_essent_len;
+	int err;
+
+	local_cand.btf = local_btf;
+	local_cand.t = btf__type_by_id(local_btf, local_type_id);
+	if (!local_cand.t)
+		return ERR_PTR(-EINVAL);
+
+	local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off);
+	if (str_is_empty(local_cand.name))
+		return ERR_PTR(-EINVAL);
+	local_essent_len = bpf_core_essential_name_len(local_cand.name);
+
+	cands = calloc(1, sizeof(*cands));
+	if (!cands)
+		return ERR_PTR(-ENOMEM);
+
+	/* Attempt to find target candidates in vmlinux BTF first */
+	err = bpf_core_add_cands(&local_cand, local_essent_len,
+				 obj->btf_vmlinux_override ?: obj->btf_vmlinux,
+				 "vmlinux", 1, cands);
+	if (err) {
+		bpf_core_free_cands(cands);
+		return ERR_PTR(err);
+	}
+
+	return cands;
 }
 
 /* Check two types for compatibility for the purpose of field access
@@ -5661,7 +5698,6 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 			       const struct bpf_core_relo *relo,
 			       int relo_idx,
 			       const struct btf *local_btf,
-			       const struct btf *targ_btf,
 			       struct hashmap *cand_cache)
 {
 	struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
@@ -5669,8 +5705,8 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 	struct bpf_core_relo_res cand_res, targ_res;
 	const struct btf_type *local_type;
 	const char *local_name;
-	struct ids_vec *cand_ids;
-	__u32 local_id, cand_id;
+	struct core_cand_list *cands = NULL;
+	__u32 local_id;
 	const char *spec_str;
 	int i, j, err;
 
@@ -5717,24 +5753,24 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 		return -EOPNOTSUPP;
 	}
 
-	if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
-		cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
-		if (IS_ERR(cand_ids)) {
+	if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
+		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
+		if (IS_ERR(cands)) {
 			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
 				prog->name, relo_idx, local_id, btf_kind_str(local_type),
-				local_name, PTR_ERR(cand_ids));
-			return PTR_ERR(cand_ids);
+				local_name, PTR_ERR(cands));
+			return PTR_ERR(cands);
 		}
-		err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
+		err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
 		if (err) {
-			bpf_core_free_cands(cand_ids);
+			bpf_core_free_cands(cands);
 			return err;
 		}
 	}
 
-	for (i = 0, j = 0; i < cand_ids->len; i++) {
-		cand_id = cand_ids->data[i];
-		err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);
+	for (i = 0, j = 0; i < cands->len; i++) {
+		err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
+					  cands->cands[i].id, &cand_spec);
 		if (err < 0) {
 			pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
 				prog->name, relo_idx, i);
@@ -5778,7 +5814,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 			return -EINVAL;
 		}
 
-		cand_ids->data[j++] = cand_spec.root_type_id;
+		cands->cands[j++] = cands->cands[i];
 	}
 
 	/*
@@ -5790,7 +5826,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 	 * depending on relo's kind.
 	 */
 	if (j > 0)
-		cand_ids->len = j;
+		cands->len = j;
 
 	/*
 	 * If no candidates were found, it might be both a programmer error,
@@ -5834,20 +5870,19 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 	struct hashmap_entry *entry;
 	struct hashmap *cand_cache = NULL;
 	struct bpf_program *prog;
-	struct btf *targ_btf;
 	const char *sec_name;
 	int i, err = 0, insn_idx, sec_idx;
 
 	if (obj->btf_ext->core_relo_info.len == 0)
 		return 0;
 
-	if (targ_btf_path)
-		targ_btf = btf__parse(targ_btf_path, NULL);
-	else
-		targ_btf = obj->btf_vmlinux;
-	if (IS_ERR_OR_NULL(targ_btf)) {
-		pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
-		return PTR_ERR(targ_btf);
+	if (targ_btf_path) {
+		obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
+		if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) {
+			err = PTR_ERR(obj->btf_vmlinux_override);
+			pr_warn("failed to parse target BTF: %d\n", err);
+			return err;
+		}
 	}
 
 	cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
@@ -5899,8 +5934,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 			if (!prog->load)
 				continue;
 
-			err = bpf_core_apply_relo(prog, rec, i, obj->btf,
-						  targ_btf, cand_cache);
+			err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache);
 			if (err) {
 				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
 					prog->name, i, err);
@@ -5911,8 +5945,9 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 
 out:
 	/* obj->btf_vmlinux is freed at the end of object load phase */
-	if (targ_btf != obj->btf_vmlinux)
-		btf__free(targ_btf);
+	btf__free(obj->btf_vmlinux_override);
+	obj->btf_vmlinux_override = NULL;
+
 	if (!IS_ERR_OR_NULL(cand_cache)) {
 		hashmap__for_each_entry(cand_cache, entry, i) {
 			bpf_core_free_cands(entry->value);
-- 
cgit v1.2.3-70-g09d2


From 4f33a53d56000cfa67e2e4e8a5dac08f084a979b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:25 -0800
Subject: libbpf: Add kernel module BTF support for CO-RE relocations

Teach libbpf to search for candidate types for CO-RE relocations across kernel
modules BTFs, in addition to vmlinux BTF. If at least one candidate type is
found in vmlinux BTF, kernel module BTFs are not iterated. If vmlinux BTF has
no matching candidates, then find all kernel module BTFs and search for all
matching candidates across all of them.

Kernel's support for module BTFs are inferred from the support for BTF name
pointer in BPF UAPI.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-6-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 169 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2e4fa3ce6b94..ca20e493726d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -176,6 +176,8 @@ enum kern_feature_id {
 	FEAT_PROBE_READ_KERN,
 	/* BPF_PROG_BIND_MAP is supported */
 	FEAT_PROG_BIND_MAP,
+	/* Kernel support for module BTFs */
+	FEAT_MODULE_BTF,
 	__FEAT_CNT,
 };
 
@@ -402,6 +404,12 @@ struct extern_desc {
 
 static LIST_HEAD(bpf_objects_list);
 
+struct module_btf {
+	struct btf *btf;
+	char *name;
+	__u32 id;
+};
+
 struct bpf_object {
 	char name[BPF_OBJ_NAME_LEN];
 	char license[64];
@@ -470,6 +478,11 @@ struct bpf_object {
 	struct btf *btf_vmlinux;
 	/* vmlinux BTF override for CO-RE relocations */
 	struct btf *btf_vmlinux_override;
+	/* Lazily initialized kernel module BTFs */
+	struct module_btf *btf_modules;
+	bool btf_modules_loaded;
+	size_t btf_module_cnt;
+	size_t btf_module_cap;
 
 	void *priv;
 	bpf_object_clear_priv_t clear_priv;
@@ -3960,6 +3973,35 @@ static int probe_prog_bind_map(void)
 	return ret >= 0;
 }
 
+static int probe_module_btf(void)
+{
+	static const char strs[] = "\0int";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+	};
+	struct bpf_btf_info info;
+	__u32 len = sizeof(info);
+	char name[16];
+	int fd, err;
+
+	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
+	if (fd < 0)
+		return 0; /* BTF not supported at all */
+
+	memset(&info, 0, sizeof(info));
+	info.name = ptr_to_u64(name);
+	info.name_len = sizeof(name);
+
+	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
+	 * kernel's module BTF support coincides with support for
+	 * name/name_len fields in struct bpf_btf_info.
+	 */
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	close(fd);
+	return !err;
+}
+
 enum kern_feature_result {
 	FEAT_UNKNOWN = 0,
 	FEAT_SUPPORTED = 1,
@@ -4003,7 +4045,10 @@ static struct kern_feature_desc {
 	},
 	[FEAT_PROG_BIND_MAP] = {
 		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
-	}
+	},
+	[FEAT_MODULE_BTF] = {
+		"module BTF support", probe_module_btf,
+	},
 };
 
 static bool kernel_supports(enum kern_feature_id feat_id)
@@ -4674,13 +4719,96 @@ static int bpf_core_add_cands(struct core_cand *local_cand,
 	return 0;
 }
 
+static int load_module_btfs(struct bpf_object *obj)
+{
+	struct bpf_btf_info info;
+	struct module_btf *mod_btf;
+	struct btf *btf;
+	char name[64];
+	__u32 id = 0, len;
+	int err, fd;
+
+	if (obj->btf_modules_loaded)
+		return 0;
+
+	/* don't do this again, even if we find no module BTFs */
+	obj->btf_modules_loaded = true;
+
+	/* kernel too old to support module BTFs */
+	if (!kernel_supports(FEAT_MODULE_BTF))
+		return 0;
+
+	while (true) {
+		err = bpf_btf_get_next_id(id, &id);
+		if (err && errno == ENOENT)
+			return 0;
+		if (err) {
+			err = -errno;
+			pr_warn("failed to iterate BTF objects: %d\n", err);
+			return err;
+		}
+
+		fd = bpf_btf_get_fd_by_id(id);
+		if (fd < 0) {
+			if (errno == ENOENT)
+				continue; /* expected race: BTF was unloaded */
+			err = -errno;
+			pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
+			return err;
+		}
+
+		len = sizeof(info);
+		memset(&info, 0, sizeof(info));
+		info.name = ptr_to_u64(name);
+		info.name_len = sizeof(name);
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			err = -errno;
+			pr_warn("failed to get BTF object #%d info: %d\n", id, err);
+			close(fd);
+			return err;
+		}
+
+		/* ignore non-module BTFs */
+		if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
+			close(fd);
+			continue;
+		}
+
+		btf = btf_get_from_fd(fd, obj->btf_vmlinux);
+		close(fd);
+		if (IS_ERR(btf)) {
+			pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
+				name, id, PTR_ERR(btf));
+			return PTR_ERR(btf);
+		}
+
+		err = btf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
+				     sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
+		if (err)
+			return err;
+
+		mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
+
+		mod_btf->btf = btf;
+		mod_btf->id = id;
+		mod_btf->name = strdup(name);
+		if (!mod_btf->name)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static struct core_cand_list *
 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
 {
 	struct core_cand local_cand = {};
 	struct core_cand_list *cands;
+	const struct btf *main_btf;
 	size_t local_essent_len;
-	int err;
+	int err, i;
 
 	local_cand.btf = local_btf;
 	local_cand.t = btf__type_by_id(local_btf, local_type_id);
@@ -4697,15 +4825,38 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l
 		return ERR_PTR(-ENOMEM);
 
 	/* Attempt to find target candidates in vmlinux BTF first */
-	err = bpf_core_add_cands(&local_cand, local_essent_len,
-				 obj->btf_vmlinux_override ?: obj->btf_vmlinux,
-				 "vmlinux", 1, cands);
-	if (err) {
-		bpf_core_free_cands(cands);
-		return ERR_PTR(err);
+	main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
+	err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
+	if (err)
+		goto err_out;
+
+	/* if vmlinux BTF has any candidate, don't got for module BTFs */
+	if (cands->len)
+		return cands;
+
+	/* if vmlinux BTF was overridden, don't attempt to load module BTFs */
+	if (obj->btf_vmlinux_override)
+		return cands;
+
+	/* now look through module BTFs, trying to still find candidates */
+	err = load_module_btfs(obj);
+	if (err)
+		goto err_out;
+
+	for (i = 0; i < obj->btf_module_cnt; i++) {
+		err = bpf_core_add_cands(&local_cand, local_essent_len,
+					 obj->btf_modules[i].btf,
+					 obj->btf_modules[i].name,
+					 btf__get_nr_types(obj->btf_vmlinux) + 1,
+					 cands);
+		if (err)
+			goto err_out;
 	}
 
 	return cands;
+err_out:
+	bpf_core_free_cands(cands);
+	return ERR_PTR(err);
 }
 
 /* Check two types for compatibility for the purpose of field access
@@ -5756,7 +5907,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
 	if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
 		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
 		if (IS_ERR(cands)) {
-			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld",
+			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
 				prog->name, relo_idx, local_id, btf_kind_str(local_type),
 				local_name, PTR_ERR(cands));
 			return PTR_ERR(cands);
@@ -5944,7 +6095,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 	}
 
 out:
-	/* obj->btf_vmlinux is freed at the end of object load phase */
+	/* obj->btf_vmlinux and module BTFs are freed after object load */
 	btf__free(obj->btf_vmlinux_override);
 	obj->btf_vmlinux_override = NULL;
 
@@ -7316,6 +7467,14 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
 	err = err ? : bpf_object__load_progs(obj, attr->log_level);
 
+	/* clean up module BTFs */
+	for (i = 0; i < obj->btf_module_cnt; i++) {
+		btf__free(obj->btf_modules[i].btf);
+		free(obj->btf_modules[i].name);
+	}
+	free(obj->btf_modules);
+
+	/* clean up vmlinux BTF */
 	btf__free(obj->btf_vmlinux);
 	obj->btf_vmlinux = NULL;
 
-- 
cgit v1.2.3-70-g09d2


From 9f7fa225894c7fcb014f3699a402fcc4d896cb1c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:26 -0800
Subject: selftests/bpf: Add bpf_testmod kernel module for testing

Add bpf_testmod module, which is conceptually out-of-tree module and provides
ways for selftests/bpf to test various kernel module-related functionality:
raw tracepoint, fentry/fexit/fmod_ret, etc. This module will be auto-loaded by
test_progs test runner and expected by some of selftests to be present and
loaded.

Pahole currently isn't able to generate BTF for static functions in kernel
modules, so make sure traced function is global.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-7-andrii@kernel.org
---
 tools/testing/selftests/bpf/.gitignore             |  1 +
 tools/testing/selftests/bpf/Makefile               | 12 +++--
 tools/testing/selftests/bpf/bpf_testmod/.gitignore |  6 +++
 tools/testing/selftests/bpf/bpf_testmod/Makefile   | 20 ++++++++
 .../selftests/bpf/bpf_testmod/bpf_testmod-events.h | 36 +++++++++++++
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        | 52 +++++++++++++++++++
 .../selftests/bpf/bpf_testmod/bpf_testmod.h        | 14 +++++
 tools/testing/selftests/bpf/test_progs.c           | 59 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h           |  1 +
 9 files changed, 198 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/.gitignore
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/Makefile
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
 create mode 100644 tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 395ae040ce1f..752d8edddc66 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -35,3 +35,4 @@ test_cpp
 /tools
 /runqslower
 /bench
+*.ko
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 371b022d932c..ac25ba5d0d6c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -80,7 +80,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
-	test_lirc_mode2_user xdping test_cpp runqslower bench
+	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko
 
 TEST_CUSTOM_PROGS = urandom_read
 
@@ -104,6 +104,7 @@ OVERRIDE_TARGETS := 1
 override define CLEAN
 	$(call msg,CLEAN)
 	$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+	$(Q)$(MAKE) -C bpf_testmod clean
 endef
 
 include ../lib.mk
@@ -136,6 +137,11 @@ $(OUTPUT)/urandom_read: urandom_read.c
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1
 
+$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
+	$(call msg,MOD,,$@)
+	$(Q)$(MAKE) $(submake_extras) -C bpf_testmod
+	$(Q)cp bpf_testmod/bpf_testmod.ko $@
+
 $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 	$(call msg,CC,,$@)
 	$(Q)$(CC) -c $(CFLAGS) -o $@ $<
@@ -388,7 +394,7 @@ TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 network_helpers.c testing_helpers.c		\
 			 btf_helpers.c	flow_dissector_load.h
-TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
+TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       ima_setup.sh					\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
@@ -460,4 +466,4 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR)			\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
 	feature								\
-	$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc)
+	$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/.gitignore b/tools/testing/selftests/bpf/bpf_testmod/.gitignore
new file mode 100644
index 000000000000..ded513777281
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/.gitignore
@@ -0,0 +1,6 @@
+*.mod
+*.mod.c
+*.o
+.ko
+/Module.symvers
+/modules.order
diff --git a/tools/testing/selftests/bpf/bpf_testmod/Makefile b/tools/testing/selftests/bpf/bpf_testmod/Makefile
new file mode 100644
index 000000000000..15cb36c4483a
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/Makefile
@@ -0,0 +1,20 @@
+BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = bpf_testmod.ko
+
+obj-m += bpf_testmod.o
+CFLAGS_bpf_testmod.o = -I$(src)
+
+all:
+	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
+
+clean:
+	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
+
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
new file mode 100644
index 000000000000..b83ea448bc79
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bpf_testmod
+
+#if !defined(_BPF_TESTMOD_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _BPF_TESTMOD_EVENTS_H
+
+#include <linux/tracepoint.h>
+#include "bpf_testmod.h"
+
+TRACE_EVENT(bpf_testmod_test_read,
+	TP_PROTO(struct task_struct *task, struct bpf_testmod_test_read_ctx *ctx),
+	TP_ARGS(task, ctx),
+	TP_STRUCT__entry(
+		__field(pid_t, pid)
+		__array(char, comm, TASK_COMM_LEN)
+		__field(loff_t, off)
+		__field(size_t, len)
+	),
+	TP_fast_assign(
+		__entry->pid = task->pid;
+		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->off = ctx->off;
+		__entry->len = ctx->len;
+	),
+	TP_printk("pid=%d comm=%s off=%llu len=%zu",
+		  __entry->pid, __entry->comm, __entry->off, __entry->len)
+);
+
+#endif /* _BPF_TESTMOD_EVENTS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE bpf_testmod-events
+#include <trace/define_trace.h>
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
new file mode 100644
index 000000000000..2df19d73ca49
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/error-injection.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sysfs.h>
+#include <linux/tracepoint.h>
+#include "bpf_testmod.h"
+
+#define CREATE_TRACE_POINTS
+#include "bpf_testmod-events.h"
+
+noinline ssize_t
+bpf_testmod_test_read(struct file *file, struct kobject *kobj,
+		      struct bin_attribute *bin_attr,
+		      char *buf, loff_t off, size_t len)
+{
+	struct bpf_testmod_test_read_ctx ctx = {
+		.buf = buf,
+		.off = off,
+		.len = len,
+	};
+
+	trace_bpf_testmod_test_read(current, &ctx);
+
+	return -EIO; /* always fail */
+}
+EXPORT_SYMBOL(bpf_testmod_test_read);
+ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
+
+static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
+	.attr = { .name = "bpf_testmod", .mode = 0444, },
+	.read = bpf_testmod_test_read,
+};
+
+static int bpf_testmod_init(void)
+{
+	return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+}
+
+static void bpf_testmod_exit(void)
+{
+	return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+}
+
+module_init(bpf_testmod_init);
+module_exit(bpf_testmod_exit);
+
+MODULE_AUTHOR("Andrii Nakryiko");
+MODULE_DESCRIPTION("BPF selftests module");
+MODULE_LICENSE("Dual BSD/GPL");
+
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
new file mode 100644
index 000000000000..b81adfedb4f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#ifndef _BPF_TESTMOD_H
+#define _BPF_TESTMOD_H
+
+#include <linux/types.h>
+
+struct bpf_testmod_test_read_ctx {
+	char *buf;
+	loff_t off;
+	size_t len;
+};
+
+#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 22943b58d752..17587754b7a7 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -360,6 +360,58 @@ err:
 	return -1;
 }
 
+static int finit_module(int fd, const char *param_values, int flags)
+{
+	return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+static int delete_module(const char *name, int flags)
+{
+	return syscall(__NR_delete_module, name, flags);
+}
+
+static void unload_bpf_testmod(void)
+{
+	if (delete_module("bpf_testmod", 0)) {
+		if (errno == ENOENT) {
+			if (env.verbosity > VERBOSE_NONE)
+				fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
+			return;
+		}
+		fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
+		exit(1);
+	}
+	if (env.verbosity > VERBOSE_NONE)
+		fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
+}
+
+static int load_bpf_testmod(void)
+{
+	int fd;
+
+	/* ensure previous instance of the module is unloaded */
+	unload_bpf_testmod();
+
+	if (env.verbosity > VERBOSE_NONE)
+		fprintf(stdout, "Loading bpf_testmod.ko...\n");
+
+	fd = open("bpf_testmod.ko", O_RDONLY);
+	if (fd < 0) {
+		fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno);
+		return -ENOENT;
+	}
+	if (finit_module(fd, "", 0)) {
+		fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno);
+		close(fd);
+		return -EINVAL;
+	}
+	close(fd);
+
+	if (env.verbosity > VERBOSE_NONE)
+		fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
+	return 0;
+}
+
 /* extern declarations for test funcs */
 #define DEFINE_TEST(name) extern void test_##name(void);
 #include <prog_tests/tests.h>
@@ -678,6 +730,11 @@ int main(int argc, char **argv)
 
 	save_netns();
 	stdio_hijack();
+	env.has_testmod = true;
+	if (load_bpf_testmod()) {
+		fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
+		env.has_testmod = false;
+	}
 	for (i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
 
@@ -722,6 +779,8 @@ int main(int argc, char **argv)
 		if (test->need_cgroup_cleanup)
 			cleanup_cgroup_environment();
 	}
+	if (env.has_testmod)
+		unload_bpf_testmod();
 	stdio_restore();
 
 	if (env.get_test_cnt) {
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index d6b14853f3bc..115953243f62 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -66,6 +66,7 @@ struct test_env {
 	enum verbosity verbosity;
 
 	bool jit_enabled;
+	bool has_testmod;
 	bool get_test_cnt;
 	bool list_test_names;
 
-- 
cgit v1.2.3-70-g09d2


From 5ed31472b9ad6373a0a24bc21186b5eac999213d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:27 -0800
Subject: selftests/bpf: Add support for marking sub-tests as skipped

Previously skipped sub-tests would be counted as passing with ":OK" appened
in the log. Change that to be accounted as ":SKIP".

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-8-andrii@kernel.org
---
 tools/testing/selftests/bpf/test_progs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 17587754b7a7..5ef081bdae4e 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -149,15 +149,15 @@ void test__end_subtest()
 
 	if (sub_error_cnt)
 		env.fail_cnt++;
-	else
+	else if (test->skip_cnt == 0)
 		env.sub_succ_cnt++;
 	skip_account();
 
 	dump_test_log(test, sub_error_cnt);
 
 	fprintf(env.stdout, "#%d/%d %s:%s\n",
-	       test->test_num, test->subtest_num,
-	       test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
+	       test->test_num, test->subtest_num, test->subtest_name,
+	       sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
 
 	free(test->subtest_name);
 	test->subtest_name = NULL;
-- 
cgit v1.2.3-70-g09d2


From 6bcd39d366b64318562785d5b47c2837e3a53ae5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:28 -0800
Subject: selftests/bpf: Add CO-RE relocs selftest relying on kernel module BTF

Add a self-tests validating libbpf is able to perform CO-RE relocations
against the type defined in kernel module BTF. if bpf_testmod.o is not
supported by the kernel (e.g., due to version mismatch), skip tests, instead
of failing.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-9-andrii@kernel.org
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  | 79 +++++++++++++++++++---
 .../testing/selftests/bpf/progs/core_reloc_types.h | 17 +++++
 .../selftests/bpf/progs/test_core_reloc_module.c   | 66 ++++++++++++++++++
 3 files changed, 151 insertions(+), 11 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_module.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 30e40ff4b0d8..bb980848cd77 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 #include "progs/core_reloc_types.h"
+#include "bpf_testmod/bpf_testmod.h"
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <bpf/btf.h>
@@ -9,6 +10,30 @@ static int duration = 0;
 
 #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
 
+#define MODULES_CASE(name, sec_name, tp_name) {				\
+	.case_name = name,						\
+	.bpf_obj_file = "test_core_reloc_module.o",			\
+	.btf_src_file = NULL, /* find in kernel module BTFs */		\
+	.input = "",							\
+	.input_len = 0,							\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_module_output) {	\
+		.read_ctx_sz = sizeof(struct bpf_testmod_test_read_ctx),\
+		.read_ctx_exists = true,				\
+		.buf_exists = true,					\
+		.len_exists = true,					\
+		.off_exists = true,					\
+		.len = 123,						\
+		.off = 0,						\
+		.comm = "test_progs",					\
+		.comm_len = sizeof("test_progs"),			\
+	},								\
+	.output_len = sizeof(struct core_reloc_module_output),		\
+	.prog_sec_name = sec_name,					\
+	.raw_tp_name = tp_name,						\
+	.trigger = trigger_module_test_read,				\
+	.needs_testmod = true,						\
+}
+
 #define FLAVORS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) {	\
 	.a = 42,							\
 	.b = 0xc001,							\
@@ -211,7 +236,7 @@ static int duration = 0;
 	.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output)	\
 		__VA_ARGS__,						\
 	.output_len = sizeof(struct core_reloc_bitfields_output),	\
-	.direct_raw_tp = true,						\
+	.prog_sec_name = "tp_btf/sys_enter",				\
 }
 
 
@@ -222,7 +247,7 @@ static int duration = 0;
 }, {									\
 	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o",	\
 			      "direct:", name),				\
-	.direct_raw_tp = true,						\
+	.prog_sec_name = "tp_btf/sys_enter",				\
 	.fails = true,							\
 }
 
@@ -309,6 +334,7 @@ static int duration = 0;
 struct core_reloc_test_case;
 
 typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
+typedef int (*trigger_test_fn)(const struct core_reloc_test_case *test);
 
 struct core_reloc_test_case {
 	const char *case_name;
@@ -319,9 +345,12 @@ struct core_reloc_test_case {
 	const char *output;
 	int output_len;
 	bool fails;
+	bool needs_testmod;
 	bool relaxed_core_relocs;
-	bool direct_raw_tp;
+	const char *prog_sec_name;
+	const char *raw_tp_name;
 	setup_test_fn setup;
+	trigger_test_fn trigger;
 };
 
 static int find_btf_type(const struct btf *btf, const char *name, __u32 kind)
@@ -451,6 +480,23 @@ static int setup_type_id_case_failure(struct core_reloc_test_case *test)
 	return 0;
 }
 
+static int trigger_module_test_read(const struct core_reloc_test_case *test)
+{
+	struct core_reloc_module_output *exp = (void *)test->output;
+	int fd, err;
+
+	fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
+	err = -errno;
+	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
+		return err;
+
+	read(fd, NULL, exp->len); /* request expected number of bytes */
+	close(fd);
+
+	return 0;
+}
+
+
 static struct core_reloc_test_case test_cases[] = {
 	/* validate we can find kernel image and use its BTF for relocs */
 	{
@@ -467,6 +513,9 @@ static struct core_reloc_test_case test_cases[] = {
 		.output_len = sizeof(struct core_reloc_kernel_output),
 	},
 
+	/* validate we can find kernel module BTF types for relocs/attach */
+	MODULES_CASE("module", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"),
+
 	/* validate BPF program can use multiple flavors to match against
 	 * single target BTF type
 	 */
@@ -779,6 +828,11 @@ void test_core_reloc(void)
 		if (!test__start_subtest(test_case->case_name))
 			continue;
 
+		if (test_case->needs_testmod && !env.has_testmod) {
+			test__skip();
+			continue;
+		}
+
 		if (test_case->setup) {
 			err = test_case->setup(test_case);
 			if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
@@ -790,13 +844,11 @@ void test_core_reloc(void)
 			  test_case->bpf_obj_file, PTR_ERR(obj)))
 			continue;
 
-		/* for typed raw tracepoints, NULL should be specified */
-		if (test_case->direct_raw_tp) {
-			probe_name = "tp_btf/sys_enter";
-			tp_name = NULL;
-		} else {
-			probe_name = "raw_tracepoint/sys_enter";
-			tp_name = "sys_enter";
+		probe_name = "raw_tracepoint/sys_enter";
+		tp_name = "sys_enter";
+		if (test_case->prog_sec_name) {
+			probe_name = test_case->prog_sec_name;
+			tp_name = test_case->raw_tp_name; /* NULL for tp_btf */
 		}
 
 		prog = bpf_object__find_program_by_title(obj, probe_name);
@@ -837,7 +889,12 @@ void test_core_reloc(void)
 			goto cleanup;
 
 		/* trigger test run */
-		usleep(1);
+		if (test_case->trigger) {
+			if (!ASSERT_OK(test_case->trigger(test_case), "test_trigger"))
+				goto cleanup;
+		} else {
+			usleep(1);
+		}
 
 		if (data->skip) {
 			test__skip();
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index e6e616cb7bc9..9a2850850121 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -15,6 +15,23 @@ struct core_reloc_kernel_output {
 	int comm_len;
 };
 
+/*
+ * MODULE
+ */
+
+struct core_reloc_module_output {
+	long long len;
+	long long off;
+	int read_ctx_sz;
+	bool read_ctx_exists;
+	bool buf_exists;
+	bool len_exists;
+	bool off_exists;
+	/* we have test_progs[-flavor], so cut flavor part */
+	char comm[sizeof("test_progs")];
+	int comm_len;
+};
+
 /*
  * FLAVORS
  */
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
new file mode 100644
index 000000000000..d1840c1a9d36
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_testmod_test_read_ctx {
+	/* field order is mixed up */
+	size_t len;
+	char *buf;
+	loff_t off;
+} __attribute__((preserve_access_index));
+
+struct {
+	char in[256];
+	char out[256];
+	bool skip;
+	uint64_t my_pid_tgid;
+} data = {};
+
+struct core_reloc_module_output {
+	long long len;
+	long long off;
+	int read_ctx_sz;
+	bool read_ctx_exists;
+	bool buf_exists;
+	bool len_exists;
+	bool off_exists;
+	/* we have test_progs[-flavor], so cut flavor part */
+	char comm[sizeof("test_progs")];
+	int comm_len;
+};
+
+SEC("raw_tp/bpf_testmod_test_read")
+int BPF_PROG(test_core_module,
+	     struct task_struct *task,
+	     struct bpf_testmod_test_read_ctx *read_ctx)
+{
+	struct core_reloc_module_output *out = (void *)&data.out;
+	__u64 pid_tgid = bpf_get_current_pid_tgid();
+	__u32 real_tgid = (__u32)(pid_tgid >> 32);
+	__u32 real_pid = (__u32)pid_tgid;
+
+	if (data.my_pid_tgid != pid_tgid)
+		return 0;
+
+	if (BPF_CORE_READ(task, pid) != real_pid || BPF_CORE_READ(task, tgid) != real_tgid)
+		return 0;
+
+	out->len = BPF_CORE_READ(read_ctx, len);
+	out->off = BPF_CORE_READ(read_ctx, off);
+
+	out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx);
+	out->read_ctx_exists = bpf_core_type_exists(struct bpf_testmod_test_read_ctx);
+	out->buf_exists = bpf_core_field_exists(read_ctx->buf);
+	out->off_exists = bpf_core_field_exists(read_ctx->off);
+	out->len_exists = bpf_core_field_exists(read_ctx->len);
+
+	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 290248a5b7d829871b3ea3c62578613a580a1744 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:30 -0800
Subject: bpf: Allow to specify kernel module BTFs when attaching BPF programs

Add ability for user-space programs to specify non-vmlinux BTF when attaching
BTF-powered BPF programs: raw_tp, fentry/fexit/fmod_ret, LSM, etc. For this,
attach_prog_fd (now with the alias name attach_btf_obj_fd) should specify FD
of a module or vmlinux BTF object. For backwards compatibility reasons,
0 denotes vmlinux BTF. Only kernel BTF (vmlinux or module) can be specified.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-11-andrii@kernel.org
---
 include/linux/btf.h            |  1 +
 include/uapi/linux/bpf.h       |  7 +++-
 kernel/bpf/btf.c               |  5 +++
 kernel/bpf/syscall.c           | 82 ++++++++++++++++++++++++++----------------
 tools/include/uapi/linux/bpf.h |  7 +++-
 5 files changed, 69 insertions(+), 33 deletions(-)

(limited to 'tools')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index fb608e4de076..4c200f5d242b 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -90,6 +90,7 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
 
 int btf_get_fd_by_id(u32 id);
 u32 btf_obj_id(const struct btf *btf);
+bool btf_is_kernel(const struct btf *btf);
 bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 			   const struct btf_member *m,
 			   u32 expected_offset, u32 expected_size);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c3458ec1f30a..1233f14f659f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -557,7 +557,12 @@ union bpf_attr {
 		__aligned_u64	line_info;	/* line info */
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
 		__u32		attach_btf_id;	/* in-kernel BTF type id to attach to */
-		__u32		attach_prog_fd; /* 0 to attach to vmlinux */
+		union {
+			/* valid prog_fd to attach to bpf prog */
+			__u32		attach_prog_fd;
+			/* or valid module BTF object fd or 0 to attach to vmlinux */
+			__u32		attach_btf_obj_fd;
+		};
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 7a19bf5bfe97..8d6bdb4f4d61 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5738,6 +5738,11 @@ u32 btf_obj_id(const struct btf *btf)
 	return btf->id;
 }
 
+bool btf_is_kernel(const struct btf *btf)
+{
+	return btf->kernel_btf;
+}
+
 static int btf_id_cmp_func(const void *a, const void *b)
 {
 	const int *pa = a, *pb = b;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 184204169949..0cd3cc2af9c1 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1926,12 +1926,16 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
 static int
 bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 			   enum bpf_attach_type expected_attach_type,
-			   u32 btf_id, u32 prog_fd)
+			   struct btf *attach_btf, u32 btf_id,
+			   struct bpf_prog *dst_prog)
 {
 	if (btf_id) {
 		if (btf_id > BTF_MAX_TYPE)
 			return -EINVAL;
 
+		if (!attach_btf && !dst_prog)
+			return -EINVAL;
+
 		switch (prog_type) {
 		case BPF_PROG_TYPE_TRACING:
 		case BPF_PROG_TYPE_LSM:
@@ -1943,7 +1947,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		}
 	}
 
-	if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING &&
+	if (attach_btf && (!btf_id || dst_prog))
+		return -EINVAL;
+
+	if (dst_prog && prog_type != BPF_PROG_TYPE_TRACING &&
 	    prog_type != BPF_PROG_TYPE_EXT)
 		return -EINVAL;
 
@@ -2060,7 +2067,8 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 {
 	enum bpf_prog_type type = attr->prog_type;
-	struct bpf_prog *prog;
+	struct bpf_prog *prog, *dst_prog = NULL;
+	struct btf *attach_btf = NULL;
 	int err;
 	char license[128];
 	bool is_gpl;
@@ -2102,44 +2110,56 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	if (is_perfmon_prog_type(type) && !perfmon_capable())
 		return -EPERM;
 
+	/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
+	 * or btf, we need to check which one it is
+	 */
+	if (attr->attach_prog_fd) {
+		dst_prog = bpf_prog_get(attr->attach_prog_fd);
+		if (IS_ERR(dst_prog)) {
+			dst_prog = NULL;
+			attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
+			if (IS_ERR(attach_btf))
+				return -EINVAL;
+			if (!btf_is_kernel(attach_btf)) {
+				btf_put(attach_btf);
+				return -EINVAL;
+			}
+		}
+	} else if (attr->attach_btf_id) {
+		/* fall back to vmlinux BTF, if BTF type ID is specified */
+		attach_btf = bpf_get_btf_vmlinux();
+		if (IS_ERR(attach_btf))
+			return PTR_ERR(attach_btf);
+		if (!attach_btf)
+			return -EINVAL;
+		btf_get(attach_btf);
+	}
+
 	bpf_prog_load_fixup_attach_type(attr);
 	if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
-				       attr->attach_btf_id,
-				       attr->attach_prog_fd))
+				       attach_btf, attr->attach_btf_id,
+				       dst_prog)) {
+		if (dst_prog)
+			bpf_prog_put(dst_prog);
+		if (attach_btf)
+			btf_put(attach_btf);
 		return -EINVAL;
+	}
 
 	/* plain bpf_prog allocation */
 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
-	if (!prog)
+	if (!prog) {
+		if (dst_prog)
+			bpf_prog_put(dst_prog);
+		if (attach_btf)
+			btf_put(attach_btf);
 		return -ENOMEM;
+	}
 
 	prog->expected_attach_type = attr->expected_attach_type;
+	prog->aux->attach_btf = attach_btf;
 	prog->aux->attach_btf_id = attr->attach_btf_id;
-
-	if (attr->attach_btf_id && !attr->attach_prog_fd) {
-		struct btf *btf;
-
-		btf = bpf_get_btf_vmlinux();
-		if (IS_ERR(btf))
-			return PTR_ERR(btf);
-		if (!btf)
-			return -EINVAL;
-
-		btf_get(btf);
-		prog->aux->attach_btf = btf;
-	}
-
-	if (attr->attach_prog_fd) {
-		struct bpf_prog *dst_prog;
-
-		dst_prog = bpf_prog_get(attr->attach_prog_fd);
-		if (IS_ERR(dst_prog)) {
-			err = PTR_ERR(dst_prog);
-			goto free_prog;
-		}
-		prog->aux->dst_prog = dst_prog;
-	}
-
+	prog->aux->dst_prog = dst_prog;
 	prog->aux->offload_requested = !!attr->prog_ifindex;
 	prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c3458ec1f30a..1233f14f659f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -557,7 +557,12 @@ union bpf_attr {
 		__aligned_u64	line_info;	/* line info */
 		__u32		line_info_cnt;	/* number of bpf_line_info records */
 		__u32		attach_btf_id;	/* in-kernel BTF type id to attach to */
-		__u32		attach_prog_fd; /* 0 to attach to vmlinux */
+		union {
+			/* valid prog_fd to attach to bpf prog */
+			__u32		attach_prog_fd;
+			/* or valid module BTF object fd or 0 to attach to vmlinux */
+			__u32		attach_btf_obj_fd;
+		};
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
-- 
cgit v1.2.3-70-g09d2


From 6aef10a481a3f42c8021fe410e07440c0d71a5fc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:31 -0800
Subject: libbpf: Factor out low-level BPF program loading helper

Refactor low-level API for BPF program loading to not rely on public API
types. This allows painless extension without constant efforts to cleverly not
break backwards compatibility.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-12-andrii@kernel.org
---
 tools/lib/bpf/bpf.c             | 100 +++++++++++++++++++++++++++-------------
 tools/lib/bpf/libbpf.c          |  34 +++++++-------
 tools/lib/bpf/libbpf_internal.h |  29 ++++++++++++
 3 files changed, 113 insertions(+), 50 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 4025266d0fb0..5d681ce32b37 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -215,59 +215,52 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,
 	return info;
 }
 
-int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-			   char *log_buf, size_t log_buf_sz)
+int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 {
 	void *finfo = NULL, *linfo = NULL;
 	union bpf_attr attr;
-	__u32 log_level;
 	int fd;
 
-	if (!load_attr || !log_buf != !log_buf_sz)
+	if (!load_attr->log_buf != !load_attr->log_buf_sz)
 		return -EINVAL;
 
-	log_level = load_attr->log_level;
-	if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
+	if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf))
 		return -EINVAL;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = load_attr->prog_type;
 	attr.expected_attach_type = load_attr->expected_attach_type;
-	if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
-	    attr.prog_type == BPF_PROG_TYPE_LSM) {
-		attr.attach_btf_id = load_attr->attach_btf_id;
-	} else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
-		   attr.prog_type == BPF_PROG_TYPE_EXT) {
-		attr.attach_btf_id = load_attr->attach_btf_id;
-		attr.attach_prog_fd = load_attr->attach_prog_fd;
-	} else {
-		attr.prog_ifindex = load_attr->prog_ifindex;
-		attr.kern_version = load_attr->kern_version;
-	}
-	attr.insn_cnt = (__u32)load_attr->insns_cnt;
+
+	attr.attach_btf_id = load_attr->attach_btf_id;
+	attr.attach_prog_fd = load_attr->attach_prog_fd;
+
+	attr.prog_ifindex = load_attr->prog_ifindex;
+	attr.kern_version = load_attr->kern_version;
+
+	attr.insn_cnt = (__u32)load_attr->insn_cnt;
 	attr.insns = ptr_to_u64(load_attr->insns);
 	attr.license = ptr_to_u64(load_attr->license);
 
-	attr.log_level = log_level;
-	if (log_level) {
-		attr.log_buf = ptr_to_u64(log_buf);
-		attr.log_size = log_buf_sz;
-	} else {
-		attr.log_buf = ptr_to_u64(NULL);
-		attr.log_size = 0;
+	attr.log_level = load_attr->log_level;
+	if (attr.log_level) {
+		attr.log_buf = ptr_to_u64(load_attr->log_buf);
+		attr.log_size = load_attr->log_buf_sz;
 	}
 
 	attr.prog_btf_fd = load_attr->prog_btf_fd;
+	attr.prog_flags = load_attr->prog_flags;
+
 	attr.func_info_rec_size = load_attr->func_info_rec_size;
 	attr.func_info_cnt = load_attr->func_info_cnt;
 	attr.func_info = ptr_to_u64(load_attr->func_info);
+
 	attr.line_info_rec_size = load_attr->line_info_rec_size;
 	attr.line_info_cnt = load_attr->line_info_cnt;
 	attr.line_info = ptr_to_u64(load_attr->line_info);
+
 	if (load_attr->name)
 		memcpy(attr.prog_name, load_attr->name,
-		       min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
-	attr.prog_flags = load_attr->prog_flags;
+		       min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1));
 
 	fd = sys_bpf_prog_load(&attr, sizeof(attr));
 	if (fd >= 0)
@@ -307,19 +300,19 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 		}
 
 		fd = sys_bpf_prog_load(&attr, sizeof(attr));
-
 		if (fd >= 0)
 			goto done;
 	}
 
-	if (log_level || !log_buf)
+	if (load_attr->log_level || !load_attr->log_buf)
 		goto done;
 
 	/* Try again with log */
-	attr.log_buf = ptr_to_u64(log_buf);
-	attr.log_size = log_buf_sz;
+	attr.log_buf = ptr_to_u64(load_attr->log_buf);
+	attr.log_size = load_attr->log_buf_sz;
 	attr.log_level = 1;
-	log_buf[0] = 0;
+	load_attr->log_buf[0] = 0;
+
 	fd = sys_bpf_prog_load(&attr, sizeof(attr));
 done:
 	free(finfo);
@@ -327,6 +320,49 @@ done:
 	return fd;
 }
 
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+			   char *log_buf, size_t log_buf_sz)
+{
+	struct bpf_prog_load_params p = {};
+
+	if (!load_attr || !log_buf != !log_buf_sz)
+		return -EINVAL;
+
+	p.prog_type = load_attr->prog_type;
+	p.expected_attach_type = load_attr->expected_attach_type;
+	switch (p.prog_type) {
+	case BPF_PROG_TYPE_STRUCT_OPS:
+	case BPF_PROG_TYPE_LSM:
+		p.attach_btf_id = load_attr->attach_btf_id;
+		break;
+	case BPF_PROG_TYPE_TRACING:
+	case BPF_PROG_TYPE_EXT:
+		p.attach_btf_id = load_attr->attach_btf_id;
+		p.attach_prog_fd = load_attr->attach_prog_fd;
+		break;
+	default:
+		p.prog_ifindex = load_attr->prog_ifindex;
+		p.kern_version = load_attr->kern_version;
+	}
+	p.insn_cnt = load_attr->insns_cnt;
+	p.insns = load_attr->insns;
+	p.license = load_attr->license;
+	p.log_level = load_attr->log_level;
+	p.log_buf = log_buf;
+	p.log_buf_sz = log_buf_sz;
+	p.prog_btf_fd = load_attr->prog_btf_fd;
+	p.func_info_rec_size = load_attr->func_info_rec_size;
+	p.func_info_cnt = load_attr->func_info_cnt;
+	p.func_info = load_attr->func_info;
+	p.line_info_rec_size = load_attr->line_info_rec_size;
+	p.line_info_cnt = load_attr->line_info_cnt;
+	p.line_info = load_attr->line_info;
+	p.name = load_attr->name;
+	p.prog_flags = load_attr->prog_flags;
+
+	return libbpf__bpf_prog_load(&p);
+}
+
 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ca20e493726d..103d66e27406 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6809,7 +6809,7 @@ static int
 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	     char *license, __u32 kern_version, int *pfd)
 {
-	struct bpf_load_program_attr load_attr;
+	struct bpf_prog_load_params load_attr = {};
 	char *cp, errmsg[STRERR_BUFSIZE];
 	size_t log_buf_size = 0;
 	char *log_buf = NULL;
@@ -6828,7 +6828,6 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	if (!insns || !insns_cnt)
 		return -EINVAL;
 
-	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
 	load_attr.prog_type = prog->type;
 	/* old kernels might not support specifying expected_attach_type */
 	if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&
@@ -6839,19 +6838,14 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	if (kernel_supports(FEAT_PROG_NAME))
 		load_attr.name = prog->name;
 	load_attr.insns = insns;
-	load_attr.insns_cnt = insns_cnt;
+	load_attr.insn_cnt = insns_cnt;
 	load_attr.license = license;
-	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
-	    prog->type == BPF_PROG_TYPE_LSM) {
-		load_attr.attach_btf_id = prog->attach_btf_id;
-	} else if (prog->type == BPF_PROG_TYPE_TRACING ||
-		   prog->type == BPF_PROG_TYPE_EXT) {
-		load_attr.attach_prog_fd = prog->attach_prog_fd;
-		load_attr.attach_btf_id = prog->attach_btf_id;
-	} else {
-		load_attr.kern_version = kern_version;
-		load_attr.prog_ifindex = prog->prog_ifindex;
-	}
+	load_attr.attach_btf_id = prog->attach_btf_id;
+	load_attr.attach_prog_fd = prog->attach_prog_fd;
+	load_attr.attach_btf_id = prog->attach_btf_id;
+	load_attr.kern_version = kern_version;
+	load_attr.prog_ifindex = prog->prog_ifindex;
+
 	/* specify func_info/line_info only if kernel supports them */
 	btf_fd = bpf_object__btf_fd(prog->obj);
 	if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {
@@ -6875,7 +6869,9 @@ retry_load:
 		*log_buf = 0;
 	}
 
-	ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
+	load_attr.log_buf = log_buf;
+	load_attr.log_buf_sz = log_buf_size;
+	ret = libbpf__bpf_prog_load(&load_attr);
 
 	if (ret >= 0) {
 		if (log_buf && load_attr.log_level)
@@ -6916,9 +6912,9 @@ retry_load:
 		pr_warn("-- BEGIN DUMP LOG ---\n");
 		pr_warn("\n%s\n", log_buf);
 		pr_warn("-- END LOG --\n");
-	} else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
+	} else if (load_attr.insn_cnt >= BPF_MAXINSNS) {
 		pr_warn("Program too large (%zu insns), at most %d insns\n",
-			load_attr.insns_cnt, BPF_MAXINSNS);
+			load_attr.insn_cnt, BPF_MAXINSNS);
 		ret = -LIBBPF_ERRNO__PROG2BIG;
 	} else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
 		/* Wrong program type? */
@@ -6926,7 +6922,9 @@ retry_load:
 
 		load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
 		load_attr.expected_attach_type = 0;
-		fd = bpf_load_program_xattr(&load_attr, NULL, 0);
+		load_attr.log_buf = NULL;
+		load_attr.log_buf_sz = 0;
+		fd = libbpf__bpf_prog_load(&load_attr);
 		if (fd >= 0) {
 			close(fd);
 			ret = -LIBBPF_ERRNO__PROGTYPE;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index e569ae63808e..681073a67ae3 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -151,6 +151,35 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 			 const char *str_sec, size_t str_len);
 
+struct bpf_prog_load_params {
+	enum bpf_prog_type prog_type;
+	enum bpf_attach_type expected_attach_type;
+	const char *name;
+	const struct bpf_insn *insns;
+	size_t insn_cnt;
+	const char *license;
+	__u32 kern_version;
+	__u32 attach_prog_fd;
+	__u32 attach_btf_id;
+	__u32 prog_ifindex;
+	__u32 prog_btf_fd;
+	__u32 prog_flags;
+
+	__u32 func_info_rec_size;
+	const void *func_info;
+	__u32 func_info_cnt;
+
+	__u32 line_info_rec_size;
+	const void *line_info;
+	__u32 line_info_cnt;
+
+	__u32 log_level;
+	char *log_buf;
+	size_t log_buf_sz;
+};
+
+int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr);
+
 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
 			     __u32 *size);
 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
-- 
cgit v1.2.3-70-g09d2


From 91abb4a6d79df6c4dcd86d85632df53c8cca2dcf Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:32 -0800
Subject: libbpf: Support attachment of BPF tracing programs to kernel modules

Teach libbpf to search for BTF types in kernel modules for tracing BPF
programs. This allows attachment of raw_tp/fentry/fexit/fmod_ret/etc BPF
program types to tracepoints and functions in kernel modules.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-13-andrii@kernel.org
---
 tools/lib/bpf/bpf.c             |   5 +-
 tools/lib/bpf/libbpf.c          | 138 +++++++++++++++++++++++++++++++---------
 tools/lib/bpf/libbpf_internal.h |   1 +
 3 files changed, 112 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 5d681ce32b37..bba48ff4c5c0 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -231,8 +231,11 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 	attr.prog_type = load_attr->prog_type;
 	attr.expected_attach_type = load_attr->expected_attach_type;
 
+	if (load_attr->attach_prog_fd)
+		attr.attach_prog_fd = load_attr->attach_prog_fd;
+	else
+		attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd;
 	attr.attach_btf_id = load_attr->attach_btf_id;
-	attr.attach_prog_fd = load_attr->attach_prog_fd;
 
 	attr.prog_ifindex = load_attr->prog_ifindex;
 	attr.kern_version = load_attr->kern_version;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 103d66e27406..912e01b946fe 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -278,6 +278,7 @@ struct bpf_program {
 	enum bpf_prog_type type;
 	enum bpf_attach_type expected_attach_type;
 	int prog_ifindex;
+	__u32 attach_btf_obj_fd;
 	__u32 attach_btf_id;
 	__u32 attach_prog_fd;
 	void *func_info;
@@ -408,6 +409,7 @@ struct module_btf {
 	struct btf *btf;
 	char *name;
 	__u32 id;
+	int fd;
 };
 
 struct bpf_object {
@@ -4766,8 +4768,7 @@ static int load_module_btfs(struct bpf_object *obj)
 		if (err) {
 			err = -errno;
 			pr_warn("failed to get BTF object #%d info: %d\n", id, err);
-			close(fd);
-			return err;
+			goto err_out;
 		}
 
 		/* ignore non-module BTFs */
@@ -4777,25 +4778,33 @@ static int load_module_btfs(struct bpf_object *obj)
 		}
 
 		btf = btf_get_from_fd(fd, obj->btf_vmlinux);
-		close(fd);
 		if (IS_ERR(btf)) {
 			pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
 				name, id, PTR_ERR(btf));
-			return PTR_ERR(btf);
+			err = PTR_ERR(btf);
+			goto err_out;
 		}
 
 		err = btf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
 				     sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
 		if (err)
-			return err;
+			goto err_out;
 
 		mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
 
 		mod_btf->btf = btf;
 		mod_btf->id = id;
+		mod_btf->fd = fd;
 		mod_btf->name = strdup(name);
-		if (!mod_btf->name)
-			return -ENOMEM;
+		if (!mod_btf->name) {
+			err = -ENOMEM;
+			goto err_out;
+		}
+		continue;
+
+err_out:
+		close(fd);
+		return err;
 	}
 
 	return 0;
@@ -6841,7 +6850,10 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.insn_cnt = insns_cnt;
 	load_attr.license = license;
 	load_attr.attach_btf_id = prog->attach_btf_id;
-	load_attr.attach_prog_fd = prog->attach_prog_fd;
+	if (prog->attach_prog_fd)
+		load_attr.attach_prog_fd = prog->attach_prog_fd;
+	else
+		load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
 	load_attr.attach_btf_id = prog->attach_btf_id;
 	load_attr.kern_version = kern_version;
 	load_attr.prog_ifindex = prog->prog_ifindex;
@@ -6937,11 +6949,11 @@ out:
 	return ret;
 }
 
-static int libbpf_find_attach_btf_id(struct bpf_program *prog);
+static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id);
 
 int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 {
-	int err = 0, fd, i, btf_id;
+	int err = 0, fd, i;
 
 	if (prog->obj->loaded) {
 		pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
@@ -6951,10 +6963,14 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 	if ((prog->type == BPF_PROG_TYPE_TRACING ||
 	     prog->type == BPF_PROG_TYPE_LSM ||
 	     prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
-		btf_id = libbpf_find_attach_btf_id(prog);
-		if (btf_id <= 0)
-			return btf_id;
-		prog->attach_btf_id = btf_id;
+		int btf_obj_fd = 0, btf_type_id = 0;
+
+		err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
+		if (err)
+			return err;
+
+		prog->attach_btf_obj_fd = btf_obj_fd;
+		prog->attach_btf_id = btf_type_id;
 	}
 
 	if (prog->instances.nr < 0 || !prog->instances.fds) {
@@ -7467,6 +7483,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 
 	/* clean up module BTFs */
 	for (i = 0; i < obj->btf_module_cnt; i++) {
+		close(obj->btf_modules[i].fd);
 		btf__free(obj->btf_modules[i].btf);
 		free(obj->btf_modules[i].name);
 	}
@@ -8821,8 +8838,8 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 	return btf__find_by_name_kind(btf, btf_type_name, kind);
 }
 
-static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
-					enum bpf_attach_type attach_type)
+static inline int find_attach_btf_id(struct btf *btf, const char *name,
+				     enum bpf_attach_type attach_type)
 {
 	int err;
 
@@ -8838,9 +8855,6 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
 	else
 		err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
 
-	if (err <= 0)
-		pr_warn("%s is not found in vmlinux BTF\n", name);
-
 	return err;
 }
 
@@ -8856,7 +8870,10 @@ int libbpf_find_vmlinux_btf_id(const char *name,
 		return -EINVAL;
 	}
 
-	err = __find_vmlinux_btf_id(btf, name, attach_type);
+	err = find_attach_btf_id(btf, name, attach_type);
+	if (err <= 0)
+		pr_warn("%s is not found in vmlinux BTF\n", name);
+
 	btf__free(btf);
 	return err;
 }
@@ -8894,11 +8911,49 @@ out:
 	return err;
 }
 
-static int libbpf_find_attach_btf_id(struct bpf_program *prog)
+static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
+			      enum bpf_attach_type attach_type,
+			      int *btf_obj_fd, int *btf_type_id)
+{
+	int ret, i;
+
+	ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
+	if (ret > 0) {
+		*btf_obj_fd = 0; /* vmlinux BTF */
+		*btf_type_id = ret;
+		return 0;
+	}
+	if (ret != -ENOENT)
+		return ret;
+
+	ret = load_module_btfs(obj);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < obj->btf_module_cnt; i++) {
+		const struct module_btf *mod = &obj->btf_modules[i];
+
+		ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
+		if (ret > 0) {
+			*btf_obj_fd = mod->fd;
+			*btf_type_id = ret;
+			return 0;
+		}
+		if (ret == -ENOENT)
+			continue;
+
+		return ret;
+	}
+
+	return -ESRCH;
+}
+
+static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id)
 {
 	enum bpf_attach_type attach_type = prog->expected_attach_type;
 	__u32 attach_prog_fd = prog->attach_prog_fd;
-	const char *name = prog->sec_name;
+	const char *name = prog->sec_name, *attach_name;
+	const struct bpf_sec_def *sec = NULL;
 	int i, err;
 
 	if (!name)
@@ -8909,17 +8964,37 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)
 			continue;
 		if (strncmp(name, section_defs[i].sec, section_defs[i].len))
 			continue;
-		if (attach_prog_fd)
-			err = libbpf_find_prog_btf_id(name + section_defs[i].len,
-						      attach_prog_fd);
-		else
-			err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
-						    name + section_defs[i].len,
-						    attach_type);
+
+		sec = &section_defs[i];
+		break;
+	}
+
+	if (!sec) {
+		pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name);
+		return -ESRCH;
+	}
+	attach_name = name + sec->len;
+
+	/* BPF program's BTF ID */
+	if (attach_prog_fd) {
+		err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
+		if (err < 0) {
+			pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
+				 attach_prog_fd, attach_name, err);
+			return err;
+		}
+		*btf_obj_fd = 0;
+		*btf_type_id = err;
+		return 0;
+	}
+
+	/* kernel/module BTF ID */
+	err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
+	if (err) {
+		pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
 		return err;
 	}
-	pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
-	return -ESRCH;
+	return 0;
 }
 
 int libbpf_attach_type_by_name(const char *name,
@@ -10808,6 +10883,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 		return btf_id;
 
 	prog->attach_btf_id = btf_id;
+	prog->attach_btf_obj_fd = 0;
 	prog->attach_prog_fd = attach_prog_fd;
 	return 0;
 }
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 681073a67ae3..969d0ac592ba 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -160,6 +160,7 @@ struct bpf_prog_load_params {
 	const char *license;
 	__u32 kern_version;
 	__u32 attach_prog_fd;
+	__u32 attach_btf_obj_fd;
 	__u32 attach_btf_id;
 	__u32 prog_ifindex;
 	__u32 prog_btf_fd;
-- 
cgit v1.2.3-70-g09d2


From bc9ed69c79ae7577314a24e09c5b0d1c1c314ced Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:33 -0800
Subject: selftests/bpf: Add tp_btf CO-RE reloc test for modules

Add another CO-RE relocation test for kernel module relocations. This time for
tp_btf with direct memory reads.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-14-andrii@kernel.org
---
 .../testing/selftests/bpf/prog_tests/core_reloc.c  |  3 +-
 .../selftests/bpf/progs/test_core_reloc_module.c   | 32 +++++++++++++++++++++-
 2 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index bb980848cd77..06eb956ff7bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -514,7 +514,8 @@ static struct core_reloc_test_case test_cases[] = {
 	},
 
 	/* validate we can find kernel module BTF types for relocs/attach */
-	MODULES_CASE("module", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"),
+	MODULES_CASE("module_probed", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"),
+	MODULES_CASE("module_direct", "tp_btf/bpf_testmod_test_read", NULL),
 
 	/* validate BPF program can use multiple flavors to match against
 	 * single target BTF type
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
index d1840c1a9d36..56363959f7b0 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -36,7 +36,7 @@ struct core_reloc_module_output {
 };
 
 SEC("raw_tp/bpf_testmod_test_read")
-int BPF_PROG(test_core_module,
+int BPF_PROG(test_core_module_probed,
 	     struct task_struct *task,
 	     struct bpf_testmod_test_read_ctx *read_ctx)
 {
@@ -64,3 +64,33 @@ int BPF_PROG(test_core_module,
 
 	return 0;
 }
+
+SEC("tp_btf/bpf_testmod_test_read")
+int BPF_PROG(test_core_module_direct,
+	     struct task_struct *task,
+	     struct bpf_testmod_test_read_ctx *read_ctx)
+{
+	struct core_reloc_module_output *out = (void *)&data.out;
+	__u64 pid_tgid = bpf_get_current_pid_tgid();
+	__u32 real_tgid = (__u32)(pid_tgid >> 32);
+	__u32 real_pid = (__u32)pid_tgid;
+
+	if (data.my_pid_tgid != pid_tgid)
+		return 0;
+
+	if (task->pid != real_pid || task->tgid != real_tgid)
+		return 0;
+
+	out->len = read_ctx->len;
+	out->off = read_ctx->off;
+
+	out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx);
+	out->read_ctx_exists = bpf_core_type_exists(struct bpf_testmod_test_read_ctx);
+	out->buf_exists = bpf_core_field_exists(read_ctx->buf);
+	out->off_exists = bpf_core_field_exists(read_ctx->off);
+	out->len_exists = bpf_core_field_exists(read_ctx->len);
+
+	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 1e38abefcfd65f3ef7b12895dfd48db80aca28da Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 12:46:34 -0800
Subject: selftests/bpf: Add fentry/fexit/fmod_ret selftest for kernel module

Add new selftest checking attachment of fentry/fexit/fmod_ret (and raw
tracepoint ones for completeness) BPF programs to kernel module function.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-15-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/module_attach.c       | 53 +++++++++++++++++
 .../selftests/bpf/progs/test_module_attach.c       | 66 ++++++++++++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/module_attach.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_module_attach.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
new file mode 100644
index 000000000000..4b65e9918764
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include "test_module_attach.skel.h"
+
+static int duration;
+
+static int trigger_module_test_read(int read_sz)
+{
+	int fd, err;
+
+	fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
+	err = -errno;
+	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
+		return err;
+
+	read(fd, NULL, read_sz);
+	close(fd);
+
+	return 0;
+}
+
+void test_module_attach(void)
+{
+	const int READ_SZ = 456;
+	struct test_module_attach* skel;
+	struct test_module_attach__bss *bss;
+	int err;
+
+	skel = test_module_attach__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+
+	bss = skel->bss;
+
+	err = test_module_attach__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	ASSERT_OK(trigger_module_test_read(READ_SZ), "trigger_read");
+
+	ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp");
+	ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
+	ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
+	ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
+	ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
+	ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
+
+cleanup:
+	test_module_attach__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
new file mode 100644
index 000000000000..b563563df172
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+__u32 raw_tp_read_sz = 0;
+
+SEC("raw_tp/bpf_testmod_test_read")
+int BPF_PROG(handle_raw_tp,
+	     struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx)
+{
+	raw_tp_read_sz = BPF_CORE_READ(read_ctx, len);
+	return 0;
+}
+
+__u32 tp_btf_read_sz = 0;
+
+SEC("tp_btf/bpf_testmod_test_read")
+int BPF_PROG(handle_tp_btf,
+	     struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx)
+{
+	tp_btf_read_sz = read_ctx->len;
+	return 0;
+}
+
+__u32 fentry_read_sz = 0;
+
+SEC("fentry/bpf_testmod_test_read")
+int BPF_PROG(handle_fentry,
+	     struct file *file, struct kobject *kobj,
+	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+	fentry_read_sz = len;
+	return 0;
+}
+
+__u32 fexit_read_sz = 0;
+int fexit_ret = 0;
+
+SEC("fexit/bpf_testmod_test_read")
+int BPF_PROG(handle_fexit,
+	     struct file *file, struct kobject *kobj,
+	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len,
+	     int ret)
+{
+	fexit_read_sz = len;
+	fexit_ret = ret;
+	return 0;
+}
+
+__u32 fmod_ret_read_sz = 0;
+
+SEC("fmod_ret/bpf_testmod_test_read")
+int BPF_PROG(handle_fmod_ret,
+	     struct file *file, struct kobject *kobj,
+	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+	fmod_ret_read_sz = len;
+	return 0; /* don't override the exit code */
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-70-g09d2


From 3015b500ae42356936b9b4a8b660eacaee7a6147 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 15:54:39 -0800
Subject: libbpf: Use memcpy instead of strncpy to please GCC

Some versions of GCC are really nit-picky about strncpy() use. Use memcpy(),
as they are pretty much equivalent for the case of fixed length strings.

Fixes: e459f49b4394 ("libbpf: Separate XDP program load with xsk socket creation")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203235440.2302137-1-andrii@kernel.org
---
 tools/lib/bpf/xsk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 4b051ec7cfbb..e3e41ceeb1bc 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -583,7 +583,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
 	}
 
 	ctx->ifindex = ifindex;
-	strncpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+	memcpy(ctx->ifname, ifname, IFNAMSIZ -1);
 	ctx->ifname[IFNAMSIZ - 1] = 0;
 
 	xsk->ctx = ctx;
-- 
cgit v1.2.3-70-g09d2


From eceae70bdeaeb6b8ceb662983cf663ff352fbc96 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 3 Dec 2020 15:54:40 -0800
Subject: selftests/bpf: Fix invalid use of strncat in test_sockmap

strncat()'s third argument is how many bytes will be added *in addition* to
already existing bytes in destination. Plus extra zero byte will be added
after that. So existing use in test_sockmap has many opportunities to overflow
the string and cause memory corruptions. And in this case, GCC complains for
a good reason.

Fixes: 16962b2404ac ("bpf: sockmap, add selftests")
Fixes: 73563aa3d977 ("selftests/bpf: test_sockmap, print additional test options")
Fixes: 1ade9abadfca ("bpf: test_sockmap, add options for msg_pop_data() helper")
Fixes: 463bac5f1ca7 ("bpf, selftests: Add test for ktls with skb bpf ingress policy")
Fixes: e9dd904708c4 ("bpf: add tls support for testing in test_sockmap")
Fixes: 753fb2ee0934 ("bpf: sockmap, add msg_peek tests to test_sockmap")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203235440.2302137-2-andrii@kernel.org
---
 tools/testing/selftests/bpf/test_sockmap.c | 36 +++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 0fa1e421c3d7..427ca00a3217 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1273,6 +1273,16 @@ static char *test_to_str(int test)
 	return "unknown";
 }
 
+static void append_str(char *dst, const char *src, size_t dst_cap)
+{
+	size_t avail = dst_cap - strlen(dst);
+
+	if (avail <= 1) /* just zero byte could be written */
+		return;
+
+	strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */
+}
+
 #define OPTSTRING 60
 static void test_options(char *options)
 {
@@ -1281,42 +1291,42 @@ static void test_options(char *options)
 	memset(options, 0, OPTSTRING);
 
 	if (txmsg_pass)
-		strncat(options, "pass,", OPTSTRING);
+		append_str(options, "pass,", OPTSTRING);
 	if (txmsg_redir)
-		strncat(options, "redir,", OPTSTRING);
+		append_str(options, "redir,", OPTSTRING);
 	if (txmsg_drop)
-		strncat(options, "drop,", OPTSTRING);
+		append_str(options, "drop,", OPTSTRING);
 	if (txmsg_apply) {
 		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_cork) {
 		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_start) {
 		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_end) {
 		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_start_pop) {
 		snprintf(tstr, OPTSTRING, "pop (%d,%d),",
 			 txmsg_start_pop, txmsg_start_pop + txmsg_pop);
-		strncat(options, tstr, OPTSTRING);
+		append_str(options, tstr, OPTSTRING);
 	}
 	if (txmsg_ingress)
-		strncat(options, "ingress,", OPTSTRING);
+		append_str(options, "ingress,", OPTSTRING);
 	if (txmsg_redir_skb)
-		strncat(options, "redir_skb,", OPTSTRING);
+		append_str(options, "redir_skb,", OPTSTRING);
 	if (txmsg_ktls_skb)
-		strncat(options, "ktls_skb,", OPTSTRING);
+		append_str(options, "ktls_skb,", OPTSTRING);
 	if (ktls)
-		strncat(options, "ktls,", OPTSTRING);
+		append_str(options, "ktls,", OPTSTRING);
 	if (peek_flag)
-		strncat(options, "peek,", OPTSTRING);
+		append_str(options, "peek,", OPTSTRING);
 }
 
 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
-- 
cgit v1.2.3-70-g09d2


From 23afeaeff3d985b07abf2c76fd12b8c548da8367 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Wed, 18 Nov 2020 11:47:46 +0100
Subject: selftests: core: add tests for CLOSE_RANGE_CLOEXEC

check that close_range(initial_fd, last_fd, CLOSE_RANGE_CLOEXEC)
correctly sets the close-on-exec bit for the specified file
descriptors.

Open 100 file descriptors and set the close-on-exec flag for a subset
of them first, then set it for every file descriptor above 2.  Make
sure RLIMIT_NOFILE doesn't affect the result.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
Link: https://lore.kernel.org/r/20201118104746.873084-3-gscrivan@redhat.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/core/close_range_test.c | 74 +++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 575b391ddc78..87e16d65d9d7 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <syscall.h>
 #include <unistd.h>
+#include <sys/resource.h>
 
 #include "../kselftest_harness.h"
 #include "../clone3/clone3_selftests.h"
@@ -23,6 +24,10 @@
 #define CLOSE_RANGE_UNSHARE	(1U << 1)
 #endif
 
+#ifndef CLOSE_RANGE_CLOEXEC
+#define CLOSE_RANGE_CLOEXEC	(1U << 2)
+#endif
+
 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
 				  unsigned int flags)
 {
@@ -224,4 +229,73 @@ TEST(close_range_unshare_capped)
 	EXPECT_EQ(0, WEXITSTATUS(status));
 }
 
+TEST(close_range_cloexec)
+{
+	int i, ret;
+	int open_fds[101];
+	struct rlimit rlimit;
+
+	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+		int fd;
+
+		fd = open("/dev/null", O_RDONLY);
+		ASSERT_GE(fd, 0) {
+			if (errno == ENOENT)
+				XFAIL(return, "Skipping test since /dev/null does not exist");
+		}
+
+		open_fds[i] = fd;
+	}
+
+	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			XFAIL(return, "close_range() syscall not supported");
+		if (errno == EINVAL)
+			XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
+	}
+
+	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
+	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
+	rlimit.rlim_cur = 25;
+	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
+
+	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
+	ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
+	ASSERT_EQ(0, ret);
+	ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
+	ASSERT_EQ(0, ret);
+
+	for (i = 0; i <= 50; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+	}
+
+	for (i = 51; i <= 74; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, 0);
+	}
+
+	for (i = 75; i <= 100; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+	}
+
+	/* Test a common pattern.  */
+	ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
+	for (i = 0; i <= 100; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+	}
+}
+
+
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3-70-g09d2


From 7d17167244f5415bc6bc90f5bb0074b6d79676b4 Mon Sep 17 00:00:00 2001
From: Florian Lehner <dev@der-flo.net>
Date: Fri, 4 Dec 2020 19:18:27 +0100
Subject: selftests/bpf: Print reason when a tester could not run a program

Commit 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported
program types") added a check to skip unsupported program types. As
bpf_probe_prog_type can change errno, do_single_test should save it before
printing a reason why a supported BPF program type failed to load.

Fixes: 8184d44c9a57 ("selftests/bpf: skip verifier tests for unsupported program types")
Signed-off-by: Florian Lehner <dev@der-flo.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201204181828.11974-2-dev@der-flo.net
---
 tools/testing/selftests/bpf/test_verifier.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 4bfe3aa2cfc4..ceea9409639e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -936,6 +936,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	int run_errs, run_successes;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
+	int saved_errno;
 	int fixup_skips;
 	__u32 pflags;
 	int i, err;
@@ -997,6 +998,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	}
 
 	fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
+	saved_errno = errno;
 
 	/* BPF_PROG_TYPE_TRACING requires more setup and
 	 * bpf_probe_prog_type won't give correct answer
@@ -1013,7 +1015,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
 		if (fd_prog < 0) {
 			printf("FAIL\nFailed to load prog '%s'!\n",
-			       strerror(errno));
+			       strerror(saved_errno));
 			goto fail_log;
 		}
 #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-- 
cgit v1.2.3-70-g09d2


From 5f61b7c6975b03e6ace2cfb13d415d5f475c8830 Mon Sep 17 00:00:00 2001
From: Florian Lehner <dev@der-flo.net>
Date: Fri, 4 Dec 2020 19:18:28 +0100
Subject: selftests/bpf: Avoid errno clobbering

Print a message when the returned error is about a program type being
not supported or because of permission problems.
These messages are expected if the program to test was actually
executed.

Signed-off-by: Florian Lehner <dev@der-flo.net>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201204181828.11974-3-dev@der-flo.net
---
 tools/testing/selftests/bpf/test_verifier.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index ceea9409639e..777a81404fdb 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -875,19 +875,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
 	__u8 tmp[TEST_DATA_LEN << 2];
 	__u32 size_tmp = sizeof(tmp);
 	uint32_t retval;
-	int err;
+	int err, saved_errno;
 
 	if (unpriv)
 		set_admin(true);
 	err = bpf_prog_test_run(fd_prog, 1, data, size_data,
 				tmp, &size_tmp, &retval, NULL);
+	saved_errno = errno;
+
 	if (unpriv)
 		set_admin(false);
-	if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
-		printf("Unexpected bpf_prog_test_run error ");
-		return err;
+
+	if (err) {
+		switch (saved_errno) {
+		case 524/*ENOTSUPP*/:
+			printf("Did not run the program (not supported) ");
+			return 0;
+		case EPERM:
+			if (unpriv) {
+				printf("Did not run the program (no permission) ");
+				return 0;
+			}
+			/* fallthrough; */
+		default:
+			printf("FAIL: Unexpected bpf_prog_test_run error (%s) ",
+				strerror(saved_errno));
+			return err;
+		}
 	}
-	if (!err && retval != expected_val &&
+
+	if (retval != expected_val &&
 	    expected_val != POINTER_VALUE) {
 		printf("FAIL retval %d != %d ", retval, expected_val);
 		return 1;
-- 
cgit v1.2.3-70-g09d2


From 2195444e09b4fd3488a69e2f269a401dd4e4f512 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Wed, 2 Dec 2020 14:05:16 +0100
Subject: selftests: add selftest for the SRv6 End.DT4 behavior

this selftest is designed for evaluating the new SRv6 End.DT4 behavior
used, in this example, for implementing IPv4 L3 VPN use cases.

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/srv6_end_dt4_l3vpn_test.sh       | 494 +++++++++++++++++++++
 1 file changed, 494 insertions(+)
 create mode 100755 tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
new file mode 100755
index 000000000000..ad7a9fc59934
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -0,0 +1,494 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT4 behavior used for
+# implementing IPv4 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv4
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv4 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT4 behavior and c) VRF.
+#
+# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-t100-1 sends an IPv4 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT4 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv4 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4 L3 VPN for tenant 200 works exactly as the IPv4 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+                                   +-------------------+
+# |                   |                                   |                   |
+# |  hs-t100-1 netns  |                                   |  hs-t100-2 netns  |
+# |                   |                                   |                   |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  | 10.0.0.1/24 |  |                                   |  | 10.0.0.2/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |        .          |                                   |         .         |
+# +-------------------+                                   +-------------------+
+#          .                                                        .
+#          .                                                        .
+#          .                                                        .
+# +-----------------------------------+   +-----------------------------------+
+# |        .                          |   |                         .         |
+# | +---------------+                 |   |                 +---------------- |
+# | |   veth-t100   |                 |   |                 |   veth-t100   | |
+# | | 10.0.0.254/24 |    +----------+ |   | +----------+    | 10.0.0.254/24 | |
+# | +-------+-------+    | localsid | |   | | localsid |    +-------+-------- |
+# |         |            |   table  | |   | |   table  |            |         |
+# |    +----+----+       +----------+ |   | +----------+       +----+----+    |
+# |    | vrf-100 |                    |   |                    | vrf-100 |    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |                    |   veth0    | |   | |   veth0    |                    |
+# |                    | fd00::1/64 |.|...|.| fd00::2/64 |                    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |    | vrf-200 |                    |   |                    | vrf-200 |    |
+# |    +----+----+                    |   |                    +----+----+    |
+# |         |                         |   |                         |         |
+# | +-------+-------+                 |   |                 +-------+-------- |
+# | |   veth-t200   |                 |   |                 |   veth-t200   | |
+# | | 10.0.0.254/24 |                 |   |                 | 10.0.0.254/24 | |
+# | +---------------+      rt-1 netns |   | rt-2 netns      +---------------- |
+# |        .                          |   |                          .        |
+# +-----------------------------------+   +-----------------------------------+
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+# +-------------------+                                   +-------------------+
+# |        .          |                                   |          .        |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  | 10.0.0.3/24 |  |                                   |  | 10.0.0.4/24 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |                   |                                   |                   |
+# |  hs-t200-3 netns  |                                   |  hs-t200-4 netns  |
+# |                   |                                   |                   |
+# +-------------------+                                   +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.2   |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.4   |apply seg6 encap segs fc00:12:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.1   |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |10.0.0.3   |apply seg6 encap segs fc00:21:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	if [ "$TESTS" != "none" ]; then
+		printf "\nTests passed: %3d\n" ${nsuccess}
+		printf "Tests failed: %3d\n"   ${nfail}
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+cleanup()
+{
+	ip link del veth-rt-1 2>/dev/null || true
+	ip link del veth-rt-2 2>/dev/null || true
+
+	# destroy routers rt-* and hosts hs-*
+	for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+		ip netns del ${ns} || true
+	done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt=$1
+	local nsname=rt-${rt}
+
+	ip netns add ${nsname}
+	ip link set veth-rt-${rt} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0
+	ip -netns ${nsname} link set veth0 up
+	ip -netns ${nsname} link set lo up
+
+	ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+	local hs=$1
+	local rt=$2
+	local tid=$3
+	local hsname=hs-t${tid}-${hs}
+	local rtname=rt-${rt}
+	local rtveth=veth-t${tid}
+
+	# set the networking for the host
+	ip netns add ${hsname}
+	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+	ip -netns ${hsname} link set veth0 up
+	ip -netns ${hsname} link set lo up
+
+	# configure the VRF for the tenant X on the router which is directly
+	# connected to the source host.
+	ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+	ip -netns ${rtname} link set vrf-${tid} up
+
+	# enslave the veth-tX interface to the vrf-X in the access router
+	ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+	ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+	ip -netns ${rtname} link set ${rtveth} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+
+	ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+	local hssrc=$1
+	local rtsrc=$2
+	local hsdst=$3
+	local rtdst=$4
+	local tid=$5
+
+	local hssrc_name=hs-t${tid}-${hssrc}
+	local hsdst_name=hs-t${tid}-${hsdst}
+	local rtsrc_name=rt-${rtsrc}
+	local rtdst_name=rt-${rtdst}
+	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004
+
+	# set the encap route for encapsulating packets which arrive from the
+	# host hssrc and destined to the access router rtsrc.
+	ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+		via fd00::${rtdst} dev veth0
+
+	# set the decap route for decapsulating packets which arrive from
+	# the rtdst router and destined to the hsdst host.
+	ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+		encap seg6local action End.DT4 vrftable ${tid} dev vrf-${tid}
+
+	# all sids for VPNs start with a common locator which is fc00::/16.
+	# Routes for handling the SRv6 End.DT4 behavior instances are grouped
+	# together in the 'localsid' table.
+	#
+	# NOTE: added only once
+	if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+	    grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+		ip -netns ${rtdst_name} -6 rule add \
+			to ${VPN_LOCATOR_SERVICE}::/16 \
+			lookup ${LOCALSID_TABLE_ID} prio 999
+	fi
+}
+
+setup()
+{
+	ip link add veth-rt-1 type veth peer name veth-rt-2
+	# setup the networking for router rt-1 and router rt-2
+	setup_rt_networking 1
+	setup_rt_networking 2
+
+	# setup two hosts for the tenant 100.
+	#  - host hs-1 is directly connected to the router rt-1;
+	#  - host hs-2 is directly connected to the router rt-2.
+	setup_hs 1 1 100  #args: host router tenant
+	setup_hs 2 2 100
+
+	# setup two hosts for the tenant 200
+	#  - host hs-3 is directly connected to the router rt-1;
+	#  - host hs-4 is directly connected to the router rt-2.
+	setup_hs 3 1 200
+	setup_hs 4 2 200
+
+	# setup the IPv4 L3 VPN which connects the host hs-t100-1 and host
+	# hs-t100-2 within the same tenant 100.
+	setup_vpn_config 1 1 2 2 100  #args: src_host src_router dst_host dst_router tenant
+	setup_vpn_config 2 2 1 1 100
+
+	# setup the IPv4 L3 VPN which connects the host hs-t200-3 and host
+	# hs-t200-4 within the same tenant 200.
+	setup_vpn_config 3 1 4 2 200
+	setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+		>/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	check_rt_connectivity ${rtsrc} ${rtdst}
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+	local hssrc=$1
+	local tidsrc=$2
+	local hsdst=$3
+	local tiddst=$4
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc=$1
+	local tid=$2
+
+	check_hs_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+	log_section "IPv6 routers connectivity test"
+
+	check_and_log_rt_connectivity 1 2
+	check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+	log_section "IPv4 connectivity test among hosts and gateway"
+
+	check_and_log_hs2gw_connectivity 1 100
+	check_and_log_hs2gw_connectivity 2 100
+
+	check_and_log_hs2gw_connectivity 3 200
+	check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+	check_and_log_hs_connectivity 1 2 100
+	check_and_log_hs_connectivity 2 1 100
+
+	check_and_log_hs_connectivity 3 4 200
+	check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+	local i
+	local j
+	local k
+	local tmp
+	local l1="1 2"
+	local l2="3 4"
+	local t1=100
+	local t2=200
+
+	log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+	for k in 0 1; do
+		for i in ${l1}; do
+			for j in ${l2}; do
+				check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+			done
+		done
+
+		# let us test the reverse path
+		tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+		tmp=${t1}; t1=${t2}; t2=${tmp}
+	done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+        echo "SKIP: vrf sysctl does not exist"
+        exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
-- 
cgit v1.2.3-70-g09d2


From 2bc035538e167e28d900f2f51403458a05d7cc4a Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Wed, 2 Dec 2020 14:05:17 +0100
Subject: selftests: add selftest for the SRv6 End.DT6 (VRF) behavior

this selftest is designed for evaluating the new SRv6 End.DT6 (VRF) behavior
used, in this example, for implementing IPv6 L3 VPN use cases.

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Paolo Lungaroni <paolo.lungaroni@cnit.it>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/srv6_end_dt6_l3vpn_test.sh       | 502 +++++++++++++++++++++
 1 file changed, 502 insertions(+)
 create mode 100755 tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
new file mode 100755
index 000000000000..68708f5e26a0
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -0,0 +1,502 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@cnit.it>
+
+# This test is designed for evaluating the new SRv6 End.DT6 behavior used for
+# implementing IPv6 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv6 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv6
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT6 behavior and c) VRF.
+#
+# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT6 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv6 L3 VPN for tenant 200 works exactly as the IPv6 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+                                   +-------------------+
+# |                   |                                   |                   |
+# |  hs-t100-1 netns  |                                   |  hs-t100-2 netns  |
+# |                   |                                   |                   |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::1/64 |  |                                   |  |  cafe::2/64 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |        .          |                                   |         .         |
+# +-------------------+                                   +-------------------+
+#          .                                                        .
+#          .                                                        .
+#          .                                                        .
+# +-----------------------------------+   +-----------------------------------+
+# |        .                          |   |                         .         |
+# | +---------------+                 |   |                 +---------------- |
+# | |   veth-t100   |                 |   |                 |   veth-t100   | |
+# | |  cafe::254/64 |    +----------+ |   | +----------+    |  cafe::254/64 | |
+# | +-------+-------+    | localsid | |   | | localsid |    +-------+-------- |
+# |         |            |   table  | |   | |   table  |            |         |
+# |    +----+----+       +----------+ |   | +----------+       +----+----+    |
+# |    | vrf-100 |                    |   |                    | vrf-100 |    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |                    |   veth0    | |   | |   veth0    |                    |
+# |                    | fd00::1/64 |.|...|.| fd00::2/64 |                    |
+# |    +---------+     +------------+ |   | +------------+     +---------+    |
+# |    | vrf-200 |                    |   |                    | vrf-200 |    |
+# |    +----+----+                    |   |                    +----+----+    |
+# |         |                         |   |                         |         |
+# | +-------+-------+                 |   |                 +-------+-------- |
+# | |   veth-t200   |                 |   |                 |   veth-t200   | |
+# | |  cafe::254/64 |                 |   |                 |  cafe::254/64 | |
+# | +---------------+      rt-1 netns |   | rt-2 netns      +---------------- |
+# |        .                          |   |                          .        |
+# +-----------------------------------+   +-----------------------------------+
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+#          .                                                         .
+# +-------------------+                                   +-------------------+
+# |        .          |                                   |          .        |
+# |  +-------------+  |                                   |  +-------------+  |
+# |  |    veth0    |  |                                   |  |    veth0    |  |
+# |  |  cafe::3/64 |  |                                   |  |  cafe::4/64 |  |
+# |  +-------------+  |                                   |  +-------------+  |
+# |                   |                                   |                   |
+# |  hs-t200-3 netns  |                                   |  hs-t200-4 netns  |
+# |                   |                                   |                   |
+# +-------------------+                                   +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:21:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::2    |apply seg6 encap segs fc00:12:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::4    |apply seg6 encap segs fc00:12:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID              |Action                         |
+# +-------------------------------------------------+
+# |fc00:12:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::1    |apply seg6 encap segs fc00:21:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t100               |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host       |Action                                 |
+# +---------------------------------------------------+
+# |cafe::3    |apply seg6 encap segs fc00:21:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64  |forward to dev veth_t200               |
+# +---------------------------------------------------+
+#
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	if [ "$TESTS" != "none" ]; then
+		printf "\nTests passed: %3d\n" ${nsuccess}
+		printf "Tests failed: %3d\n"   ${nfail}
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+cleanup()
+{
+	ip link del veth-rt-1 2>/dev/null || true
+	ip link del veth-rt-2 2>/dev/null || true
+
+	# destroy routers rt-* and hosts hs-*
+	for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+		ip netns del ${ns} || true
+	done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt=$1
+	local nsname=rt-${rt}
+
+	ip netns add ${nsname}
+	ip link set veth-rt-${rt} netns ${nsname}
+	ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+	ip -netns ${nsname} link set veth0 up
+	ip -netns ${nsname} link set lo up
+
+	ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+	local hs=$1
+	local rt=$2
+	local tid=$3
+	local hsname=hs-t${tid}-${hs}
+	local rtname=rt-${rt}
+	local rtveth=veth-t${tid}
+
+	# set the networking for the host
+	ip netns add ${hsname}
+
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+	ip -netns ${hsname} link set veth0 up
+	ip -netns ${hsname} link set lo up
+
+	# configure the VRF for the tenant X on the router which is directly
+	# connected to the source host.
+	ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+	ip -netns ${rtname} link set vrf-${tid} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	# enslave the veth-tX interface to the vrf-X in the access router
+	ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+	ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+	ip -netns ${rtname} link set ${rtveth} up
+
+	ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+
+	ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+	local hssrc=$1
+	local rtsrc=$2
+	local hsdst=$3
+	local rtdst=$4
+	local tid=$5
+
+	local hssrc_name=hs-t${tid}-${hssrc}
+	local hsdst_name=hs-t${tid}-${hsdst}
+	local rtsrc_name=rt-${rtsrc}
+	local rtdst_name=rt-${rtdst}
+	local rtveth=veth-t${tid}
+	local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006
+
+	ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+	# set the encap route for encapsulating packets which arrive from the
+	# host hssrc and destined to the access router rtsrc.
+	ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+		encap seg6 mode encap segs ${vpn_sid} dev veth0
+	ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+		via fd00::${rtdst} dev veth0
+
+	# set the decap route for decapsulating packets which arrive from
+	# the rtdst router and destined to the hsdst host.
+	ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+		encap seg6local action End.DT6 vrftable ${tid} dev vrf-${tid}
+
+	# all sids for VPNs start with a common locator which is fc00::/16.
+	# Routes for handling the SRv6 End.DT6 behavior instances are grouped
+	# together in the 'localsid' table.
+	#
+	# NOTE: added only once
+	if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+	    grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+		ip -netns ${rtdst_name} -6 rule add \
+			to ${VPN_LOCATOR_SERVICE}::/16 \
+			lookup ${LOCALSID_TABLE_ID} prio 999
+	fi
+}
+
+setup()
+{
+	ip link add veth-rt-1 type veth peer name veth-rt-2
+	# setup the networking for router rt-1 and router rt-2
+	setup_rt_networking 1
+	setup_rt_networking 2
+
+	# setup two hosts for the tenant 100.
+	#  - host hs-1 is directly connected to the router rt-1;
+	#  - host hs-2 is directly connected to the router rt-2.
+	setup_hs 1 1 100  #args: host router tenant
+	setup_hs 2 2 100
+
+	# setup two hosts for the tenant 200
+	#  - host hs-3 is directly connected to the router rt-1;
+	#  - host hs-4 is directly connected to the router rt-2.
+	setup_hs 3 1 200
+	setup_hs 4 2 200
+
+	# setup the IPv6 L3 VPN which connects the host hs-t100-1 and host
+	# hs-t100-2 within the same tenant 100.
+	setup_vpn_config 1 1 2 2 100  #args: src_host src_router dst_host dst_router tenant
+	setup_vpn_config 2 2 1 1 100
+
+	# setup the IPv6 L3 VPN which connects the host hs-t200-3 and host
+	# hs-t200-4 within the same tenant 200.
+	setup_vpn_config 3 1 4 2 200
+	setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+		>/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc=$1
+	local rtdst=$2
+
+	check_rt_connectivity ${rtsrc} ${rtdst}
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+		${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+	local hssrc=$1
+	local hsdst=$2
+	local tid=$3
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+	local hssrc=$1
+	local tidsrc=$2
+	local hsdst=$3
+	local tiddst=$4
+
+	check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+	# NOTE: ping should fail
+	log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc=$1
+	local tid=$2
+
+	check_hs_connectivity ${hssrc} 254 ${tid}
+	log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+	log_section "IPv6 routers connectivity test"
+
+	check_and_log_rt_connectivity 1 2
+	check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+	log_section "IPv6 connectivity test among hosts and gateway"
+
+	check_and_log_hs2gw_connectivity 1 100
+	check_and_log_hs2gw_connectivity 2 100
+
+	check_and_log_hs2gw_connectivity 3 200
+	check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+	check_and_log_hs_connectivity 1 2 100
+	check_and_log_hs_connectivity 2 1 100
+
+	check_and_log_hs_connectivity 3 4 200
+	check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+	local i
+	local j
+	local k
+	local tmp
+	local l1="1 2"
+	local l2="3 4"
+	local t1=100
+	local t2=200
+
+	log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+	for k in 0 1; do
+		for i in ${l1}; do
+			for j in ${l2}; do
+				check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+			done
+		done
+
+		# let us test the reverse path
+		tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+		tmp=${t1}; t1=${t2}; t2=${tmp}
+	done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+        echo "SKIP: vrf sysctl does not exist"
+        exit 0
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
-- 
cgit v1.2.3-70-g09d2


From 4f19cab76136e800a3f04d8c9aa4d8e770e3d3d8 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:05 +0100
Subject: bpf: Add a bpf_sock_from_file helper

While eBPF programs can check whether a file is a socket by file->f_op
== &socket_file_ops, they cannot convert the void private_data pointer
to a struct socket BTF pointer. In order to do this a new helper
wrapping sock_from_file is added.

This is useful to tracing programs but also other program types
inheriting this set of helpers such as iterators or LSM programs.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: KP Singh <kpsingh@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-2-revest@google.com
---
 include/uapi/linux/bpf.h       |  9 +++++++++
 kernel/trace/bpf_trace.c       | 20 ++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  4 ++++
 tools/include/uapi/linux/bpf.h |  9 +++++++++
 4 files changed, 42 insertions(+)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1233f14f659f..30b477a26482 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3822,6 +3822,14 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ *	Description
+ *		If the given file represents a socket, returns the associated
+ *		socket.
+ *	Return
+ *		A pointer to a struct socket on success or NULL if the file is
+ *		not a socket.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3986,6 +3994,7 @@ union bpf_attr {
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
+	FN(sock_from_file),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cb9d7478ef0c..0cf0a6331482 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1270,6 +1270,24 @@ const struct bpf_func_proto bpf_snprintf_btf_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_sock_from_file, struct file *, file)
+{
+	return (unsigned long) sock_from_file(file);
+}
+
+BTF_ID_LIST(bpf_sock_from_file_btf_ids)
+BTF_ID(struct, socket)
+BTF_ID(struct, file)
+
+static const struct bpf_func_proto bpf_sock_from_file_proto = {
+	.func		= bpf_sock_from_file,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_BTF_ID_OR_NULL,
+	.ret_btf_id	= &bpf_sock_from_file_btf_ids[0],
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &bpf_sock_from_file_btf_ids[1],
+};
+
 const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1366,6 +1384,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_per_cpu_ptr_proto;
 	case BPF_FUNC_bpf_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
+	case BPF_FUNC_sock_from_file:
+		return &bpf_sock_from_file_proto;
 	default:
 		return NULL;
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 8b829748d488..867ada23281c 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -437,6 +437,8 @@ class PrinterHelpers(Printer):
             'struct path',
             'struct btf_ptr',
             'struct inode',
+            'struct socket',
+            'struct file',
     ]
     known_types = {
             '...',
@@ -482,6 +484,8 @@ class PrinterHelpers(Printer):
             'struct path',
             'struct btf_ptr',
             'struct inode',
+            'struct socket',
+            'struct file',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1233f14f659f..30b477a26482 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3822,6 +3822,14 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if IMA is disabled or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * struct socket *bpf_sock_from_file(struct file *file)
+ *	Description
+ *		If the given file represents a socket, returns the associated
+ *		socket.
+ *	Return
+ *		A pointer to a struct socket on success or NULL if the file is
+ *		not a socket.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3986,6 +3994,7 @@ union bpf_attr {
 	FN(bprm_opts_set),		\
 	FN(ktime_get_coarse_ns),	\
 	FN(ima_inode_hash),		\
+	FN(sock_from_file),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3-70-g09d2


From 593f6d41abbbc63e1ad297f7a36ab6060a812f0c Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:07 +0100
Subject: selftests/bpf: Add an iterator selftest for bpf_sk_storage_delete

The eBPF program iterates over all entries (well, only one) of a socket
local storage map and deletes them all. The test makes sure that the
entry is indeed deleted.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-4-revest@google.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 64 ++++++++++++++++++++++
 .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c    | 23 ++++++++
 2 files changed, 87 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 448885b95eed..bb4a638f2e6f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -20,6 +20,7 @@
 #include "bpf_iter_bpf_percpu_hash_map.skel.h"
 #include "bpf_iter_bpf_array_map.skel.h"
 #include "bpf_iter_bpf_percpu_array_map.skel.h"
+#include "bpf_iter_bpf_sk_storage_helpers.skel.h"
 #include "bpf_iter_bpf_sk_storage_map.skel.h"
 #include "bpf_iter_test_kern5.skel.h"
 #include "bpf_iter_test_kern6.skel.h"
@@ -913,6 +914,67 @@ out:
 	bpf_iter_bpf_percpu_array_map__destroy(skel);
 }
 
+/* An iterator program deletes all local storage in a map. */
+static void test_bpf_sk_storage_delete(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	struct bpf_iter_bpf_sk_storage_helpers *skel;
+	union bpf_iter_link_info linfo;
+	int err, len, map_fd, iter_fd;
+	struct bpf_link *link;
+	int sock_fd = -1;
+	__u32 val = 42;
+	char buf[64];
+
+	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+		goto out;
+	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+	if (CHECK(err, "map_update", "map_update failed\n"))
+		goto out;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.map.map_fd = map_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+	link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
+					&opts);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* do some tests */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		goto close_iter;
+
+	/* test results */
+	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
+		  "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
+		goto close_iter;
+
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+out:
+	if (sock_fd >= 0)
+		close(sock_fd);
+	bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
 static void test_bpf_sk_storage_map(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -1067,6 +1129,8 @@ void test_bpf_iter(void)
 		test_bpf_percpu_array_map();
 	if (test__start_subtest("bpf_sk_storage_map"))
 		test_bpf_sk_storage_map();
+	if (test__start_subtest("bpf_sk_storage_delete"))
+		test_bpf_sk_storage_delete();
 	if (test__start_subtest("rdonly-buf-out-of-bound"))
 		test_rdonly_buf_out_of_bound();
 	if (test__start_subtest("buf-neg-offset"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
new file mode 100644
index 000000000000..01ff3235e413
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google LLC. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} sk_stg_map SEC(".maps");
+
+SEC("iter/bpf_sk_storage_map")
+int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+	if (ctx->sk)
+		bpf_sk_storage_delete(&sk_stg_map, ctx->sk);
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From bd9b327e58f98aa7126291bf12b50720c660e787 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:08 +0100
Subject: selftests/bpf: Add an iterator selftest for bpf_sk_storage_get

The eBPF program iterates over all files and tasks. For all socket
files, it stores the tgid of the last task it encountered with a handle
to that socket. This is a heuristic for finding the "owner" of a socket
similar to what's done by lsof, ss, netstat or fuser. Potentially, this
information could be used from a cgroup_skb/*gress hook to try to
associate network traffic with processes.

The test makes sure that a socket it created is tagged with prog_tests's
pid.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-5-revest@google.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 40 ++++++++++++++++++++++
 .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c    | 24 +++++++++++++
 2 files changed, 64 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index bb4a638f2e6f..9336d0f18331 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -975,6 +975,44 @@ out:
 	bpf_iter_bpf_sk_storage_helpers__destroy(skel);
 }
 
+/* This creates a socket and its local storage. It then runs a task_iter BPF
+ * program that replaces the existing socket local storage with the tgid of the
+ * only task owning a file descriptor to this socket, this process, prog_tests.
+ */
+static void test_bpf_sk_storage_get(void)
+{
+	struct bpf_iter_bpf_sk_storage_helpers *skel;
+	int err, map_fd, val = -1;
+	int sock_fd = -1;
+
+	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+		goto out;
+
+	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+	if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n"))
+		goto close_socket;
+
+	do_dummy_read(skel->progs.fill_socket_owner);
+
+	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+	CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+	      "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
+	      getpid(), val, err);
+
+close_socket:
+	close(sock_fd);
+out:
+	bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
 static void test_bpf_sk_storage_map(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
@@ -1131,6 +1169,8 @@ void test_bpf_iter(void)
 		test_bpf_sk_storage_map();
 	if (test__start_subtest("bpf_sk_storage_delete"))
 		test_bpf_sk_storage_delete();
+	if (test__start_subtest("bpf_sk_storage_get"))
+		test_bpf_sk_storage_get();
 	if (test__start_subtest("rdonly-buf-out-of-bound"))
 		test_rdonly_buf_out_of_bound();
 	if (test__start_subtest("buf-neg-offset"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
index 01ff3235e413..dde53df37de8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -21,3 +21,27 @@ int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
 
 	return 0;
 }
+
+SEC("iter/task_file")
+int fill_socket_owner(struct bpf_iter__task_file *ctx)
+{
+	struct task_struct *task = ctx->task;
+	struct file *file = ctx->file;
+	struct socket *sock;
+	int *sock_tgid;
+
+	if (!task || !file)
+		return 0;
+
+	sock = bpf_sock_from_file(file);
+	if (!sock)
+		return 0;
+
+	sock_tgid = bpf_sk_storage_get(&sk_stg_map, sock->sk, 0, 0);
+	if (!sock_tgid)
+		return 0;
+
+	*sock_tgid = task->tgid;
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 34da87213d3ddd26643aa83deff7ffc6463da0fc Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Fri, 4 Dec 2020 12:36:09 +0100
Subject: selftests/bpf: Test bpf_sk_storage_get in tcp iterators

This extends the existing bpf_sk_storage_get test where a socket is
created and tagged with its creator's pid by a task_file iterator.

A TCP iterator is now also used at the end of the test to negate the
values already stored in the local storage. The test therefore expects
-getpid() to be stored in the local storage.

Signed-off-by: Florent Revest <revest@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20201204113609.1850150-6-revest@google.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c      | 18 ++++++++++++++++--
 .../bpf/progs/bpf_iter_bpf_sk_storage_helpers.c        | 18 ++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 9336d0f18331..0e586368948d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -978,6 +978,8 @@ out:
 /* This creates a socket and its local storage. It then runs a task_iter BPF
  * program that replaces the existing socket local storage with the tgid of the
  * only task owning a file descriptor to this socket, this process, prog_tests.
+ * It then runs a tcp socket iterator that negates the value in the existing
+ * socket local storage, the test verifies that the resulting value is -pid.
  */
 static void test_bpf_sk_storage_get(void)
 {
@@ -994,6 +996,10 @@ static void test_bpf_sk_storage_get(void)
 	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
 		goto out;
 
+	err = listen(sock_fd, 1);
+	if (CHECK(err != 0, "listen", "errno: %d\n", errno))
+		goto close_socket;
+
 	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
 
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
@@ -1003,9 +1009,17 @@ static void test_bpf_sk_storage_get(void)
 	do_dummy_read(skel->progs.fill_socket_owner);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+	if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
+	    "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
+	    getpid(), val, err))
+		goto close_socket;
+
+	do_dummy_read(skel->progs.negate_socket_local_storage);
+
+	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+	CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
 	      "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
-	      getpid(), val, err);
+	      -getpid(), val, err);
 
 close_socket:
 	close(sock_fd);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
index dde53df37de8..6cecab2b32ba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -45,3 +45,21 @@ int fill_socket_owner(struct bpf_iter__task_file *ctx)
 
 	return 0;
 }
+
+SEC("iter/tcp")
+int negate_socket_local_storage(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = ctx->sk_common;
+	int *sock_tgid;
+
+	if (!sk_common)
+		return 0;
+
+	sock_tgid = bpf_sk_storage_get(&sk_stg_map, sk_common, 0, 0);
+	if (!sock_tgid)
+		return 0;
+
+	*sock_tgid = -*sock_tgid;
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 41fdfffd5783db62bb9e00605eee14c69b9c0974 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Wed, 2 Dec 2020 15:35:43 +0100
Subject: selftests: forwarding: Add MPLS L2VPN test

Connect hosts H1 and H2 using two intermediate encapsulation routers
(LER1 and LER2). These routers encapsulate traffic from the hosts,
including the original Ethernet header, into MPLS.

Use ping to test reachability between H1 and H2.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Link: https://lore.kernel.org/r/625f5c1aafa3a8085f8d3e082d680a82e16ffbaa.1606918980.git.gnault@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/Makefile    |   1 +
 tools/testing/selftests/net/forwarding/config      |   3 +
 .../selftests/net/forwarding/tc_mpls_l2vpn.sh      | 192 +++++++++++++++++++++
 3 files changed, 196 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 250fbb2d1625..d97bd6889446 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -48,6 +48,7 @@ TEST_PROGS = bridge_igmp.sh \
 	tc_chains.sh \
 	tc_flower_router.sh \
 	tc_flower.sh \
+	tc_mpls_l2vpn.sh \
 	tc_shblocks.sh \
 	tc_vlan_modify.sh \
 	vxlan_asymmetric.sh \
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index da96eff72a8e..10e9a3321ae1 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -6,6 +6,9 @@ CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_NET_VRF=m
 CONFIG_BPF_SYSCALL=y
 CONFIG_CGROUP_BPF=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_VLAN=m
 CONFIG_NET_CLS_FLOWER=m
 CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_ACT_GACT=m
diff --git a/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh
new file mode 100755
index 000000000000..03743f04e178
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+
+# | H1 (v$h1)             |
+# | 192.0.2.1/24          |
+# | 2001:db8::1/124       |
+# |                 + $h1 |
+# +-----------------|-----+
+#                   |
+#                   | (Plain Ethernet traffic)
+#                   |
+# +-----------------|-----------------------------------------+
+# | LER1            + $edge1                                  |
+# |                     -ingress:                             |
+# |                       -encapsulate Ethernet into MPLS     |
+# |                       -add outer Ethernet header          |
+# |                       -redirect to $mpls1 (egress)        |
+# |                                                           |
+# |                 + $mpls1                                  |
+# |                 |   -ingress:                             |
+# |                 |     -remove outer Ethernet header       |
+# |                 |     -remove MPLS header                 |
+# |                 |     -redirect to $edge1 (egress)        |
+# +-----------------|-----------------------------------------+
+#                   |
+#                   | (Ethernet over MPLS traffic)
+#                   |
+# +-----------------|-----------------------------------------+
+# | LER2            + $mpls2                                  |
+# |                     -ingress:                             |
+# |                       -remove outer Ethernet header       |
+# |                       -remove MPLS header                 |
+# |                       -redirect to $edge2 (egress)        |
+# |                                                           |
+# |                 + $edge2                                  |
+# |                 |   -ingress:                             |
+# |                 |     -encapsulate Ethernet into MPLS     |
+# |                 |     -add outer Ethernet header          |
+# |                 |     -redirect to $mpls2 (egress)        |
+# +-----------------|-----------------------------------------|
+#                   |
+#                   | (Plain Ethernet traffic)
+#                   |
+# +-----------------|-----+
+# | H2 (v$h2)       |     |
+# |                 + $h2 |
+# | 192.0.2.2/24          |
+# | 2001:db8::2/124       |
+# +-----------------------+
+#
+# LER1 and LER2 logically represent two different routers. However, no VRF is
+# created for them, as they don't do any IP routing.
+
+ALL_TESTS="mpls_forward_eth"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8::1/124
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8::1/124
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8::2/124
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8::2/124
+}
+
+ler1_create()
+{
+	tc qdisc add dev $edge1 ingress
+	tc filter add dev $edge1 ingress                            \
+	   matchall                                                 \
+	   action mpls mac_push label 102                           \
+	   action vlan push_eth dst_mac $mpls2mac src_mac $mpls1mac \
+	   action mirred egress redirect dev $mpls1
+	ip link set dev $edge1 up
+
+	tc qdisc add dev $mpls1 ingress
+	tc filter add dev $mpls1 ingress            \
+	   protocol mpls_uc                         \
+	   flower mpls_label 101                    \
+	   action vlan pop_eth                      \
+	   action mpls pop protocol teb             \
+	   action mirred egress redirect dev $edge1
+	ip link set dev $mpls1 up
+}
+
+ler1_destroy()
+{
+	ip link set dev $mpls1 down
+	tc qdisc del dev $mpls1 ingress
+
+	ip link set dev $edge1 down
+	tc qdisc del dev $edge1 ingress
+}
+
+ler2_create()
+{
+	tc qdisc add dev $edge2 ingress
+	tc filter add dev $edge2 ingress                            \
+	   matchall                                                 \
+	   action mpls mac_push label 101                           \
+	   action vlan push_eth dst_mac $mpls1mac src_mac $mpls2mac \
+	   action mirred egress redirect dev $mpls2
+	ip link set dev $edge2 up
+
+	tc qdisc add dev $mpls2 ingress
+	tc filter add dev $mpls2 ingress            \
+	   protocol mpls_uc                         \
+	   flower mpls_label 102                    \
+	   action vlan pop_eth                      \
+	   action mpls pop protocol teb             \
+	   action mirred egress redirect dev $edge2
+	ip link set dev $mpls2 up
+}
+
+ler2_destroy()
+{
+	ip link set dev $mpls2 down
+	tc qdisc del dev $mpls2 ingress
+
+	ip link set dev $edge2 down
+	tc qdisc del dev $edge2 ingress
+}
+
+mpls_forward_eth()
+{
+	ping_test $h1 192.0.2.2
+	ping6_test $h1 2001:db8::2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	edge1=${NETIFS[p2]}
+
+	mpls1=${NETIFS[p3]}
+	mpls2=${NETIFS[p4]}
+
+	edge2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	mpls1mac=$(mac_get $mpls1)
+	mpls2mac=$(mac_get $mpls2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	ler1_create
+	ler2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ler2_destroy
+	ler1_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-70-g09d2


From 205704c618af0ab2366015d2281a3b0814d918a0 Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Thu, 3 Dec 2020 22:06:04 -0500
Subject: vrf: packets with lladdr src needs dst at input with orig_iif when
 needs strict

Depending on the order of the routes to fe80::/64 are installed on the
VRF table, the NS for the source link-local address of the originator
might be sent to the wrong interface.

This patch ensures that packets with link-local addr source is doing a
lookup with the orig_iif when the destination addr indicates that it
is strict.

Add the reproducer as a use case in self test script fcnal-test.sh.

Fixes: b4869aa2f881 ("net: vrf: ipv6 support for local traffic to local addresses")
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20201204030604.18828-1-ssuryaextr@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/vrf.c                         | 10 +++-
 tools/testing/selftests/net/fcnal-test.sh | 95 +++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index f2793ffde191..b9b7e00b72a8 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1315,11 +1315,17 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
 	int orig_iif = skb->skb_iif;
 	bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr);
 	bool is_ndisc = ipv6_ndisc_frame(skb);
+	bool is_ll_src;
 
 	/* loopback, multicast & non-ND link-local traffic; do not push through
-	 * packet taps again. Reset pkt_type for upper layers to process skb
+	 * packet taps again. Reset pkt_type for upper layers to process skb.
+	 * for packets with lladdr src, however, skip so that the dst can be
+	 * determine at input using original ifindex in the case that daddr
+	 * needs strict
 	 */
-	if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) {
+	is_ll_src = ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL;
+	if (skb->pkt_type == PACKET_LOOPBACK ||
+	    (need_strict && !is_ndisc && !is_ll_src)) {
 		skb->dev = vrf_dev;
 		skb->skb_iif = vrf_dev->ifindex;
 		IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index fb5c55dd6df8..02b0b9ead40b 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -256,6 +256,28 @@ setup_cmd_nsb()
 	fi
 }
 
+setup_cmd_nsc()
+{
+	local cmd="$*"
+	local rc
+
+	run_cmd_nsc ${cmd}
+	rc=$?
+	if [ $rc -ne 0 ]; then
+		# show user the command if not done so already
+		if [ "$VERBOSE" = "0" ]; then
+			echo "setup command: $cmd"
+		fi
+		echo "failed. stopping tests"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue"
+			read a
+		fi
+		exit $rc
+	fi
+}
+
 # set sysctl values in NS-A
 set_sysctl()
 {
@@ -471,6 +493,36 @@ setup()
 	sleep 1
 }
 
+setup_lla_only()
+{
+	# make sure we are starting with a clean slate
+	kill_procs
+	cleanup 2>/dev/null
+
+	log_debug "Configuring network namespaces"
+	set -e
+
+	create_ns ${NSA} "-" "-"
+	create_ns ${NSB} "-" "-"
+	create_ns ${NSC} "-" "-"
+	connect_ns ${NSA} ${NSA_DEV} "-" "-" \
+		   ${NSB} ${NSB_DEV} "-" "-"
+	connect_ns ${NSA} ${NSA_DEV2} "-" "-" \
+		   ${NSC} ${NSC_DEV}  "-" "-"
+
+	NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV})
+	NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV})
+	NSC_LINKIP6=$(get_linklocal ${NSC} ${NSC_DEV})
+
+	create_vrf ${NSA} ${VRF} ${VRF_TABLE} "-" "-"
+	ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF}
+	ip -netns ${NSA} link set dev ${NSA_DEV2} vrf ${VRF}
+
+	set +e
+
+	sleep 1
+}
+
 ################################################################################
 # IPv4
 
@@ -3787,10 +3839,53 @@ use_case_br()
 	setup_cmd_nsb ip li del vlan100 2>/dev/null
 }
 
+# VRF only.
+# ns-A device is connected to both ns-B and ns-C on a single VRF but only has
+# LLA on the interfaces
+use_case_ping_lla_multi()
+{
+	setup_lla_only
+	# only want reply from ns-A
+	setup_cmd_nsb sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+	setup_cmd_nsc sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+
+	log_start
+	run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+	log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Pre cycle, ping out ns-B"
+
+	run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Pre cycle, ping out ns-C"
+
+	# cycle/flap the first ns-A interface
+	setup_cmd ip link set ${NSA_DEV} down
+	setup_cmd ip link set ${NSA_DEV} up
+	sleep 1
+
+	log_start
+	run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+	log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-B"
+	run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-C"
+
+	# cycle/flap the second ns-A interface
+	setup_cmd ip link set ${NSA_DEV2} down
+	setup_cmd ip link set ${NSA_DEV2} up
+	sleep 1
+
+	log_start
+	run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+	log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-B"
+	run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+	log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
+}
+
 use_cases()
 {
 	log_section "Use cases"
+	log_subsection "Device enslaved to bridge"
 	use_case_br
+	log_subsection "Ping LLA with multiple interfaces"
+	use_case_ping_lla_multi
 }
 
 ################################################################################
-- 
cgit v1.2.3-70-g09d2


From da777be6de014be6b302644685797ed3860a0d0d Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Thu, 15 Oct 2020 07:11:57 +0800
Subject: tools: gpio: add support for reporting realtime event clock to lsgpio

Add support for reporting if a line is configured to report realtime
timestamps in events.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Link: https://lore.kernel.org/r/20201014231158.34117-3-warthog618@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 tools/gpio/lsgpio.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index 5a05a454d0c9..c61d061247e1 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -65,6 +65,10 @@ struct gpio_flag flagnames[] = {
 		.name = "bias-disabled",
 		.mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED,
 	},
+	{
+		.name = "clock-realtime",
+		.mask = GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME,
+	},
 };
 
 static void print_attributes(struct gpio_v2_line_info *info)
-- 
cgit v1.2.3-70-g09d2


From e0822cf9b892ed051830daaf57896aca48c8567b Mon Sep 17 00:00:00 2001
From: Kent Gibson <warthog618@gmail.com>
Date: Thu, 15 Oct 2020 07:11:58 +0800
Subject: tools: gpio: add option to report wall-clock time to gpio-event-mon

Add support for selecting the realtime clock for events.

Signed-off-by: Kent Gibson <warthog618@gmail.com>
Link: https://lore.kernel.org/r/20201014231158.34117-4-warthog618@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 tools/gpio/gpio-event-mon.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 90c3155f05b1..cacd66ad7926 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -148,6 +148,7 @@ void print_usage(void)
 		"  -s         Set line as open source\n"
 		"  -r         Listen for rising edges\n"
 		"  -f         Listen for falling edges\n"
+		"  -w         Report the wall-clock time for events\n"
 		"  -b <n>     Debounce the line with period n microseconds\n"
 		" [-c <n>]    Do <n> loops (optional, infinite loop if not stated)\n"
 		"  -?         This helptext\n"
@@ -173,7 +174,7 @@ int main(int argc, char **argv)
 
 	memset(&config, 0, sizeof(config));
 	config.flags = GPIO_V2_LINE_FLAG_INPUT;
-	while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) {
+	while ((c = getopt(argc, argv, "c:n:o:b:dsrfw?")) != -1) {
 		switch (c) {
 		case 'c':
 			loops = strtoul(optarg, NULL, 10);
@@ -204,6 +205,9 @@ int main(int argc, char **argv)
 		case 'f':
 			config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING;
 			break;
+		case 'w':
+			config.flags |= GPIO_V2_LINE_FLAG_EVENT_CLOCK_REALTIME;
+			break;
 		case '?':
 			print_usage();
 			return -1;
-- 
cgit v1.2.3-70-g09d2


From 4e9a5ae8df5b3365183150f6df49e49dece80d8c Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 3 Dec 2020 13:50:37 +0900
Subject: x86/uprobes: Do not use prefixes.nbytes when looping over
 prefixes.bytes

Since insn.prefixes.nbytes can be bigger than the size of
insn.prefixes.bytes[] when a prefix is repeated, the proper check must
be

  insn.prefixes.bytes[i] != 0 and i < 4

instead of using insn.prefixes.nbytes.

Introduce a for_each_insn_prefix() macro for this purpose. Debugged by
Kees Cook <keescook@chromium.org>.

 [ bp: Massage commit message, sync with the respective header in tools/
   and drop "we". ]

Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints")
Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/160697103739.3146288.7437620795200799020.stgit@devnote2
---
 arch/x86/include/asm/insn.h       | 15 +++++++++++++++
 arch/x86/kernel/uprobes.c         | 10 ++++++----
 tools/arch/x86/include/asm/insn.h | 15 +++++++++++++++
 3 files changed, 36 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 5c1ae3eff9d4..a8c3d284fa46 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn)
 	return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx:  Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix)	\
+	for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
 #define POP_SS_OPCODE 0x1f
 #define MOV_SREG_OPCODE 0x8e
 
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 3fdaa042823d..138bdb1fd136 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -255,12 +255,13 @@ static volatile u32 good_2byte_insns[256 / 32] = {
 
 static bool is_prefix_bad(struct insn *insn)
 {
+	insn_byte_t p;
 	int i;
 
-	for (i = 0; i < insn->prefixes.nbytes; i++) {
+	for_each_insn_prefix(insn, i, p) {
 		insn_attr_t attr;
 
-		attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+		attr = inat_get_opcode_attribute(p);
 		switch (attr) {
 		case INAT_MAKE_PREFIX(INAT_PFX_ES):
 		case INAT_MAKE_PREFIX(INAT_PFX_CS):
@@ -715,6 +716,7 @@ static const struct uprobe_xol_ops push_xol_ops = {
 static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 opc1 = OPCODE1(insn);
+	insn_byte_t p;
 	int i;
 
 	switch (opc1) {
@@ -746,8 +748,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 	 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
 	 * No one uses these insns, reject any branch insns with such prefix.
 	 */
-	for (i = 0; i < insn->prefixes.nbytes; i++) {
-		if (insn->prefixes.bytes[i] == 0x66)
+	for_each_insn_prefix(insn, i, p) {
+		if (p == 0x66)
 			return -ENOTSUPP;
 	}
 
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index 568854b14d0a..52c6262e6bfd 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn)
 	return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx:  Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix)	\
+	for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
 #define POP_SS_OPCODE 0x1f
 #define MOV_SREG_OPCODE 0x8e
 
-- 
cgit v1.2.3-70-g09d2


From d8cbe8bfa7df3c680ddfd5e1eee3a5c86d8dc764 Mon Sep 17 00:00:00 2001
From: Xingxing Su <suxingxing@loongson.cn>
Date: Sat, 5 Dec 2020 22:15:02 -0800
Subject: tools/testing/selftests/vm: fix build error

Only x86 and PowerPC implement the pkey-xxx.h, and an error was reported
when compiling protection_keys.c.

Add a Arch judgment to compile "protection_keys" in the Makefile.

If other arch implement this, add the arch name to the Makefile.
eg:
    ifneq (,$(findstring $(ARCH),powerpc mips ... ))

Following build errors:

    pkey-helpers.h:93:2: error: #error Architecture not supported
     #error Architecture not supported
    pkey-helpers.h:96:20: error: `PKEY_DISABLE_ACCESS' undeclared
     #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
                        ^
    protection_keys.c:218:45: error: `PKEY_DISABLE_WRITE' undeclared
     pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
                                                ^

Signed-off-by: Xingxing Su <suxingxing@loongson.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Sandipan Das <sandipan@linux.ibm.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Mina Almasry <almasrymina@google.com>
Link: https://lkml.kernel.org/r/1606826876-30656-1-git-send-email-suxingxing@loongson.cn
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 30873b19d04b..691893afc15d 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -60,9 +60,13 @@ ifeq ($(CAN_BUILD_X86_64),1)
 TEST_GEN_FILES += $(BINARIES_64)
 endif
 else
+
+ifneq (,$(findstring $(ARCH),powerpc))
 TEST_GEN_FILES += protection_keys
 endif
 
+endif
+
 ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64))
 TEST_GEN_FILES += va_128TBswitch
 TEST_GEN_FILES += virtual_address_range
-- 
cgit v1.2.3-70-g09d2


From 573a259336f8c57739bdaf035aa7abbae7d9a713 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Sat, 5 Dec 2020 22:15:05 -0800
Subject: userfaultfd: selftests: fix SIGSEGV if huge mmap fails

The error handling in hugetlb_allocate_area() was incorrect for the
hugetlb_shared test case.

Previously the behavior was:

- mmap a hugetlb area
  - If this fails, set the pointer to NULL, and carry on
- mmap an alias of the same hugetlb fd
  - If this fails, munmap the original area

If the original mmap failed, it's likely the second one did too.  If
both failed, we'd blindly try to munmap a NULL pointer, causing a
SIGSEGV.  Instead, "goto fail" so we return before trying to mmap the
alias.

This issue can be hit "in real life" by forgetting to set
/proc/sys/vm/nr_hugepages (leaving it at 0), and then trying to run the
hugetlb_shared test.

Another small improvement is, when the original mmap fails, don't just
print "it failed": perror(), so we can see *why*.  :)

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Joe Perches <joe@perches.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Alan Gilbert <dgilbert@redhat.com>
Link: https://lkml.kernel.org/r/20201204203443.2714693-1-axelrasmussen@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 9b0912a01777..c4425597769a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -206,19 +206,19 @@ static int hugetlb_release_pages(char *rel_area)
 	return ret;
 }
 
-
 static void hugetlb_allocate_area(void **alloc_area)
 {
 	void *area_alias = NULL;
 	char **alloc_area_alias;
+
 	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
 			   (map_shared ? MAP_SHARED : MAP_PRIVATE) |
 			   MAP_HUGETLB,
 			   huge_fd, *alloc_area == area_src ? 0 :
 			   nr_pages * page_size);
 	if (*alloc_area == MAP_FAILED) {
-		fprintf(stderr, "mmap of hugetlbfs file failed\n");
-		*alloc_area = NULL;
+		perror("mmap of hugetlbfs file failed");
+		goto fail;
 	}
 
 	if (map_shared) {
@@ -227,14 +227,11 @@ static void hugetlb_allocate_area(void **alloc_area)
 				  huge_fd, *alloc_area == area_src ? 0 :
 				  nr_pages * page_size);
 		if (area_alias == MAP_FAILED) {
-			if (munmap(*alloc_area, nr_pages * page_size) < 0) {
-				perror("hugetlb munmap");
-				exit(1);
-			}
-			*alloc_area = NULL;
-			return;
+			perror("mmap of hugetlb file alias failed");
+			goto fail_munmap;
 		}
 	}
+
 	if (*alloc_area == area_src) {
 		huge_fd_off0 = *alloc_area;
 		alloc_area_alias = &area_src_alias;
@@ -243,6 +240,16 @@ static void hugetlb_allocate_area(void **alloc_area)
 	}
 	if (area_alias)
 		*alloc_area_alias = area_alias;
+
+	return;
+
+fail_munmap:
+	if (munmap(*alloc_area, nr_pages * page_size) < 0) {
+		perror("hugetlb munmap");
+		exit(1);
+	}
+fail:
+	*alloc_area = NULL;
 }
 
 static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
-- 
cgit v1.2.3-70-g09d2


From 23fb55526d80122710c28cb6be0e5dba65a6a3f1 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 6 Dec 2020 10:22:22 +0200
Subject: selftests: mlxsw: Test RIF's reference count when joining a LAG

Test that the reference count of a router interface (RIF) configured for
a LAG is incremented / decremented when ports join / leave the LAG. Use
the offload indication on routes configured on the RIF to understand if
it was created / destroyed.

The test fails without the previous patch.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/rtnetlink.sh       | 43 ++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index a2eff5f58209..ed346da5d3cb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -22,6 +22,7 @@ ALL_TESTS="
 	duplicate_vlans_test
 	vlan_rif_refcount_test
 	subport_rif_refcount_test
+	subport_rif_lag_join_test
 	vlan_dev_deletion_test
 	lag_unlink_slaves_test
 	lag_dev_deletion_test
@@ -440,6 +441,48 @@ subport_rif_refcount_test()
 	ip link del dev bond1
 }
 
+subport_rif_lag_join_test()
+{
+	# Test that the reference count of a RIF configured for a LAG is
+	# incremented / decremented when ports join / leave the LAG. We use the
+	# offload indication on routes configured on the RIF to understand if
+	# it was created / destroyed
+	RET=0
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 master bond1
+
+	ip link set dev bond1 up
+	ip -6 address add 2001:db8:1::1/64 dev bond1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif was not created on lag device"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was destroyed after removing one port"
+
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was destroyed after re-adding a port and removing another"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was not destroyed when should"
+
+	log_test "subport rif lag join"
+
+	ip link del dev bond1
+}
+
 vlan_dev_deletion_test()
 {
 	# Test that VLAN devices are correctly deleted / unlinked when enslaved
-- 
cgit v1.2.3-70-g09d2


From 6f39cecdb6018234a47dcea15121f01b9903d16b Mon Sep 17 00:00:00 2001
From: Xingxing Su <suxingxing@loongson.cn>
Date: Wed, 25 Nov 2020 12:04:57 +0800
Subject: rseq/selftests: Fix MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ build error
 under other arch.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Except arch x86, the function rseq_offset_deref_addv is not defined.
The function test_membarrier_manager_thread call rseq_offset_deref_addv
produces a build error.

The RSEQ_ARCH_HAS_OFFSET_DEREF_ADD should contain all the code
for the MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ.
If the other Arch implements this feature,
defined RSEQ_ARCH_HAS_OFFSET_DEREF_ADD in the header file
to ensure that this feature is available.

Following build errors:

param_test.c: In function ‘test_membarrier_worker_thread’:
param_test.c:1164:10: warning: implicit declaration of function ‘rseq_offset_deref_addv’
    ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
          ^~~~~~~~~~~~~~~~~~~~~~
/tmp/ccMj9yHJ.o: In function `test_membarrier_worker_thread':
param_test.c:1164: undefined reference to `rseq_offset_deref_addv'
param_test.c:1164: undefined reference to `rseq_offset_deref_addv'
collect2: error: ld returned 1 exit status
make: *** [/selftests/rseq/param_test_benchmark] Error 1

Signed-off-by: Xingxing Su <suxingxing@loongson.cn>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/rseq/param_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 384589095864..699ad5f93c34 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1133,6 +1133,8 @@ static int set_signal_handler(void)
 	return ret;
 }
 
+/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
 struct test_membarrier_thread_args {
 	int stop;
 	intptr_t percpu_list_ptr;
@@ -1286,8 +1288,6 @@ void *test_membarrier_manager_thread(void *arg)
 	return NULL;
 }
 
-/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
-#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
 void test_membarrier(void)
 {
 	const int num_threads = opt_threads;
-- 
cgit v1.2.3-70-g09d2


From 88f4ede44c585b24674dd99841040b2a1a856a76 Mon Sep 17 00:00:00 2001
From: Xingxing Su <suxingxing@loongson.cn>
Date: Fri, 27 Nov 2020 11:16:57 +0800
Subject: selftests/clone3: Fix build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When compiling the selftests with the -std=gnu99 option the build can
fail with.

Following build error:

  test_core.c: In function ‘test_cgcore_destroy’:
  test_core.c:87:2: error: ‘for’ loop initial declarations are only
  allowed in C99 mode
    for (int i = 0; i < 10; i++) {
    ^
  test_core.c:87:2: note: use option -std=c99 or -std=gnu99 to compile

Add -std=gnu99 to the clone3 selftest Makefile to fix this.

Signed-off-by: Xingxing Su <suxingxing@loongson.cn>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/clone3/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile
index ef7564cb7abe..79b19a2863a0 100644
--- a/tools/testing/selftests/clone3/Makefile
+++ b/tools/testing/selftests/clone3/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-CFLAGS += -g -I../../../../usr/include/
+CFLAGS += -g -std=gnu99 -I../../../../usr/include/
 LDLIBS += -lcap
 
 TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid \
-- 
cgit v1.2.3-70-g09d2


From 07f262d80d5f1f426da4557066bf0ec24ee32d97 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 23 Nov 2020 16:56:05 -0800
Subject: tools/power/x86/intel-speed-select: Read TRL from mailbox

When SST-PP feature is not present, the TRL (Turbo Ratio Limits)
is read from MSRs. This is done as the mailbox command will fail
on Skylake-X based platform. But for IceLake servers, mailbox
commands can still be used. So add a check to allow for non Skylake
based platforms to read from mail box commands.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/platform-driver-x86/57d6648282491906e0e1f70fe3b9a44f72cec90d.camel@intel.com/
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 tools/power/x86/intel-speed-select/isst-core.c | 2 +-
 tools/power/x86/intel-speed-select/isst.h      | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 1d7ecb54352e..8afd23407522 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -804,7 +804,7 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
 				return ret;
 		}
 
-		if (!pkg_dev->enabled) {
+		if (!pkg_dev->enabled && is_skx_based_platform()) {
 			int freq;
 
 			freq = get_cpufreq_base_freq(cpu);
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 29715e9c2e06..60db0bb084d5 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -255,4 +255,5 @@ extern int is_clx_n_platform(void);
 extern int get_cpufreq_base_freq(int cpu);
 extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap);
 extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg);
+extern int is_skx_based_platform(void);
 #endif
-- 
cgit v1.2.3-70-g09d2


From 6c4832253a2d2259fd4002e3c4511035f81f48f6 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 3 Dec 2020 13:08:37 -0800
Subject: tools/power/x86/intel-speed-select: Account for missing sysfs for
 die_id

Some older kernels will not have support to get CPU die_id from the
sysfs. This requires several back ports. But the tool depends on getting
die_id to match to correct CPU.

Relax this restriction and use die_id as 0 when die_id is missing. This
is not a problem as we don't have any multi-die processors with Intel SST
support.

This helps in running this tool on older kernels with just Intel SST
drivers back ported.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/platform-driver-x86/57d6648282491906e0e1f70fe3b9a44f72cec90d.camel@intel.com/
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 tools/power/x86/intel-speed-select/isst-config.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index cd089a505859..271bc79bdc59 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -328,8 +328,12 @@ int get_physical_die_id(int cpu)
 		int core_id, pkg_id, die_id;
 
 		ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id);
-		if (!ret)
+		if (!ret) {
+			if (die_id < 0)
+				die_id = 0;
+
 			return die_id;
+		}
 	}
 
 	if (ret < 0)
-- 
cgit v1.2.3-70-g09d2


From 5e27cb9bca6746d1c9c76c4b4aeececcb18b2120 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 3 Dec 2020 14:29:40 -0800
Subject: tools/power/x86/intel-speed-select: Update version for v5.11

Update version for changes released with v5.11 kernel release.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/platform-driver-x86/57d6648282491906e0e1f70fe3b9a44f72cec90d.camel@intel.com/
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 tools/power/x86/intel-speed-select/isst-config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 271bc79bdc59..5390158cdb40 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.6";
+static const char *version_str = "v1.7";
 static const int supported_api_ver = 1;
 static struct isst_if_platform_info isst_platform_info;
 static char *progname;
-- 
cgit v1.2.3-70-g09d2


From c25ce589dca10d64dde139ae093abc258a32869c Mon Sep 17 00:00:00 2001
From: Finn Behrens <me@kloenk.de>
Date: Mon, 23 Nov 2020 15:15:33 +0100
Subject: tweewide: Fix most Shebang lines

Change every shebang which does not need an argument to use /usr/bin/env.
This is needed as not every distro has everything under /usr/bin,
sometimes not even bash.

Signed-off-by: Finn Behrens <me@kloenk.de>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Documentation/sphinx/parse-headers.pl                                   | 2 +-
 Documentation/target/tcm_mod_builder.py                                 | 2 +-
 Documentation/trace/postprocess/decode_msr.py                           | 2 +-
 Documentation/trace/postprocess/trace-pagealloc-postprocess.pl          | 2 +-
 Documentation/trace/postprocess/trace-vmscan-postprocess.pl             | 2 +-
 arch/ia64/scripts/unwcheck.py                                           | 2 +-
 scripts/bloat-o-meter                                                   | 2 +-
 scripts/config                                                          | 2 +-
 scripts/diffconfig                                                      | 2 +-
 scripts/get_abi.pl                                                      | 2 +-
 scripts/show_delta                                                      | 2 +-
 scripts/sphinx-pre-install                                              | 2 +-
 scripts/split-man.pl                                                    | 2 +-
 scripts/tracing/draw_functrace.py                                       | 2 +-
 tools/perf/python/tracepoint.py                                         | 2 +-
 tools/perf/python/twatch.py                                             | 2 +-
 tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py              | 2 +-
 tools/testing/ktest/compare-ktest-sample.pl                             | 2 +-
 tools/testing/kunit/kunit.py                                            | 2 +-
 tools/testing/kunit/kunit_tool_test.py                                  | 2 +-
 tools/testing/selftests/bpf/test_offload.py                             | 2 +-
 tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py | 2 +-
 tools/testing/selftests/kselftest/prefix.pl                             | 2 +-
 tools/testing/selftests/net/devlink_port_split.py                       | 2 +-
 tools/testing/selftests/tc-testing/tdc_batch.py                         | 2 +-
 tools/testing/selftests/tc-testing/tdc_multibatch.py                    | 2 +-
 26 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/Documentation/sphinx/parse-headers.pl b/Documentation/sphinx/parse-headers.pl
index 1910079f984f..b063f2f1cfb2 100755
--- a/Documentation/sphinx/parse-headers.pl
+++ b/Documentation/sphinx/parse-headers.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use strict;
 use Text::Tabs;
 use Getopt::Long;
diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py
index 1548d8420499..54492aa813b9 100755
--- a/Documentation/target/tcm_mod_builder.py
+++ b/Documentation/target/tcm_mod_builder.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # The TCM v4 multi-protocol fabric module generation script for drivers/target/$NEW_MOD
 #
 # Copyright (c) 2010 Rising Tide Systems
diff --git a/Documentation/trace/postprocess/decode_msr.py b/Documentation/trace/postprocess/decode_msr.py
index 0ab40e0db580..aa9cc7abd5c2 100644
--- a/Documentation/trace/postprocess/decode_msr.py
+++ b/Documentation/trace/postprocess/decode_msr.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # add symbolic names to read_msr / write_msr in trace
 # decode_msr msr-index.h < trace
 import sys
diff --git a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
index 0a120aae33ce..b9b7d80c2f9d 100644
--- a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # This is a POC (proof of concept or piece of crap, take your pick) for reading the
 # text representation of trace output related to page allocation. It makes an attempt
 # to extract some high-level information on what is going on. The accuracy of the parser
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
index 995da15b16ca..2f4e39875fb3 100644
--- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # This is a POC for reading the text representation of trace output related to
 # page reclaim. It makes an attempt to extract some high-level information on
 # what is going on. The accuracy of the parser may vary
diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py
index c55276e31b6b..bfd1b671e35f 100644
--- a/arch/ia64/scripts/unwcheck.py
+++ b/arch/ia64/scripts/unwcheck.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0
 #
 # Usage: unwcheck.py FILE
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
index d7ca46c612b3..652e9542043f 100755
--- a/scripts/bloat-o-meter
+++ b/scripts/bloat-o-meter
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 # Copyright 2004 Matt Mackall <mpm@selenic.com>
 #
diff --git a/scripts/config b/scripts/config
index eee5b7f3a092..8c8d7c3d7acc 100755
--- a/scripts/config
+++ b/scripts/config
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # SPDX-License-Identifier: GPL-2.0
 # Manipulate options in a .config file from the command line
 
diff --git a/scripts/diffconfig b/scripts/diffconfig
index 89abf777f197..627eba5849b5 100755
--- a/scripts/diffconfig
+++ b/scripts/diffconfig
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0
 #
 # diffconfig - a tool to compare .config files.
diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl
index 68dab828a722..92d9aa6cc4f5 100755
--- a/scripts/get_abi.pl
+++ b/scripts/get_abi.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0
 
 use strict;
diff --git a/scripts/show_delta b/scripts/show_delta
index 264399307c4f..28e67e178194 100755
--- a/scripts/show_delta
+++ b/scripts/show_delta
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0-only
 #
 # show_deltas: Read list of printk messages instrumented with
diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index 40fa6923e80a..828a8615a918 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0-or-later
 use strict;
 
diff --git a/scripts/split-man.pl b/scripts/split-man.pl
index c3db607ee9ec..96bd99dc977a 100755
--- a/scripts/split-man.pl
+++ b/scripts/split-man.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0
 #
 # Author: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
index b65735758520..74f8aadfd4cb 100755
--- a/scripts/tracing/draw_functrace.py
+++ b/scripts/tracing/draw_functrace.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0-only
 
 """
diff --git a/tools/perf/python/tracepoint.py b/tools/perf/python/tracepoint.py
index eb76f6516247..461848c7f57d 100755
--- a/tools/perf/python/tracepoint.py
+++ b/tools/perf/python/tracepoint.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#! /usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0
 # -*- python -*-
 # -*- coding: utf-8 -*-
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
index ff87ccf5b708..04f3db29b9bc 100755
--- a/tools/perf/python/twatch.py
+++ b/tools/perf/python/twatch.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#! /usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0-only
 # -*- python -*-
 # -*- coding: utf-8 -*-
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index 3c47865bb247..e15e20696d17 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0-only
 # -*- coding: utf-8 -*-
 #
diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl
index 4118eb4a842d..ebea21d0a1be 100755
--- a/tools/testing/ktest/compare-ktest-sample.pl
+++ b/tools/testing/ktest/compare-ktest-sample.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0
 
 open (IN,"ktest.pl");
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index d4f7846d0745..21516e293d17 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 #
 # A thin wrapper on top of the KUnit Kernel
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 497ab51bc170..b593f4448e83 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 #
 # A collection of tests for tools/testing/kunit/kunit.py
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 43c9cda199b8..f736d34b89e1 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 
 # Copyright (C) 2017 Netronome Systems, Inc.
 # Copyright (c) 2019 Mellanox Technologies. All rights reserved
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
index 0d4b9327c9b3..2223337eed0c 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # SPDX-License-Identifier: GPL-2.0
 
 import subprocess
diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl
index 31f7c2a0a8bd..12a7f4ca2684 100755
--- a/tools/testing/selftests/kselftest/prefix.pl
+++ b/tools/testing/selftests/kselftest/prefix.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # SPDX-License-Identifier: GPL-2.0
 # Prefix all lines with "# ", unbuffered. Command being piped in may need
 # to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd".
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
index 58bb7e9b88ce..834066d465fc 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
 from subprocess import PIPE, Popen
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 995f66ce43eb..35d5d9493784 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 
 """
 tdc_batch.py - a script to generate TC batch file
diff --git a/tools/testing/selftests/tc-testing/tdc_multibatch.py b/tools/testing/selftests/tc-testing/tdc_multibatch.py
index 5e7237952e49..48e1f17ff2e8 100755
--- a/tools/testing/selftests/tc-testing/tdc_multibatch.py
+++ b/tools/testing/selftests/tc-testing/tdc_multibatch.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 """
 tdc_multibatch.py - a thin wrapper over tdc_batch.py to generate multiple batch
-- 
cgit v1.2.3-70-g09d2


From 932c60558109a9131e54dacfda6070147fd1cdfb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 4 Dec 2020 15:20:01 -0800
Subject: tools/bpftool: Fix PID fetching with a lot of results

In case of having so many PID results that they don't fit into a singe page
(4096) bytes, bpftool will erroneously conclude that it got corrupted data due
to 4096 not being a multiple of struct pid_iter_entry, so the last entry will
be partially truncated. Fix this by sizing the buffer to fit exactly N entries
with no truncation in the middle of record.

Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201204232002.3589803-1-andrii@kernel.org
---
 tools/bpf/bpftool/pids.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index df7d8ec76036..477e55d59c34 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -89,9 +89,9 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level,
 
 int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
 {
-	char buf[4096];
-	struct pid_iter_bpf *skel;
 	struct pid_iter_entry *e;
+	char buf[4096 / sizeof(*e) * sizeof(*e)];
+	struct pid_iter_bpf *skel;
 	int err, ret, fd = -1, i;
 	libbpf_print_fn_t default_print;
 
-- 
cgit v1.2.3-70-g09d2


From 547f574fd9d5e3925d47fd44decbf6ab6df94b0e Mon Sep 17 00:00:00 2001
From: Mathieu Chouquet-Stringer <me@mathieu.digital>
Date: Wed, 2 Dec 2020 16:32:43 +0100
Subject: docs: Update documentation to reflect what TAINT_CPU_OUT_OF_SPEC
 means

Here's a patch updating the meaning of TAINT_CPU_OUT_OF_SPEC after
Borislav introduced changes in a7e1f67ed29f and upcoming patches in tip.

TAINT_CPU_OUT_OF_SPEC now means a bit more what it implies as the
flag isn't set just because of a CPU misconfiguration or mismatch.
Historically it was for SMP kernel oops on an officially SMP incapable
processor but now it also covers CPUs whose MSRs have been incorrectly
poked at from userspace, drivers being used on non supported
architectures, broken firmware, mismatched CPUs, ...

Update documentation and script to reflect that.

Signed-off-by: Mathieu Chouquet-Stringer <me@mathieu.digital>
Link: https://lore.kernel.org/r/20201202153244.709752-1-me@mathieu.digital
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst   |  2 +-
 Documentation/admin-guide/tainted-kernels.rst | 23 ++++++++++++++++++-----
 tools/debugging/kernel-chktaint               |  2 +-
 3 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index d7a17859adbd..1d56a6b73a4e 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1336,7 +1336,7 @@ ORed together. The letters are seen in "Tainted" line of Oops reports.
 ======  =====  ==============================================================
      1  `(P)`  proprietary module was loaded
      2  `(F)`  module was force loaded
-     4  `(S)`  SMP kernel oops on an officially SMP incapable processor
+     4  `(S)`  kernel running on an out of specification system
      8  `(R)`  module was force unloaded
     16  `(M)`  processor reported a Machine Check Exception (MCE)
     32  `(B)`  bad page referenced or some unexpected page flags
diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst
index f718a2eaf1f6..ceeed7b0798d 100644
--- a/Documentation/admin-guide/tainted-kernels.rst
+++ b/Documentation/admin-guide/tainted-kernels.rst
@@ -84,7 +84,7 @@ Bit  Log  Number  Reason that got the kernel tainted
 ===  ===  ======  ========================================================
   0  G/P       1  proprietary module was loaded
   1  _/F       2  module was force loaded
-  2  _/S       4  SMP kernel oops on an officially SMP incapable processor
+  2  _/S       4  kernel running on an out of specification system
   3  _/R       8  module was force unloaded
   4  _/M      16  processor reported a Machine Check Exception (MCE)
   5  _/B      32  bad page referenced or some unexpected page flags
@@ -116,10 +116,23 @@ More detailed explanation for tainting
  1)  ``F`` if any module was force loaded by ``insmod -f``, ``' '`` if all
      modules were loaded normally.
 
- 2)  ``S`` if the oops occurred on an SMP kernel running on hardware that
-     hasn't been certified as safe to run multiprocessor.
-     Currently this occurs only on various Athlons that are not
-     SMP capable.
+ 2)  ``S`` if the kernel is running on a processor or system that is out of
+     specification: hardware has been put into an unsupported configuration,
+     therefore proper execution cannot be guaranteed.
+     Kernel will be tainted if, for example:
+
+     - on x86: PAE is forced through forcepae on intel CPUs (such as Pentium M)
+       which do not report PAE but may have a functional implementation, an SMP
+       kernel is running on non officially capable SMP Athlon CPUs, MSRs are
+       being poked at from userspace.
+     - on arm: kernel running on certain CPUs (such as Keystone 2) without
+       having certain kernel features enabled.
+     - on arm64: there are mismatched hardware features between CPUs, the
+       bootloader has booted CPUs in different modes.
+     - certain drivers are being used on non supported architectures (such as
+       scsi/snic on something else than x86_64, scsi/ips on non
+       x86/x86_64/itanium, have broken firmware settings for the
+       irqchip/irq-gic on arm64 ...).
 
  3)  ``R`` if a module was force unloaded by ``rmmod -f``, ``' '`` if all
      modules were unloaded normally.
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 2240cb56e6e5..607b2b280945 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -72,7 +72,7 @@ if [ `expr $T % 2` -eq 0 ]; then
 	addout " "
 else
 	addout "S"
-	echo " * SMP kernel oops on an officially SMP incapable processor (#2)"
+	echo " * kernel running on an out of specification system (#2)"
 fi
 
 T=`expr $T / 2`
-- 
cgit v1.2.3-70-g09d2


From 4cec85296c7c7a123890d9335b835f991b36e106 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Tue, 8 Dec 2020 11:22:52 +0200
Subject: selftests: forwarding: Add Q-in-VNI test

Add test to check Q-in-VNI traffic.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/q_in_vni.sh | 347 +++++++++++++++++++++
 1 file changed, 347 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/q_in_vni.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/q_in_vni.sh b/tools/testing/selftests/net/forwarding/q_in_vni.sh
new file mode 100755
index 000000000000..4c50c0234bce
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/q_in_vni.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# |  + $h1.10             |                          |  + $h2.10              |
+# |  | 192.0.2.1/28       |                          |  | 192.0.2.2/28        |
+# |  |                    |                          |  |                     |
+# |  | + $h1.20           |                          |  | + $h2.20            |
+# |  \ | 198.51.100.1/24  |                          |  \ | 198.51.100.2/24   |
+# |   \|                  |                          |   \|                   |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1ad)            + $swp2           | |
+# | |    vid 100 pvid untagged                              vid 100 pvid    | |
+# | |                                                           untagged    | |
+# | |  + vx100 (vxlan)                                                      | |
+# | |    local 192.0.2.17                                                   | |
+# | |    remote 192.0.2.34 192.0.2.50                                       | |
+# | |    id 1000 dstport $VXPORT                                            | |
+# | |    vid 100 pvid untagged                                              | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  192.0.2.32/28 via 192.0.2.18                                             |
+# |  192.0.2.48/28 via 192.0.2.18                                             |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 192.0.2.17/28                                                        |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRP2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |   (maybe) HW
+# =============================================================================
+# |                                                             |  (likely) SW
+# |    + v1 (veth)                             + v3 (veth)      |
+# |    | 192.0.2.33/28                         | 192.0.2.49/28  |
+# +----|---------------------------------------|----------------+
+#      |                                       |
+# +----|------------------------------+   +----|------------------------------+
+# |    + v2 (veth)        NS1 (netns) |   |    + v4 (veth)        NS2 (netns) |
+# |      192.0.2.34/28                |   |      192.0.2.50/28                |
+# |                                   |   |                                   |
+# |   192.0.2.16/28 via 192.0.2.33    |   |   192.0.2.16/28 via 192.0.2.49    |
+# |   192.0.2.50/32 via 192.0.2.33    |   |   192.0.2.34/32 via 192.0.2.49    |
+# |                                   |   |                                   |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# | |                 BR2 (802.1ad) | |   | |                 BR2 (802.1ad) | |
+# | |  + vx100 (vxlan)              | |   | |  + vx100 (vxlan)              | |
+# | |    local 192.0.2.34           | |   | |    local 192.0.2.50           | |
+# | |    remote 192.0.2.17          | |   | |    remote 192.0.2.17          | |
+# | |    remote 192.0.2.50          | |   | |    remote 192.0.2.34          | |
+# | |    id 1000 dstport $VXPORT    | |   | |    id 1000 dstport $VXPORT    | |
+# | |    vid 100 pvid untagged      | |   | |    vid 100 pvid untagged      | |
+# | |                               | |   | |                               | |
+# | |  + w1 (veth)                  | |   | |  + w1 (veth)                  | |
+# | |  | vid 100 pvid untagged      | |   | |  | vid 100 pvid untagged      | |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# |    |                              |   |    |                              |
+# | +--|----------------------------+ |   | +--|----------------------------+ |
+# | |  |                  VW2 (vrf) | |   | |  |                  VW2 (vrf) | |
+# | |  + w2 (veth)                  | |   | |  + w2 (veth)                  | |
+# | |  |\                           | |   | |  |\                           | |
+# | |  | + w2.10                    | |   | |  | + w2.10                    | |
+# | |  |   192.0.2.3/28             | |   | |  |   192.0.2.4/28             | |
+# | |  |                            | |   | |  |                            | |
+# | |  + w2.20                      | |   | |  + w2.20                      | |
+# | |    198.51.100.3/24            | |   | |    198.51.100.4/24            | |
+# | +-------------------------------+ |   | +-------------------------------+ |
+# +-----------------------------------+   +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+	ping_ipv4
+    "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+	vlan_create $h1 20 v$h1 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 20
+	vlan_destroy $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+	vlan_create $h2 10 v$h2 192.0.2.2/28
+	vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 20
+	vlan_destroy $h2 10
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+	ip address add dev $rp1 192.0.2.17/28
+
+	ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+	ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+	ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+	ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+	ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	rp1_set_addr
+
+	ip link add name vx100 type vxlan id 1000		\
+		local 192.0.2.17 dstport "$VXPORT"	\
+		nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev vx100 master br1
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 100 dev $swp1 pvid untagged
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+	bridge vlan add vid 100 dev $swp2 pvid untagged
+
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self
+	bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+	bridge vlan del vid 100 dev $swp2
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+
+	bridge vlan del vid 100 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx100 nomaster
+	ip link set dev vx100 down
+	ip link del dev vx100
+
+	rp1_unset_addr
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+	__simple_if_init v1 v$rp2 192.0.2.33/28
+	__simple_if_init v3 v$rp2 192.0.2.49/28
+	tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+	tc qdisc del dev v1 clsact
+	__simple_if_fini v3 192.0.2.49/28
+	__simple_if_fini v1 192.0.2.33/28
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+	local in_if=$1; shift
+	local in_addr=$1; shift
+	local other_in_addr=$1; shift
+	local nh_addr=$1; shift
+	local host_addr1=$1; shift
+	local host_addr2=$1; shift
+
+	ip link set dev $in_if up
+	ip address add dev $in_if $in_addr/28
+	tc qdisc add dev $in_if clsact
+
+	ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0
+	ip link set dev br2 up
+
+	ip link add name w1 type veth peer name w2
+
+	ip link set dev w1 master br2
+	ip link set dev w1 up
+	bridge vlan add vid 100 dev w1 pvid untagged
+
+	ip link add name vx100 type vxlan id 1000 local $in_addr \
+		dstport "$VXPORT"
+	ip link set dev vx100 up
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.17 self
+	bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self
+
+	ip link set dev vx100 master br2
+	tc qdisc add dev vx100 clsact
+
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	simple_if_init w2
+        vlan_create w2 10 vw2 $host_addr1/28
+        vlan_create w2 20 vw2 $host_addr2/24
+
+	ip route add 192.0.2.16/28 nexthop via $nh_addr
+	ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+	ip netns add ns1
+	ip link set dev v2 netns ns1
+	in_ns ns1 \
+	      ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 \
+			     192.0.2.3 198.51.100.3
+}
+
+ns1_destroy()
+{
+	ip netns exec ns1 ip link set dev v2 netns 1
+	ip netns del ns1
+}
+
+ns2_create()
+{
+	ip netns add ns2
+	ip link set dev v4 netns ns2
+	in_ns ns2 \
+	      ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 \
+			     192.0.2.4 198.51.100.4
+}
+
+ns2_destroy()
+{
+	ip netns exec ns2 ip link set dev v4 netns 1
+	ip netns del ns2
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1=${NETIFS[p5]}
+	rp2=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	switch_create
+
+	ip link add name v1 type veth peer name v2
+	ip link add name v3 type veth peer name v4
+	vrp2_create
+	ns1_create
+	ns2_create
+
+	r1_mac=$(in_ns ns1 mac_get w2)
+	r2_mac=$(in_ns ns2 mac_get w2)
+	h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ns2_destroy
+	ns1_destroy
+	vrp2_destroy
+	ip link del dev v3
+	ip link del dev v1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2 ": local->local"
+	ping_test $h1 192.0.2.3 ": local->remote 1"
+	ping_test $h1 192.0.2.4 ": local->remote 2"
+}
+
+test_all()
+{
+	echo "Running tests with UDP port $VXPORT"
+	tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-70-g09d2


From 477ce6d971159910fb8ae76755c8027aa6a84dde Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Tue, 8 Dec 2020 11:22:53 +0200
Subject: selftests: mlxsw: Add Q-in-VNI veto tests

Add tests to ensure that the forbidden and unsupported cases are indeed
vetoed by mlxsw driver.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh  | 77 ++++++++++++++++++++++
 .../drivers/net/mlxsw/spectrum/q_in_vni_veto.sh    | 66 +++++++++++++++++++
 2 files changed, 143 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
new file mode 100755
index 000000000000..0231205a7147
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+VXPORT=4789
+
+ALL_TESTS="
+	create_dot1d_and_dot1ad_vxlans
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+create_dot1d_and_dot1ad_vxlans()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
+		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev vx100 master br0
+	bridge vlan add vid 100 dev vx100 pvid untagged
+
+	ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0
+	ip link set dev br1 up
+
+	ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \
+		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx200 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev vx200 master br1 2>/dev/null
+	check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected"
+
+	ip link set dev vx200 master br1 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack"
+
+	log_test "create 802.1d and 802.1ad VxLANs"
+
+	ip link del dev vx200
+	ip link del dev br1
+	ip link del dev vx100
+	ip link del dev br0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
new file mode 100755
index 000000000000..f0443b1b05b9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+VXPORT=4789
+
+ALL_TESTS="
+	create_vxlan_on_top_of_8021ad_bridge
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+create_vxlan_on_top_of_8021ad_bridge()
+{
+	RET=0
+
+	ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+		vlan_default_pvid 0 mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
+		"$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx100 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev vx100 master br0
+
+	bridge vlan add vid 100 dev vx100 pvid untagged 2>/dev/null
+	check_fail $? "802.1ad bridge with VxLAN in Spectrum-1 not rejected"
+
+	bridge vlan add vid 100 dev vx100 pvid untagged 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $? "802.1ad bridge with VxLAN in Spectrum-1 rejected without extack"
+
+	log_test "create VxLAN on top of 802.1ad bridge"
+
+	ip link del dev vx100
+	ip link del dev br0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-70-g09d2


From 0b5b6e747c86e57b7ebd64ccb84314a227ccfcc2 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:38 +0100
Subject: selftests/bpf/test_offload.py: Remove check for program load flags
 match
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we just removed the xdp_attachment_flags_ok() callback, also remove
the check for it in test_offload.py, and replace it with a test for the new
ambiguity-avoid check when multiple programs are loaded.

Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752225858.110217.13036901876869496246.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 43c9cda199b8..becd27b2f4ba 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -716,13 +716,11 @@ def test_multi_prog(simdev, sim, obj, modename, modeid):
     fail(ret == 0, "Replaced one of programs without -force")
     check_extack(err, "XDP program already attached.", args)
 
-    if modename == "" or modename == "drv":
-        othermode = "" if modename == "drv" else "drv"
-        start_test("Test multi-attachment XDP - detach...")
-        ret, _, err = sim.unset_xdp(othermode, force=True,
-                                    fail=False, include_stderr=True)
-        fail(ret == 0, "Removed program with a bad mode")
-        check_extack(err, "program loaded with different flags.", args)
+    start_test("Test multi-attachment XDP - remove without mode...")
+    ret, _, err = sim.unset_xdp("", force=True,
+                                fail=False, include_stderr=True)
+    fail(ret == 0, "Removed program without a mode flag")
+    check_extack(err, "More than one program loaded, unset mode is ambiguous.", args)
 
     sim.unset_xdp("offload")
     xdp = sim.ip_link_show(xdp=True)["xdp"]
@@ -1001,16 +999,6 @@ try:
     check_extack(err,
                  "native and generic XDP can't be active at the same time.",
                  args)
-    ret, _, err = sim.set_xdp(obj, "", force=True,
-                              fail=False, include_stderr=True)
-    fail(ret == 0, "Replaced XDP program with a program in different mode")
-    check_extack(err, "program loaded with different flags.", args)
-
-    start_test("Test XDP prog remove with bad flags...")
-    ret, _, err = sim.unset_xdp("", force=True,
-                                fail=False, include_stderr=True)
-    fail(ret == 0, "Removed program with a bad mode")
-    check_extack(err, "program loaded with different flags.", args)
 
     start_test("Test MTU restrictions...")
     ret, _ = sim.set_mtu(9000, fail=False)
-- 
cgit v1.2.3-70-g09d2


From d8b5e76ae4e02908d000397597c6bc2868362fbb Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:40 +0100
Subject: selftests/bpf/test_offload.py: Only check verifier log on
 verification fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 6f8a57ccf851 ("bpf: Make verifier log more relevant by
default"), the verifier discards log messages for successfully-verified
programs. This broke test_offload.py which is looking for a verification
message from the driver callback. Change test_offload.py to use the toggle
in netdevsim to make the verification fail before looking for the
verification message.

Fixes: 6f8a57ccf851 ("bpf: Make verifier log more relevant by default")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752226069.110217.12370824996153348073.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index becd27b2f4ba..61527b43f067 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -911,11 +911,18 @@ try:
 
     sim.tc_flush_filters()
 
+    start_test("Test TC offloads failure...")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+    ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret == 0, "TC filter did not reject with TC offloads enabled")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+
     start_test("Test TC offloads work...")
     ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
                                          fail=False, include_stderr=True)
     fail(ret != 0, "TC filter did not load with TC offloads enabled")
-    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test TC offload basics...")
     dfs = simdev.dfs_get_bound_progs(expected=1)
@@ -1032,6 +1039,15 @@ try:
     rm("/sys/fs/bpf/offload")
     sim.wait_for_flush()
 
+    start_test("Test XDP load failure...")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+    ret, _, err = bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+                                 dev=sim['ifname'], fail=False, include_stderr=True)
+    fail(ret == 0, "verifier should fail on load")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+    sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+    sim.wait_for_flush()
+
     start_test("Test XDP offload...")
     _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
     ipl = sim.ip_link_show(xdp=True)
@@ -1039,7 +1055,6 @@ try:
     progs = bpftool_prog_list(expected=1)
     prog = progs[0]
     fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
-    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
 
     start_test("Test XDP offload is device bound...")
     dfs = simdev.dfs_get_bound_progs(expected=1)
-- 
cgit v1.2.3-70-g09d2


From 852c2ee338f0ac6026458615b624e1c496142cf6 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:41 +0100
Subject: selftests/bpf/test_offload.py: Fix expected case of extack messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs
in net_device") changed the case of some of the extack messages being
returned when attaching of XDP programs failed. This broke test_offload.py,
so let's fix the test to reflect this.

Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752226175.110217.11214100824416344952.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 61527b43f067..51a5e4d939cc 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1004,7 +1004,7 @@ try:
                               fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
     check_extack(err,
-                 "native and generic XDP can't be active at the same time.",
+                 "Native and generic XDP can't be active at the same time.",
                  args)
 
     start_test("Test MTU restrictions...")
@@ -1035,7 +1035,7 @@ try:
     offload = bpf_pinned("/sys/fs/bpf/offload")
     ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
     fail(ret == 0, "attached offloaded XDP program to drv")
-    check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args)
+    check_extack(err, "Using device-bound program without HW_MODE flag is not supported.", args)
     rm("/sys/fs/bpf/offload")
     sim.wait_for_flush()
 
-- 
cgit v1.2.3-70-g09d2


From 766e62b7fcd2cf1d43e6594ba37c659dc48f7ddb Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:42 +0100
Subject: selftests/bpf/test_offload.py: Reset ethtool features after failed
 setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When setting the ethtool feature flag fails (as expected for the test), the
kernel now tracks that the feature was requested to be 'off' and refuses to
subsequently disable it again. So reset it back to 'on' so a subsequent
disable (that's not supposed to fail) can succeed.

Fixes: 417ec26477a5 ("selftests/bpf: add offload test based on netdevsim")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752226280.110217.10696241563705667871.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 51a5e4d939cc..2128fbd8414b 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -946,6 +946,7 @@ try:
     start_test("Test disabling TC offloads is rejected while filters installed...")
     ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
     fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+    sim.set_ethtool_tc_offloads(True)
 
     start_test("Test qdisc removal frees things...")
     sim.tc_flush_filters()
-- 
cgit v1.2.3-70-g09d2


From 8158cad13435639cd4962fb88970960f880ef6d9 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Wed, 9 Dec 2020 14:57:43 +0100
Subject: selftests/bpf/test_offload.py: Filter bpftool internal map when
 counting maps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A few of the tests in test_offload.py expects to see a certain number of
maps created, and checks this by counting the number of maps returned by
bpftool. There is already a filter that will remove any maps already there
at the beginning of the test, but bpftool now creates a map for the PID
iterator rodata on each invocation, which makes the map count wrong. Fix
this by also filtering the pid_iter.rodata map by name when counting.

Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/160752226387.110217.9887866138149423444.stgit@toke.dk
---
 tools/testing/selftests/bpf/test_offload.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 2128fbd8414b..b99bb8ed3ed4 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -184,9 +184,7 @@ def bpftool_prog_list(expected=None, ns=""):
 def bpftool_map_list(expected=None, ns=""):
     _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
     # Remove the base maps
-    for m in base_maps:
-        if m in maps:
-            maps.remove(m)
+    maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names]
     if expected is not None:
         if len(maps) != expected:
             fail(True, "%d BPF maps loaded, expected %d" %
@@ -770,6 +768,9 @@ ret, progs = bpftool("prog", fail=False)
 skip(ret != 0, "bpftool not installed")
 base_progs = progs
 _, base_maps = bpftool("map")
+base_map_names = [
+    'pid_iter.rodata' # created on each bpftool invocation
+]
 
 # Check netdevsim
 ret, out = cmd("modprobe netdevsim", fail=False)
-- 
cgit v1.2.3-70-g09d2


From a89052572ebbf4bcee7c39390640e92b60eaa653 Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:29 +0000
Subject: selftests/bpf: Xsk selftests framework
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds AF_XDP selftests framework under selftests/bpf.

Topology:
---------
     -----------           -----------
     |  xskX   | --------- |  xskY   |
     -----------     |     -----------
          |          |          |
     -----------     |     ----------
     |  vethX  | --------- |  vethY |
     -----------   peer    ----------
          |          |          |
     namespaceX      |     namespaceY

Prerequisites setup by script test_xsk.sh:

   Set up veth interfaces as per the topology shown ^^:
   * setup two veth interfaces and one namespace
   ** veth<xxxx> in root namespace
   ** veth<yyyy> in af_xdp<xxxx> namespace
   ** namespace af_xdp<xxxx>
   * create a spec file veth.spec that includes this run-time configuration
   *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
       conflict with any existing interface
   * tests the veth and xsk layers of the topology

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-2-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/Makefile       |   4 +-
 tools/testing/selftests/bpf/test_xsk.sh    | 152 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/xsk_prereqs.sh | 119 ++++++++++++++++++++++
 3 files changed, 274 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/bpf/test_xsk.sh
 create mode 100755 tools/testing/selftests/bpf/xsk_prereqs.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index ac25ba5d0d6c..6a1ddfe68f15 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -46,7 +46,8 @@ endif
 
 TEST_GEN_FILES =
 TEST_FILES = test_lwt_ip_encap.o \
-	test_tc_edt.o
+	test_tc_edt.o \
+	xsk_prereqs.sh
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -70,6 +71,7 @@ TEST_PROGS := test_kmod.sh \
 	test_bpftool_build.sh \
 	test_bpftool.sh \
 	test_bpftool_metadata.sh \
+	test_xsk.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
new file mode 100755
index 000000000000..cae4c5574c4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation, Weqaar Janjua <weqaar.a.janjua@intel.com>
+
+# AF_XDP selftests based on veth
+#
+# End-to-end AF_XDP over Veth test
+#
+# Topology:
+# ---------
+#      -----------           -----------
+#      |  xskX   | --------- |  xskY   |
+#      -----------     |     -----------
+#           |          |          |
+#      -----------     |     ----------
+#      |  vethX  | --------- |  vethY |
+#      -----------   peer    ----------
+#           |          |          |
+#      namespaceX      |     namespaceY
+#
+# AF_XDP is an address family optimized for high performance packet processing,
+# it is XDP’s user-space interface.
+#
+# An AF_XDP socket is linked to a single UMEM which is a region of virtual
+# contiguous memory, divided into equal-sized frames.
+#
+# Refer to AF_XDP Kernel Documentation for detailed information:
+# https://www.kernel.org/doc/html/latest/networking/af_xdp.html
+#
+# Prerequisites setup by script:
+#
+#   Set up veth interfaces as per the topology shown ^^:
+#   * setup two veth interfaces and one namespace
+#   ** veth<xxxx> in root namespace
+#   ** veth<yyyy> in af_xdp<xxxx> namespace
+#   ** namespace af_xdp<xxxx>
+#   * create a spec file veth.spec that includes this run-time configuration
+#   *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
+#       conflict with any existing interface
+#   * tests the veth and xsk layers of the topology
+#
+# Kernel configuration:
+# ---------------------
+# See "config" file for recommended kernel config options.
+#
+# Turn on XDP sockets and veth support when compiling i.e.
+# 	Networking support -->
+# 		Networking options -->
+# 			[ * ] XDP sockets
+#
+# Executing Tests:
+# ----------------
+# Must run with CAP_NET_ADMIN capability.
+#
+# Run (full color-coded output):
+#   sudo ./test_xsk.sh -c
+#
+# If running from kselftests:
+#   sudo make colorconsole=1 run_tests
+#
+# Run (full output without color-coding):
+#   sudo ./test_xsk.sh
+
+. xsk_prereqs.sh
+
+while getopts c flag
+do
+	case "${flag}" in
+		c) colorconsole=1;;
+	esac
+done
+
+TEST_NAME="PREREQUISITES"
+
+URANDOM=/dev/urandom
+[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; }
+
+VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH0=ve${VETH0_POSTFIX}
+VETH1_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH1=ve${VETH1_POSTFIX}
+NS0=root
+NS1=af_xdp${VETH1_POSTFIX}
+MTU=1500
+
+setup_vethPairs() {
+	echo "setting up ${VETH0}: namespace: ${NS0}"
+	ip netns add ${NS1}
+	ip link add ${VETH0} type veth peer name ${VETH1}
+	if [ -f /proc/net/if_inet6 ]; then
+		echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
+	fi
+	echo "setting up ${VETH1}: namespace: ${NS1}"
+	ip link set ${VETH1} netns ${NS1}
+	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
+	ip link set ${VETH0} mtu ${MTU}
+	ip netns exec ${NS1} ip link set ${VETH1} up
+	ip link set ${VETH0} up
+}
+
+validate_root_exec
+validate_veth_support ${VETH0}
+validate_ip_utility
+setup_vethPairs
+
+retval=$?
+if [ $retval -ne 0 ]; then
+	test_status $retval "${TEST_NAME}"
+	cleanup_exit ${VETH0} ${VETH1} ${NS1}
+	exit $retval
+fi
+
+echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
+
+validate_veth_spec_file
+
+echo "Spec file created: ${SPECFILE}"
+
+test_status $retval "${TEST_NAME}"
+
+## START TESTS
+
+statusList=()
+
+### TEST 1
+TEST_NAME="XSK KSELFTEST FRAMEWORK"
+
+echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+retval=$?
+if [ $retval -eq 0 ]; then
+	echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
+	vethXDPnative ${VETH0} ${VETH1} ${NS1}
+fi
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+## END TESTS
+
+cleanup_exit ${VETH0} ${VETH1} ${NS1}
+
+for _status in "${statusList[@]}"
+do
+	if [ $_status -ne 0 ]; then
+		test_exit $ksft_fail 0
+	fi
+done
+
+test_exit $ksft_pass 0
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
new file mode 100755
index 000000000000..29762739c21b
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation.
+
+ksft_pass=0
+ksft_fail=1
+ksft_xfail=2
+ksft_xpass=3
+ksft_skip=4
+
+GREEN='\033[0;92m'
+YELLOW='\033[0;93m'
+RED='\033[0;31m'
+NC='\033[0m'
+STACK_LIM=131072
+SPECFILE=veth.spec
+
+validate_root_exec()
+{
+	msg="skip all tests:"
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		test_exit $ksft_fail 2
+	else
+		return $ksft_pass
+	fi
+}
+
+validate_veth_support()
+{
+	msg="skip all tests:"
+	if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then
+		echo $msg veth kernel support not available >&2
+		test_exit $ksft_skip 1
+	else
+		ip link del $1
+		return $ksft_pass
+	fi
+}
+
+validate_veth_spec_file()
+{
+	if [ ! -f ${SPECFILE} ]; then
+		test_exit $ksft_skip 1
+	fi
+}
+
+test_status()
+{
+	statusval=$1
+	if [ -n "${colorconsole+set}" ]; then
+		if [ $statusval -eq 2 ]; then
+			echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
+		elif [ $statusval -eq 1 ]; then
+			echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
+		elif [ $statusval -eq 0 ]; then
+			echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
+		fi
+	else
+		if [ $statusval -eq 2 ]; then
+			echo -e "$2: [ FAIL ]"
+		elif [ $statusval -eq 1 ]; then
+			echo -e "$2: [ SKIPPED ]"
+		elif [ $statusval -eq 0 ]; then
+			echo -e "$2: [ PASS ]"
+		fi
+	fi
+}
+
+test_exit()
+{
+	retval=$1
+	if [ $2 -ne 0 ]; then
+		test_status $2 $(basename $0)
+	fi
+	exit $retval
+}
+
+clear_configs()
+{
+	if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
+		[ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
+			{ echo "removing link $1:$2"; ip netns exec $3 ip link del $2; }
+		echo "removing ns $3"
+		ip netns del $3
+	fi
+	#Once we delete a veth pair node, the entire veth pair is removed,
+	#this is just to be cautious just incase the NS does not exist then
+	#veth node inside NS won't get removed so we explicitly remove it
+	[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
+		{ echo "removing link $1"; ip link del $1; }
+	if [ -f ${SPECFILE} ]; then
+		echo "removing spec file:" ${SPECFILE}
+		rm -f ${SPECFILE}
+	fi
+}
+
+cleanup_exit()
+{
+	echo "cleaning up..."
+	clear_configs $1 $2 $3
+}
+
+validate_ip_utility()
+{
+	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
+}
+
+vethXDPgeneric()
+{
+	ip link set dev $1 xdpdrv off
+	ip netns exec $3 ip link set dev $2 xdpdrv off
+}
+
+vethXDPnative()
+{
+	ip link set dev $1 xdpgeneric off
+	ip netns exec $3 ip link set dev $2 xdpgeneric off
+}
-- 
cgit v1.2.3-70-g09d2


From facb7cb2e909ad2d21ebbfdc051726d4cd8f1d35 Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:30 +0000
Subject: selftests/bpf: Xsk selftests - SKB POLL, NOPOLL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

1. AF_XDP SKB mode
   Generic mode XDP is driver independent, used when the driver does
   not have support for XDP. Works on any netdevice using sockets and
   generic XDP path. XDP hook from netif_receive_skb().
   a. nopoll - soft-irq processing
   b. poll - using poll() syscall

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-3-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/Makefile       |   3 +-
 tools/testing/selftests/bpf/test_xsk.sh    |  39 +-
 tools/testing/selftests/bpf/xdpxceiver.c   | 979 +++++++++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.h   | 153 +++++
 tools/testing/selftests/bpf/xsk_prereqs.sh |  16 +
 5 files changed, 1187 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/xdpxceiver.c
 create mode 100644 tools/testing/selftests/bpf/xdpxceiver.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6a1ddfe68f15..944ae17a39ed 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -82,7 +82,8 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
-	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko
+	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
+	xdpxceiver
 
 TEST_CUSTOM_PROGS = urandom_read
 
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index cae4c5574c4c..0b7bafb65f43 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -8,8 +8,17 @@
 #
 # Topology:
 # ---------
-#      -----------           -----------
-#      |  xskX   | --------- |  xskY   |
+#                 -----------
+#               _ | Process | _
+#              /  -----------  \
+#             /        |        \
+#            /         |         \
+#      -----------     |     -----------
+#      | Thread1 |     |     | Thread2 |
+#      -----------     |     -----------
+#           |          |          |
+#      -----------     |     -----------
+#      |  xskX   |     |     |  xskY   |
 #      -----------     |     -----------
 #           |          |          |
 #      -----------     |     ----------
@@ -39,6 +48,8 @@
 #       conflict with any existing interface
 #   * tests the veth and xsk layers of the topology
 #
+# See the source xdpxceiver.c for information on each test
+#
 # Kernel configuration:
 # ---------------------
 # See "config" file for recommended kernel config options.
@@ -138,6 +149,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 2
+TEST_NAME="SKB NOPOLL"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 3
+TEST_NAME="SKB POLL"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-p")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
new file mode 100644
index 000000000000..3f2a65b6a9f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -0,0 +1,979 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+/*
+ * Some functions in this program are taken from
+ * Linux kernel samples/bpf/xdpsock* and modified
+ * for use.
+ *
+ * See test_xsk.sh for detailed information on test topology
+ * and prerequisite network setup.
+ *
+ * This test program contains two threads, each thread is single socket with
+ * a unique UMEM. It validates in-order packet delivery and packet content
+ * by sending packets to each other.
+ *
+ * Tests Information:
+ * ------------------
+ * These selftests test AF_XDP SKB and Native/DRV modes using veth
+ * Virtual Ethernet interfaces.
+ *
+ * The following tests are run:
+ *
+ * 1. AF_XDP SKB mode
+ *    Generic mode XDP is driver independent, used when the driver does
+ *    not have support for XDP. Works on any netdevice using sockets and
+ *    generic XDP path. XDP hook from netif_receive_skb().
+ *    a. nopoll - soft-irq processing
+ *    b. poll - using poll() syscall
+ *
+ * Total tests: 2
+ *
+ * Flow:
+ * -----
+ * - Single process spawns two threads: Tx and Rx
+ * - Each of these two threads attach to a veth interface within their assigned
+ *   namespaces
+ * - Each thread Creates one AF_XDP socket connected to a unique umem for each
+ *   veth interface
+ * - Tx thread Transmits 10k packets from veth<xxxx> to veth<yyyy>
+ * - Rx thread verifies if all 10k packets were received and delivered in-order,
+ *   and have the right content
+ *
+ * Enable/disable debug mode:
+ * --------------------------
+ * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
+ * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <errno.h>
+#include <getopt.h>
+#include <asm/barrier.h>
+typedef __u16 __sum16;
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <locale.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdatomic.h>
+#include <bpf/xsk.h>
+#include "xdpxceiver.h"
+#include "../kselftest.h"
+
+static void __exit_with_error(int error, const char *file, const char *func, int line)
+{
+	ksft_test_result_fail
+	    ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+	ksft_exit_xfail();
+}
+
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+
+#define print_ksft_result(void)\
+	(ksft_test_result_pass("PASS: %s %s\n", uut ? "" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
+
+static void pthread_init_mutex(void)
+{
+	pthread_mutex_init(&sync_mutex, NULL);
+	pthread_mutex_init(&sync_mutex_tx, NULL);
+	pthread_cond_init(&signal_rx_condition, NULL);
+	pthread_cond_init(&signal_tx_condition, NULL);
+}
+
+static void pthread_destroy_mutex(void)
+{
+	pthread_mutex_destroy(&sync_mutex);
+	pthread_mutex_destroy(&sync_mutex_tx);
+	pthread_cond_destroy(&signal_rx_condition);
+	pthread_cond_destroy(&signal_tx_condition);
+}
+
+static void *memset32_htonl(void *dest, u32 val, u32 size)
+{
+	u32 *ptr = (u32 *)dest;
+	int i;
+
+	val = htonl(val);
+
+	for (i = 0; i < (size & (~0x3)); i += 4)
+		ptr[i >> 2] = val;
+
+	for (; i < size; i++)
+		((char *)dest)[i] = ((char *)&val)[i & 3];
+
+	return dest;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline unsigned short from32to16(unsigned int x)
+{
+	/* add up 16-bit and 16-bit for 16+c bit */
+	x = (x & 0xffff) + (x >> 16);
+	/* add up carry.. */
+	x = (x & 0xffff) + (x >> 16);
+	return x;
+}
+
+/*
+ * Fold a partial checksum
+ * This function code has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __u16 csum_fold(__u32 csum)
+{
+	u32 sum = (__force u32)csum;
+
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return (__force __u16)~sum;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline u32 from64to32(u64 x)
+{
+	/* add up 32-bit and 32-bit for 32+c bit */
+	x = (x & 0xffffffff) + (x >> 32);
+	/* add up carry.. */
+	x = (x & 0xffffffff) + (x >> 32);
+	return (u32)x;
+}
+
+__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum);
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+	unsigned long long s = (__force u32)sum;
+
+	s += (__force u32)saddr;
+	s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
+	s += proto + len;
+#else
+	s += (proto + len) << 8;
+#endif
+	return (__force __u32)from64to32(s);
+}
+
+/*
+ * This function has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __u16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+{
+	u32 csum = 0;
+	u32 cnt = 0;
+
+	/* udp hdr and data */
+	for (; cnt < len; cnt += 2)
+		csum += udp_pkt[cnt >> 1];
+
+	return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+}
+
+static void gen_eth_hdr(void *data, struct ethhdr *eth_hdr)
+{
+	memcpy(eth_hdr->h_dest, ((struct ifobject *)data)->dst_mac, ETH_ALEN);
+	memcpy(eth_hdr->h_source, ((struct ifobject *)data)->src_mac, ETH_ALEN);
+	eth_hdr->h_proto = htons(ETH_P_IP);
+}
+
+static void gen_ip_hdr(void *data, struct iphdr *ip_hdr)
+{
+	ip_hdr->version = IP_PKT_VER;
+	ip_hdr->ihl = 0x5;
+	ip_hdr->tos = IP_PKT_TOS;
+	ip_hdr->tot_len = htons(IP_PKT_SIZE);
+	ip_hdr->id = 0;
+	ip_hdr->frag_off = 0;
+	ip_hdr->ttl = IPDEFTTL;
+	ip_hdr->protocol = IPPROTO_UDP;
+	ip_hdr->saddr = ((struct ifobject *)data)->src_ip;
+	ip_hdr->daddr = ((struct ifobject *)data)->dst_ip;
+	ip_hdr->check = 0;
+}
+
+static void gen_udp_hdr(void *data, void *arg, struct udphdr *udp_hdr)
+{
+	udp_hdr->source = htons(((struct ifobject *)arg)->src_port);
+	udp_hdr->dest = htons(((struct ifobject *)arg)->dst_port);
+	udp_hdr->len = htons(UDP_PKT_SIZE);
+	memset32_htonl(pkt_data + PKT_HDR_SIZE,
+		       htonl(((struct generic_data *)data)->seqnum), UDP_PKT_DATA_SIZE);
+}
+
+static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
+{
+	udp_hdr->check = 0;
+	udp_hdr->check =
+	    udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
+}
+
+static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
+{
+	memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
+}
+
+static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
+{
+	int ret;
+
+	data->umem = calloc(1, sizeof(struct xsk_umem_info));
+	if (!data->umem)
+		exit_with_error(errno);
+
+	ret = xsk_umem__create(&data->umem->umem, buffer, size,
+			       &data->umem->fq, &data->umem->cq, NULL);
+	if (ret)
+		exit_with_error(ret);
+
+	data->umem->buffer = buffer;
+}
+
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
+{
+	int ret, i;
+	u32 idx;
+
+	ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
+	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		exit_with_error(ret);
+	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
+	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+}
+
+static int xsk_configure_socket(struct ifobject *ifobject)
+{
+	struct xsk_socket_config cfg;
+	struct xsk_ring_cons *rxr;
+	struct xsk_ring_prod *txr;
+	int ret;
+
+	ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info));
+	if (!ifobject->xsk)
+		exit_with_error(errno);
+
+	ifobject->xsk->umem = ifobject->umem;
+	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+	cfg.libbpf_flags = 0;
+	cfg.xdp_flags = opt_xdp_flags;
+	cfg.bind_flags = opt_xdp_bind_flags;
+
+	rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
+	txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+
+	ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
+				 opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
+
+	if (ret)
+		return 1;
+
+	return 0;
+}
+
+static struct option long_options[] = {
+	{"interface", required_argument, 0, 'i'},
+	{"queue", optional_argument, 0, 'q'},
+	{"poll", no_argument, 0, 'p'},
+	{"xdp-skb", no_argument, 0, 'S'},
+	{"copy", no_argument, 0, 'c'},
+	{"debug", optional_argument, 0, 'D'},
+	{"tx-pkt-count", optional_argument, 0, 'C'},
+	{0, 0, 0, 0}
+};
+
+static void usage(const char *prog)
+{
+	const char *str =
+	    "  Usage: %s [OPTIONS]\n"
+	    "  Options:\n"
+	    "  -i, --interface      Use interface\n"
+	    "  -q, --queue=n        Use queue n (default 0)\n"
+	    "  -p, --poll           Use poll syscall\n"
+	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
+	    "  -c, --copy           Force copy mode\n"
+	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
+	    "  -C, --tx-pkt-count=n Number of packets to send\n";
+	ksft_print_msg(str, prog);
+}
+
+static bool switch_namespace(int idx)
+{
+	char fqns[26] = "/var/run/netns/";
+	int nsfd;
+
+	strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1);
+	nsfd = open(fqns, O_RDONLY);
+
+	if (nsfd == -1)
+		exit_with_error(errno);
+
+	if (setns(nsfd, 0) == -1)
+		exit_with_error(errno);
+
+	return true;
+}
+
+static void *nsswitchthread(void *args)
+{
+	if (switch_namespace(((struct targs *)args)->idx)) {
+		ifdict[((struct targs *)args)->idx]->ifindex =
+		    if_nametoindex(ifdict[((struct targs *)args)->idx]->ifname);
+		if (!ifdict[((struct targs *)args)->idx]->ifindex) {
+			ksft_test_result_fail
+			    ("ERROR: [%s] interface \"%s\" does not exist\n",
+			     __func__, ifdict[((struct targs *)args)->idx]->ifname);
+			((struct targs *)args)->retptr = false;
+		} else {
+			ksft_print_msg("Interface found: %s\n",
+				       ifdict[((struct targs *)args)->idx]->ifname);
+			((struct targs *)args)->retptr = true;
+		}
+	} else {
+		((struct targs *)args)->retptr = false;
+	}
+	pthread_exit(NULL);
+}
+
+static int validate_interfaces(void)
+{
+	bool ret = true;
+
+	for (int i = 0; i < MAX_INTERFACES; i++) {
+		if (!strcmp(ifdict[i]->ifname, "")) {
+			ret = false;
+			ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
+		}
+		if (strcmp(ifdict[i]->nsname, "")) {
+			struct targs *targs;
+
+			targs = (struct targs *)malloc(sizeof(struct targs));
+			if (!targs)
+				exit_with_error(errno);
+
+			targs->idx = i;
+			if (pthread_create(&ns_thread, NULL, nsswitchthread, (void *)targs))
+				exit_with_error(errno);
+
+			pthread_join(ns_thread, NULL);
+
+			if (targs->retptr)
+				ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname);
+
+			free(targs);
+		} else {
+			ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname);
+			if (!ifdict[i]->ifindex) {
+				ksft_test_result_fail
+				    ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
+				ret = false;
+			} else {
+				ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname);
+			}
+		}
+	}
+	return ret;
+}
+
+static void parse_command_line(int argc, char **argv)
+{
+	int option_index, interface_index = 0, c;
+
+	opterr = 0;
+
+	for (;;) {
+		c = getopt_long(argc, argv, "i:q:pScDC:", long_options, &option_index);
+
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'i':
+			if (interface_index == MAX_INTERFACES)
+				break;
+			char *sptr, *token;
+
+			sptr = strndupa(optarg, strlen(optarg));
+			memcpy(ifdict[interface_index]->ifname,
+			       strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
+			token = strsep(&sptr, ",");
+			if (token)
+				memcpy(ifdict[interface_index]->nsname, token,
+				       MAX_INTERFACES_NAMESPACE_CHARS);
+			interface_index++;
+			break;
+		case 'q':
+			opt_queue = atoi(optarg);
+			break;
+		case 'p':
+			opt_poll = 1;
+			break;
+		case 'S':
+			opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
+			opt_xdp_bind_flags |= XDP_COPY;
+			uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
+			break;
+		case 'c':
+			opt_xdp_bind_flags |= XDP_COPY;
+			break;
+		case 'D':
+			debug_pkt_dump = 1;
+			break;
+		case 'C':
+			opt_pkt_count = atoi(optarg);
+			break;
+		default:
+			usage(basename(argv[0]));
+			ksft_exit_xfail();
+		}
+	}
+
+	if (!validate_interfaces()) {
+		usage(basename(argv[0]));
+		ksft_exit_xfail();
+	}
+}
+
+static void kick_tx(struct xsk_socket_info *xsk)
+{
+	int ret;
+
+	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN)
+		return;
+	exit_with_error(errno);
+}
+
+static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+{
+	unsigned int rcvd;
+	u32 idx;
+
+	if (!xsk->outstanding_tx)
+		return;
+
+	if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+		kick_tx(xsk);
+
+	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+	if (rcvd) {
+		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+		xsk->outstanding_tx -= rcvd;
+		xsk->tx_npkts += rcvd;
+	}
+}
+
+static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
+{
+	unsigned int rcvd, i;
+	u32 idx_rx = 0, idx_fq = 0;
+	int ret;
+
+	rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+	if (!rcvd) {
+		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret < 0)
+				exit_with_error(ret);
+		}
+		return;
+	}
+
+	ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+	while (ret != rcvd) {
+		if (ret < 0)
+			exit_with_error(ret);
+		if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret < 0)
+				exit_with_error(ret);
+		}
+		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+	}
+
+	for (i = 0; i < rcvd; i++) {
+		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
+		(void)xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
+		u64 orig = xsk_umem__extract_addr(addr);
+
+		addr = xsk_umem__add_offset_to_addr(addr);
+		pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
+		if (!pkt_node_rx)
+			exit_with_error(errno);
+
+		pkt_node_rx->pkt_frame = (char *)malloc(PKT_SIZE);
+		if (!pkt_node_rx->pkt_frame)
+			exit_with_error(errno);
+
+		memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
+		       PKT_SIZE);
+
+		TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
+
+		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+	}
+
+	xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+	xsk_ring_cons__release(&xsk->rx, rcvd);
+	xsk->rx_npkts += rcvd;
+}
+
+static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
+{
+	u32 idx;
+	unsigned int i;
+
+	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
+		complete_tx_only(xsk, batch_size);
+
+	for (i = 0; i < batch_size; i++) {
+		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+		tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+		tx_desc->len = PKT_SIZE;
+	}
+
+	xsk_ring_prod__submit(&xsk->tx, batch_size);
+	xsk->outstanding_tx += batch_size;
+	*frameptr += batch_size;
+	*frameptr %= num_frames;
+	complete_tx_only(xsk, batch_size);
+}
+
+static inline int get_batch_size(int pkt_cnt)
+{
+	if (!opt_pkt_count)
+		return BATCH_SIZE;
+
+	if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
+		return BATCH_SIZE;
+
+	return opt_pkt_count - pkt_cnt;
+}
+
+static void complete_tx_only_all(void *arg)
+{
+	bool pending;
+
+	do {
+		pending = false;
+		if (((struct ifobject *)arg)->xsk->outstanding_tx) {
+			complete_tx_only(((struct ifobject *)
+					  arg)->xsk, BATCH_SIZE);
+			pending = !!((struct ifobject *)arg)->xsk->outstanding_tx;
+		}
+	} while (pending);
+}
+
+static void tx_only_all(void *arg)
+{
+	struct pollfd fds[MAX_SOCKS] = { };
+	u32 frame_nb = 0;
+	int pkt_cnt = 0;
+	int ret;
+
+	fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk);
+	fds[0].events = POLLOUT;
+
+	while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
+		int batch_size = get_batch_size(pkt_cnt);
+
+		if (opt_poll) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret <= 0)
+				continue;
+
+			if (!(fds[0].revents & POLLOUT))
+				continue;
+		}
+
+		tx_only(((struct ifobject *)arg)->xsk, &frame_nb, batch_size);
+		pkt_cnt += batch_size;
+	}
+
+	if (opt_pkt_count)
+		complete_tx_only_all(arg);
+}
+
+static void worker_pkt_dump(void)
+{
+	struct in_addr ipaddr;
+
+	fprintf(stdout, "---------------------------------------\n");
+	for (int iter = 0; iter < num_frames - 1; iter++) {
+		/*extract L2 frame */
+		fprintf(stdout, "DEBUG>> L2: dst mac: ");
+		for (int i = 0; i < ETH_ALEN; i++)
+			fprintf(stdout, "%02X", ((struct ethhdr *)
+						 pkt_buf[iter]->payload)->h_dest[i]);
+
+		fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+		for (int i = 0; i < ETH_ALEN; i++)
+			fprintf(stdout, "%02X", ((struct ethhdr *)
+						 pkt_buf[iter]->payload)->h_source[i]);
+
+		/*extract L3 frame */
+		fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n",
+			((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl);
+
+		ipaddr.s_addr =
+		    ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr;
+		fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr));
+
+		ipaddr.s_addr =
+		    ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr;
+		fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr));
+
+		/*extract L4 frame */
+		fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n",
+			ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
+						 sizeof(struct ethhdr) +
+						 sizeof(struct iphdr)))->source));
+
+		fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n",
+			ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
+						 sizeof(struct ethhdr) +
+						 sizeof(struct iphdr)))->dest));
+		/*extract L5 frame */
+		int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
+
+		if (payload == EOT) {
+			ksft_print_msg("End-of-tranmission frame received\n");
+			fprintf(stdout, "---------------------------------------\n");
+			break;
+		}
+		fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+		fprintf(stdout, "---------------------------------------\n");
+	}
+}
+
+static void worker_pkt_validate(void)
+{
+	u32 payloadseqnum = -2;
+
+	while (1) {
+		pkt_node_rx_q = malloc(sizeof(struct pkt));
+		pkt_node_rx_q = TAILQ_LAST(&head, head_s);
+		if (!pkt_node_rx_q)
+			break;
+		/*do not increment pktcounter if !(tos=0x9 and ipv4) */
+		if ((((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+				       sizeof(struct ethhdr)))->version == IP_PKT_VER)
+		    && (((struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr)))->tos ==
+			IP_PKT_TOS)) {
+			payloadseqnum = *((uint32_t *) (pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
+			if (debug_pkt_dump && payloadseqnum != EOT) {
+				pkt_obj = (struct pkt_frame *)malloc(sizeof(struct pkt_frame));
+				pkt_obj->payload = (char *)malloc(PKT_SIZE);
+				memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
+				pkt_buf[payloadseqnum] = pkt_obj;
+			}
+
+			if (payloadseqnum == EOT) {
+				ksft_print_msg("End-of-tranmission frame received: PASS\n");
+				sigvar = 1;
+				break;
+			}
+
+			if (prev_pkt + 1 != payloadseqnum) {
+				ksft_test_result_fail
+				    ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
+				     __func__, prev_pkt, payloadseqnum);
+				ksft_exit_xfail();
+			}
+
+			TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
+			free(pkt_node_rx_q->pkt_frame);
+			free(pkt_node_rx_q);
+			pkt_node_rx_q = NULL;
+			prev_pkt = payloadseqnum;
+			pkt_counter++;
+		} else {
+			ksft_print_msg("Invalid frame received: ");
+			ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n",
+				((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+				       sizeof(struct ethhdr)))->version,
+				((struct iphdr *)(pkt_node_rx_q->pkt_frame +
+				       sizeof(struct ethhdr)))->tos);
+			TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
+			free(pkt_node_rx_q->pkt_frame);
+			free(pkt_node_rx_q);
+			pkt_node_rx_q = NULL;
+		}
+	}
+}
+
+static void thread_common_ops(void *arg, void *bufs, pthread_mutex_t *mutexptr,
+			      atomic_int *spinningptr)
+{
+	int ctr = 0;
+	int ret;
+
+	xsk_configure_umem((struct ifobject *)arg, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
+	ret = xsk_configure_socket((struct ifobject *)arg);
+
+	/* Retry Create Socket if it fails as xsk_socket__create()
+	 * is asynchronous
+	 *
+	 * Essential to lock Mutex here to prevent Tx thread from
+	 * entering before Rx and causing a deadlock
+	 */
+	pthread_mutex_lock(mutexptr);
+	while (ret && ctr < SOCK_RECONF_CTR) {
+		atomic_store(spinningptr, 1);
+		xsk_configure_umem((struct ifobject *)arg,
+				   bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
+		ret = xsk_configure_socket((struct ifobject *)arg);
+		usleep(USLEEP_MAX);
+		ctr++;
+	}
+	atomic_store(spinningptr, 0);
+	pthread_mutex_unlock(mutexptr);
+
+	if (ctr >= SOCK_RECONF_CTR)
+		exit_with_error(ret);
+}
+
+static void *worker_testapp_validate(void *arg)
+{
+	struct udphdr *udp_hdr =
+	    (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+	struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data));
+	struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
+	struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
+	void *bufs;
+
+	pthread_attr_setstacksize(&attr, THREAD_STACK);
+
+	bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
+		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (bufs == MAP_FAILED)
+		exit_with_error(errno);
+
+	if (strcmp(((struct ifobject *)arg)->nsname, ""))
+		switch_namespace(((struct ifobject *)arg)->ifdict_index);
+
+	if (((struct ifobject *)arg)->fv.vector == tx) {
+		int spinningrxctr = 0;
+
+		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
+
+		while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
+			spinningrxctr++;
+			usleep(USLEEP_MAX);
+		}
+
+		ksft_print_msg("Interface [%s] vector [Tx]\n", ((struct ifobject *)arg)->ifname);
+		for (int i = 0; i < num_frames; i++) {
+			/*send EOT frame */
+			if (i == (num_frames - 1))
+				data->seqnum = -1;
+			else
+				data->seqnum = i;
+			gen_udp_hdr((void *)data, (void *)arg, udp_hdr);
+			gen_ip_hdr((void *)arg, ip_hdr);
+			gen_udp_csum(udp_hdr, ip_hdr);
+			gen_eth_hdr((void *)arg, eth_hdr);
+			gen_eth_frame(((struct ifobject *)arg)->umem,
+				      i * XSK_UMEM__DEFAULT_FRAME_SIZE);
+		}
+
+		free(data);
+		ksft_print_msg("Sending %d packets on interface %s\n",
+			       (opt_pkt_count - 1), ((struct ifobject *)arg)->ifname);
+		tx_only_all(arg);
+	} else if (((struct ifobject *)arg)->fv.vector == rx) {
+		struct pollfd fds[MAX_SOCKS] = { };
+		int ret;
+
+		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
+
+		ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname);
+		xsk_populate_fill_ring(((struct ifobject *)arg)->umem);
+
+		TAILQ_INIT(&head);
+		if (debug_pkt_dump) {
+			pkt_buf = malloc(sizeof(struct pkt_frame **) * num_frames);
+			if (!pkt_buf)
+				exit_with_error(errno);
+		}
+
+		fds[0].fd = xsk_socket__fd(((struct ifobject *)arg)->xsk->xsk);
+		fds[0].events = POLLIN;
+
+		pthread_mutex_lock(&sync_mutex);
+		pthread_cond_signal(&signal_rx_condition);
+		pthread_mutex_unlock(&sync_mutex);
+
+		while (1) {
+			if (opt_poll) {
+				ret = poll(fds, 1, POLL_TMOUT);
+				if (ret <= 0)
+					continue;
+			}
+			rx_pkt(((struct ifobject *)arg)->xsk, fds);
+			worker_pkt_validate();
+
+			if (sigvar)
+				break;
+		}
+
+		ksft_print_msg("Received %d packets on interface %s\n",
+			       pkt_counter, ((struct ifobject *)arg)->ifname);
+	}
+
+	xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
+	(void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+	pthread_exit(NULL);
+}
+
+static void testapp_validate(void)
+{
+	pthread_attr_init(&attr);
+	pthread_attr_setstacksize(&attr, THREAD_STACK);
+
+	pthread_mutex_lock(&sync_mutex);
+
+	/*Spawn RX thread */
+	if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
+		exit_with_error(errno);
+
+	struct timespec max_wait = { 0, 0 };
+
+	if (clock_gettime(CLOCK_REALTIME, &max_wait))
+		exit_with_error(errno);
+	max_wait.tv_sec += TMOUT_SEC;
+
+	if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT)
+		exit_with_error(errno);
+
+	pthread_mutex_unlock(&sync_mutex);
+
+	/*Spawn TX thread */
+	if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
+		exit_with_error(errno);
+
+	pthread_join(t1, NULL);
+	pthread_join(t0, NULL);
+
+	if (debug_pkt_dump) {
+		worker_pkt_dump();
+		for (int iter = 0; iter < num_frames - 1; iter++) {
+			free(pkt_buf[iter]->payload);
+			free(pkt_buf[iter]);
+		}
+		free(pkt_buf);
+	}
+
+	print_ksft_result();
+}
+
+static void init_iface_config(void *ifaceconfig)
+{
+	/*Init interface0 */
+	ifdict[0]->fv.vector = tx;
+	memcpy(ifdict[0]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN);
+	memcpy(ifdict[0]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN);
+	ifdict[0]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr;
+	ifdict[0]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr;
+	ifdict[0]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port;
+	ifdict[0]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port;
+
+	/*Init interface1 */
+	ifdict[1]->fv.vector = rx;
+	memcpy(ifdict[1]->dst_mac, ((struct ifaceconfigobj *)ifaceconfig)->src_mac, ETH_ALEN);
+	memcpy(ifdict[1]->src_mac, ((struct ifaceconfigobj *)ifaceconfig)->dst_mac, ETH_ALEN);
+	ifdict[1]->dst_ip = ((struct ifaceconfigobj *)ifaceconfig)->src_ip.s_addr;
+	ifdict[1]->src_ip = ((struct ifaceconfigobj *)ifaceconfig)->dst_ip.s_addr;
+	ifdict[1]->dst_port = ((struct ifaceconfigobj *)ifaceconfig)->src_port;
+	ifdict[1]->src_port = ((struct ifaceconfigobj *)ifaceconfig)->dst_port;
+}
+
+int main(int argc, char **argv)
+{
+	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+
+	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
+		exit_with_error(errno);
+
+	const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
+	const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+	const char *IP1 = "192.168.100.162";
+	const char *IP2 = "192.168.100.161";
+	u16 UDP_DST_PORT = 2020;
+	u16 UDP_SRC_PORT = 2121;
+
+	ifaceconfig = (struct ifaceconfigobj *)malloc(sizeof(struct ifaceconfigobj));
+	memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
+	memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN);
+	inet_aton(IP1, &ifaceconfig->dst_ip);
+	inet_aton(IP2, &ifaceconfig->src_ip);
+	ifaceconfig->dst_port = UDP_DST_PORT;
+	ifaceconfig->src_port = UDP_SRC_PORT;
+
+	for (int i = 0; i < MAX_INTERFACES; i++) {
+		ifdict[i] = (struct ifobject *)malloc(sizeof(struct ifobject));
+		if (!ifdict[i])
+			exit_with_error(errno);
+
+		ifdict[i]->ifdict_index = i;
+	}
+
+	setlocale(LC_ALL, "");
+
+	parse_command_line(argc, argv);
+
+	num_frames = ++opt_pkt_count;
+
+	init_iface_config((void *)ifaceconfig);
+
+	pthread_init_mutex();
+
+	ksft_set_plan(1);
+
+	testapp_validate();
+
+	for (int i = 0; i < MAX_INTERFACES; i++)
+		free(ifdict[i]);
+
+	pthread_destroy_mutex();
+
+	ksft_exit_pass();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
new file mode 100644
index 000000000000..5929f2fc1224
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2020 Intel Corporation.
+ */
+
+#ifndef XDPXCEIVER_H_
+#define XDPXCEIVER_H_
+
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+#define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+#define PF_XDP AF_XDP
+#endif
+
+#define MAX_INTERFACES 2
+#define MAX_INTERFACE_NAME_CHARS 7
+#define MAX_INTERFACES_NAMESPACE_CHARS 10
+#define MAX_SOCKS 1
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+			sizeof(struct udphdr))
+#define MIN_PKT_SIZE 64
+#define ETH_FCS_SIZE 4
+#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE)
+#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
+#define IP_PKT_VER 0x4
+#define IP_PKT_TOS 0x9
+#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
+#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+#define TMOUT_SEC (3)
+#define EOT (-1)
+#define USLEEP_MAX 200000
+#define THREAD_STACK 60000000
+#define SOCK_RECONF_CTR 10
+#define BATCH_SIZE 64
+#define POLL_TMOUT 1000
+#define NEED_WAKEUP 1
+
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
+
+enum TESTS {
+	ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
+};
+
+u8 uut;
+u8 debug_pkt_dump;
+u32 num_frames;
+
+static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int opt_queue;
+static int opt_pkt_count;
+static int opt_poll;
+static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
+static u32 pkt_counter;
+static u32 prev_pkt = -1;
+static int sigvar;
+
+struct xsk_umem_info {
+	struct xsk_ring_prod fq;
+	struct xsk_ring_cons cq;
+	struct xsk_umem *umem;
+	void *buffer;
+};
+
+struct xsk_socket_info {
+	struct xsk_ring_cons rx;
+	struct xsk_ring_prod tx;
+	struct xsk_umem_info *umem;
+	struct xsk_socket *xsk;
+	unsigned long rx_npkts;
+	unsigned long tx_npkts;
+	unsigned long prev_rx_npkts;
+	unsigned long prev_tx_npkts;
+	u32 outstanding_tx;
+};
+
+struct flow_vector {
+	enum fvector {
+		tx,
+		rx,
+		bidi,
+		undef,
+	} vector;
+};
+
+struct generic_data {
+	u32 seqnum;
+};
+
+struct ifaceconfigobj {
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	struct in_addr dst_ip;
+	struct in_addr src_ip;
+	u16 src_port;
+	u16 dst_port;
+} *ifaceconfig;
+
+struct ifobject {
+	int ifindex;
+	int ifdict_index;
+	char ifname[MAX_INTERFACE_NAME_CHARS];
+	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
+	struct flow_vector fv;
+	struct xsk_socket_info *xsk;
+	struct xsk_umem_info *umem;
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	u32 dst_ip;
+	u32 src_ip;
+	u16 src_port;
+	u16 dst_port;
+};
+
+static struct ifobject *ifdict[MAX_INTERFACES];
+
+/*threads*/
+atomic_int spinning_tx;
+atomic_int spinning_rx;
+pthread_mutex_t sync_mutex;
+pthread_mutex_t sync_mutex_tx;
+pthread_cond_t signal_rx_condition;
+pthread_cond_t signal_tx_condition;
+pthread_t t0, t1, ns_thread;
+pthread_attr_t attr;
+
+struct targs {
+	bool retptr;
+	int idx;
+};
+
+TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
+struct head_s *head_p;
+struct pkt {
+	char *pkt_frame;
+
+	TAILQ_ENTRY(pkt) pkt_nodes;
+} *pkt_node_rx, *pkt_node_rx_q;
+
+struct pkt_frame {
+	char *payload;
+} *pkt_obj;
+
+struct pkt_frame **pkt_buf;
+
+#endif				/* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 29762739c21b..9d54c4645127 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -14,6 +14,8 @@ RED='\033[0;31m'
 NC='\033[0m'
 STACK_LIM=131072
 SPECFILE=veth.spec
+XSKOBJ=xdpxceiver
+NUMPKTS=10000
 
 validate_root_exec()
 {
@@ -117,3 +119,17 @@ vethXDPnative()
 	ip link set dev $1 xdpgeneric off
 	ip netns exec $3 ip link set dev $2 xdpgeneric off
 }
+
+execxdpxceiver()
+{
+	local -a 'paramkeys=("${!'"$1"'[@]}")' copy
+	paramkeysstr=${paramkeys[*]}
+
+	for index in $paramkeysstr;
+		do
+			current=$1"[$index]"
+			copy[$index]=${!current}
+		done
+
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS}
+}
-- 
cgit v1.2.3-70-g09d2


From 9103a8594d9324d8e1512442ba580e4e91d42c2d Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:31 +0000
Subject: selftests/bpf: Xsk selftests - DRV POLL, NOPOLL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

2. AF_XDP DRV/Native mode
   Works on any netdevice with XDP_REDIRECT support, driver dependent.
   Processes packets before SKB allocation. Provides better performance
   than SKB. Driver hook available just after DMA of buffer descriptor.
   a. nopoll
   b. poll
   * Only copy mode is supported because veth does not currently support
     zero-copy mode

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-4-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  | 24 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.c | 22 +++++++++++++++++++---
 tools/testing/selftests/bpf/xdpxceiver.h |  3 ++-
 3 files changed, 45 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 0b7bafb65f43..aad8065637fd 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -173,6 +173,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 4
+TEST_NAME="DRV NOPOLL"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 5
+TEST_NAME="DRV POLL"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-p")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 3f2a65b6a9f5..9fcd80a38b07 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -27,7 +27,16 @@
  *    a. nopoll - soft-irq processing
  *    b. poll - using poll() syscall
  *
- * Total tests: 2
+ * 2. AF_XDP DRV/Native mode
+ *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
+ *    packets before SKB allocation. Provides better performance than SKB. Driver
+ *    hook available just after DMA of buffer descriptor.
+ *    a. nopoll
+ *    b. poll
+ *    - Only copy mode is supported because veth does not currently support
+ *      zero-copy mode
+ *
+ * Total tests: 4
  *
  * Flow:
  * -----
@@ -88,7 +97,7 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s\n", uut ? "" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
+	(ksft_test_result_pass("PASS: %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
 
 static void pthread_init_mutex(void)
 {
@@ -311,6 +320,7 @@ static struct option long_options[] = {
 	{"queue", optional_argument, 0, 'q'},
 	{"poll", no_argument, 0, 'p'},
 	{"xdp-skb", no_argument, 0, 'S'},
+	{"xdp-native", no_argument, 0, 'N'},
 	{"copy", no_argument, 0, 'c'},
 	{"debug", optional_argument, 0, 'D'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
@@ -326,6 +336,7 @@ static void usage(const char *prog)
 	    "  -q, --queue=n        Use queue n (default 0)\n"
 	    "  -p, --poll           Use poll syscall\n"
 	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
+	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
 	    "  -c, --copy           Force copy mode\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
@@ -417,7 +428,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pScDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcDC:", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -448,6 +459,11 @@ static void parse_command_line(int argc, char **argv)
 			opt_xdp_bind_flags |= XDP_COPY;
 			uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
 			break;
+		case 'N':
+			opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+			opt_xdp_bind_flags |= XDP_COPY;
+			uut = ORDER_CONTENT_VALIDATE_XDP_DRV;
+			break;
 		case 'c':
 			opt_xdp_bind_flags |= XDP_COPY;
 			break;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 5929f2fc1224..12070d66344b 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -38,7 +38,7 @@
 #define SOCK_RECONF_CTR 10
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
-#define NEED_WAKEUP 1
+#define NEED_WAKEUP true
 
 typedef __u32 u32;
 typedef __u16 u16;
@@ -46,6 +46,7 @@ typedef __u8 u8;
 
 enum TESTS {
 	ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
+	ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
 };
 
 u8 uut;
-- 
cgit v1.2.3-70-g09d2


From 6674bf66560a6c55aada1e3cd4fca7a3ed204075 Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:32 +0000
Subject: selftests/bpf: Xsk selftests - Socket Teardown - SKB, DRV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

1. AF_XDP SKB mode
   c. Socket Teardown
      Create a Tx and a Rx socket, Tx from one socket, Rx on another.
      Destroy both sockets, then repeat multiple times. Only nopoll mode
      is used

2. AF_XDP DRV/Native mode
   c. Socket Teardown
   * Only copy mode is supported because veth does not currently support
     zero-copy mode

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-5-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  | 24 ++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.c | 35 ++++++++++++++++++++++++++++----
 tools/testing/selftests/bpf/xdpxceiver.h |  2 ++
 3 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index aad8065637fd..9be9dff25560 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -197,6 +197,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 6
+TEST_NAME="SKB SOCKET TEARDOWN"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-T")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 7
+TEST_NAME="DRV SOCKET TEARDOWN"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-T")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 9fcd80a38b07..e8907109782d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -26,6 +26,9 @@
  *    generic XDP path. XDP hook from netif_receive_skb().
  *    a. nopoll - soft-irq processing
  *    b. poll - using poll() syscall
+ *    c. Socket Teardown
+ *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
+ *       both sockets, then repeat multiple times. Only nopoll mode is used
  *
  * 2. AF_XDP DRV/Native mode
  *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
@@ -33,10 +36,11 @@
  *    hook available just after DMA of buffer descriptor.
  *    a. nopoll
  *    b. poll
+ *    c. Socket Teardown
  *    - Only copy mode is supported because veth does not currently support
  *      zero-copy mode
  *
- * Total tests: 4
+ * Total tests: 6
  *
  * Flow:
  * -----
@@ -97,7 +101,8 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" : "NOPOLL"))
+	(ksft_test_result_pass("PASS: %s %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
+			       "NOPOLL", opt_teardown ? "Socket Teardown" : ""))
 
 static void pthread_init_mutex(void)
 {
@@ -322,6 +327,7 @@ static struct option long_options[] = {
 	{"xdp-skb", no_argument, 0, 'S'},
 	{"xdp-native", no_argument, 0, 'N'},
 	{"copy", no_argument, 0, 'c'},
+	{"tear-down", no_argument, 0, 'T'},
 	{"debug", optional_argument, 0, 'D'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
 	{0, 0, 0, 0}
@@ -338,6 +344,7 @@ static void usage(const char *prog)
 	    "  -S, --xdp-skb=n      Use XDP SKB mode\n"
 	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
 	    "  -c, --copy           Force copy mode\n"
+	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
 	ksft_print_msg(str, prog);
@@ -428,7 +435,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pSNcDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcTDC:", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -467,6 +474,9 @@ static void parse_command_line(int argc, char **argv)
 		case 'c':
 			opt_xdp_bind_flags |= XDP_COPY;
 			break;
+		case 'T':
+			opt_teardown = 1;
+			break;
 		case 'D':
 			debug_pkt_dump = 1;
 			break;
@@ -871,6 +881,9 @@ static void *worker_testapp_validate(void *arg)
 
 		ksft_print_msg("Received %d packets on interface %s\n",
 			       pkt_counter, ((struct ifobject *)arg)->ifname);
+
+		if (opt_teardown)
+			ksft_print_msg("Destroying socket\n");
 	}
 
 	xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
@@ -916,6 +929,20 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
+	if (!opt_teardown)
+		print_ksft_result();
+}
+
+static void testapp_sockets(void)
+{
+	for (int i = 0; i < MAX_TEARDOWN_ITER; i++) {
+		pkt_counter = 0;
+		prev_pkt = -1;
+		sigvar = 0;
+		ksft_print_msg("Creating socket\n");
+		testapp_validate();
+	}
+
 	print_ksft_result();
 }
 
@@ -982,7 +1009,7 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(1);
 
-	testapp_validate();
+	opt_teardown ? testapp_sockets() : testapp_validate();
 
 	for (int i = 0; i < MAX_INTERFACES; i++)
 		free(ifdict[i]);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 12070d66344b..58185b914f99 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -21,6 +21,7 @@
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
 #define MAX_SOCKS 1
+#define MAX_TEARDOWN_ITER 10
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
 #define MIN_PKT_SIZE 64
@@ -57,6 +58,7 @@ static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int opt_queue;
 static int opt_pkt_count;
 static int opt_poll;
+static int opt_teardown;
 static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
-- 
cgit v1.2.3-70-g09d2


From 7d20441eb05ec6d8dc7b16381c53b3c0b3ad6e8a Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Mon, 7 Dec 2020 21:53:33 +0000
Subject: selftests/bpf: Xsk selftests - Bi-directional Sockets - SKB, DRV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds following tests:

1. AF_XDP SKB mode
   d. Bi-directional Sockets
      Configure sockets as bi-directional tx/rx sockets, sets up fill
      and completion rings on each socket, tx/rx in both directions.
      Only nopoll mode is used

2. AF_XDP DRV/Native mode
   d. Bi-directional Sockets
   * Only copy mode is supported because veth does not currently support
     zero-copy mode

Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Yonghong Song <yhs@fb.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Link: https://lore.kernel.org/bpf/20201207215333.11586-6-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  |  24 ++++++++
 tools/testing/selftests/bpf/xdpxceiver.c | 100 +++++++++++++++++++++++--------
 tools/testing/selftests/bpf/xdpxceiver.h |   4 ++
 3 files changed, 104 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 9be9dff25560..88a7483eaae4 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -221,6 +221,30 @@ retval=$?
 test_status $retval "${TEST_NAME}"
 statusList+=($retval)
 
+### TEST 8
+TEST_NAME="SKB BIDIRECTIONAL SOCKETS"
+
+vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
+
+params=("-S" "-B")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
+### TEST 9
+TEST_NAME="DRV BIDIRECTIONAL SOCKETS"
+
+vethXDPnative ${VETH0} ${VETH1} ${NS1}
+
+params=("-N" "-B")
+execxdpxceiver params
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+statusList+=($retval)
+
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index e8907109782d..014dedaa4dd2 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -29,6 +29,10 @@
  *    c. Socket Teardown
  *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
  *       both sockets, then repeat multiple times. Only nopoll mode is used
+ *    d. Bi-directional sockets
+ *       Configure sockets as bi-directional tx/rx sockets, sets up fill and
+ *       completion rings on each socket, tx/rx in both directions. Only nopoll
+ *       mode is used
  *
  * 2. AF_XDP DRV/Native mode
  *    Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
@@ -37,10 +41,11 @@
  *    a. nopoll
  *    b. poll
  *    c. Socket Teardown
+ *    d. Bi-directional sockets
  *    - Only copy mode is supported because veth does not currently support
  *      zero-copy mode
  *
- * Total tests: 6
+ * Total tests: 8
  *
  * Flow:
  * -----
@@ -101,8 +106,9 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s %s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
-			       "NOPOLL", opt_teardown ? "Socket Teardown" : ""))
+	(ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
+			       "NOPOLL", opt_teardown ? "Socket Teardown" : "",\
+			       opt_bidi ? "Bi-directional Sockets" : ""))
 
 static void pthread_init_mutex(void)
 {
@@ -308,8 +314,13 @@ static int xsk_configure_socket(struct ifobject *ifobject)
 	cfg.xdp_flags = opt_xdp_flags;
 	cfg.bind_flags = opt_xdp_bind_flags;
 
-	rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
-	txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+	if (!opt_bidi) {
+		rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
+		txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+	} else {
+		rxr = &ifobject->xsk->rx;
+		txr = &ifobject->xsk->tx;
+	}
 
 	ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
 				 opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
@@ -328,6 +339,7 @@ static struct option long_options[] = {
 	{"xdp-native", no_argument, 0, 'N'},
 	{"copy", no_argument, 0, 'c'},
 	{"tear-down", no_argument, 0, 'T'},
+	{"bidi", optional_argument, 0, 'B'},
 	{"debug", optional_argument, 0, 'D'},
 	{"tx-pkt-count", optional_argument, 0, 'C'},
 	{0, 0, 0, 0}
@@ -345,6 +357,7 @@ static void usage(const char *prog)
 	    "  -N, --xdp-native=n   Enforce XDP DRV (native) mode\n"
 	    "  -c, --copy           Force copy mode\n"
 	    "  -T, --tear-down      Tear down sockets by repeatedly recreating them\n"
+	    "  -B, --bidi           Bi-directional sockets test\n"
 	    "  -D, --debug          Debug mode - dump packets L2 - L5\n"
 	    "  -C, --tx-pkt-count=n Number of packets to send\n";
 	ksft_print_msg(str, prog);
@@ -435,7 +448,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:q:pSNcTDC:", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index);
 
 		if (c == -1)
 			break;
@@ -477,6 +490,9 @@ static void parse_command_line(int argc, char **argv)
 		case 'T':
 			opt_teardown = 1;
 			break;
+		case 'B':
+			opt_bidi = 1;
+			break;
 		case 'D':
 			debug_pkt_dump = 1;
 			break;
@@ -802,22 +818,25 @@ static void *worker_testapp_validate(void *arg)
 	struct generic_data *data = (struct generic_data *)malloc(sizeof(struct generic_data));
 	struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
 	struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
-	void *bufs;
+	void *bufs = NULL;
 
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
-	bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
-		    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if (bufs == MAP_FAILED)
-		exit_with_error(errno);
+	if (!bidi_pass) {
+		bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
+			    PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (bufs == MAP_FAILED)
+			exit_with_error(errno);
 
-	if (strcmp(((struct ifobject *)arg)->nsname, ""))
-		switch_namespace(((struct ifobject *)arg)->ifdict_index);
+		if (strcmp(((struct ifobject *)arg)->nsname, ""))
+			switch_namespace(((struct ifobject *)arg)->ifdict_index);
+	}
 
 	if (((struct ifobject *)arg)->fv.vector == tx) {
 		int spinningrxctr = 0;
 
-		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
+		if (!bidi_pass)
+			thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_tx);
 
 		while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
 			spinningrxctr++;
@@ -847,7 +866,8 @@ static void *worker_testapp_validate(void *arg)
 		struct pollfd fds[MAX_SOCKS] = { };
 		int ret;
 
-		thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
+		if (!bidi_pass)
+			thread_common_ops(arg, bufs, &sync_mutex_tx, &spinning_rx);
 
 		ksft_print_msg("Interface [%s] vector [Rx]\n", ((struct ifobject *)arg)->ifname);
 		xsk_populate_fill_ring(((struct ifobject *)arg)->umem);
@@ -886,8 +906,10 @@ static void *worker_testapp_validate(void *arg)
 			ksft_print_msg("Destroying socket\n");
 	}
 
-	xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
-	(void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+	if (!opt_bidi || (opt_bidi && bidi_pass)) {
+		xsk_socket__delete(((struct ifobject *)arg)->xsk->xsk);
+		(void)xsk_umem__delete(((struct ifobject *)arg)->umem->umem);
+	}
 	pthread_exit(NULL);
 }
 
@@ -896,11 +918,26 @@ static void testapp_validate(void)
 	pthread_attr_init(&attr);
 	pthread_attr_setstacksize(&attr, THREAD_STACK);
 
+	if (opt_bidi && bidi_pass) {
+		pthread_init_mutex();
+		if (!switching_notify) {
+			ksft_print_msg("Switching Tx/Rx vectors\n");
+			switching_notify++;
+		}
+	}
+
 	pthread_mutex_lock(&sync_mutex);
 
 	/*Spawn RX thread */
-	if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
-		exit_with_error(errno);
+	if (!opt_bidi || (opt_bidi && !bidi_pass)) {
+		if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[1]))
+			exit_with_error(errno);
+	} else if (opt_bidi && bidi_pass) {
+		/*switch Tx/Rx vectors */
+		ifdict[0]->fv.vector = rx;
+		if (pthread_create(&t0, &attr, worker_testapp_validate, (void *)ifdict[0]))
+			exit_with_error(errno);
+	}
 
 	struct timespec max_wait = { 0, 0 };
 
@@ -914,8 +951,15 @@ static void testapp_validate(void)
 	pthread_mutex_unlock(&sync_mutex);
 
 	/*Spawn TX thread */
-	if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
-		exit_with_error(errno);
+	if (!opt_bidi || (opt_bidi && !bidi_pass)) {
+		if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[0]))
+			exit_with_error(errno);
+	} else if (opt_bidi && bidi_pass) {
+		/*switch Tx/Rx vectors */
+		ifdict[1]->fv.vector = tx;
+		if (pthread_create(&t1, &attr, worker_testapp_validate, (void *)ifdict[1]))
+			exit_with_error(errno);
+	}
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
@@ -929,18 +973,19 @@ static void testapp_validate(void)
 		free(pkt_buf);
 	}
 
-	if (!opt_teardown)
+	if (!opt_teardown && !opt_bidi)
 		print_ksft_result();
 }
 
 static void testapp_sockets(void)
 {
-	for (int i = 0; i < MAX_TEARDOWN_ITER; i++) {
+	for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) {
 		pkt_counter = 0;
 		prev_pkt = -1;
 		sigvar = 0;
 		ksft_print_msg("Creating socket\n");
 		testapp_validate();
+		opt_bidi ? bidi_pass++ : bidi_pass;
 	}
 
 	print_ksft_result();
@@ -1009,7 +1054,14 @@ int main(int argc, char **argv)
 
 	ksft_set_plan(1);
 
-	opt_teardown ? testapp_sockets() : testapp_validate();
+	if (!opt_teardown && !opt_bidi) {
+		testapp_validate();
+	} else if (opt_teardown && opt_bidi) {
+		ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n");
+		ksft_exit_xfail();
+	} else {
+		testapp_sockets();
+	}
 
 	for (int i = 0; i < MAX_INTERFACES; i++)
 		free(ifdict[i]);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 58185b914f99..61f595b6f200 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -22,6 +22,7 @@
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
 #define MAX_SOCKS 1
 #define MAX_TEARDOWN_ITER 10
+#define MAX_BIDI_ITER 2
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
 #define MIN_PKT_SIZE 64
@@ -53,12 +54,15 @@ enum TESTS {
 u8 uut;
 u8 debug_pkt_dump;
 u32 num_frames;
+u8 switching_notify;
+u8 bidi_pass;
 
 static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static int opt_queue;
 static int opt_pkt_count;
 static int opt_poll;
 static int opt_teardown;
+static int opt_bidi;
 static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
 static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
 static u32 pkt_counter;
-- 
cgit v1.2.3-70-g09d2


From efaa83a3736d392c61499ee3aad8690a142675cd Mon Sep 17 00:00:00 2001
From: Collin Walling <walling@linux.ibm.com>
Date: Mon, 7 Dec 2020 10:41:25 -0500
Subject: KVM: selftests: sync_regs test for diag318

The DIAGNOSE 0x0318 instruction, unique to s390x, is a privileged call
that must be intercepted via SIE, handled in userspace, and the
information set by the instruction is communicated back to KVM.

To test the instruction interception, an ad-hoc handler is defined which
simply has a VM execute the instruction and then userspace will extract
the necessary info. The handler is defined such that the instruction
invocation occurs only once. It is up to the caller to determine how the
info returned by this handler should be used.

The diag318 info is communicated from userspace to KVM via a sync_regs
call. This is tested during a sync_regs test, where the diag318 info is
requested via the handler, then the info is stored in the appropriate
register in KVM via a sync registers call.

If KVM does not support diag318, then the tests will print a message
stating that diag318 was skipped, and the asserts will simply test
against a value of 0.

Signed-off-by: Collin Walling <walling@linux.ibm.com>
Link: https://lore.kernel.org/r/20201207154125.10322-1-walling@linux.ibm.com
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 tools/testing/selftests/kvm/Makefile               |  2 +-
 .../kvm/include/s390x/diag318_test_handler.h       | 13 ++++
 .../selftests/kvm/lib/s390x/diag318_test_handler.c | 82 ++++++++++++++++++++++
 tools/testing/selftests/kvm/s390x/sync_regs_test.c | 16 ++++-
 4 files changed, 111 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
 create mode 100644 tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 30afbad36cd5..1b9c257fca5e 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -36,7 +36,7 @@ endif
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
+LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
new file mode 100644
index 000000000000..b0ed71302722
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
+#define SELFTEST_KVM_DIAG318_TEST_HANDLER
+
+uint64_t get_diag318_info(void);
+
+#endif
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
new file mode 100644
index 000000000000..86b9e611ad87
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define VCPU_ID	6
+
+#define ICPT_INSTRUCTION	0x04
+#define IPA0_DIAG		0x8300
+
+static void guest_code(void)
+{
+	uint64_t diag318_info = 0x12345678;
+
+	asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
+}
+
+/*
+ * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
+ * we create an ad-hoc VM here to handle the instruction then extract the
+ * necessary data. It is up to the caller to decide what to do with that data.
+ */
+static uint64_t diag318_handler(void)
+{
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	uint64_t reg;
+	uint64_t diag318_info;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_run(vm, VCPU_ID);
+	run = vcpu_state(vm, VCPU_ID);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
+		    "DIAGNOSE 0x0318 instruction was not intercepted");
+	TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
+		    "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
+	TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
+		    "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
+
+	reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
+	diag318_info = run->s.regs.gprs[reg];
+
+	TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
+
+	kvm_vm_free(vm);
+
+	return diag318_info;
+}
+
+uint64_t get_diag318_info(void)
+{
+	static uint64_t diag318_info;
+	static bool printed_skip;
+
+	/*
+	 * If KVM does not support diag318, then return 0 to
+	 * ensure tests do not break.
+	 */
+	if (!kvm_check_cap(KVM_CAP_S390_DIAG318)) {
+		if (!printed_skip) {
+			fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
+				"Skipping diag318 test.\n");
+			printed_skip = true;
+		}
+		return 0;
+	}
+
+	/*
+	 * If a test has previously requested the diag318 info,
+	 * then don't bother spinning up a temporary VM again.
+	 */
+	if (!diag318_info)
+		diag318_info = diag318_handler();
+
+	return diag318_info;
+}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index 5731ccf34917..caf7b8859a94 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -20,6 +20,7 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
+#include "diag318_test_handler.h"
 
 #define VCPU_ID 5
 
@@ -70,7 +71,7 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
 
 #undef REG_COMPARE
 
-#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS)
+#define TEST_SYNC_FIELDS   (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
 #define INVALID_SYNC_FIELD 0x80000000
 
 int main(int argc, char *argv[])
@@ -152,6 +153,12 @@ int main(int argc, char *argv[])
 
 	run->kvm_valid_regs = TEST_SYNC_FIELDS;
 	run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
+
+	if (get_diag318_info() > 0) {
+		run->s.regs.diag318 = get_diag318_info();
+		run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
+	}
+
 	rv = _vcpu_run(vm, VCPU_ID);
 	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
@@ -164,6 +171,9 @@ int main(int argc, char *argv[])
 	TEST_ASSERT(run->s.regs.acrs[0]  == 1 << 11,
 		    "acr0 sync regs value incorrect 0x%x.",
 		    run->s.regs.acrs[0]);
+	TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
+		    "diag318 sync regs value incorrect 0x%llx.",
+		    run->s.regs.diag318);
 
 	vcpu_regs_get(vm, VCPU_ID, &regs);
 	compare_regs(&regs, &run->s.regs);
@@ -177,6 +187,7 @@ int main(int argc, char *argv[])
 	run->kvm_valid_regs = TEST_SYNC_FIELDS;
 	run->kvm_dirty_regs = 0;
 	run->s.regs.gprs[11] = 0xDEADBEEF;
+	run->s.regs.diag318 = 0x4B1D;
 	rv = _vcpu_run(vm, VCPU_ID);
 	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
@@ -186,6 +197,9 @@ int main(int argc, char *argv[])
 	TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
 		    "r11 sync regs value incorrect 0x%llx.",
 		    run->s.regs.gprs[11]);
+	TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
+		    "diag318 sync regs value incorrect 0x%llx.",
+		    run->s.regs.diag318);
 
 	kvm_vm_free(vm);
 
-- 
cgit v1.2.3-70-g09d2


From a5b7b1194a57bc59f289f3e4433a1be81cc3e19d Mon Sep 17 00:00:00 2001
From: Veronika Kabatova <vkabatov@redhat.com>
Date: Thu, 10 Dec 2020 13:01:34 +0100
Subject: selftests/bpf: Drop tcp-{client,server}.py from Makefile

The files don't exist anymore so this breaks generic kselftest builds
when using "make install" or "make gen_tar".

Fixes: 247f0ec361b7 ("selftests/bpf: Drop python client/server in favor of threads")
Signed-off-by: Veronika Kabatova <vkabatov@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201210120134.2148482-1-vkabatov@redhat.com
---
 tools/testing/selftests/bpf/Makefile | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 944ae17a39ed..50b3495d7ddf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -75,8 +75,6 @@ TEST_PROGS := test_kmod.sh \
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
-	tcp_client.py \
-	tcp_server.py \
 	test_xdp_vlan.sh
 
 # Compile but not part of 'make run_tests'
-- 
cgit v1.2.3-70-g09d2


From 7535a3526dfe78db02a08ca2fa6bf69f393105dd Mon Sep 17 00:00:00 2001
From: Weqaar Janjua <weqaar.janjua@gmail.com>
Date: Thu, 10 Dec 2020 11:54:35 +0000
Subject: selftests/bpf: Xsk selftests - adding xdpxceiver to .gitignore

This patch adds *xdpxceiver* to selftests/bpf/.gitignore

Reported-by: Yonghong Song <yhs@fb.com>
Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Weqaar Janjua <weqaar.a.janjua@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201210115435.3995-1-weqaar.a.janjua@intel.com
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 752d8edddc66..f5b7ef93618c 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -36,3 +36,4 @@ test_cpp
 /runqslower
 /bench
 *.ko
+xdpxceiver
-- 
cgit v1.2.3-70-g09d2


From 41003dd0241c2ceb2461a88a18ff461795f2af57 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 9 Dec 2020 15:29:12 +0100
Subject: selftests/bpf: Make selftest compilation work on clang 11

We can't compile test_core_reloc_module.c selftest with clang 11, compile
fails with:

  CLNG-LLC [test_maps] test_core_reloc_module.o
  progs/test_core_reloc_module.c:57:21: error: use of unknown builtin \
  '__builtin_preserve_type_info' [-Wimplicit-function-declaration]
   out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx);

Skipping these tests if __builtin_preserve_type_info() is not supported
by compiler.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201209142912.99145-1-jolsa@kernel.org
---
 tools/testing/selftests/bpf/progs/test_core_reloc_module.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
index 56363959f7b0..f59f175c7baf 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -40,6 +40,7 @@ int BPF_PROG(test_core_module_probed,
 	     struct task_struct *task,
 	     struct bpf_testmod_test_read_ctx *read_ctx)
 {
+#if __has_builtin(__builtin_preserve_enum_value)
 	struct core_reloc_module_output *out = (void *)&data.out;
 	__u64 pid_tgid = bpf_get_current_pid_tgid();
 	__u32 real_tgid = (__u32)(pid_tgid >> 32);
@@ -61,6 +62,9 @@ int BPF_PROG(test_core_module_probed,
 	out->len_exists = bpf_core_field_exists(read_ctx->len);
 
 	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+	data.skip = true;
+#endif
 
 	return 0;
 }
@@ -70,6 +74,7 @@ int BPF_PROG(test_core_module_direct,
 	     struct task_struct *task,
 	     struct bpf_testmod_test_read_ctx *read_ctx)
 {
+#if __has_builtin(__builtin_preserve_enum_value)
 	struct core_reloc_module_output *out = (void *)&data.out;
 	__u64 pid_tgid = bpf_get_current_pid_tgid();
 	__u32 real_tgid = (__u32)(pid_tgid >> 32);
@@ -91,6 +96,9 @@ int BPF_PROG(test_core_module_direct,
 	out->len_exists = bpf_core_field_exists(read_ctx->len);
 
 	out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+	data.skip = true;
+#endif
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 511a76bcb0ce242a19153658b25437906cc6070e Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 8 Dec 2020 19:01:52 +0100
Subject: selftests/bpf: Add test for signed 32-bit bound check bug

After a 32-bit load followed by a branch, the verifier would reduce the
maximum bound of the register to 0x7fffffff, allowing a user to bypass
bound checks. Ensure such a program is rejected.

In the second test, the 64-bit compare should not sufficient to
determine whether the signed 32-bit lower bound is 0, so the verifier
should reject the second branch.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/bounds.c | 41 +++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index dac40de3f868..57ed67b86074 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -703,3 +703,44 @@
 	.fixup_map_hash_8b = { 3 },
 	.result = ACCEPT,
 },
+{
+	"bounds checks after 32-bit truncation. test 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+	/* This used to reduce the max bound to 0x7fffffff */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0x7fffffff, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.errstr_unpriv = "R0 leaks addr",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
+},
+{
+	"bounds checks after 32-bit truncation. test 2",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_1, 1, 1),
+	BPF_JMP32_IMM(BPF_JSLT, BPF_REG_1, 0, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.errstr_unpriv = "R0 leaks addr",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
+},
-- 
cgit v1.2.3-70-g09d2


From 77ce220c0549dcc3db8226c61c60e83fc59dfafc Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 8 Dec 2020 19:01:53 +0100
Subject: selftests/bpf: Fix array access with signed variable test

The test fails because of a recent fix to the verifier, even though this
program is valid. In details what happens is:

    7: (61) r1 = *(u32 *)(r0 +0)

Load a 32-bit value, with signed bounds [S32_MIN, S32_MAX]. The bounds
of the 64-bit value are [0, U32_MAX]...

    8: (65) if r1 s> 0xffffffff goto pc+1

... therefore this is always true (the operand is sign-extended).

    10: (b4) w2 = 11
    11: (6d) if r2 s> r1 goto pc+1

When true, the 64-bit bounds become [0, 10]. The 32-bit bounds are still
[S32_MIN, 10].

    13: (64) w1 <<= 2

Because this is a 32-bit operation, the verifier propagates the new
32-bit bounds to the 64-bit ones, and the knowledge gained from insn 11
is lost.

    14: (0f) r0 += r1
    15: (7a) *(u64 *)(r0 +0) = 4

Then the verifier considers r0 unbounded here, rejecting the test. To
make the test work, change insn 8 to check the sign of the 32-bit value.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/array_access.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index 1c4b1939f5a8..bed53b561e04 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -68,7 +68,7 @@
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
 	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
-	BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
+	BPF_JMP32_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
 	BPF_MOV32_IMM(BPF_REG_1, 0),
 	BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
 	BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
-- 
cgit v1.2.3-70-g09d2


From 3615bdf6d9b19db12b1589861609b4f1c6a8d303 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 8 Dec 2020 19:01:54 +0100
Subject: selftests/bpf: Fix "dubious pointer arithmetic" test

The verifier trace changed following a bugfix. After checking the 64-bit
sign, only the upper bit mask is known, not bit 31. Update the test
accordingly.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/align.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 52414058a627..5861446d0777 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -456,10 +456,10 @@ static struct bpf_align_test tests[] = {
 			 */
 			{7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>=0 */
-			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* packet pointer + nonnegative (4n+2) */
-			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
-			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
+			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
 			 * to overflow if the packet pointer started in the
@@ -467,7 +467,7 @@ static struct bpf_align_test tests[] = {
 			 * So we did not get a 'range' on R6, and the access
 			 * attempt will fail.
 			 */
-			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"},
 		}
 	},
 	{
-- 
cgit v1.2.3-70-g09d2


From 38bf8cd821be292e7d8e6f6283d67c5d9708f887 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 9 Dec 2020 12:21:13 +0100
Subject: selftests: fix poll error in udpgro.sh

The test program udpgso_bench_rx always invokes the poll()
syscall with a timeout of 10ms. If a larger timeout is specified
via the command line, udpgso_bench_rx is supposed to do multiple
poll() calls till the timeout is expired or an event is received.

Currently the poll() loop errors out after the first invocation with
no events, and may causes self-tests failure alike:

failed
 GRO with custom segment size            ./udpgso_bench_rx: poll: 0x0 expected 0x1

This change addresses the issue allowing the poll() loop to consume
all the configured timeout.

Fixes: ada641ff6ed3 ("selftests: fixes for UDP GRO")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgso_bench_rx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index db3d4a8b5a4c..76a24052f4b4 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -113,6 +113,9 @@ static void do_poll(int fd, int timeout_ms)
 				interrupted = true;
 				break;
 			}
+
+			/* no events and more time to wait, do poll again */
+			continue;
 		}
 		if (pfd.revents != POLLIN)
 			error(1, errno, "poll: 0x%x expected 0x%x\n",
-- 
cgit v1.2.3-70-g09d2


From a67079b03165a17f9aceab3dd26b1638af68e0fc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 10 Dec 2020 17:59:46 -0800
Subject: selftests/bpf: fix bpf_testmod.ko recompilation logic

bpf_testmod.ko build rule declared dependency on VMLINUX_BTF, but the variable
itself was initialized after the rule was declared, which often caused
bpf_testmod.ko to not be re-compiled. Fix by moving VMLINUX_BTF determination
sooner.

Also enforce bpf_testmod.ko recompilation when we detect that vmlinux image
changed by removing bpf_testmod/bpf_testmod.ko. This is necessary to generate
correct module's split BTF. Without it, Kbuild's module build logic might
determine that nothing changed on the kernel side and thus bpf_testmod.ko
shouldn't be rebuilt, so won't re-generate module BTF, which often leads to
module's BTF with wrong string offsets against vmlinux BTF. Removing .ko file
forces Kbuild to re-build the module.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 9f7fa225894c ("selftests/bpf: Add bpf_testmod kernel module for testing")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20201211015946.4062098-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 50b3495d7ddf..8b515a17f44b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -116,6 +116,13 @@ INCLUDE_DIR := $(SCRATCH_DIR)/include
 BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
 RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids
 
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
+		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
+		     ../../../../vmlinux				\
+		     /sys/kernel/btf/vmlinux				\
+		     /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
 # Define simple and short `make test_progs`, `make test_sysctl`, etc targets
 # to build individual tests.
 # NOTE: Semicolon at the end is critical to override lib.mk's default static
@@ -140,6 +147,7 @@ $(OUTPUT)/urandom_read: urandom_read.c
 
 $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
 	$(call msg,MOD,,$@)
+	$(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
 	$(Q)$(MAKE) $(submake_extras) -C bpf_testmod
 	$(Q)cp bpf_testmod/bpf_testmod.ko $@
 
@@ -147,13 +155,6 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 	$(call msg,CC,,$@)
 	$(Q)$(CC) -c $(CFLAGS) -o $@ $<
 
-VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
-		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
-		     ../../../../vmlinux				\
-		     /sys/kernel/btf/vmlinux				\
-		     /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-
 DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
 
 $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
-- 
cgit v1.2.3-70-g09d2


From 89ad7420b25c2b40a4d916f4fd43b9ccacd50500 Mon Sep 17 00:00:00 2001
From: Andrew Delgadillo <adelg@google.com>
Date: Fri, 11 Dec 2020 00:43:44 +0000
Subject: selftests/bpf: Drop the need for LLVM's llc

LLC is meant for compiler development and debugging. Consequently, it
exposes many low level options about its backend. To avoid future bugs
introduced by using the raw LLC tool, use clang directly so that all
appropriate options are passed to the back end.

Additionally, simplify the Makefile by removing the
CLANG_NATIVE_BPF_BUILD_RULE as it is not being use, stop passing
dwarfris attr since elfutils/libdw now supports the bpf backend (which
should work with any recent pahole), and stop passing alu32 since
-mcpu=v3 implies alu32.

Signed-off-by: Andrew Delgadillo <adelg@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201211004344.3355074-1-adelg@google.com
---
 tools/testing/selftests/bpf/Makefile | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8b515a17f44b..8c33e999319a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,6 @@ ifneq ($(wildcard $(GENHDR)),)
 endif
 
 CLANG		?= clang
-LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 BPF_GCC		?= $(shell command -v bpf-gcc;)
 SAN_CFLAGS	?=
@@ -253,31 +252,19 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
 # $1 - input .c file
 # $2 - output .o file
 # $3 - CFLAGS
-# $4 - LDFLAGS
 define CLANG_BPF_BUILD_RULE
-	$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
-	$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm			\
-		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+	$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+	$(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v3
 endef
 # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
 define CLANG_NOALU32_BPF_BUILD_RULE
-	$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
-	$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm			\
-		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
-endef
-# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
-define CLANG_NATIVE_BPF_BUILD_RULE
 	$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
-	$(Q)($(CLANG) $3 -O2 -emit-llvm					\
-		-c $1 -o - || echo "BPF obj compilation failed") | 	\
-	$(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+	$(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v2
 endef
 # Build BPF object using GCC
 define GCC_BPF_BUILD_RULE
 	$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
-	$(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+	$(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
 endef
 
 SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
@@ -332,8 +319,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		     $$(INCLUDE_DIR)/vmlinux.h				\
 		     $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
-					  $(TRUNNER_BPF_CFLAGS),	\
-					  $(TRUNNER_BPF_LDFLAGS))
+					  $(TRUNNER_BPF_CFLAGS))
 
 $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h:			\
 		      $(TRUNNER_OUTPUT)/%.o				\
@@ -401,19 +387,16 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
-TRUNNER_BPF_LDFLAGS := -mattr=+alu32
 $(eval $(call DEFINE_TEST_RUNNER,test_progs))
 
 # Define test_progs-no_alu32 test runner.
 TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
-TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
 
 # Define test_progs BPF-GCC-flavored test runner.
 ifneq ($(BPF_GCC),)
 TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
-TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
 endif
 
@@ -424,7 +407,6 @@ TRUNNER_EXTRA_SOURCES := test_maps.c
 TRUNNER_EXTRA_FILES :=
 TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
 TRUNNER_BPF_CFLAGS :=
-TRUNNER_BPF_LDFLAGS :=
 $(eval $(call DEFINE_TEST_RUNNER,test_maps))
 
 # Define test_verifier test runner.
-- 
cgit v1.2.3-70-g09d2


From b4fe9fec51ef48011f11c2da4099f0b530449c92 Mon Sep 17 00:00:00 2001
From: KP Singh <kpsingh@kernel.org>
Date: Fri, 11 Dec 2020 01:07:11 +0000
Subject: selftests/bpf: Silence ima_setup.sh when not running in verbose mode.

Currently, ima_setup.sh spews outputs from commands like mkfs and dd
on the terminal without taking into account the verbosity level of
the test framework. Update test_progs to set the environment variable
SELFTESTS_VERBOSE=1 when a verbose output is requested. This
environment variable is then used by ima_setup.sh (and can be used by
other similar scripts) to obey the verbosity level of the test harness
without needing to re-implement command line options for verbosity.

In "silent" mode, the script saves the output to a temporary file, the
contents of which are echoed back to stderr when the script encounters
an error.

Fixes: 34b82d3ac105 ("bpf: Add a selftest for bpf_ima_inode_hash")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: KP Singh <kpsingh@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201211010711.3716917-1-kpsingh@kernel.org
---
 tools/testing/selftests/bpf/ima_setup.sh | 24 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.c | 10 ++++++++++
 2 files changed, 34 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
index 2bfc646bc230..8e62581113a3 100755
--- a/tools/testing/selftests/bpf/ima_setup.sh
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -7,6 +7,8 @@ set -o pipefail
 
 IMA_POLICY_FILE="/sys/kernel/security/ima/policy"
 TEST_BINARY="/bin/true"
+VERBOSE="${SELFTESTS_VERBOSE:=0}"
+LOG_FILE="$(mktemp /tmp/ima_setup.XXXX.log)"
 
 usage()
 {
@@ -75,6 +77,19 @@ run()
 	exec "${copied_bin_path}"
 }
 
+catch()
+{
+	local exit_code="$1"
+	local log_file="$2"
+
+	if [[ "${exit_code}" -ne 0 ]]; then
+		cat "${log_file}" >&3
+	fi
+
+	rm -f "${log_file}"
+	exit ${exit_code}
+}
+
 main()
 {
 	[[ $# -ne 2 ]] && usage
@@ -96,4 +111,13 @@ main()
 	fi
 }
 
+trap 'catch "$?" "${LOG_FILE}"' EXIT
+
+if [[ "${VERBOSE}" -eq 0 ]]; then
+	# Save the stderr to 3 so that we can output back to
+	# it incase of an error.
+	exec 3>&2 1>"${LOG_FILE}" 2>&1
+fi
+
 main "$@"
+rm -f "${LOG_FILE}"
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 5ef081bdae4e..7d077d48cadd 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -587,6 +587,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 				return -EINVAL;
 			}
 		}
+
+		if (env->verbosity > VERBOSE_NONE) {
+			if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) {
+				fprintf(stderr,
+					"Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)",
+					errno);
+				return -1;
+			}
+		}
+
 		break;
 	case ARG_GET_TEST_CNT:
 		env->get_test_cnt = true;
-- 
cgit v1.2.3-70-g09d2


From b7906b70a2337e445b8dca3ce7ba8976b6ebd07d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Dec 2020 22:36:25 +0100
Subject: bpf: Fix enum names for bpf_this_cpu_ptr() and bpf_per_cpu_ptr()
 helpers

Remove bpf_ prefix, which causes these helpers to be reported in verifier
dump as bpf_bpf_this_cpu_ptr() and bpf_bpf_per_cpu_ptr(), respectively. Lets
fix it as long as it is still possible before UAPI freezes on these helpers.

Fixes: eaa6bcb71ef6 ("bpf: Introduce bpf_per_cpu_ptr()")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/bpf.h       | 4 ++--
 kernel/bpf/helpers.c           | 4 ++--
 kernel/trace/bpf_trace.c       | 4 ++--
 tools/include/uapi/linux/bpf.h | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e6ceac3f7d62..556216dc9703 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3897,8 +3897,8 @@ union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
-	FN(bpf_per_cpu_ptr),            \
-	FN(bpf_this_cpu_ptr),		\
+	FN(per_cpu_ptr),		\
+	FN(this_cpu_ptr),		\
 	FN(redirect_peer),		\
 	/* */
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 25520f5eeaf6..deda1185237b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -717,9 +717,9 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_jiffies64:
 		return &bpf_jiffies64_proto;
-	case BPF_FUNC_bpf_per_cpu_ptr:
+	case BPF_FUNC_per_cpu_ptr:
 		return &bpf_per_cpu_ptr_proto;
-	case BPF_FUNC_bpf_this_cpu_ptr:
+	case BPF_FUNC_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
 	default:
 		break;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 048c655315f1..a125ea5e04cd 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1337,9 +1337,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
 	case BPF_FUNC_snprintf_btf:
 		return &bpf_snprintf_btf_proto;
-	case BPF_FUNC_bpf_per_cpu_ptr:
+	case BPF_FUNC_per_cpu_ptr:
 		return &bpf_per_cpu_ptr_proto;
-	case BPF_FUNC_bpf_this_cpu_ptr:
+	case BPF_FUNC_this_cpu_ptr:
 		return &bpf_this_cpu_ptr_proto;
 	default:
 		return NULL;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e6ceac3f7d62..556216dc9703 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3897,8 +3897,8 @@ union bpf_attr {
 	FN(seq_printf_btf),		\
 	FN(skb_cgroup_classid),		\
 	FN(redirect_neigh),		\
-	FN(bpf_per_cpu_ptr),            \
-	FN(bpf_this_cpu_ptr),		\
+	FN(per_cpu_ptr),		\
+	FN(this_cpu_ptr),		\
 	FN(redirect_peer),		\
 	/* */
 
-- 
cgit v1.2.3-70-g09d2


From 3cea1891748e0ed8e79fa5d9afe40750319751d1 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 12 Oct 2020 12:47:16 -0700
Subject: selftests: kvm: Test MSR exiting to userspace

Add a selftest to test that when the ioctl KVM_X86_SET_MSR_FILTER is
called with an MSR list, those MSRs exit to userspace.

This test uses 3 MSRs to test this:
  1. MSR_IA32_XSS, an MSR the kernel knows about.
  2. MSR_IA32_FLUSH_CMD, an MSR the kernel does not know about.
  3. MSR_NON_EXISTENT, an MSR invented in this test for the purposes of
     passing a fake MSR from the guest to userspace.  KVM just acts as a
     pass through.

Userspace is also able to inject a #GP.  This is demonstrated when
MSR_IA32_XSS and MSR_IA32_FLUSH_CMD are misused in the test.  When this
happens a #GP is initiated in userspace to be thrown in the guest which is
handled gracefully by the exception handling framework introduced earlier
in this series.

Tests for the generic instruction emulator were also added.  For this to
work the module parameter kvm.force_emulation_prefix=1 has to be enabled.
If it isn't enabled the tests will be skipped.

A test was also added to ensure the MSR permission bitmap is being set
correctly by executing reads and writes of MSR_FS_BASE and MSR_GS_BASE
in the guest while alternating which MSR userspace should intercept.  If
the permission bitmap is being set correctly only one of the MSRs should
be coming through at a time, and the guest should be able to read and
write the other one directly.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: Alexander Graf <graf@amazon.com>
Message-Id: <20201012194716.3950330-5-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore             |   1 +
 tools/testing/selftests/kvm/Makefile               |   3 +-
 tools/testing/selftests/kvm/lib/kvm_util.c         |   2 +
 .../selftests/kvm/x86_64/userspace_msr_exit_test.c | 560 +++++++++++++++++++++
 4 files changed, 565 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 5468db7dd674..5008bf90772b 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -18,6 +18,7 @@
 /x86_64/sync_regs_test
 /x86_64/tsc_msrs_test
 /x86_64/user_msr_test
+/x86_64/userspace_msr_exit_test
 /x86_64/vmx_apic_access_test
 /x86_64/vmx_close_while_nested_test
 /x86_64/vmx_dirty_log_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 4febf4d5ead9..d54870db21c4 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -50,6 +50,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
+TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
@@ -58,7 +60,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
 TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
 TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index b2c426adb87f..88ef7067f1e6 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1806,6 +1806,8 @@ static struct exit_reason {
 	{KVM_EXIT_OSI, "OSI"},
 	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
 	{KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
+	{KVM_EXIT_X86_RDMSR, "RDMSR"},
+	{KVM_EXIT_X86_WRMSR, "WRMSR"},
 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
 	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
 #endif
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
new file mode 100644
index 000000000000..e8b6918cdea0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for exiting into userspace on registered MSRs
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+#define KVM_FEP_LENGTH 5
+static int fep_available = 1;
+
+#define VCPU_ID	      1
+#define MSR_NON_EXISTENT 0x474f4f00
+
+u64 deny_bits = 0;
+struct kvm_msr_filter filter = {
+	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+	.ranges = {
+		{
+			.flags = KVM_MSR_FILTER_READ |
+				 KVM_MSR_FILTER_WRITE,
+			.nmsrs = 1,
+			/* Test an MSR the kernel knows about. */
+			.base = MSR_IA32_XSS,
+			.bitmap = (uint8_t*)&deny_bits,
+		}, {
+			.flags = KVM_MSR_FILTER_READ |
+				 KVM_MSR_FILTER_WRITE,
+			.nmsrs = 1,
+			/* Test an MSR the kernel doesn't know about. */
+			.base = MSR_IA32_FLUSH_CMD,
+			.bitmap = (uint8_t*)&deny_bits,
+		}, {
+			.flags = KVM_MSR_FILTER_READ |
+				 KVM_MSR_FILTER_WRITE,
+			.nmsrs = 1,
+			/* Test a fabricated MSR that no one knows about. */
+			.base = MSR_NON_EXISTENT,
+			.bitmap = (uint8_t*)&deny_bits,
+		},
+	},
+};
+
+struct kvm_msr_filter filter_fs = {
+	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+	.ranges = {
+		{
+			.flags = KVM_MSR_FILTER_READ |
+				 KVM_MSR_FILTER_WRITE,
+			.nmsrs = 1,
+			.base = MSR_FS_BASE,
+			.bitmap = (uint8_t*)&deny_bits,
+		},
+	},
+};
+
+struct kvm_msr_filter filter_gs = {
+	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+	.ranges = {
+		{
+			.flags = KVM_MSR_FILTER_READ |
+				 KVM_MSR_FILTER_WRITE,
+			.nmsrs = 1,
+			.base = MSR_GS_BASE,
+			.bitmap = (uint8_t*)&deny_bits,
+		},
+	},
+};
+
+uint64_t msr_non_existent_data;
+int guest_exception_count;
+
+/*
+ * Note: Force test_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_rdmsr(uint32_t msr)
+{
+	uint32_t a, d;
+
+	guest_exception_count = 0;
+
+	__asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
+			"=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+	return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+{
+	uint32_t a = value;
+	uint32_t d = value >> 32;
+
+	guest_exception_count = 0;
+
+	__asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
+			"a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char rdmsr_start, rdmsr_end;
+extern char wrmsr_start, wrmsr_end;
+
+/*
+ * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_em_rdmsr(uint32_t msr)
+{
+	uint32_t a, d;
+
+	guest_exception_count = 0;
+
+	__asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
+			"=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+	return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+{
+	uint32_t a = value;
+	uint32_t d = value >> 32;
+
+	guest_exception_count = 0;
+
+	__asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
+			"a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char em_rdmsr_start, em_rdmsr_end;
+extern char em_wrmsr_start, em_wrmsr_end;
+
+static void guest_code(void)
+{
+	uint64_t data;
+
+	/*
+	 * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
+	 *
+	 * A GP is thrown if anything other than 0 is written to
+	 * MSR_IA32_XSS.
+	 */
+	data = test_rdmsr(MSR_IA32_XSS);
+	GUEST_ASSERT(data == 0);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	test_wrmsr(MSR_IA32_XSS, 0);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	test_wrmsr(MSR_IA32_XSS, 1);
+	GUEST_ASSERT(guest_exception_count == 1);
+
+	/*
+	 * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
+	 *
+	 * A GP is thrown if MSR_IA32_FLUSH_CMD is read
+	 * from or if a value other than 1 is written to it.
+	 */
+	test_rdmsr(MSR_IA32_FLUSH_CMD);
+	GUEST_ASSERT(guest_exception_count == 1);
+
+	test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+	GUEST_ASSERT(guest_exception_count == 1);
+
+	test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	/*
+	 * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
+	 *
+	 * Test that a fabricated MSR can pass through the kernel
+	 * and be handled in userspace.
+	 */
+	test_wrmsr(MSR_NON_EXISTENT, 2);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	data = test_rdmsr(MSR_NON_EXISTENT);
+	GUEST_ASSERT(data == 2);
+	GUEST_ASSERT(guest_exception_count == 0);
+
+	/*
+	 * Test to see if the instruction emulator is available (ie: the module
+	 * parameter 'kvm.force_emulation_prefix=1' is set).  This instruction
+	 * will #UD if it isn't available.
+	 */
+	__asm__ __volatile__(KVM_FEP "nop");
+
+	if (fep_available) {
+		/* Let userspace know we aren't done. */
+		GUEST_SYNC(0);
+
+		/*
+		 * Now run the same tests with the instruction emulator.
+		 */
+		data = test_em_rdmsr(MSR_IA32_XSS);
+		GUEST_ASSERT(data == 0);
+		GUEST_ASSERT(guest_exception_count == 0);
+		test_em_wrmsr(MSR_IA32_XSS, 0);
+		GUEST_ASSERT(guest_exception_count == 0);
+		test_em_wrmsr(MSR_IA32_XSS, 1);
+		GUEST_ASSERT(guest_exception_count == 1);
+
+		test_em_rdmsr(MSR_IA32_FLUSH_CMD);
+		GUEST_ASSERT(guest_exception_count == 1);
+		test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+		GUEST_ASSERT(guest_exception_count == 1);
+		test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+		GUEST_ASSERT(guest_exception_count == 0);
+
+		test_em_wrmsr(MSR_NON_EXISTENT, 2);
+		GUEST_ASSERT(guest_exception_count == 0);
+		data = test_em_rdmsr(MSR_NON_EXISTENT);
+		GUEST_ASSERT(data == 2);
+		GUEST_ASSERT(guest_exception_count == 0);
+	}
+
+	GUEST_DONE();
+}
+
+
+static void guest_code_permission_bitmap(void)
+{
+	uint64_t data;
+
+	test_wrmsr(MSR_FS_BASE, 0);
+	data = test_rdmsr(MSR_FS_BASE);
+	GUEST_ASSERT(data == MSR_FS_BASE);
+
+	test_wrmsr(MSR_GS_BASE, 0);
+	data = test_rdmsr(MSR_GS_BASE);
+	GUEST_ASSERT(data == 0);
+
+	/* Let userspace know to switch the filter */
+	GUEST_SYNC(0);
+
+	test_wrmsr(MSR_FS_BASE, 0);
+	data = test_rdmsr(MSR_FS_BASE);
+	GUEST_ASSERT(data == 0);
+
+	test_wrmsr(MSR_GS_BASE, 0);
+	data = test_rdmsr(MSR_GS_BASE);
+	GUEST_ASSERT(data == MSR_GS_BASE);
+
+	GUEST_DONE();
+}
+
+static void __guest_gp_handler(struct ex_regs *regs,
+			       char *r_start, char *r_end,
+			       char *w_start, char *w_end)
+{
+	if (regs->rip == (uintptr_t)r_start) {
+		regs->rip = (uintptr_t)r_end;
+		regs->rax = 0;
+		regs->rdx = 0;
+	} else if (regs->rip == (uintptr_t)w_start) {
+		regs->rip = (uintptr_t)w_end;
+	} else {
+		GUEST_ASSERT(!"RIP is at an unknown location!");
+	}
+
+	++guest_exception_count;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+	__guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
+			   &wrmsr_start, &wrmsr_end);
+}
+
+static void guest_fep_gp_handler(struct ex_regs *regs)
+{
+	__guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
+			   &em_wrmsr_start, &em_wrmsr_end);
+}
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+	fep_available = 0;
+	regs->rip += KVM_FEP_LENGTH;
+}
+
+static void run_guest(struct kvm_vm *vm)
+{
+	int rc;
+
+	rc = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+}
+
+static void check_for_guest_assert(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct ucall uc;
+
+	if (run->exit_reason == KVM_EXIT_IO &&
+		get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+				__FILE__, uc.args[1]);
+	}
+}
+
+static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+	check_for_guest_assert(vm);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->msr.index == msr_index,
+			"Unexpected msr (0x%04x), expected 0x%04x",
+			run->msr.index, msr_index);
+
+	switch (run->msr.index) {
+	case MSR_IA32_XSS:
+		run->msr.data = 0;
+		break;
+	case MSR_IA32_FLUSH_CMD:
+		run->msr.error = 1;
+		break;
+	case MSR_NON_EXISTENT:
+		run->msr.data = msr_non_existent_data;
+		break;
+	case MSR_FS_BASE:
+		run->msr.data = MSR_FS_BASE;
+		break;
+	case MSR_GS_BASE:
+		run->msr.data = MSR_GS_BASE;
+		break;
+	default:
+		TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+	}
+}
+
+static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+	check_for_guest_assert(vm);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->msr.index == msr_index,
+			"Unexpected msr (0x%04x), expected 0x%04x",
+			run->msr.index, msr_index);
+
+	switch (run->msr.index) {
+	case MSR_IA32_XSS:
+		if (run->msr.data != 0)
+			run->msr.error = 1;
+		break;
+	case MSR_IA32_FLUSH_CMD:
+		if (run->msr.data != 1)
+			run->msr.error = 1;
+		break;
+	case MSR_NON_EXISTENT:
+		msr_non_existent_data = run->msr.data;
+		break;
+	case MSR_FS_BASE:
+	case MSR_GS_BASE:
+		break;
+	default:
+		TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+	}
+}
+
+static void process_ucall_done(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct ucall uc;
+
+	check_for_guest_assert(vm);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+		    "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+		    uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct ucall uc = {};
+
+	check_for_guest_assert(vm);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	switch (get_ucall(vm, VCPU_ID, &uc)) {
+	case UCALL_SYNC:
+		break;
+	case UCALL_ABORT:
+		check_for_guest_assert(vm);
+		break;
+	case UCALL_DONE:
+		process_ucall_done(vm);
+		break;
+	default:
+		TEST_ASSERT(false, "Unexpected ucall");
+	}
+
+	return uc.cmd;
+}
+
+static void run_guest_then_process_rdmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+	run_guest(vm);
+	process_rdmsr(vm, msr_index);
+}
+
+static void run_guest_then_process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
+{
+	run_guest(vm);
+	process_wrmsr(vm, msr_index);
+}
+
+static uint64_t run_guest_then_process_ucall(struct kvm_vm *vm)
+{
+	run_guest(vm);
+	return process_ucall(vm);
+}
+
+static void run_guest_then_process_ucall_done(struct kvm_vm *vm)
+{
+	run_guest(vm);
+	process_ucall_done(vm);
+}
+
+static void test_msr_filter(void) {
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_X86_USER_SPACE_MSR,
+		.args[0] = KVM_MSR_EXIT_REASON_FILTER,
+	};
+	struct kvm_vm *vm;
+	int rc;
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+	TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+	vm_enable_cap(vm, &cap);
+
+	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+	vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+	/* Process guest code userspace exits. */
+	run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
+	run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+	run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+
+	run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
+	run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+	run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+
+	run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
+	run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+
+	vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+	run_guest(vm);
+	vm_handle_exception(vm, UD_VECTOR, NULL);
+
+	if (process_ucall(vm) != UCALL_DONE) {
+		vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+
+		/* Process emulated rdmsr and wrmsr instructions. */
+		run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
+		run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+		run_guest_then_process_wrmsr(vm, MSR_IA32_XSS);
+
+		run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD);
+		run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+		run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD);
+
+		run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
+		run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
+
+		/* Confirm the guest completed without issues. */
+		run_guest_then_process_ucall_done(vm);
+	} else {
+		printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
+	}
+
+	kvm_vm_free(vm);
+}
+
+static void test_msr_permission_bitmap(void) {
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_X86_USER_SPACE_MSR,
+		.args[0] = KVM_MSR_EXIT_REASON_FILTER,
+	};
+	struct kvm_vm *vm;
+	int rc;
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code_permission_bitmap);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+	TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+	vm_enable_cap(vm, &cap);
+
+	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
+	run_guest_then_process_wrmsr(vm, MSR_FS_BASE);
+	run_guest_then_process_rdmsr(vm, MSR_FS_BASE);
+	TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC.");
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
+	run_guest_then_process_wrmsr(vm, MSR_GS_BASE);
+	run_guest_then_process_rdmsr(vm, MSR_GS_BASE);
+	run_guest_then_process_ucall_done(vm);
+
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	test_msr_filter();
+
+	test_msr_permission_bitmap();
+
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From fb6360534ecc0a2703f7b6076cf1397385d23df8 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Fri, 4 Dec 2020 09:25:31 -0800
Subject: selftests: kvm: Merge user_msr_test into userspace_msr_exit_test

Both user_msr_test and userspace_msr_exit_test tests the functionality
of kvm_msr_filter.  Instead of testing this feature in two tests, merge
them together, so there is only one test for this feature.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Message-Id: <20201204172530.2958493-1-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore             |   1 -
 tools/testing/selftests/kvm/Makefile               |   1 -
 tools/testing/selftests/kvm/x86_64/user_msr_test.c | 251 --------------------
 .../selftests/kvm/x86_64/userspace_msr_exit_test.c | 262 +++++++++++++++++++--
 4 files changed, 236 insertions(+), 279 deletions(-)
 delete mode 100644 tools/testing/selftests/kvm/x86_64/user_msr_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 5008bf90772b..ce8f4ad39684 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -17,7 +17,6 @@
 /x86_64/svm_vmcall_test
 /x86_64/sync_regs_test
 /x86_64/tsc_msrs_test
-/x86_64/user_msr_test
 /x86_64/userspace_msr_exit_test
 /x86_64/vmx_apic_access_test
 /x86_64/vmx_close_while_nested_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index d54870db21c4..86ccca8ca89d 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -50,7 +50,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
-TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
diff --git a/tools/testing/selftests/kvm/x86_64/user_msr_test.c b/tools/testing/selftests/kvm/x86_64/user_msr_test.c
deleted file mode 100644
index fe88b98908ee..000000000000
--- a/tools/testing/selftests/kvm/x86_64/user_msr_test.c
+++ /dev/null
@@ -1,251 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * tests for KVM_CAP_X86_USER_SPACE_MSR and KVM_X86_SET_MSR_FILTER
- *
- * Copyright (C) 2020, Amazon Inc.
- *
- * This is a functional test to verify that we can deflect MSR events
- * into user space.
- */
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-#include "processor.h"
-
-#define VCPU_ID                  5
-
-static u32 msr_reads, msr_writes;
-
-static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
-static u8 bitmap_deadbeef[1] = { 0x1 };
-
-static void deny_msr(uint8_t *bitmap, u32 msr)
-{
-	u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
-
-	bitmap[idx / 8] &= ~(1 << (idx % 8));
-}
-
-static void prepare_bitmaps(void)
-{
-	memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
-	memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
-	memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
-	memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
-	memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
-
-	deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
-	deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
-	deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
-}
-
-struct kvm_msr_filter filter = {
-	.flags = KVM_MSR_FILTER_DEFAULT_DENY,
-	.ranges = {
-		{
-			.flags = KVM_MSR_FILTER_READ,
-			.base = 0x00000000,
-			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-			.bitmap = bitmap_00000000,
-		}, {
-			.flags = KVM_MSR_FILTER_WRITE,
-			.base = 0x00000000,
-			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-			.bitmap = bitmap_00000000_write,
-		}, {
-			.flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
-			.base = 0x40000000,
-			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-			.bitmap = bitmap_40000000,
-		}, {
-			.flags = KVM_MSR_FILTER_READ,
-			.base = 0xc0000000,
-			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-			.bitmap = bitmap_c0000000_read,
-		}, {
-			.flags = KVM_MSR_FILTER_WRITE,
-			.base = 0xc0000000,
-			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
-			.bitmap = bitmap_c0000000,
-		}, {
-			.flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
-			.base = 0xdeadbeef,
-			.nmsrs = 1,
-			.bitmap = bitmap_deadbeef,
-		},
-	},
-};
-
-struct kvm_msr_filter no_filter = {
-	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
-};
-
-static void guest_msr_calls(bool trapped)
-{
-	/* This goes into the in-kernel emulation */
-	wrmsr(MSR_SYSCALL_MASK, 0);
-
-	if (trapped) {
-		/* This goes into user space emulation */
-		GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
-		GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
-	} else {
-		GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
-		GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
-	}
-
-	/* If trapped == true, this goes into user space emulation */
-	wrmsr(MSR_IA32_POWER_CTL, 0x1234);
-
-	/* This goes into the in-kernel emulation */
-	rdmsr(MSR_IA32_POWER_CTL);
-
-	/* Invalid MSR, should always be handled by user space exit */
-	GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
-	wrmsr(0xdeadbeef, 0x1234);
-}
-
-static void guest_code(void)
-{
-	guest_msr_calls(true);
-
-	/*
-	 * Disable msr filtering, so that the kernel
-	 * handles everything in the next round
-	 */
-	GUEST_SYNC(0);
-
-	guest_msr_calls(false);
-
-	GUEST_DONE();
-}
-
-static int handle_ucall(struct kvm_vm *vm)
-{
-	struct ucall uc;
-
-	switch (get_ucall(vm, VCPU_ID, &uc)) {
-	case UCALL_ABORT:
-		TEST_FAIL("Guest assertion not met");
-		break;
-	case UCALL_SYNC:
-		vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter);
-		break;
-	case UCALL_DONE:
-		return 1;
-	default:
-		TEST_FAIL("Unknown ucall %lu", uc.cmd);
-	}
-
-	return 0;
-}
-
-static void handle_rdmsr(struct kvm_run *run)
-{
-	run->msr.data = run->msr.index;
-	msr_reads++;
-
-	if (run->msr.index == MSR_SYSCALL_MASK ||
-	    run->msr.index == MSR_GS_BASE) {
-		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-			    "MSR read trap w/o access fault");
-	}
-
-	if (run->msr.index == 0xdeadbeef) {
-		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-			    "MSR deadbeef read trap w/o inval fault");
-	}
-}
-
-static void handle_wrmsr(struct kvm_run *run)
-{
-	/* ignore */
-	msr_writes++;
-
-	if (run->msr.index == MSR_IA32_POWER_CTL) {
-		TEST_ASSERT(run->msr.data == 0x1234,
-			    "MSR data for MSR_IA32_POWER_CTL incorrect");
-		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
-			    "MSR_IA32_POWER_CTL trap w/o access fault");
-	}
-
-	if (run->msr.index == 0xdeadbeef) {
-		TEST_ASSERT(run->msr.data == 0x1234,
-			    "MSR data for deadbeef incorrect");
-		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
-			    "deadbeef trap w/o inval fault");
-	}
-}
-
-int main(int argc, char *argv[])
-{
-	struct kvm_enable_cap cap = {
-		.cap = KVM_CAP_X86_USER_SPACE_MSR,
-		.args[0] = KVM_MSR_EXIT_REASON_INVAL |
-			   KVM_MSR_EXIT_REASON_UNKNOWN |
-			   KVM_MSR_EXIT_REASON_FILTER,
-	};
-	struct kvm_vm *vm;
-	struct kvm_run *run;
-	int rc;
-
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-	run = vcpu_state(vm, VCPU_ID);
-
-	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
-	if (!rc) {
-		print_skip("KVM_CAP_X86_USER_SPACE_MSR not supported");
-		exit(KSFT_SKIP);
-	}
-
-	vm_enable_cap(vm, &cap);
-
-	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
-	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
-
-	prepare_bitmaps();
-	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter);
-
-	while (1) {
-		rc = _vcpu_run(vm, VCPU_ID);
-
-		TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
-
-		switch (run->exit_reason) {
-		case KVM_EXIT_X86_RDMSR:
-			handle_rdmsr(run);
-			break;
-		case KVM_EXIT_X86_WRMSR:
-			handle_wrmsr(run);
-			break;
-		case KVM_EXIT_IO:
-			if (handle_ucall(vm))
-				goto done;
-			break;
-		}
-
-	}
-
-done:
-	TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
-	TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
-
-	kvm_vm_free(vm);
-
-	return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index e8b6918cdea0..72c0d0797522 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -20,8 +20,8 @@ static int fep_available = 1;
 #define VCPU_ID	      1
 #define MSR_NON_EXISTENT 0x474f4f00
 
-u64 deny_bits = 0;
-struct kvm_msr_filter filter = {
+static u64 deny_bits = 0;
+struct kvm_msr_filter filter_allow = {
 	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
 	.ranges = {
 		{
@@ -53,8 +53,7 @@ struct kvm_msr_filter filter_fs = {
 	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
 	.ranges = {
 		{
-			.flags = KVM_MSR_FILTER_READ |
-				 KVM_MSR_FILTER_WRITE,
+			.flags = KVM_MSR_FILTER_READ,
 			.nmsrs = 1,
 			.base = MSR_FS_BASE,
 			.bitmap = (uint8_t*)&deny_bits,
@@ -66,8 +65,7 @@ struct kvm_msr_filter filter_gs = {
 	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
 	.ranges = {
 		{
-			.flags = KVM_MSR_FILTER_READ |
-				 KVM_MSR_FILTER_WRITE,
+			.flags = KVM_MSR_FILTER_READ,
 			.nmsrs = 1,
 			.base = MSR_GS_BASE,
 			.bitmap = (uint8_t*)&deny_bits,
@@ -75,8 +73,77 @@ struct kvm_msr_filter filter_gs = {
 	},
 };
 
-uint64_t msr_non_existent_data;
-int guest_exception_count;
+static uint64_t msr_non_existent_data;
+static int guest_exception_count;
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+	u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+	bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+	memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+	memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+	memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+	memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+	memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+	deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+	deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+	deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter_deny = {
+	.flags = KVM_MSR_FILTER_DEFAULT_DENY,
+	.ranges = {
+		{
+			.flags = KVM_MSR_FILTER_READ,
+			.base = 0x00000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_00000000,
+		}, {
+			.flags = KVM_MSR_FILTER_WRITE,
+			.base = 0x00000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_00000000_write,
+		}, {
+			.flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+			.base = 0x40000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_40000000,
+		}, {
+			.flags = KVM_MSR_FILTER_READ,
+			.base = 0xc0000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_c0000000_read,
+		}, {
+			.flags = KVM_MSR_FILTER_WRITE,
+			.base = 0xc0000000,
+			.nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+			.bitmap = bitmap_c0000000,
+		}, {
+			.flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+			.base = 0xdeadbeef,
+			.nmsrs = 1,
+			.bitmap = bitmap_deadbeef,
+		},
+	},
+};
+
+struct kvm_msr_filter no_filter_deny = {
+	.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
 
 /*
  * Note: Force test_rdmsr() to not be inlined to prevent the labels,
@@ -146,7 +213,7 @@ static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
 extern char em_rdmsr_start, em_rdmsr_end;
 extern char em_wrmsr_start, em_wrmsr_end;
 
-static void guest_code(void)
+static void guest_code_filter_allow(void)
 {
 	uint64_t data;
 
@@ -233,27 +300,60 @@ static void guest_code(void)
 	GUEST_DONE();
 }
 
+static void guest_msr_calls(bool trapped)
+{
+	/* This goes into the in-kernel emulation */
+	wrmsr(MSR_SYSCALL_MASK, 0);
+
+	if (trapped) {
+		/* This goes into user space emulation */
+		GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+		GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+	} else {
+		GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+		GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+	}
+
+	/* If trapped == true, this goes into user space emulation */
+	wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+	/* This goes into the in-kernel emulation */
+	rdmsr(MSR_IA32_POWER_CTL);
+
+	/* Invalid MSR, should always be handled by user space exit */
+	GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+	wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code_filter_deny(void)
+{
+	guest_msr_calls(true);
+
+	/*
+	 * Disable msr filtering, so that the kernel
+	 * handles everything in the next round
+	 */
+	GUEST_SYNC(0);
+
+	guest_msr_calls(false);
+
+	GUEST_DONE();
+}
 
 static void guest_code_permission_bitmap(void)
 {
 	uint64_t data;
 
-	test_wrmsr(MSR_FS_BASE, 0);
 	data = test_rdmsr(MSR_FS_BASE);
 	GUEST_ASSERT(data == MSR_FS_BASE);
-
-	test_wrmsr(MSR_GS_BASE, 0);
 	data = test_rdmsr(MSR_GS_BASE);
-	GUEST_ASSERT(data == 0);
+	GUEST_ASSERT(data != MSR_GS_BASE);
 
 	/* Let userspace know to switch the filter */
 	GUEST_SYNC(0);
 
-	test_wrmsr(MSR_FS_BASE, 0);
 	data = test_rdmsr(MSR_FS_BASE);
-	GUEST_ASSERT(data == 0);
-
-	test_wrmsr(MSR_GS_BASE, 0);
+	GUEST_ASSERT(data != MSR_FS_BASE);
 	data = test_rdmsr(MSR_GS_BASE);
 	GUEST_ASSERT(data == MSR_GS_BASE);
 
@@ -376,9 +476,6 @@ static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index)
 	case MSR_NON_EXISTENT:
 		msr_non_existent_data = run->msr.data;
 		break;
-	case MSR_FS_BASE:
-	case MSR_GS_BASE:
-		break;
 	default:
 		TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
 	}
@@ -453,7 +550,7 @@ static void run_guest_then_process_ucall_done(struct kvm_vm *vm)
 	process_ucall_done(vm);
 }
 
-static void test_msr_filter(void) {
+static void test_msr_filter_allow(void) {
 	struct kvm_enable_cap cap = {
 		.cap = KVM_CAP_X86_USER_SPACE_MSR,
 		.args[0] = KVM_MSR_EXIT_REASON_FILTER,
@@ -462,7 +559,7 @@ static void test_msr_filter(void) {
 	int rc;
 
 	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vm = vm_create_default(VCPU_ID, 0, guest_code_filter_allow);
 	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
 	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
@@ -472,7 +569,7 @@ static void test_msr_filter(void) {
 	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
 	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
 
-	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter);
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
 
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vm, VCPU_ID);
@@ -519,6 +616,116 @@ static void test_msr_filter(void) {
 	kvm_vm_free(vm);
 }
 
+static int handle_ucall(struct kvm_vm *vm)
+{
+	struct ucall uc;
+
+	switch (get_ucall(vm, VCPU_ID, &uc)) {
+	case UCALL_ABORT:
+		TEST_FAIL("Guest assertion not met");
+		break;
+	case UCALL_SYNC:
+		vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+		break;
+	case UCALL_DONE:
+		return 1;
+	default:
+		TEST_FAIL("Unknown ucall %lu", uc.cmd);
+	}
+
+	return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+	run->msr.data = run->msr.index;
+	msr_reads++;
+
+	if (run->msr.index == MSR_SYSCALL_MASK ||
+	    run->msr.index == MSR_GS_BASE) {
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+			    "MSR read trap w/o access fault");
+	}
+
+	if (run->msr.index == 0xdeadbeef) {
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+			    "MSR deadbeef read trap w/o inval fault");
+	}
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+	/* ignore */
+	msr_writes++;
+
+	if (run->msr.index == MSR_IA32_POWER_CTL) {
+		TEST_ASSERT(run->msr.data == 0x1234,
+			    "MSR data for MSR_IA32_POWER_CTL incorrect");
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+			    "MSR_IA32_POWER_CTL trap w/o access fault");
+	}
+
+	if (run->msr.index == 0xdeadbeef) {
+		TEST_ASSERT(run->msr.data == 0x1234,
+			    "MSR data for deadbeef incorrect");
+		TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+			    "deadbeef trap w/o inval fault");
+	}
+}
+
+static void test_msr_filter_deny(void) {
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_X86_USER_SPACE_MSR,
+		.args[0] = KVM_MSR_EXIT_REASON_INVAL |
+			   KVM_MSR_EXIT_REASON_UNKNOWN |
+			   KVM_MSR_EXIT_REASON_FILTER,
+	};
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	int rc;
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code_filter_deny);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+	TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+	vm_enable_cap(vm, &cap);
+
+	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+	prepare_bitmaps();
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
+
+	while (1) {
+		rc = _vcpu_run(vm, VCPU_ID);
+
+		TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+
+		switch (run->exit_reason) {
+		case KVM_EXIT_X86_RDMSR:
+			handle_rdmsr(run);
+			break;
+		case KVM_EXIT_X86_WRMSR:
+			handle_wrmsr(run);
+			break;
+		case KVM_EXIT_IO:
+			if (handle_ucall(vm))
+				goto done;
+			break;
+		}
+
+	}
+
+done:
+	TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+	TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+
+	kvm_vm_free(vm);
+}
+
 static void test_msr_permission_bitmap(void) {
 	struct kvm_enable_cap cap = {
 		.cap = KVM_CAP_X86_USER_SPACE_MSR,
@@ -539,11 +746,9 @@ static void test_msr_permission_bitmap(void) {
 	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
 
 	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
-	run_guest_then_process_wrmsr(vm, MSR_FS_BASE);
 	run_guest_then_process_rdmsr(vm, MSR_FS_BASE);
 	TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC.");
 	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
-	run_guest_then_process_wrmsr(vm, MSR_GS_BASE);
 	run_guest_then_process_rdmsr(vm, MSR_GS_BASE);
 	run_guest_then_process_ucall_done(vm);
 
@@ -552,7 +757,12 @@ static void test_msr_permission_bitmap(void) {
 
 int main(int argc, char *argv[])
 {
-	test_msr_filter();
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	test_msr_filter_allow();
+
+	test_msr_filter_deny();
 
 	test_msr_permission_bitmap();
 
-- 
cgit v1.2.3-70-g09d2


From 111d0bda8eeb4b54e0c63897b071effbf9fd9251 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@de.ibm.com>
Date: Tue, 8 Dec 2020 22:08:29 +0100
Subject: tools/kvm_stat: Exempt time-based counters

The new counters halt_poll_success_ns and halt_poll_fail_ns do not count
events. Instead they provide a time, and mess up our statistics. Therefore,
we should exclude them.
Removal is currently implemented with an exempt list. If more counters like
these appear, we can think about a more general rule like excluding all
fields name "*_ns", in case that's a standing convention.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Tested-and-reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <20201208210829.101324-1-raspl@linux.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index d199a3694be8..b0bf56c5f120 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -742,7 +742,11 @@ class DebugfsProvider(Provider):
         The fields are all available KVM debugfs files
 
         """
-        return self.walkdir(PATH_DEBUGFS_KVM)[2]
+        exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns']
+        fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
+                  if field not in exempt_list]
+
+        return fields
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-- 
cgit v1.2.3-70-g09d2


From fe62de310e2b563c0d303a09d06b020077fe86b4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Dec 2020 13:58:24 -0800
Subject: libbpf: Support modules in bpf_program__set_attach_target() API

Support finding kernel targets in kernel modules when using
bpf_program__set_attach_target() API. This brings it up to par with what
libbpf supports when doing declarative SEC()-based target determination.

Some minor internal refactoring was needed to make sure vmlinux BTF can be
loaded before bpf_object's load phase.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201211215825.3646154-2-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 64 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9be88a90a4aa..6ae748f6ea11 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2518,7 +2518,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
 	return 0;
 }
 
-static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
+static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
 {
 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
 	    prog->type == BPF_PROG_TYPE_LSM)
@@ -2533,37 +2533,43 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
 	return false;
 }
 
-static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
+static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
 {
-	bool need_vmlinux_btf = false;
 	struct bpf_program *prog;
-	int i, err;
+	int i;
 
 	/* CO-RE relocations need kernel BTF */
 	if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
-		need_vmlinux_btf = true;
+		return true;
 
 	/* Support for typed ksyms needs kernel BTF */
 	for (i = 0; i < obj->nr_extern; i++) {
 		const struct extern_desc *ext;
 
 		ext = &obj->externs[i];
-		if (ext->type == EXT_KSYM && ext->ksym.type_id) {
-			need_vmlinux_btf = true;
-			break;
-		}
+		if (ext->type == EXT_KSYM && ext->ksym.type_id)
+			return true;
 	}
 
 	bpf_object__for_each_program(prog, obj) {
 		if (!prog->load)
 			continue;
-		if (libbpf_prog_needs_vmlinux_btf(prog)) {
-			need_vmlinux_btf = true;
-			break;
-		}
+		if (prog_needs_vmlinux_btf(prog))
+			return true;
 	}
 
-	if (!need_vmlinux_btf)
+	return false;
+}
+
+static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
+{
+	int err;
+
+	/* btf_vmlinux could be loaded earlier */
+	if (obj->btf_vmlinux)
+		return 0;
+
+	if (!force && !obj_needs_vmlinux_btf(obj))
 		return 0;
 
 	obj->btf_vmlinux = libbpf_find_kernel_btf();
@@ -7475,7 +7481,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	}
 
 	err = bpf_object__probe_loading(obj);
-	err = err ? : bpf_object__load_vmlinux_btf(obj);
+	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 	err = err ? : bpf_object__sanitize_maps(obj);
@@ -10870,23 +10876,33 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 				   int attach_prog_fd,
 				   const char *attach_func_name)
 {
-	int btf_id;
+	int btf_obj_fd = 0, btf_id = 0, err;
 
 	if (!prog || attach_prog_fd < 0 || !attach_func_name)
 		return -EINVAL;
 
-	if (attach_prog_fd)
+	if (prog->obj->loaded)
+		return -EINVAL;
+
+	if (attach_prog_fd) {
 		btf_id = libbpf_find_prog_btf_id(attach_func_name,
 						 attach_prog_fd);
-	else
-		btf_id = libbpf_find_vmlinux_btf_id(attach_func_name,
-						    prog->expected_attach_type);
-
-	if (btf_id < 0)
-		return btf_id;
+		if (btf_id < 0)
+			return btf_id;
+	} else {
+		/* load btf_vmlinux, if not yet */
+		err = bpf_object__load_vmlinux_btf(prog->obj, true);
+		if (err)
+			return err;
+		err = find_kernel_btf_id(prog->obj, attach_func_name,
+					 prog->expected_attach_type,
+					 &btf_obj_fd, &btf_id);
+		if (err)
+			return err;
+	}
 
 	prog->attach_btf_id = btf_id;
-	prog->attach_btf_obj_fd = 0;
+	prog->attach_btf_obj_fd = btf_obj_fd;
 	prog->attach_prog_fd = attach_prog_fd;
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 2e33f831fccd2df83836a8e255755f85d364aaeb Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 11 Dec 2020 13:58:25 -0800
Subject: selftests/bpf: Add set_attach_target() API selftest for module target

Add test for bpf_program__set_attach_target() API, validating it can find
kernel module fentry target.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201211215825.3646154-3-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/module_attach.c | 11 ++++++++++-
 tools/testing/selftests/bpf/progs/test_module_attach.c | 11 +++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 4b65e9918764..50796b651f72 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -28,10 +28,18 @@ void test_module_attach(void)
 	struct test_module_attach__bss *bss;
 	int err;
 
-	skel = test_module_attach__open_and_load();
+	skel = test_module_attach__open();
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
 		return;
 
+	err = bpf_program__set_attach_target(skel->progs.handle_fentry_manual,
+					     0, "bpf_testmod_test_read");
+	ASSERT_OK(err, "set_attach_target");
+
+	err = test_module_attach__load(skel);
+	if (CHECK(err, "skel_load", "failed to load skeleton\n"))
+		return;
+
 	bss = skel->bss;
 
 	err = test_module_attach__attach(skel);
@@ -44,6 +52,7 @@ void test_module_attach(void)
 	ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp");
 	ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
 	ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
+	ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual");
 	ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
 	ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
 	ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index b563563df172..efd1e287ac17 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -38,6 +38,17 @@ int BPF_PROG(handle_fentry,
 	return 0;
 }
 
+__u32 fentry_manual_read_sz = 0;
+
+SEC("fentry/placeholder")
+int BPF_PROG(handle_fentry_manual,
+	     struct file *file, struct kobject *kobj,
+	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+	fentry_manual_read_sz = len;
+	return 0;
+}
+
 __u32 fexit_read_sz = 0;
 int fexit_ret = 0;
 
-- 
cgit v1.2.3-70-g09d2


From a4d2a7ad86834092b327082004ead755d2603376 Mon Sep 17 00:00:00 2001
From: Brendan Jackman <jackmanb@google.com>
Date: Mon, 14 Dec 2020 11:38:12 +0000
Subject: libbpf: Expose libbpf ring_buffer epoll_fd

This provides a convenient perf ringbuf -> libbpf ringbuf migration
path for users of external polling systems. It is analogous to
perf_buffer__epoll_fd.

Signed-off-by: Brendan Jackman <jackmanb@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201214113812.305274-1-jackmanb@google.com
---
 tools/lib/bpf/libbpf.h   | 1 +
 tools/lib/bpf/libbpf.map | 1 +
 tools/lib/bpf/ringbuf.c  | 6 ++++++
 3 files changed, 8 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6909ee81113a..3c35eb401931 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -536,6 +536,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
 				ring_buffer_sample_fn sample_cb, void *ctx);
 LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
 LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
 
 /* Perf buffer APIs */
 struct perf_buffer;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 7c4126542e2b..1c0fd2dd233a 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -346,6 +346,7 @@ LIBBPF_0.3.0 {
 		btf__parse_split;
 		btf__new_empty_split;
 		btf__new_split;
+		ring_buffer__epoll_fd;
 		xsk_setup_xdp_prog;
 		xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 5c6522c89af1..a8f997d47adf 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -282,3 +282,9 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 	}
 	return cnt < 0 ? -errno : res;
 }
+
+/* Get an fd that can be used to sleep until data is available in the ring(s) */
+int ring_buffer__epoll_fd(const struct ring_buffer *rb)
+{
+	return rb->epoll_fd;
+}
-- 
cgit v1.2.3-70-g09d2


From b4b638c36b7e7acd847b9c4b9c80f268e45ea30c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 9 Dec 2020 17:33:50 -0800
Subject: selftests/bpf: Add a test for ptr_to_map_value on stack for helper
 access

Change bpf_iter_task.c such that pointer to map_value may appear
on the stack for bpf_seq_printf() to access. Without previous
verifier patch, the bpf_iter test will fail.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201210013350.943985-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_task.c | 3 ++-
 tools/testing/selftests/bpf/verifier/unpriv.c     | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index 4983087852a0..b7f32c160f4e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -11,9 +11,10 @@ int dump_task(struct bpf_iter__task *ctx)
 {
 	struct seq_file *seq = ctx->meta->seq;
 	struct task_struct *task = ctx->task;
+	static char info[] = "    === END ===";
 
 	if (task == (void *)0) {
-		BPF_SEQ_PRINTF(seq, "    === END ===\n");
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
 		return 0;
 	}
 
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index 91bb77c24a2e..a3fe0fbaed41 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -108,8 +108,9 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "invalid indirect read from stack off -8+0 size 8",
-	.result = REJECT,
+	.errstr_unpriv = "invalid indirect read from stack off -8+0 size 8",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
 },
 {
 	"unpriv: mangle pointer on stack 1",
-- 
cgit v1.2.3-70-g09d2


From 6fe4ccdc3dabe3de573e27fb2684d925bd611458 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Thu, 10 Dec 2020 14:25:00 -0800
Subject: selftests: mptcp: add the flush addrs testcase

This patch added the flush addrs testcase. In do_transfer, if the number
of removing addresses is less than 8, use the del addr command to remove
the addresses one by one. If the number is more than 8, use the flush addrs
command to remove the addresses.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 50 ++++++++++++++++++-------
 1 file changed, 36 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 0eae628d1ffd..9aa9624cff97 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -264,27 +264,37 @@ do_transfer()
 	cpid=$!
 
 	if [ $rm_nr_ns1 -gt 0 ]; then
-		counter=1
-		sleep 1
+		if [ $rm_nr_ns1 -lt 8 ]; then
+			counter=1
+			sleep 1
 
-		while [ $counter -le $rm_nr_ns1 ]
-		do
-			ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+			while [ $counter -le $rm_nr_ns1 ]
+			do
+				ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+				sleep 1
+				let counter+=1
+			done
+		else
 			sleep 1
-			let counter+=1
-		done
+			ip netns exec ${listener_ns} ./pm_nl_ctl flush
+		fi
 	fi
 
 	if [ $rm_nr_ns2 -gt 0 ]; then
-		counter=1
-		sleep 1
+		if [ $rm_nr_ns2 -lt 8 ]; then
+			counter=1
+			sleep 1
 
-		while [ $counter -le $rm_nr_ns2 ]
-		do
-			ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+			while [ $counter -le $rm_nr_ns2 ]
+			do
+				ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+				sleep 1
+				let counter+=1
+			done
+		else
 			sleep 1
-			let counter+=1
-		done
+			ip netns exec ${connector_ns} ./pm_nl_ctl flush
+		fi
 	fi
 
 	wait $cpid
@@ -663,6 +673,18 @@ chk_join_nr "remove subflows and signal" 3 3 3
 chk_add_nr 1 1
 chk_rm_nr 2 2
 
+# subflows and signal, flush
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1 0 8 8 slow
+chk_join_nr "flush subflows and signal" 3 3 3
+chk_add_nr 1 1
+chk_rm_nr 2 2
+
 # subflow IPv6
 reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-- 
cgit v1.2.3-70-g09d2


From 0e12c0271887f1b00b79b7612c1d4f0d3d34e8a8 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Fri, 11 Dec 2020 12:24:20 +0800
Subject: selftests: test_vxlan_under_vrf: mute unnecessary error message

The cleanup function in this script that tries to delete hv-1 / hv-2
vm-1 / vm-2 netns will generate some uncessary error messages:

Cannot remove namespace file "/run/netns/hv-2": No such file or directory
Cannot remove namespace file "/run/netns/vm-1": No such file or directory
Cannot remove namespace file "/run/netns/vm-2": No such file or directory

Redirect it to /dev/null like other commands in the cleanup function
to reduce confusion.

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Link: https://lore.kernel.org/r/20201211042420.16411-1-po-hsu.lin@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/test_vxlan_under_vrf.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
index 09f9ed92cbe4..534c8b7699ab 100755
--- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -50,7 +50,7 @@ cleanup() {
     ip link del veth-tap 2>/dev/null || true
 
     for ns in hv-1 hv-2 vm-1 vm-2; do
-        ip netns del $ns || true
+        ip netns del $ns 2>/dev/null || true
     done
 }
 
-- 
cgit v1.2.3-70-g09d2


From b2ce5dbc15819ea4bef47dbd368239cb1e965158 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Thu, 19 Nov 2020 20:54:11 +0530
Subject: perf test: Fix metric parsing test

Commit e1c92a7fbbc5 ("perf tests: Add another metric parsing test") add
another test for metric parsing. The test goes through all metrics
compiled for arch within pmu events and try to parse them.

Right now this test is failing in powerpc machine.

Result in power9 platform:

  [command]# ./perf test 10
  10: PMU events                                                      :
  10.1: PMU event table sanity                                        : Ok
  10.2: PMU event map aliases                                         : Ok
  10.3: Parsing of PMU event table metrics                            : Skip (some metrics failed)
  10.4: Parsing of PMU event table metrics with fake PMUs             : FAILED!

Issue is we are passing different runtime parameter value in
"expr__find_other" and "expr__parse" function which is called from
function `metric_parse_fake`.  And because of this parsing of hv-24x7
metrics is failing.

  [command]# ./perf test 10 -vv
  .....
  hv_24x7/pm_mcs01_128b_rd_disp_port01,chip=1/ not found
  expr__parse failed
  test child finished with -1
  ---- end ----
  PMU events subtest 4: FAILED!

This patch fix this issue and change runtime parameter value to '0' in
expr__parse function.

Result in power9 platform after this patch:

  [command]# ./perf test 10
  10: PMU events                                                      :
  10.1: PMU event table sanity                                        : Ok
  10.2: PMU event map aliases                                         : Ok
  10.3: Parsing of PMU event table metrics                            : Skip (some metrics failed)
  10.4: Parsing of PMU event table metrics with fake PMUs             : Ok

Fixes: e1c92a7fbbc5 ("perf tests: Add another metric parsing test")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Link: http://lore.kernel.org/lkml/20201119152411.46041-1-kjain@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/pmu-events.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index ad2b21591275..0ca6a5a53523 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -575,7 +575,7 @@ static int metric_parse_fake(const char *str)
 		}
 	}
 
-	if (expr__parse(&result, &ctx, str, 1))
+	if (expr__parse(&result, &ctx, str, 0))
 		pr_err("expr__parse failed\n");
 	else
 		ret = 0;
-- 
cgit v1.2.3-70-g09d2


From 9c84f229268fa229e250b7225611d0eb7094fea0 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:05 -0800
Subject: mm/gup_benchmark: rename to mm/gup_test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "selftests/vm: gup_test, hmm-tests, assorted improvements", v3.

Summary: This series provides two main things, and a number of smaller
supporting goodies.  The two main points are:

1) Add a new sub-test to gup_test, which in turn is a renamed version
   of gup_benchmark.  This sub-test allows nicer testing of dump_pages(),
   at least on user-space pages.

   For quite a while, I was doing a quick hack to gup_test.c whenever I
   wanted to try out changes to dump_page().  Then Matthew Wilcox asked me
   what I meant when I said "I used my dump_page() unit test", and I
   realized that it might be nice to check in a polished up version of
   that.

   Details about how it works and how to use it are in the commit
   description for patch #6 ("selftests/vm: gup_test: introduce the
   dump_pages() sub-test").

2) Fixes a limitation of hmm-tests: these tests are incredibly useful,
   but only if people actually build and run them.  And it turns out that
   libhugetlbfs is a little too effective at throwing a wrench in the
   works, there.  So I've added a little configuration check that removes
   just two of the 21 hmm-tests, if libhugetlbfs is not available.

   Further details in the commit description of patch #8
   ("selftests/vm: hmm-tests: remove the libhugetlbfs dependency").

Other smaller things that this series does:

a) Remove code duplication by creating gup_test.h.

b) Clear up the sub-test organization, and their invocation within
   run_vmtests.sh.

c) Other minor assorted improvements.

[1] v2 is here:
https://lore.kernel.org/linux-doc/20200929212747.251804-1-jhubbard@nvidia.com/

[2] https://lore.kernel.org/r/CAHk-=wgh-TMPHLY3jueHX7Y2fWh3D+nMBqVS__AZm6-oorquWA@mail.gmail.com

This patch (of 9):

Rename nearly every "gup_benchmark" reference and file name to "gup_test".
The one exception is for the actual gup benchmark test itself.

The current code already does a *little* bit more than benchmarking, and
definitely covers more than get_user_pages_fast().  More importantly,
however, subsequent patches are about to add some functionality that is
non-benchmark related.

Closely related changes:

* Kconfig: in addition to renaming the options from GUP_BENCHMARK to
  GUP_TEST, update the help text to reflect that it's no longer a
  benchmark-only test.

Link: https://lkml.kernel.org/r/20201026064021.3545418-1-jhubbard@nvidia.com
Link: https://lkml.kernel.org/r/20201026064021.3545418-2-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/core-api/pin_user_pages.rst  |   6 +-
 arch/s390/configs/debug_defconfig          |   2 +-
 arch/s390/configs/defconfig                |   2 +-
 mm/Kconfig                                 |  15 ++-
 mm/Makefile                                |   2 +-
 mm/gup_benchmark.c                         | 210 -----------------------------
 mm/gup_test.c                              | 210 +++++++++++++++++++++++++++++
 tools/testing/selftests/vm/.gitignore      |   2 +-
 tools/testing/selftests/vm/Makefile        |   2 +-
 tools/testing/selftests/vm/config          |   2 +-
 tools/testing/selftests/vm/gup_benchmark.c | 143 --------------------
 tools/testing/selftests/vm/gup_test.c      | 143 ++++++++++++++++++++
 tools/testing/selftests/vm/run_vmtests     |   8 +-
 13 files changed, 376 insertions(+), 371 deletions(-)
 delete mode 100644 mm/gup_benchmark.c
 create mode 100644 mm/gup_test.c
 delete mode 100644 tools/testing/selftests/vm/gup_benchmark.c
 create mode 100644 tools/testing/selftests/vm/gup_test.c

(limited to 'tools')

diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst
index 7ca8c7bac650..eae972b23224 100644
--- a/Documentation/core-api/pin_user_pages.rst
+++ b/Documentation/core-api/pin_user_pages.rst
@@ -221,12 +221,12 @@ Unit testing
 ============
 This file::
 
- tools/testing/selftests/vm/gup_benchmark.c
+ tools/testing/selftests/vm/gup_test.c
 
 has the following new calls to exercise the new pin*() wrapper functions:
 
-* PIN_FAST_BENCHMARK (./gup_benchmark -a)
-* PIN_BENCHMARK (./gup_benchmark -b)
+* PIN_FAST_BENCHMARK (./gup_test -a)
+* PIN_BENCHMARK (./gup_test -b)
 
 You can monitor how many total dma-pinned pages have been acquired and released
 since the system was booted, via two new /proc/vmstat entries: ::
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index fe6f529ac82c..3601c28d1526 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -102,7 +102,7 @@ CONFIG_ZSMALLOC_STAT=y
 CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_PERCPU_STATS=y
-CONFIG_GUP_BENCHMARK=y
+CONFIG_GUP_TEST=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 17d5df2c1eff..e2171a008809 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -95,7 +95,7 @@ CONFIG_ZSMALLOC_STAT=y
 CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_PERCPU_STATS=y
-CONFIG_GUP_BENCHMARK=y
+CONFIG_GUP_TEST=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
diff --git a/mm/Kconfig b/mm/Kconfig
index 390165ffbb0f..e25e5cb2989f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -821,13 +821,18 @@ config PERCPU_STATS
 	  information includes global and per chunk statistics, which can
 	  be used to help understand percpu memory usage.
 
-config GUP_BENCHMARK
-	bool "Enable infrastructure for get_user_pages() and related calls benchmarking"
+config GUP_TEST
+	bool "Enable infrastructure for get_user_pages()-related unit tests"
 	help
-	  Provides /sys/kernel/debug/gup_benchmark that helps with testing
-	  performance of get_user_pages() and related calls.
+	  Provides /sys/kernel/debug/gup_test, which in turn provides a way
+	  to make ioctl calls that can launch kernel-based unit tests for
+	  the get_user_pages*() and pin_user_pages*() family of API calls.
 
-	  See tools/testing/selftests/vm/gup_benchmark.c
+	  These tests include benchmark testing of the _fast variants of
+	  get_user_pages*() and pin_user_pages*(), as well as smoke tests of
+	  the non-_fast variants.
+
+	  See tools/testing/selftests/vm/gup_test.c
 
 config GUP_GET_PTE_LOW_HIGH
 	bool
diff --git a/mm/Makefile b/mm/Makefile
index d73aed0fc99c..069f216e109e 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -90,7 +90,7 @@ obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
 obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
 obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
 obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
-obj-$(CONFIG_GUP_BENCHMARK) += gup_benchmark.o
+obj-$(CONFIG_GUP_TEST) += gup_test.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
deleted file mode 100644
index 8b3e5b5cd8fa..000000000000
--- a/mm/gup_benchmark.c
+++ /dev/null
@@ -1,210 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-#include <linux/ktime.h>
-#include <linux/debugfs.h>
-
-#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
-#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_benchmark)
-#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_benchmark)
-#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_benchmark)
-#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_benchmark)
-
-struct gup_benchmark {
-	__u64 get_delta_usec;
-	__u64 put_delta_usec;
-	__u64 addr;
-	__u64 size;
-	__u32 nr_pages_per_call;
-	__u32 flags;
-	__u64 expansion[10];	/* For future use */
-};
-
-static void put_back_pages(unsigned int cmd, struct page **pages,
-			   unsigned long nr_pages)
-{
-	unsigned long i;
-
-	switch (cmd) {
-	case GUP_FAST_BENCHMARK:
-	case GUP_BENCHMARK:
-		for (i = 0; i < nr_pages; i++)
-			put_page(pages[i]);
-		break;
-
-	case PIN_FAST_BENCHMARK:
-	case PIN_BENCHMARK:
-	case PIN_LONGTERM_BENCHMARK:
-		unpin_user_pages(pages, nr_pages);
-		break;
-	}
-}
-
-static void verify_dma_pinned(unsigned int cmd, struct page **pages,
-			      unsigned long nr_pages)
-{
-	unsigned long i;
-	struct page *page;
-
-	switch (cmd) {
-	case PIN_FAST_BENCHMARK:
-	case PIN_BENCHMARK:
-	case PIN_LONGTERM_BENCHMARK:
-		for (i = 0; i < nr_pages; i++) {
-			page = pages[i];
-			if (WARN(!page_maybe_dma_pinned(page),
-				 "pages[%lu] is NOT dma-pinned\n", i)) {
-
-				dump_page(page, "gup_benchmark failure");
-				break;
-			}
-		}
-		break;
-	}
-}
-
-static int __gup_benchmark_ioctl(unsigned int cmd,
-		struct gup_benchmark *gup)
-{
-	ktime_t start_time, end_time;
-	unsigned long i, nr_pages, addr, next;
-	int nr;
-	struct page **pages;
-	int ret = 0;
-	bool needs_mmap_lock =
-		cmd != GUP_FAST_BENCHMARK && cmd != PIN_FAST_BENCHMARK;
-
-	if (gup->size > ULONG_MAX)
-		return -EINVAL;
-
-	nr_pages = gup->size / PAGE_SIZE;
-	pages = kvcalloc(nr_pages, sizeof(void *), GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
-
-	if (needs_mmap_lock && mmap_read_lock_killable(current->mm)) {
-		ret = -EINTR;
-		goto free_pages;
-	}
-
-	i = 0;
-	nr = gup->nr_pages_per_call;
-	start_time = ktime_get();
-	for (addr = gup->addr; addr < gup->addr + gup->size; addr = next) {
-		if (nr != gup->nr_pages_per_call)
-			break;
-
-		next = addr + nr * PAGE_SIZE;
-		if (next > gup->addr + gup->size) {
-			next = gup->addr + gup->size;
-			nr = (next - addr) / PAGE_SIZE;
-		}
-
-		/* Filter out most gup flags: only allow a tiny subset here: */
-		gup->flags &= FOLL_WRITE;
-
-		switch (cmd) {
-		case GUP_FAST_BENCHMARK:
-			nr = get_user_pages_fast(addr, nr, gup->flags,
-						 pages + i);
-			break;
-		case GUP_BENCHMARK:
-			nr = get_user_pages(addr, nr, gup->flags, pages + i,
-					    NULL);
-			break;
-		case PIN_FAST_BENCHMARK:
-			nr = pin_user_pages_fast(addr, nr, gup->flags,
-						 pages + i);
-			break;
-		case PIN_BENCHMARK:
-			nr = pin_user_pages(addr, nr, gup->flags, pages + i,
-					    NULL);
-			break;
-		case PIN_LONGTERM_BENCHMARK:
-			nr = pin_user_pages(addr, nr,
-					    gup->flags | FOLL_LONGTERM,
-					    pages + i, NULL);
-			break;
-		default:
-			ret = -EINVAL;
-			goto unlock;
-		}
-
-		if (nr <= 0)
-			break;
-		i += nr;
-	}
-	end_time = ktime_get();
-
-	/* Shifting the meaning of nr_pages: now it is actual number pinned: */
-	nr_pages = i;
-
-	gup->get_delta_usec = ktime_us_delta(end_time, start_time);
-	gup->size = addr - gup->addr;
-
-	/*
-	 * Take an un-benchmark-timed moment to verify DMA pinned
-	 * state: print a warning if any non-dma-pinned pages are found:
-	 */
-	verify_dma_pinned(cmd, pages, nr_pages);
-
-	start_time = ktime_get();
-
-	put_back_pages(cmd, pages, nr_pages);
-
-	end_time = ktime_get();
-	gup->put_delta_usec = ktime_us_delta(end_time, start_time);
-
-unlock:
-	if (needs_mmap_lock)
-		mmap_read_unlock(current->mm);
-free_pages:
-	kvfree(pages);
-	return ret;
-}
-
-static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,
-		unsigned long arg)
-{
-	struct gup_benchmark gup;
-	int ret;
-
-	switch (cmd) {
-	case GUP_FAST_BENCHMARK:
-	case GUP_BENCHMARK:
-	case PIN_FAST_BENCHMARK:
-	case PIN_BENCHMARK:
-	case PIN_LONGTERM_BENCHMARK:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (copy_from_user(&gup, (void __user *)arg, sizeof(gup)))
-		return -EFAULT;
-
-	ret = __gup_benchmark_ioctl(cmd, &gup);
-	if (ret)
-		return ret;
-
-	if (copy_to_user((void __user *)arg, &gup, sizeof(gup)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static const struct file_operations gup_benchmark_fops = {
-	.open = nonseekable_open,
-	.unlocked_ioctl = gup_benchmark_ioctl,
-};
-
-static int gup_benchmark_init(void)
-{
-	debugfs_create_file_unsafe("gup_benchmark", 0600, NULL, NULL,
-				   &gup_benchmark_fops);
-
-	return 0;
-}
-
-late_initcall(gup_benchmark_init);
diff --git a/mm/gup_test.c b/mm/gup_test.c
new file mode 100644
index 000000000000..59472ea6aa39
--- /dev/null
+++ b/mm/gup_test.c
@@ -0,0 +1,210 @@
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/ktime.h>
+#include <linux/debugfs.h>
+
+#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_test)
+#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_test)
+#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_test)
+#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_test)
+#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_test)
+
+struct gup_test {
+	__u64 get_delta_usec;
+	__u64 put_delta_usec;
+	__u64 addr;
+	__u64 size;
+	__u32 nr_pages_per_call;
+	__u32 flags;
+	__u64 expansion[10];	/* For future use */
+};
+
+static void put_back_pages(unsigned int cmd, struct page **pages,
+			   unsigned long nr_pages)
+{
+	unsigned long i;
+
+	switch (cmd) {
+	case GUP_FAST_BENCHMARK:
+	case GUP_BENCHMARK:
+		for (i = 0; i < nr_pages; i++)
+			put_page(pages[i]);
+		break;
+
+	case PIN_FAST_BENCHMARK:
+	case PIN_BENCHMARK:
+	case PIN_LONGTERM_BENCHMARK:
+		unpin_user_pages(pages, nr_pages);
+		break;
+	}
+}
+
+static void verify_dma_pinned(unsigned int cmd, struct page **pages,
+			      unsigned long nr_pages)
+{
+	unsigned long i;
+	struct page *page;
+
+	switch (cmd) {
+	case PIN_FAST_BENCHMARK:
+	case PIN_BENCHMARK:
+	case PIN_LONGTERM_BENCHMARK:
+		for (i = 0; i < nr_pages; i++) {
+			page = pages[i];
+			if (WARN(!page_maybe_dma_pinned(page),
+				 "pages[%lu] is NOT dma-pinned\n", i)) {
+
+				dump_page(page, "gup_test failure");
+				break;
+			}
+		}
+		break;
+	}
+}
+
+static int __gup_test_ioctl(unsigned int cmd,
+		struct gup_test *gup)
+{
+	ktime_t start_time, end_time;
+	unsigned long i, nr_pages, addr, next;
+	int nr;
+	struct page **pages;
+	int ret = 0;
+	bool needs_mmap_lock =
+		cmd != GUP_FAST_BENCHMARK && cmd != PIN_FAST_BENCHMARK;
+
+	if (gup->size > ULONG_MAX)
+		return -EINVAL;
+
+	nr_pages = gup->size / PAGE_SIZE;
+	pages = kvcalloc(nr_pages, sizeof(void *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	if (needs_mmap_lock && mmap_read_lock_killable(current->mm)) {
+		ret = -EINTR;
+		goto free_pages;
+	}
+
+	i = 0;
+	nr = gup->nr_pages_per_call;
+	start_time = ktime_get();
+	for (addr = gup->addr; addr < gup->addr + gup->size; addr = next) {
+		if (nr != gup->nr_pages_per_call)
+			break;
+
+		next = addr + nr * PAGE_SIZE;
+		if (next > gup->addr + gup->size) {
+			next = gup->addr + gup->size;
+			nr = (next - addr) / PAGE_SIZE;
+		}
+
+		/* Filter out most gup flags: only allow a tiny subset here: */
+		gup->flags &= FOLL_WRITE;
+
+		switch (cmd) {
+		case GUP_FAST_BENCHMARK:
+			nr = get_user_pages_fast(addr, nr, gup->flags,
+						 pages + i);
+			break;
+		case GUP_BENCHMARK:
+			nr = get_user_pages(addr, nr, gup->flags, pages + i,
+					    NULL);
+			break;
+		case PIN_FAST_BENCHMARK:
+			nr = pin_user_pages_fast(addr, nr, gup->flags,
+						 pages + i);
+			break;
+		case PIN_BENCHMARK:
+			nr = pin_user_pages(addr, nr, gup->flags, pages + i,
+					    NULL);
+			break;
+		case PIN_LONGTERM_BENCHMARK:
+			nr = pin_user_pages(addr, nr,
+					    gup->flags | FOLL_LONGTERM,
+					    pages + i, NULL);
+			break;
+		default:
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		if (nr <= 0)
+			break;
+		i += nr;
+	}
+	end_time = ktime_get();
+
+	/* Shifting the meaning of nr_pages: now it is actual number pinned: */
+	nr_pages = i;
+
+	gup->get_delta_usec = ktime_us_delta(end_time, start_time);
+	gup->size = addr - gup->addr;
+
+	/*
+	 * Take an un-benchmark-timed moment to verify DMA pinned
+	 * state: print a warning if any non-dma-pinned pages are found:
+	 */
+	verify_dma_pinned(cmd, pages, nr_pages);
+
+	start_time = ktime_get();
+
+	put_back_pages(cmd, pages, nr_pages);
+
+	end_time = ktime_get();
+	gup->put_delta_usec = ktime_us_delta(end_time, start_time);
+
+unlock:
+	if (needs_mmap_lock)
+		mmap_read_unlock(current->mm);
+free_pages:
+	kvfree(pages);
+	return ret;
+}
+
+static long gup_test_ioctl(struct file *filep, unsigned int cmd,
+		unsigned long arg)
+{
+	struct gup_test gup;
+	int ret;
+
+	switch (cmd) {
+	case GUP_FAST_BENCHMARK:
+	case GUP_BENCHMARK:
+	case PIN_FAST_BENCHMARK:
+	case PIN_BENCHMARK:
+	case PIN_LONGTERM_BENCHMARK:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&gup, (void __user *)arg, sizeof(gup)))
+		return -EFAULT;
+
+	ret = __gup_test_ioctl(cmd, &gup);
+	if (ret)
+		return ret;
+
+	if (copy_to_user((void __user *)arg, &gup, sizeof(gup)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static const struct file_operations gup_test_fops = {
+	.open = nonseekable_open,
+	.unlocked_ioctl = gup_test_ioctl,
+};
+
+static int gup_test_init(void)
+{
+	debugfs_create_file_unsafe("gup_test", 0600, NULL, NULL,
+				   &gup_test_fops);
+
+	return 0;
+}
+
+late_initcall(gup_test_init);
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 849e8226395a..2c8ddcf41c0e 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -15,7 +15,7 @@ userfaultfd
 mlock-intersect-test
 mlock-random-test
 virtual_address_range
-gup_benchmark
+gup_test
 va_128TBswitch
 map_fixed_noreplace
 write_to_hugetlbfs
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 691893afc15d..43723df2c6c4 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -23,7 +23,7 @@ MAKEFLAGS += --no-builtin-rules
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
-TEST_GEN_FILES += gup_benchmark
+TEST_GEN_FILES += gup_test
 TEST_GEN_FILES += hmm-tests
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 69dd0d1aa30b..60e82da0de85 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -3,4 +3,4 @@ CONFIG_USERFAULTFD=y
 CONFIG_TEST_VMALLOC=m
 CONFIG_DEVICE_PRIVATE=y
 CONFIG_TEST_HMM=m
-CONFIG_GUP_BENCHMARK=y
+CONFIG_GUP_TEST=y
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
deleted file mode 100644
index 1d4359341e44..000000000000
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ /dev/null
@@ -1,143 +0,0 @@
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/prctl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <linux/types.h>
-
-#define MB (1UL << 20)
-#define PAGE_SIZE sysconf(_SC_PAGESIZE)
-
-#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
-#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_benchmark)
-
-/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_benchmark)
-#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_benchmark)
-#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_benchmark)
-
-/* Just the flags we need, copied from mm.h: */
-#define FOLL_WRITE	0x01	/* check pte is writable */
-
-struct gup_benchmark {
-	__u64 get_delta_usec;
-	__u64 put_delta_usec;
-	__u64 addr;
-	__u64 size;
-	__u32 nr_pages_per_call;
-	__u32 flags;
-	__u64 expansion[10];	/* For future use */
-};
-
-int main(int argc, char **argv)
-{
-	struct gup_benchmark gup;
-	unsigned long size = 128 * MB;
-	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
-	int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
-	char *file = "/dev/zero";
-	char *p;
-
-	while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
-		switch (opt) {
-		case 'a':
-			cmd = PIN_FAST_BENCHMARK;
-			break;
-		case 'b':
-			cmd = PIN_BENCHMARK;
-			break;
-		case 'L':
-			cmd = PIN_LONGTERM_BENCHMARK;
-			break;
-		case 'm':
-			size = atoi(optarg) * MB;
-			break;
-		case 'r':
-			repeats = atoi(optarg);
-			break;
-		case 'n':
-			nr_pages = atoi(optarg);
-			break;
-		case 't':
-			thp = 1;
-			break;
-		case 'T':
-			thp = 0;
-			break;
-		case 'U':
-			cmd = GUP_BENCHMARK;
-			break;
-		case 'u':
-			cmd = GUP_FAST_BENCHMARK;
-			break;
-		case 'w':
-			write = 1;
-			break;
-		case 'f':
-			file = optarg;
-			break;
-		case 'S':
-			flags &= ~MAP_PRIVATE;
-			flags |= MAP_SHARED;
-			break;
-		case 'H':
-			flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
-			break;
-		default:
-			return -1;
-		}
-	}
-
-	filed = open(file, O_RDWR|O_CREAT);
-	if (filed < 0) {
-		perror("open");
-		exit(filed);
-	}
-
-	gup.nr_pages_per_call = nr_pages;
-	if (write)
-		gup.flags |= FOLL_WRITE;
-
-	fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
-	if (fd == -1) {
-		perror("open");
-		exit(1);
-	}
-
-	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
-	if (p == MAP_FAILED) {
-		perror("mmap");
-		exit(1);
-	}
-	gup.addr = (unsigned long)p;
-
-	if (thp == 1)
-		madvise(p, size, MADV_HUGEPAGE);
-	else if (thp == 0)
-		madvise(p, size, MADV_NOHUGEPAGE);
-
-	for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
-		p[0] = 0;
-
-	for (i = 0; i < repeats; i++) {
-		gup.size = size;
-		if (ioctl(fd, cmd, &gup)) {
-			perror("ioctl");
-			exit(1);
-		}
-
-		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
-			gup.put_delta_usec);
-		if (gup.size != size)
-			printf(", truncated (size: %lld)", gup.size);
-		printf("\n");
-	}
-
-	return 0;
-}
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
new file mode 100644
index 000000000000..00b4731f535e
--- /dev/null
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -0,0 +1,143 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <linux/types.h>
+
+#define MB (1UL << 20)
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+
+#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_test)
+#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_test)
+
+/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
+#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_test)
+#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_test)
+#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_test)
+
+/* Just the flags we need, copied from mm.h: */
+#define FOLL_WRITE	0x01	/* check pte is writable */
+
+struct gup_test {
+	__u64 get_delta_usec;
+	__u64 put_delta_usec;
+	__u64 addr;
+	__u64 size;
+	__u32 nr_pages_per_call;
+	__u32 flags;
+	__u64 expansion[10];	/* For future use */
+};
+
+int main(int argc, char **argv)
+{
+	struct gup_test gup;
+	unsigned long size = 128 * MB;
+	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+	int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
+	char *file = "/dev/zero";
+	char *p;
+
+	while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
+		switch (opt) {
+		case 'a':
+			cmd = PIN_FAST_BENCHMARK;
+			break;
+		case 'b':
+			cmd = PIN_BENCHMARK;
+			break;
+		case 'L':
+			cmd = PIN_LONGTERM_BENCHMARK;
+			break;
+		case 'm':
+			size = atoi(optarg) * MB;
+			break;
+		case 'r':
+			repeats = atoi(optarg);
+			break;
+		case 'n':
+			nr_pages = atoi(optarg);
+			break;
+		case 't':
+			thp = 1;
+			break;
+		case 'T':
+			thp = 0;
+			break;
+		case 'U':
+			cmd = GUP_BENCHMARK;
+			break;
+		case 'u':
+			cmd = GUP_FAST_BENCHMARK;
+			break;
+		case 'w':
+			write = 1;
+			break;
+		case 'f':
+			file = optarg;
+			break;
+		case 'S':
+			flags &= ~MAP_PRIVATE;
+			flags |= MAP_SHARED;
+			break;
+		case 'H':
+			flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
+			break;
+		default:
+			return -1;
+		}
+	}
+
+	filed = open(file, O_RDWR|O_CREAT);
+	if (filed < 0) {
+		perror("open");
+		exit(filed);
+	}
+
+	gup.nr_pages_per_call = nr_pages;
+	if (write)
+		gup.flags |= FOLL_WRITE;
+
+	fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+	if (fd == -1) {
+		perror("open");
+		exit(1);
+	}
+
+	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
+	if (p == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+	gup.addr = (unsigned long)p;
+
+	if (thp == 1)
+		madvise(p, size, MADV_HUGEPAGE);
+	else if (thp == 0)
+		madvise(p, size, MADV_NOHUGEPAGE);
+
+	for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
+		p[0] = 0;
+
+	for (i = 0; i < repeats; i++) {
+		gup.size = size;
+		if (ioctl(fd, cmd, &gup)) {
+			perror("ioctl");
+			exit(1);
+		}
+
+		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
+			gup.put_delta_usec);
+		if (gup.size != size)
+			printf(", truncated (size: %lld)", gup.size);
+		printf("\n");
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index a3f4f30f0a2e..d1843d5f3c30 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -124,9 +124,9 @@ else
 fi
 
 echo "--------------------------------------------"
-echo "running 'gup_benchmark -U' (normal/slow gup)"
+echo "running 'gup_test -U' (normal/slow gup)"
 echo "--------------------------------------------"
-./gup_benchmark -U
+./gup_test -U
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
@@ -135,9 +135,9 @@ else
 fi
 
 echo "------------------------------------------"
-echo "running gup_benchmark -b (pin_user_pages)"
+echo "running gup_test -b (pin_user_pages)"
 echo "------------------------------------------"
-./gup_benchmark -b
+./gup_test -b
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
-- 
cgit v1.2.3-70-g09d2


From b9dcfdff8b4b223280015281b5050976c484c80a Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:08 -0800
Subject: selftests/vm: use a common gup_test.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid the need to copy-paste the gup_test ioctl commands and the struct
gup_test definition, between the kernel and the user space application, by
providing a new header file for these.  This allows easier and safer
adding of new ioctl calls, as well as reducing the overall line count.

Details: The header file has to be able to compile independently, because
of the arguably unfortunate way that the Makefile is written: the Makefile
tries to build all of its prerequisites, when really it should be only
building the .c files, and leaving the other prerequisites (LOCAL_HDRS) as
pure dependencies.

That Makefile limitation is probably not worth fixing, but it explains why
one of the includes had to be moved into the new header file.

Also: simplify the ioctl struct (struct gup_test), by deleting the unused
__expansion[10] field.  This sort of thing is what you might see in a
stable ABI, but this low-level, kernel-developer-oriented selftests/vm
system is very much not subject to ABI stability.  So "expansion" and
"reserved" fields are unnecessary here.

Link: https://lkml.kernel.org/r/20201026064021.3545418-3-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup_test.c                         | 17 +----------------
 mm/gup_test.h                         | 22 ++++++++++++++++++++++
 tools/testing/selftests/vm/Makefile   |  2 ++
 tools/testing/selftests/vm/gup_test.c | 22 +---------------------
 4 files changed, 26 insertions(+), 37 deletions(-)
 create mode 100644 mm/gup_test.h

(limited to 'tools')

diff --git a/mm/gup_test.c b/mm/gup_test.c
index 59472ea6aa39..4c2d70d88f24 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -4,22 +4,7 @@
 #include <linux/uaccess.h>
 #include <linux/ktime.h>
 #include <linux/debugfs.h>
-
-#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_test)
-#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_test)
-#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_test)
-#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_test)
-#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_test)
-
-struct gup_test {
-	__u64 get_delta_usec;
-	__u64 put_delta_usec;
-	__u64 addr;
-	__u64 size;
-	__u32 nr_pages_per_call;
-	__u32 flags;
-	__u64 expansion[10];	/* For future use */
-};
+#include "gup_test.h"
 
 static void put_back_pages(unsigned int cmd, struct page **pages,
 			   unsigned long nr_pages)
diff --git a/mm/gup_test.h b/mm/gup_test.h
new file mode 100644
index 000000000000..931c2f3f477a
--- /dev/null
+++ b/mm/gup_test.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __GUP_TEST_H
+#define __GUP_TEST_H
+
+#include <linux/types.h>
+
+#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_test)
+#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_test)
+#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_test)
+#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_test)
+#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_test)
+
+struct gup_test {
+	__u64 get_delta_usec;
+	__u64 put_delta_usec;
+	__u64 addr;
+	__u64 size;
+	__u32 nr_pages_per_call;
+	__u32 flags;
+};
+
+#endif	/* __GUP_TEST_H */
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 43723df2c6c4..101c0315bc92 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -134,3 +134,5 @@ endif
 $(OUTPUT)/userfaultfd: LDLIBS += -lpthread
 
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
+
+$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 00b4731f535e..03f7c4f1beaf 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -2,39 +2,19 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/prctl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-
-#include <linux/types.h>
+#include "../../../../mm/gup_test.h"
 
 #define MB (1UL << 20)
 #define PAGE_SIZE sysconf(_SC_PAGESIZE)
 
-#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_test)
-#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_test)
-
-/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_test)
-#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_test)
-#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_test)
-
 /* Just the flags we need, copied from mm.h: */
 #define FOLL_WRITE	0x01	/* check pte is writable */
 
-struct gup_test {
-	__u64 get_delta_usec;
-	__u64 put_delta_usec;
-	__u64 addr;
-	__u64 size;
-	__u32 nr_pages_per_call;
-	__u32 flags;
-	__u64 expansion[10];	/* For future use */
-};
-
 int main(int argc, char **argv)
 {
 	struct gup_test gup;
-- 
cgit v1.2.3-70-g09d2


From c2aa8afc36fa8669ac165ace1f4d7173f21f367f Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:11 -0800
Subject: selftests/vm: rename run_vmtests --> run_vmtests.sh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename to *.sh, in order to match the conventions of all of the other
items in selftest/vm.

The only reason not to use a .sh suffix a shell script like this, might be
to make it look more like a normal program, but that's not an issue here.

Link: https://lkml.kernel.org/r/20201026064021.3545418-4-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 101c0315bc92..74ac457c96bb 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -73,7 +73,7 @@ TEST_GEN_FILES += virtual_address_range
 TEST_GEN_FILES += write_to_hugetlbfs
 endif
 
-TEST_PROGS := run_vmtests
+TEST_PROGS := run_vmtests.sh
 
 TEST_FILES := test_vmalloc.sh
 
-- 
cgit v1.2.3-70-g09d2


From f545605cc08e1f1820b4c8748689e7c6d4365d99 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:14 -0800
Subject: selftests/vm: minor cleanup: Makefile and gup_test.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A few cleanups that don't deserve separate patches, but that also should
not clutter up other functional changes:

1. Remove an unnecessary #include <prctl.h>

2. Restore the sorted order of TEST_GEN_FILES.

3. Add -lpthread to the common LDLIBS, as it is harmless and several
   tests use it. This gets rid of one special rule already.

Link: https://lkml.kernel.org/r/20201026064021.3545418-5-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile   | 10 ++++------
 tools/testing/selftests/vm/gup_test.c |  1 -
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 74ac457c96bb..e640d8145cbd 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -21,14 +21,15 @@ MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
 MAKEFLAGS += --no-builtin-rules
 
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
-LDLIBS = -lrt
+LDLIBS = -lrt -lpthread
 TEST_GEN_FILES = compaction_test
 TEST_GEN_FILES += gup_test
 TEST_GEN_FILES += hmm-tests
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
-TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += khugepaged
 TEST_GEN_FILES += map_fixed_noreplace
+TEST_GEN_FILES += map_hugetlb
 TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
@@ -37,7 +38,6 @@ TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
-TEST_GEN_FILES += khugepaged
 
 ifeq ($(ARCH),x86_64)
 CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
@@ -80,7 +80,7 @@ TEST_FILES := test_vmalloc.sh
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread
+$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs
 
 ifeq ($(ARCH),x86_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
@@ -131,8 +131,6 @@ warn_32bit_failure:
 endif
 endif
 
-$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
-
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
 
 $(OUTPUT)/gup_test: ../../../../mm/gup_test.h
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 03f7c4f1beaf..1a54771ad97e 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -4,7 +4,6 @@
 #include <unistd.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
-#include <sys/prctl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include "../../../../mm/gup_test.h"
-- 
cgit v1.2.3-70-g09d2


From a9bed1e1c2a9bb36cdd29af0ef48044d1b9e8c2a Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:17 -0800
Subject: selftests/vm: only some gup_test items are really benchmarks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Therefore, some minor cleanup and improvements are in order:

1. Rename the other items appropriately.

2. Stop reporting timing information on the non-benchmark items. It's
   still being recorded and is available, but there's no point in
   cluttering up the report with data that no one reasonably needs to
   check.

3. Don't do iterations, for non-benchmark items.

4. Print out a shorter, more appropriate report for the non-benchmark
   tests.

5. Add the command that was run, to the report. This really helps, as
   there are quite a lot of options now.

6. Use a larger integer type for cmd, now that it's being compared
   Otherwise it doesn't work, because in this case cmd is about 3 billion,
   which is the perfect size for problems with signed vs unsigned int.

Link: https://lkml.kernel.org/r/20201026064021.3545418-6-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/core-api/pin_user_pages.rst |  2 +-
 mm/gup_test.c                             | 14 ++++-----
 mm/gup_test.h                             |  8 +++---
 tools/testing/selftests/vm/gup_test.c     | 47 +++++++++++++++++++++++++------
 4 files changed, 51 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst
index eae972b23224..fcf605be43d0 100644
--- a/Documentation/core-api/pin_user_pages.rst
+++ b/Documentation/core-api/pin_user_pages.rst
@@ -226,7 +226,7 @@ This file::
 has the following new calls to exercise the new pin*() wrapper functions:
 
 * PIN_FAST_BENCHMARK (./gup_test -a)
-* PIN_BENCHMARK (./gup_test -b)
+* PIN_BASIC_TEST (./gup_test -b)
 
 You can monitor how many total dma-pinned pages have been acquired and released
 since the system was booted, via two new /proc/vmstat entries: ::
diff --git a/mm/gup_test.c b/mm/gup_test.c
index 4c2d70d88f24..173bb38f3688 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -13,13 +13,13 @@ static void put_back_pages(unsigned int cmd, struct page **pages,
 
 	switch (cmd) {
 	case GUP_FAST_BENCHMARK:
-	case GUP_BENCHMARK:
+	case GUP_BASIC_TEST:
 		for (i = 0; i < nr_pages; i++)
 			put_page(pages[i]);
 		break;
 
 	case PIN_FAST_BENCHMARK:
-	case PIN_BENCHMARK:
+	case PIN_BASIC_TEST:
 	case PIN_LONGTERM_BENCHMARK:
 		unpin_user_pages(pages, nr_pages);
 		break;
@@ -34,7 +34,7 @@ static void verify_dma_pinned(unsigned int cmd, struct page **pages,
 
 	switch (cmd) {
 	case PIN_FAST_BENCHMARK:
-	case PIN_BENCHMARK:
+	case PIN_BASIC_TEST:
 	case PIN_LONGTERM_BENCHMARK:
 		for (i = 0; i < nr_pages; i++) {
 			page = pages[i];
@@ -94,7 +94,7 @@ static int __gup_test_ioctl(unsigned int cmd,
 			nr = get_user_pages_fast(addr, nr, gup->flags,
 						 pages + i);
 			break;
-		case GUP_BENCHMARK:
+		case GUP_BASIC_TEST:
 			nr = get_user_pages(addr, nr, gup->flags, pages + i,
 					    NULL);
 			break;
@@ -102,7 +102,7 @@ static int __gup_test_ioctl(unsigned int cmd,
 			nr = pin_user_pages_fast(addr, nr, gup->flags,
 						 pages + i);
 			break;
-		case PIN_BENCHMARK:
+		case PIN_BASIC_TEST:
 			nr = pin_user_pages(addr, nr, gup->flags, pages + i,
 					    NULL);
 			break;
@@ -157,10 +157,10 @@ static long gup_test_ioctl(struct file *filep, unsigned int cmd,
 
 	switch (cmd) {
 	case GUP_FAST_BENCHMARK:
-	case GUP_BENCHMARK:
 	case PIN_FAST_BENCHMARK:
-	case PIN_BENCHMARK:
 	case PIN_LONGTERM_BENCHMARK:
+	case GUP_BASIC_TEST:
+	case PIN_BASIC_TEST:
 		break;
 	default:
 		return -EINVAL;
diff --git a/mm/gup_test.h b/mm/gup_test.h
index 931c2f3f477a..921b4caad8ef 100644
--- a/mm/gup_test.h
+++ b/mm/gup_test.h
@@ -5,10 +5,10 @@
 #include <linux/types.h>
 
 #define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_test)
-#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_test)
-#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_test)
-#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_test)
-#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_test)
+#define PIN_FAST_BENCHMARK	_IOWR('g', 2, struct gup_test)
+#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 3, struct gup_test)
+#define GUP_BASIC_TEST		_IOWR('g', 4, struct gup_test)
+#define PIN_BASIC_TEST		_IOWR('g', 5, struct gup_test)
 
 struct gup_test {
 	__u64 get_delta_usec;
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 1a54771ad97e..f9163e1bb57a 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -14,12 +14,30 @@
 /* Just the flags we need, copied from mm.h: */
 #define FOLL_WRITE	0x01	/* check pte is writable */
 
+static char *cmd_to_str(unsigned long cmd)
+{
+	switch (cmd) {
+	case GUP_FAST_BENCHMARK:
+		return "GUP_FAST_BENCHMARK";
+	case PIN_FAST_BENCHMARK:
+		return "PIN_FAST_BENCHMARK";
+	case PIN_LONGTERM_BENCHMARK:
+		return "PIN_LONGTERM_BENCHMARK";
+	case GUP_BASIC_TEST:
+		return "GUP_BASIC_TEST";
+	case PIN_BASIC_TEST:
+		return "PIN_BASIC_TEST";
+	}
+	return "Unknown command";
+}
+
 int main(int argc, char **argv)
 {
 	struct gup_test gup;
 	unsigned long size = 128 * MB;
 	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
-	int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
+	unsigned long cmd = GUP_FAST_BENCHMARK;
+	int flags = MAP_PRIVATE;
 	char *file = "/dev/zero";
 	char *p;
 
@@ -29,7 +47,7 @@ int main(int argc, char **argv)
 			cmd = PIN_FAST_BENCHMARK;
 			break;
 		case 'b':
-			cmd = PIN_BENCHMARK;
+			cmd = PIN_BASIC_TEST;
 			break;
 		case 'L':
 			cmd = PIN_LONGTERM_BENCHMARK;
@@ -50,7 +68,7 @@ int main(int argc, char **argv)
 			thp = 0;
 			break;
 		case 'U':
-			cmd = GUP_BENCHMARK;
+			cmd = GUP_BASIC_TEST;
 			break;
 		case 'u':
 			cmd = GUP_FAST_BENCHMARK;
@@ -104,18 +122,31 @@ int main(int argc, char **argv)
 	for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
 		p[0] = 0;
 
-	for (i = 0; i < repeats; i++) {
+	/* Only report timing information on the *_BENCHMARK commands: */
+	if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
+	     (cmd == PIN_LONGTERM_BENCHMARK)) {
+		for (i = 0; i < repeats; i++) {
+			gup.size = size;
+			if (ioctl(fd, cmd, &gup))
+				perror("ioctl"), exit(1);
+
+			printf("%s: Time: get:%lld put:%lld us",
+			       cmd_to_str(cmd), gup.get_delta_usec,
+			       gup.put_delta_usec);
+			if (gup.size != size)
+				printf(", truncated (size: %lld)", gup.size);
+			printf("\n");
+		}
+	} else {
 		gup.size = size;
 		if (ioctl(fd, cmd, &gup)) {
 			perror("ioctl");
 			exit(1);
 		}
 
-		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
-			gup.put_delta_usec);
+		printf("%s: done\n", cmd_to_str(cmd));
 		if (gup.size != size)
-			printf(", truncated (size: %lld)", gup.size);
-		printf("\n");
+			printf("Truncated (size: %lld)\n", gup.size);
 	}
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From f4f9bda418ab8b4dbc5372e9e2a28162f7777154 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:21 -0800
Subject: selftests/vm: gup_test: introduce the dump_pages() sub-test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For quite a while, I was doing a quick hack to gup_test.c (previously,
gup_benchmark.c) whenever I wanted to try out my changes to dump_page().
This makes that hack unnecessary, and instead allows anyone to easily get
the same coverage from a user space program.  That saves a lot of time
because you don't have to change the kernel, in order to test different
pages and options.

The new sub-test takes advantage of the existing gup_test infrastructure,
which already provides a simple user space program, some allocated user
space pages, an ioctl call, pinning of those pages (via either
get_user_pages or pin_user_pages) and a corresponding kernel-side test
invocation.  There's not much more required, mainly just a couple of
inputs from the user.

In fact, the new test re-uses the existing command line options in order
to get various helpful combinations (THP or normal, _fast or slow gup, gup
vs.  pup, and more).

New command line options are: which pages to dump, and what type of
"get/pin" to use.

In order to figure out which pages to dump, the logic is:

* If the user doesn't specify anything, the page 0 (the first page in
  the address range that the program sets up for testing) is dumped.

* Or, the user can type up to 8 page indices anywhere on the command
  line.  If you type more than 8, then it uses the first 8 and ignores the
  remaining items.

For example:

    ./gup_test -ct -F 1 0 19 0x1000

Meaning:
    -c:          dump pages sub-test
    -t:          use THP pages
    -F 1:        use pin_user_pages() instead of get_user_pages()
    0 19 0x1000: dump pages 0, 19, and 4096

Link: https://lkml.kernel.org/r/20201026064021.3545418-7-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/Kconfig                            |  6 ++++
 mm/gup_test.c                         | 56 +++++++++++++++++++++++++++++++++--
 mm/gup_test.h                         | 10 +++++++
 tools/testing/selftests/vm/gup_test.c | 45 ++++++++++++++++++++++++++--
 4 files changed, 113 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/mm/Kconfig b/mm/Kconfig
index e25e5cb2989f..350f2e23a94a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -832,6 +832,12 @@ config GUP_TEST
 	  get_user_pages*() and pin_user_pages*(), as well as smoke tests of
 	  the non-_fast variants.
 
+	  There is also a sub-test that allows running dump_page() on any
+	  of up to eight pages (selected by command line args) within the
+	  range of user-space addresses. These pages are either pinned via
+	  pin_user_pages*(), or pinned via get_user_pages*(), as specified
+	  by other command line arguments.
+
 	  See tools/testing/selftests/vm/gup_test.c
 
 config GUP_GET_PTE_LOW_HIGH
diff --git a/mm/gup_test.c b/mm/gup_test.c
index 173bb38f3688..087ddaf30eb4 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -7,7 +7,7 @@
 #include "gup_test.h"
 
 static void put_back_pages(unsigned int cmd, struct page **pages,
-			   unsigned long nr_pages)
+			   unsigned long nr_pages, unsigned int gup_test_flags)
 {
 	unsigned long i;
 
@@ -23,6 +23,15 @@ static void put_back_pages(unsigned int cmd, struct page **pages,
 	case PIN_LONGTERM_BENCHMARK:
 		unpin_user_pages(pages, nr_pages);
 		break;
+	case DUMP_USER_PAGES_TEST:
+		if (gup_test_flags & GUP_TEST_FLAG_DUMP_PAGES_USE_PIN) {
+			unpin_user_pages(pages, nr_pages);
+		} else {
+			for (i = 0; i < nr_pages; i++)
+				put_page(pages[i]);
+
+		}
+		break;
 	}
 }
 
@@ -49,6 +58,37 @@ static void verify_dma_pinned(unsigned int cmd, struct page **pages,
 	}
 }
 
+static void dump_pages_test(struct gup_test *gup, struct page **pages,
+			    unsigned long nr_pages)
+{
+	unsigned int index_to_dump;
+	unsigned int i;
+
+	/*
+	 * Zero out any user-supplied page index that is out of range. Remember:
+	 * .which_pages[] contains a 1-based set of page indices.
+	 */
+	for (i = 0; i < GUP_TEST_MAX_PAGES_TO_DUMP; i++) {
+		if (gup->which_pages[i] > nr_pages) {
+			pr_warn("ZEROING due to out of range: .which_pages[%u]: %u\n",
+				i, gup->which_pages[i]);
+			gup->which_pages[i] = 0;
+		}
+	}
+
+	for (i = 0; i < GUP_TEST_MAX_PAGES_TO_DUMP; i++) {
+		index_to_dump = gup->which_pages[i];
+
+		if (index_to_dump) {
+			index_to_dump--; // Decode from 1-based, to 0-based
+			pr_info("---- page #%u, starting from user virt addr: 0x%llx\n",
+				index_to_dump, gup->addr);
+			dump_page(pages[index_to_dump],
+				  "gup_test: dump_pages() test");
+		}
+	}
+}
+
 static int __gup_test_ioctl(unsigned int cmd,
 		struct gup_test *gup)
 {
@@ -111,6 +151,14 @@ static int __gup_test_ioctl(unsigned int cmd,
 					    gup->flags | FOLL_LONGTERM,
 					    pages + i, NULL);
 			break;
+		case DUMP_USER_PAGES_TEST:
+			if (gup->flags & GUP_TEST_FLAG_DUMP_PAGES_USE_PIN)
+				nr = pin_user_pages(addr, nr, gup->flags,
+						    pages + i, NULL);
+			else
+				nr = get_user_pages(addr, nr, gup->flags,
+						    pages + i, NULL);
+			break;
 		default:
 			ret = -EINVAL;
 			goto unlock;
@@ -134,9 +182,12 @@ static int __gup_test_ioctl(unsigned int cmd,
 	 */
 	verify_dma_pinned(cmd, pages, nr_pages);
 
+	if (cmd == DUMP_USER_PAGES_TEST)
+		dump_pages_test(gup, pages, nr_pages);
+
 	start_time = ktime_get();
 
-	put_back_pages(cmd, pages, nr_pages);
+	put_back_pages(cmd, pages, nr_pages, gup->flags);
 
 	end_time = ktime_get();
 	gup->put_delta_usec = ktime_us_delta(end_time, start_time);
@@ -161,6 +212,7 @@ static long gup_test_ioctl(struct file *filep, unsigned int cmd,
 	case PIN_LONGTERM_BENCHMARK:
 	case GUP_BASIC_TEST:
 	case PIN_BASIC_TEST:
+	case DUMP_USER_PAGES_TEST:
 		break;
 	default:
 		return -EINVAL;
diff --git a/mm/gup_test.h b/mm/gup_test.h
index 921b4caad8ef..90a6713d50eb 100644
--- a/mm/gup_test.h
+++ b/mm/gup_test.h
@@ -9,6 +9,11 @@
 #define PIN_LONGTERM_BENCHMARK	_IOWR('g', 3, struct gup_test)
 #define GUP_BASIC_TEST		_IOWR('g', 4, struct gup_test)
 #define PIN_BASIC_TEST		_IOWR('g', 5, struct gup_test)
+#define DUMP_USER_PAGES_TEST	_IOWR('g', 6, struct gup_test)
+
+#define GUP_TEST_MAX_PAGES_TO_DUMP		8
+
+#define GUP_TEST_FLAG_DUMP_PAGES_USE_PIN	0x1
 
 struct gup_test {
 	__u64 get_delta_usec;
@@ -17,6 +22,11 @@ struct gup_test {
 	__u64 size;
 	__u32 nr_pages_per_call;
 	__u32 flags;
+	/*
+	 * Each non-zero entry is the number of the page (1-based: first page is
+	 * page 1, so that zero entries mean "do nothing") from the .addr base.
+	 */
+	__u32 which_pages[GUP_TEST_MAX_PAGES_TO_DUMP];
 };
 
 #endif	/* __GUP_TEST_H */
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index f9163e1bb57a..6c6336dd3b7f 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -27,13 +27,15 @@ static char *cmd_to_str(unsigned long cmd)
 		return "GUP_BASIC_TEST";
 	case PIN_BASIC_TEST:
 		return "PIN_BASIC_TEST";
+	case DUMP_USER_PAGES_TEST:
+		return "DUMP_USER_PAGES_TEST";
 	}
 	return "Unknown command";
 }
 
 int main(int argc, char **argv)
 {
-	struct gup_test gup;
+	struct gup_test gup = { 0 };
 	unsigned long size = 128 * MB;
 	int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
 	unsigned long cmd = GUP_FAST_BENCHMARK;
@@ -41,7 +43,7 @@ int main(int argc, char **argv)
 	char *file = "/dev/zero";
 	char *p;
 
-	while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
+	while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwSH")) != -1) {
 		switch (opt) {
 		case 'a':
 			cmd = PIN_FAST_BENCHMARK;
@@ -52,6 +54,21 @@ int main(int argc, char **argv)
 		case 'L':
 			cmd = PIN_LONGTERM_BENCHMARK;
 			break;
+		case 'c':
+			cmd = DUMP_USER_PAGES_TEST;
+			/*
+			 * Dump page 0 (index 1). May be overridden later, by
+			 * user's non-option arguments.
+			 *
+			 * .which_pages is zero-based, so that zero can mean "do
+			 * nothing".
+			 */
+			gup.which_pages[0] = 1;
+			break;
+		case 'F':
+			/* strtol, so you can pass flags in hex form */
+			gup.flags = strtol(optarg, 0, 0);
+			break;
 		case 'm':
 			size = atoi(optarg) * MB;
 			break;
@@ -91,6 +108,30 @@ int main(int argc, char **argv)
 		}
 	}
 
+	if (optind < argc) {
+		int extra_arg_count = 0;
+		/*
+		 * For example:
+		 *
+		 *   ./gup_test -c 0 1 0x1001
+		 *
+		 * ...to dump pages 0, 1, and 4097
+		 */
+
+		while ((optind < argc) &&
+		       (extra_arg_count < GUP_TEST_MAX_PAGES_TO_DUMP)) {
+			/*
+			 * Do the 1-based indexing here, so that the user can
+			 * use normal 0-based indexing on the command line.
+			 */
+			long page_index = strtol(argv[optind], 0, 0) + 1;
+
+			gup.which_pages[extra_arg_count] = page_index;
+			extra_arg_count++;
+			optind++;
+		}
+	}
+
 	filed = open(file, O_RDWR|O_CREAT);
 	if (filed < 0) {
 		perror("open");
-- 
cgit v1.2.3-70-g09d2


From d943fe81e0bf49dda1369e87d49c5276a02698df Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:24 -0800
Subject: selftests/vm: run_vmtests.sh: update and clean up gup_test invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Run benchmarks on the _fast variants of gup and pup, as originally
intended.

Run the new gup_test sub-test: dump pages.  In addition to exercising the
dump_page() call, it also demonstrates the various options you can use to
specify which pages to dump, and how.

Link: https://lkml.kernel.org/r/20201026064021.3545418-8-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index d1843d5f3c30..4ac84b350d9f 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -123,10 +123,10 @@ else
 	echo "[PASS]"
 fi
 
-echo "--------------------------------------------"
-echo "running 'gup_test -U' (normal/slow gup)"
-echo "--------------------------------------------"
-./gup_test -U
+echo "------------------------------------------------------"
+echo "running: gup_test -u # get_user_pages_fast() benchmark"
+echo "------------------------------------------------------"
+./gup_test -u
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
@@ -134,10 +134,22 @@ else
 	echo "[PASS]"
 fi
 
-echo "------------------------------------------"
-echo "running gup_test -b (pin_user_pages)"
-echo "------------------------------------------"
-./gup_test -b
+echo "------------------------------------------------------"
+echo "running: gup_test -a # pin_user_pages_fast() benchmark"
+echo "------------------------------------------------------"
+./gup_test -a
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "------------------------------------------------------------"
+echo "# Dump pages 0, 19, and 4096, using pin_user_pages:"
+echo "running: gup_test -ct -F 0x1 0 19 0x1000 # dump_page() test"
+echo "------------------------------------------------------------"
+./gup_test -ct -F 0x1 0 19 0x1000
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
-- 
cgit v1.2.3-70-g09d2


From f3a45709d2bb1b6cbab2899a6c8e75dfb8e4aad7 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:28 -0800
Subject: selftests/vm: hmm-tests: remove the libhugetlbfs dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

HMM selftests are incredibly useful, but they are only effective if people
actually build and run them.  All the other tests in selftests/vm can be
built with very standard, always-available libraries: libpthread, librt.
The hmm-tests.c program, on the other hand, requires something that is
(much) less readily available: libhugetlbfs.  And so the build will
typically fail for many developers.

A simple attempt to install libhugetlbfs will also run into complications
on some common distros these days: Fedora and Arch Linux (yes, Arch AUR
has it, but that's fragile, as always with AUR).  The library is not
maintained actively enough at the moment, for distros to deal with it.  I
had to build it from source, for Fedora, and that didn't go too smoothly
either.

It turns out that, out of 21 tests in hmm-tests.c, only 2 actually require
functionality from libhugetlbfs.  Therefore, if libhugetlbfs is missing,
simply ifdef those two tests out and allow the developer to at least have
the other 19 tests, if they don't want to pause to work through the above
issues.  Also issue a warning, so that it's clear that there is an
imperfection in the build.

In order to do that, a tiny shell script (check_config.sh) runs a quick
compile (not link, that's too prone to false failures with library paths),
and basically, if the compiler doesn't find hugetlbfs.h in its standard
locations, then the script concludes that libhugetlbfs is not available.
The output is in two files, one for inclusion in hmm-test.c
(local_config.h), and one for inclusion in the Makefile (local_config.mk).

Link: https://lkml.kernel.org/r/20201026064021.3545418-9-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore      |  1 +
 tools/testing/selftests/vm/Makefile        | 24 +++++++++++++++++++++--
 tools/testing/selftests/vm/check_config.sh | 31 ++++++++++++++++++++++++++++++
 tools/testing/selftests/vm/hmm-tests.c     | 10 +++++++++-
 4 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/vm/check_config.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 2c8ddcf41c0e..e90d28bcd518 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -20,3 +20,4 @@ va_128TBswitch
 map_fixed_noreplace
 write_to_hugetlbfs
 hmm-tests
+local_config.*
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index e640d8145cbd..bdd3fe7fd086 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for vm selftests
+
+include local_config.mk
+
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
 
@@ -80,8 +83,6 @@ TEST_FILES := test_vmalloc.sh
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs
-
 ifeq ($(ARCH),x86_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
 BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
@@ -134,3 +135,22 @@ endif
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
 
 $(OUTPUT)/gup_test: ../../../../mm/gup_test.h
+
+$(OUTPUT)/hmm-tests: local_config.h
+
+# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
+$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
+
+local_config.mk local_config.h: check_config.sh
+	/bin/sh ./check_config.sh $(CC)
+
+EXTRA_CLEAN += local_config.mk local_config.h
+
+ifeq ($(HMM_EXTRA_LIBS),)
+all: warn_missing_hugelibs
+
+warn_missing_hugelibs:
+	@echo ; \
+	echo "Warning: missing libhugetlbfs support. Some HMM tests will be skipped." ; \
+	echo
+endif
diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh
new file mode 100644
index 000000000000..079c8a40b85d
--- /dev/null
+++ b/tools/testing/selftests/vm/check_config.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Probe for libraries and create header files to record the results. Both C
+# header files and Makefile include fragments are created.
+
+OUTPUT_H_FILE=local_config.h
+OUTPUT_MKFILE=local_config.mk
+
+# libhugetlbfs
+tmpname=$(mktemp)
+tmpfile_c=${tmpname}.c
+tmpfile_o=${tmpname}.o
+
+echo "#include <sys/types.h>"        > $tmpfile_c
+echo "#include <hugetlbfs.h>"       >> $tmpfile_c
+echo "int func(void) { return 0; }" >> $tmpfile_c
+
+CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"}
+$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
+
+if [ -f $tmpfile_o ]; then
+    echo "#define LOCAL_CONFIG_HAVE_LIBHUGETLBFS 1" > $OUTPUT_H_FILE
+    echo "HMM_EXTRA_LIBS = -lhugetlbfs"             > $OUTPUT_MKFILE
+else
+    echo "// No libhugetlbfs support found"      > $OUTPUT_H_FILE
+    echo "# No libhugetlbfs support found, so:"  > $OUTPUT_MKFILE
+    echo "HMM_EXTRA_LIBS = "                    >> $OUTPUT_MKFILE
+fi
+
+rm ${tmpname}.*
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index c9404ef9698e..5d1ac691b9f4 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -21,12 +21,16 @@
 #include <strings.h>
 #include <time.h>
 #include <pthread.h>
-#include <hugetlbfs.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 
+#include "./local_config.h"
+#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
+#include <hugetlbfs.h>
+#endif
+
 /*
  * This is a private UAPI to the kernel test module so it isn't exported
  * in the usual include/uapi/... directory.
@@ -662,6 +666,7 @@ TEST_F(hmm, anon_write_huge)
 	hmm_buffer_free(buffer);
 }
 
+#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
 /*
  * Write huge TLBFS page.
  */
@@ -720,6 +725,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
 	buffer->ptr = NULL;
 	hmm_buffer_free(buffer);
 }
+#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */
 
 /*
  * Read mmap'ed file memory.
@@ -1336,6 +1342,7 @@ TEST_F(hmm2, snapshot)
 	hmm_buffer_free(buffer);
 }
 
+#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
 /*
  * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
  * should be mapped by a large page table entry.
@@ -1411,6 +1418,7 @@ TEST_F(hmm, compound)
 	buffer->ptr = NULL;
 	hmm_buffer_free(buffer);
 }
+#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */
 
 /*
  * Test two devices reading the same memory (double mapped).
-- 
cgit v1.2.3-70-g09d2


From a26c4c62990a3ad5061f72e68f2394a01480265d Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Mon, 14 Dec 2020 19:05:31 -0800
Subject: selftests/vm: 2x speedup for run_vmtests.sh

Each invocation of userfaultfd for "anon" and "shmem" was taking about
6.5 sec to run, contributing to an overall run time of about 22 sec for
run_vmtests.sh.

Reduce the size and bounce input values to the userfaultfd invocation
within run_vmtests.sh, enough to get each invocation down to about 1.0
sec. This should still provide a reasonable smoke test, while staying
within a nominal time budget of around 1 second or so per test. And this
brings the overall running time of run_vmtests.sh down to 11 second.

Link: https://lkml.kernel.org/r/20201026064021.3545418-10-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 4ac84b350d9f..9e8837768ee2 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -160,7 +160,7 @@ fi
 echo "-------------------"
 echo "running userfaultfd"
 echo "-------------------"
-./userfaultfd anon 128 32
+./userfaultfd anon 20 16
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
@@ -185,7 +185,7 @@ rm -f $mnt/ufd_test_file
 echo "-------------------------"
 echo "running userfaultfd_shmem"
 echo "-------------------------"
-./userfaultfd shmem 128 32
+./userfaultfd shmem 20 16
 if [ $? -ne 0 ]; then
 	echo "[FAIL]"
 	exitcode=1
-- 
cgit v1.2.3-70-g09d2


From 7df666253f2610284f653bce0e2e50b4923c84aa Mon Sep 17 00:00:00 2001
From: Kalesh Singh <kaleshsingh@google.com>
Date: Mon, 14 Dec 2020 19:07:25 -0800
Subject: kselftests: vm: add mremap tests

Patch series "Speed up mremap on large regions", v4.

mremap time can be optimized by moving entries at the PMD/PUD level if the
source and destination addresses are PMD/PUD-aligned and PMD/PUD-sized.
Enable moving at the PMD and PUD levels on arm64 and x86.  Other
architectures where this type of move is supported and known to be safe
can also opt-in to these optimizations by enabling HAVE_MOVE_PMD and
HAVE_MOVE_PUD.

Observed Performance Improvements for remapping a PUD-aligned 1GB-sized
region on x86 and arm64:

    - HAVE_MOVE_PMD is already enabled on x86 : N/A
    - Enabling HAVE_MOVE_PUD on x86   : ~13x speed up

    - Enabling HAVE_MOVE_PMD on arm64 : ~ 8x speed up
    - Enabling HAVE_MOVE_PUD on arm64 : ~19x speed up

          Altogether, HAVE_MOVE_PMD and HAVE_MOVE_PUD
          give a total of ~150x speed up on arm64.

This patch (of 4):

Test mremap on regions of various sizes and alignments and validate data
after remapping.  Also provide total time for remapping the region which
is useful for performance comparison of the mremap optimizations that move
pages at the PMD/PUD levels if HAVE_MOVE_PMD and/or HAVE_MOVE_PUD are
enabled.

Link: https://lkml.kernel.org/r/20201014005320.2233162-1-kaleshsingh@google.com
Link: https://lkml.kernel.org/r/20201014005320.2233162-2-kaleshsingh@google.com
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Minchan Kim <minchan@google.com>
Cc: Lokesh Gidra <lokeshgidra@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Hassan Naveed <hnaveed@wavecomp.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Gavin Shan <gshan@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Jia He <justin.he@arm.com>
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Sandipan Das <sandipan@linux.ibm.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: SeongJae Park <sjpark@amazon.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore    |   1 +
 tools/testing/selftests/vm/Makefile      |   1 +
 tools/testing/selftests/vm/mremap_test.c | 344 +++++++++++++++++++++++++++++++
 tools/testing/selftests/vm/run_vmtests   |  11 +
 4 files changed, 357 insertions(+)
 create mode 100644 tools/testing/selftests/vm/mremap_test.c

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index e90d28bcd518..9a35c3f6a557 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -8,6 +8,7 @@ thuge-gen
 compaction_test
 mlock2-tests
 mremap_dontunmap
+mremap_test
 on-fault-limit
 transhuge-stress
 protection_keys
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index bdd3fe7fd086..9a25307f6115 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -37,6 +37,7 @@ TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += mlock-random-test
 TEST_GEN_FILES += mlock2-tests
 TEST_GEN_FILES += mremap_dontunmap
+TEST_GEN_FILES += mremap_test
 TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
new file mode 100644
index 000000000000..9c391d016922
--- /dev/null
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020 Google LLC
+ */
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+
+#include "../kselftest.h"
+
+#define EXPECT_SUCCESS 0
+#define EXPECT_FAILURE 1
+#define NON_OVERLAPPING 0
+#define OVERLAPPING 1
+#define NS_PER_SEC 1000000000ULL
+#define VALIDATION_DEFAULT_THRESHOLD 4	/* 4MB */
+#define VALIDATION_NO_THRESHOLD 0	/* Verify the entire region */
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+
+struct config {
+	unsigned long long src_alignment;
+	unsigned long long dest_alignment;
+	unsigned long long region_size;
+	int overlapping;
+};
+
+struct test {
+	const char *name;
+	struct config config;
+	int expect_failure;
+};
+
+enum {
+	_1KB = 1ULL << 10,	/* 1KB -> not page aligned */
+	_4KB = 4ULL << 10,
+	_8KB = 8ULL << 10,
+	_1MB = 1ULL << 20,
+	_2MB = 2ULL << 20,
+	_4MB = 4ULL << 20,
+	_1GB = 1ULL << 30,
+	_2GB = 2ULL << 30,
+	PTE = _4KB,
+	PMD = _2MB,
+	PUD = _1GB,
+};
+
+#define MAKE_TEST(source_align, destination_align, size,	\
+		  overlaps, should_fail, test_name)		\
+{								\
+	.name = test_name,					\
+	.config = {						\
+		.src_alignment = source_align,			\
+		.dest_alignment = destination_align,		\
+		.region_size = size,				\
+		.overlapping = overlaps,			\
+	},							\
+	.expect_failure = should_fail				\
+}
+
+/*
+ * Returns the start address of the mapping on success, else returns
+ * NULL on failure.
+ */
+static void *get_source_mapping(struct config c)
+{
+	unsigned long long addr = 0ULL;
+	void *src_addr = NULL;
+retry:
+	addr += c.src_alignment;
+	src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
+			MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+	if (src_addr == MAP_FAILED) {
+		if (errno == EPERM)
+			goto retry;
+		goto error;
+	}
+	/*
+	 * Check that the address is aligned to the specified alignment.
+	 * Addresses which have alignments that are multiples of that
+	 * specified are not considered valid. For instance, 1GB address is
+	 * 2MB-aligned, however it will not be considered valid for a
+	 * requested alignment of 2MB. This is done to reduce coincidental
+	 * alignment in the tests.
+	 */
+	if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
+			!((unsigned long long) src_addr & c.src_alignment))
+		goto retry;
+
+	if (!src_addr)
+		goto error;
+
+	return src_addr;
+error:
+	ksft_print_msg("Failed to map source region: %s\n",
+			strerror(errno));
+	return NULL;
+}
+
+/* Returns the time taken for the remap on success else returns -1. */
+static long long remap_region(struct config c, unsigned int threshold_mb,
+			      char pattern_seed)
+{
+	void *addr, *src_addr, *dest_addr;
+	unsigned long long i;
+	struct timespec t_start = {0, 0}, t_end = {0, 0};
+	long long  start_ns, end_ns, align_mask, ret, offset;
+	unsigned long long threshold;
+
+	if (threshold_mb == VALIDATION_NO_THRESHOLD)
+		threshold = c.region_size;
+	else
+		threshold = MIN(threshold_mb * _1MB, c.region_size);
+
+	src_addr = get_source_mapping(c);
+	if (!src_addr) {
+		ret = -1;
+		goto out;
+	}
+
+	/* Set byte pattern */
+	srand(pattern_seed);
+	for (i = 0; i < threshold; i++)
+		memset((char *) src_addr + i, (char) rand(), 1);
+
+	/* Mask to zero out lower bits of address for alignment */
+	align_mask = ~(c.dest_alignment - 1);
+	/* Offset of destination address from the end of the source region */
+	offset = (c.overlapping) ? -c.dest_alignment : c.dest_alignment;
+	addr = (void *) (((unsigned long long) src_addr + c.region_size
+			  + offset) & align_mask);
+
+	/* See comment in get_source_mapping() */
+	if (!((unsigned long long) addr & c.dest_alignment))
+		addr = (void *) ((unsigned long long) addr | c.dest_alignment);
+
+	clock_gettime(CLOCK_MONOTONIC, &t_start);
+	dest_addr = mremap(src_addr, c.region_size, c.region_size,
+			MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+	clock_gettime(CLOCK_MONOTONIC, &t_end);
+
+	if (dest_addr == MAP_FAILED) {
+		ksft_print_msg("mremap failed: %s\n", strerror(errno));
+		ret = -1;
+		goto clean_up_src;
+	}
+
+	/* Verify byte pattern after remapping */
+	srand(pattern_seed);
+	for (i = 0; i < threshold; i++) {
+		char c = (char) rand();
+
+		if (((char *) dest_addr)[i] != c) {
+			ksft_print_msg("Data after remap doesn't match at offset %d\n",
+				       i);
+			ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+					((char *) dest_addr)[i] & 0xff);
+			ret = -1;
+			goto clean_up_dest;
+		}
+	}
+
+	start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
+	end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
+	ret = end_ns - start_ns;
+
+/*
+ * Since the destination address is specified using MREMAP_FIXED, subsequent
+ * mremap will unmap any previous mapping at the address range specified by
+ * dest_addr and region_size. This significantly affects the remap time of
+ * subsequent tests. So we clean up mappings after each test.
+ */
+clean_up_dest:
+	munmap(dest_addr, c.region_size);
+clean_up_src:
+	munmap(src_addr, c.region_size);
+out:
+	return ret;
+}
+
+static void run_mremap_test_case(struct test test_case, int *failures,
+				 unsigned int threshold_mb,
+				 unsigned int pattern_seed)
+{
+	long long remap_time = remap_region(test_case.config, threshold_mb,
+					    pattern_seed);
+
+	if (remap_time < 0) {
+		if (test_case.expect_failure)
+			ksft_test_result_pass("%s\n\tExpected mremap failure\n",
+					      test_case.name);
+		else {
+			ksft_test_result_fail("%s\n", test_case.name);
+			*failures += 1;
+		}
+	} else {
+		/*
+		 * Comparing mremap time is only applicable if entire region
+		 * was faulted in.
+		 */
+		if (threshold_mb == VALIDATION_NO_THRESHOLD ||
+		    test_case.config.region_size <= threshold_mb * _1MB)
+			ksft_test_result_pass("%s\n\tmremap time: %12lldns\n",
+					      test_case.name, remap_time);
+		else
+			ksft_test_result_pass("%s\n", test_case.name);
+	}
+}
+
+static void usage(const char *cmd)
+{
+	fprintf(stderr,
+		"Usage: %s [[-t <threshold_mb>] [-p <pattern_seed>]]\n"
+		"-t\t only validate threshold_mb of the remapped region\n"
+		"  \t if 0 is supplied no threshold is used; all tests\n"
+		"  \t are run and remapped regions validated fully.\n"
+		"  \t The default threshold used is 4MB.\n"
+		"-p\t provide a seed to generate the random pattern for\n"
+		"  \t validating the remapped region.\n", cmd);
+}
+
+static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
+		      unsigned int *pattern_seed)
+{
+	const char *optstr = "t:p:";
+	int opt;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 't':
+			*threshold_mb = atoi(optarg);
+			break;
+		case 'p':
+			*pattern_seed = atoi(optarg);
+			break;
+		default:
+			usage(argv[0]);
+			return -1;
+		}
+	}
+
+	if (optind < argc) {
+		usage(argv[0]);
+		return -1;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int failures = 0;
+	int i, run_perf_tests;
+	unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
+	unsigned int pattern_seed;
+	time_t t;
+
+	pattern_seed = (unsigned int) time(&t);
+
+	if (parse_args(argc, argv, &threshold_mb, &pattern_seed) < 0)
+		exit(EXIT_FAILURE);
+
+	ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
+		       threshold_mb, pattern_seed);
+
+	struct test test_cases[] = {
+		/* Expected mremap failures */
+		MAKE_TEST(_4KB, _4KB, _4KB, OVERLAPPING, EXPECT_FAILURE,
+		  "mremap - Source and Destination Regions Overlapping"),
+		MAKE_TEST(_4KB, _1KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
+		  "mremap - Destination Address Misaligned (1KB-aligned)"),
+		MAKE_TEST(_1KB, _4KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
+		  "mremap - Source Address Misaligned (1KB-aligned)"),
+
+		/* Src addr PTE aligned */
+		MAKE_TEST(PTE, PTE, _8KB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "8KB mremap - Source PTE-aligned, Destination PTE-aligned"),
+
+		/* Src addr 1MB aligned */
+		MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2MB mremap - Source 1MB-aligned, Destination PTE-aligned"),
+		MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned"),
+
+		/* Src addr PMD aligned */
+		MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "4MB mremap - Source PMD-aligned, Destination PTE-aligned"),
+		MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "4MB mremap - Source PMD-aligned, Destination 1MB-aligned"),
+		MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "4MB mremap - Source PMD-aligned, Destination PMD-aligned"),
+
+		/* Src addr PUD aligned */
+		MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2GB mremap - Source PUD-aligned, Destination PTE-aligned"),
+		MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2GB mremap - Source PUD-aligned, Destination 1MB-aligned"),
+		MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2GB mremap - Source PUD-aligned, Destination PMD-aligned"),
+		MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "2GB mremap - Source PUD-aligned, Destination PUD-aligned"),
+	};
+
+	struct test perf_test_cases[] = {
+		/*
+		 * mremap 1GB region - Page table level aligned time
+		 * comparison.
+		 */
+		MAKE_TEST(PTE, PTE, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "1GB mremap - Source PTE-aligned, Destination PTE-aligned"),
+		MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "1GB mremap - Source PMD-aligned, Destination PMD-aligned"),
+		MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+		  "1GB mremap - Source PUD-aligned, Destination PUD-aligned"),
+	};
+
+	run_perf_tests =  (threshold_mb == VALIDATION_NO_THRESHOLD) ||
+				(threshold_mb * _1MB >= _1GB);
+
+	ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
+		      ARRAY_SIZE(perf_test_cases) : 0));
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+		run_mremap_test_case(test_cases[i], &failures, threshold_mb,
+				     pattern_seed);
+
+	if (run_perf_tests) {
+		ksft_print_msg("\n%s\n",
+		 "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
+		for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
+			run_mremap_test_case(perf_test_cases[i], &failures,
+					     threshold_mb, pattern_seed);
+	}
+
+	if (failures > 0)
+		ksft_exit_fail();
+	else
+		ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 9e8837768ee2..e953f3cd9664 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -253,6 +253,17 @@ else
 	echo "[PASS]"
 fi
 
+echo "-------------------"
+echo "running mremap_test"
+echo "-------------------"
+./mremap_test
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
 echo "-----------------"
 echo "running thuge-gen"
 echo "-----------------"
-- 
cgit v1.2.3-70-g09d2


From f289041ed4cf9a3f6e8a32068fef9ffb2acc5662 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 14 Dec 2020 19:13:45 -0800
Subject: mm, page_poison: remove CONFIG_PAGE_POISONING_ZERO

CONFIG_PAGE_POISONING_ZERO uses the zero pattern instead of 0xAA.  It was
introduced by commit 1414c7f4f7d7 ("mm/page_poisoning.c: allow for zero
poisoning"), noting that using zeroes retains the benefit of sanitizing
content of freed pages, with the benefit of not having to zero them again
on alloc, and the downside of making some forms of corruption (stray
writes of NULLs) harder to detect than with the 0xAA pattern.  Together
with CONFIG_PAGE_POISONING_NO_SANITY it made possible to sanitize the
contents on free without checking it back on alloc.

These days we have the init_on_free() option to achieve sanitization with
zeroes and to save clearing on alloc (and without checking on alloc).
Arguably if someone does choose to check the poison for corruption on
alloc, the savings of not clearing the page are secondary, and it makes
sense to always use the 0xAA poison pattern.  Thus, remove the
CONFIG_PAGE_POISONING_ZERO option for being redundant.

Link: https://lkml.kernel.org/r/20201113104033.22907-6-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Laura Abbott <labbott@kernel.org>
Cc: Mateusz Nosek <mateusznosek0@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/poison.h       |  4 ----
 mm/Kconfig.debug             | 12 ------------
 mm/page_alloc.c              |  8 +-------
 tools/include/linux/poison.h |  6 +-----
 4 files changed, 2 insertions(+), 28 deletions(-)

(limited to 'tools')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index dc8ae5d8db03..aff1c9250c82 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -27,11 +27,7 @@
 #define TIMER_ENTRY_STATIC	((void *) 0x300 + POISON_POINTER_DELTA)
 
 /********** mm/page_poison.c **********/
-#ifdef CONFIG_PAGE_POISONING_ZERO
-#define PAGE_POISON 0x00
-#else
 #define PAGE_POISON 0xaa
-#endif
 
 /********** mm/page_alloc.c ************/
 
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 14e29fe5bfa6..1e73717802f8 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -80,18 +80,6 @@ config PAGE_POISONING
 
 	  If unsure, say N
 
-config PAGE_POISONING_ZERO
-	bool "Use zero for poisoning instead of debugging value"
-	depends on PAGE_POISONING
-	help
-	   Instead of using the existing poison value, fill the pages with
-	   zeros. This makes it harder to detect when errors are occurring
-	   due to sanitization but the zeroing at free means that it is
-	   no longer necessary to write zeros when GFP_ZERO is used on
-	   allocation.
-
-	   If unsure, say N
-
 config DEBUG_PAGE_REF
 	bool "Enable tracepoint to track down page reference manipulation"
 	depends on DEBUG_KERNEL
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index efcd1baa35e4..918647ff6eef 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2228,12 +2228,6 @@ static inline int check_new_page(struct page *page)
 	return 1;
 }
 
-static inline bool free_pages_prezeroed(void)
-{
-	return (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
-		page_poisoning_enabled_static()) || want_init_on_free();
-}
-
 #ifdef CONFIG_DEBUG_VM
 /*
  * With DEBUG_VM enabled, order-0 pages are checked for expected state when
@@ -2296,7 +2290,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 	kernel_unpoison_pages(page, 1 << order);
 	set_page_owner(page, order, gfp_flags);
 
-	if (!free_pages_prezeroed() && want_init_on_alloc(gfp_flags))
+	if (!want_init_on_free() && want_init_on_alloc(gfp_flags))
 		kernel_init_free_pages(page, 1 << order);
 }
 
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
index d29725769107..2e6338ac5eed 100644
--- a/tools/include/linux/poison.h
+++ b/tools/include/linux/poison.h
@@ -35,12 +35,8 @@
  */
 #define TIMER_ENTRY_STATIC	((void *) 0x300 + POISON_POINTER_DELTA)
 
-/********** mm/debug-pagealloc.c **********/
-#ifdef CONFIG_PAGE_POISONING_ZERO
-#define PAGE_POISON 0x00
-#else
+/********** mm/page_poison.c **********/
 #define PAGE_POISON 0xaa
-#endif
 
 /********** mm/page_alloc.c ************/
 
-- 
cgit v1.2.3-70-g09d2


From 77f962e7ae24e5fa7b257b8242c62e716119a312 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Mon, 14 Dec 2020 19:13:58 -0800
Subject: userfaultfd: selftests: make __{s,u}64 format specifiers portable

On certain platforms (powerpcle is the one on which I ran into this),
"%Ld" and "%Lu" are unsuitable for printing __s64 and __u64, respectively,
resulting in build warnings.  Cast to {u,}int64_t, and use the PRI{d,u}64
macros defined in inttypes.h to print them.  This ought to be portable to
all platforms.

Splitting this off into a separate macro lets us remove some lines, and
get rid of some (I would argue) stylistically odd cases where we joined
printf() and exit() into a single statement with a ,.

Finally, this also fixes a "missing braces around initializer" warning
when we initialize prms in wp_range().

[axelrasmussen@google.com: v2]
  Link: https://lkml.kernel.org/r/20201203180244.1811601-1-axelrasmussen@google.com

Link: https://lkml.kernel.org/r/20201202211542.1121189-1-axelrasmussen@google.com
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Joe Perches <joe@perches.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Alan Gilbert <dgilbert@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 81 ++++++++++++++------------------
 1 file changed, 35 insertions(+), 46 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index c4425597769a..a7059a6bc215 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -55,6 +55,8 @@
 #include <setjmp.h>
 #include <stdbool.h>
 #include <assert.h>
+#include <inttypes.h>
+#include <stdint.h>
 
 #include "../kselftest.h"
 
@@ -135,6 +137,13 @@ static void usage(void)
 	exit(1);
 }
 
+#define uffd_error(code, fmt, ...)                                             \
+	do {                                                                   \
+		fprintf(stderr, fmt, ##__VA_ARGS__);                           \
+		fprintf(stderr, ": %" PRId64 "\n", (int64_t)(code));           \
+		exit(1);                                                       \
+	} while (0)
+
 static void uffd_stats_reset(struct uffd_stats *uffd_stats,
 			     unsigned long n_cpus)
 {
@@ -338,7 +347,7 @@ static int my_bcmp(char *str1, char *str2, size_t n)
 
 static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
 {
-	struct uffdio_writeprotect prms = { 0 };
+	struct uffdio_writeprotect prms;
 
 	/* Write protection page faults */
 	prms.range.start = start;
@@ -347,7 +356,8 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
 	prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
 
 	if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
-		fprintf(stderr, "clear WP failed for address 0x%Lx\n", start);
+		fprintf(stderr, "clear WP failed for address 0x%" PRIx64 "\n",
+			(uint64_t)start);
 		exit(1);
 	}
 }
@@ -481,14 +491,11 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
 	if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
 		/* real retval in ufdio_copy.copy */
 		if (uffdio_copy->copy != -EEXIST) {
-			fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
-				uffdio_copy->copy);
-			exit(1);
+			uffd_error(uffdio_copy->copy,
+				   "UFFDIO_COPY retry error");
 		}
-	} else {
-		fprintf(stderr,	"UFFDIO_COPY retry unexpected %Ld\n",
-			uffdio_copy->copy); exit(1);
-	}
+	} else
+		uffd_error(uffdio_copy->copy, "UFFDIO_COPY retry unexpected");
 }
 
 static int __copy_page(int ufd, unsigned long offset, bool retry)
@@ -509,14 +516,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
 	uffdio_copy.copy = 0;
 	if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
 		/* real retval in ufdio_copy.copy */
-		if (uffdio_copy.copy != -EEXIST) {
-			fprintf(stderr, "UFFDIO_COPY error %Ld\n",
-				uffdio_copy.copy);
-			exit(1);
-		}
+		if (uffdio_copy.copy != -EEXIST)
+			uffd_error(uffdio_copy.copy, "UFFDIO_COPY error");
 	} else if (uffdio_copy.copy != page_size) {
-		fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
-			uffdio_copy.copy); exit(1);
+		uffd_error(uffdio_copy.copy, "UFFDIO_COPY unexpected copy");
 	} else {
 		if (test_uffdio_copy_eexist && retry) {
 			test_uffdio_copy_eexist = false;
@@ -795,7 +798,8 @@ static int userfaultfd_open(int features)
 		return 1;
 	}
 	if (uffdio_api.api != UFFD_API) {
-		fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
+		fprintf(stderr, "UFFDIO_API error: %" PRIu64 "\n",
+			(uint64_t)uffdio_api.api);
 		return 1;
 	}
 
@@ -957,13 +961,12 @@ static void retry_uffdio_zeropage(int ufd,
 				     offset);
 	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
 		if (uffdio_zeropage->zeropage != -EEXIST) {
-			fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
-				uffdio_zeropage->zeropage);
-			exit(1);
+			uffd_error(uffdio_zeropage->zeropage,
+				   "UFFDIO_ZEROPAGE retry error");
 		}
 	} else {
-		fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
-			uffdio_zeropage->zeropage); exit(1);
+		uffd_error(uffdio_zeropage->zeropage,
+			   "UFFDIO_ZEROPAGE retry unexpected");
 	}
 }
 
@@ -972,6 +975,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 	struct uffdio_zeropage uffdio_zeropage;
 	int ret;
 	unsigned long has_zeropage;
+	__s64 res;
 
 	has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
 
@@ -983,29 +987,17 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 	uffdio_zeropage.range.len = page_size;
 	uffdio_zeropage.mode = 0;
 	ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
+	res = uffdio_zeropage.zeropage;
 	if (ret) {
 		/* real retval in ufdio_zeropage.zeropage */
 		if (has_zeropage) {
-			if (uffdio_zeropage.zeropage == -EEXIST) {
-				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n");
-				exit(1);
-			} else {
-				fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
-					uffdio_zeropage.zeropage);
-				exit(1);
-			}
-		} else {
-			if (uffdio_zeropage.zeropage != -EINVAL) {
-				fprintf(stderr,
-					"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
-					uffdio_zeropage.zeropage);
-				exit(1);
-			}
-		}
+			uffd_error(res, "UFFDIO_ZEROPAGE %s",
+				   res == -EEXIST ? "-EEXIST" : "error");
+		} else if (res != -EINVAL)
+			uffd_error(res, "UFFDIO_ZEROPAGE not -EINVAL");
 	} else if (has_zeropage) {
-		if (uffdio_zeropage.zeropage != page_size) {
-			fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
-				uffdio_zeropage.zeropage); exit(1);
+		if (res != page_size) {
+			uffd_error(res, "UFFDIO_ZEROPAGE unexpected");
 		} else {
 			if (test_uffdio_zeropage_eexist && retry) {
 				test_uffdio_zeropage_eexist = false;
@@ -1014,11 +1006,8 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 			}
 			return 1;
 		}
-	} else {
-		fprintf(stderr,
-			"UFFDIO_ZEROPAGE succeeded %Ld\n",
-			uffdio_zeropage.zeropage); exit(1);
-	}
+	} else
+		uffd_error(res, "UFFDIO_ZEROPAGE succeeded");
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 164c50be2878f4caf6d7973e8e0e438f182f4ded Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 14 Dec 2020 19:14:02 -0800
Subject: userfaultfd/selftests: always dump something in modes

Patch series "userfaultfd: selftests: Small fixes".

Some very trivial fixes that I kept locally to userfaultfd selftest
program.

This patch (of 3):

BOUNCE_POLL is a special bit that if cleared it means "READ" instead.
Dump that too otherwise we'll see tests with empty modes.

Link: https://lkml.kernel.org/r/20201208024709.7701-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20201208024709.7701-2-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index a7059a6bc215..7fc1eaf4ceaa 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -1291,6 +1291,8 @@ static int userfaultfd_stress(void)
 			printf(" ver");
 		if (bounces & BOUNCE_POLL)
 			printf(" poll");
+		else
+			printf(" read");
 		printf(", ");
 		fflush(stdout);
 
-- 
cgit v1.2.3-70-g09d2


From 1e17a24edf9bef891bbdd02617eaab4fa6efcd7f Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 14 Dec 2020 19:14:05 -0800
Subject: userfaultfd/selftests: fix retval check for userfaultfd_open()

userfaultfd_open() returns 1 for errors rather than negatives.  Fix it on
all the callers so when UFFDIO_API failed the test will bail out.

Link: https://lkml.kernel.org/r/20201208024709.7701-3-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 7fc1eaf4ceaa..6e482e2f198c 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -1029,7 +1029,7 @@ static int userfaultfd_zeropage_test(void)
 	if (uffd_test_ops->release_pages(area_dst))
 		return 1;
 
-	if (userfaultfd_open(0) < 0)
+	if (userfaultfd_open(0))
 		return 1;
 	uffdio_register.range.start = (unsigned long) area_dst;
 	uffdio_register.range.len = nr_pages * page_size;
@@ -1079,7 +1079,7 @@ static int userfaultfd_events_test(void)
 
 	features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
 		UFFD_FEATURE_EVENT_REMOVE;
-	if (userfaultfd_open(features) < 0)
+	if (userfaultfd_open(features))
 		return 1;
 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
 
@@ -1151,7 +1151,7 @@ static int userfaultfd_sig_test(void)
 		return 1;
 
 	features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
-	if (userfaultfd_open(features) < 0)
+	if (userfaultfd_open(features))
 		return 1;
 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
 
@@ -1231,7 +1231,7 @@ static int userfaultfd_stress(void)
 	if (!area_dst)
 		return 1;
 
-	if (userfaultfd_open(0) < 0)
+	if (userfaultfd_open(0))
 		return 1;
 
 	count_verify = malloc(nr_pages * sizeof(unsigned long long));
-- 
cgit v1.2.3-70-g09d2


From d9f411bacfa0c3d0d97580a66f88e70f92bcf58e Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 14 Dec 2020 19:14:08 -0800
Subject: userfaultfd/selftests: hint the test runner on required privilege

Now userfaultfd test program requires either root or ptrace privilege due
to the signal/event tests.  When UFFDIO_API failed, hint the test runner
about this fact verbosely.

Link: https://lkml.kernel.org/r/20201208024709.7701-4-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 6e482e2f198c..9d8650d4ba5a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -794,7 +794,8 @@ static int userfaultfd_open(int features)
 	uffdio_api.api = UFFD_API;
 	uffdio_api.features = features;
 	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
-		fprintf(stderr, "UFFDIO_API\n");
+		fprintf(stderr, "UFFDIO_API failed.\nPlease make sure to "
+			"run with either root or ptrace capability.\n");
 		return 1;
 	}
 	if (uffdio_api.api != UFFD_API) {
-- 
cgit v1.2.3-70-g09d2


From febebaf366868a4204deb3955ef5dda17f676fc1 Mon Sep 17 00:00:00 2001
From: Francis Laniel <laniel_francis@privacyrequired.com>
Date: Tue, 15 Dec 2020 20:43:54 -0800
Subject: drivers/misc/lkdtm: add new file in LKDTM to test fortified strscpy

This new test ensures that fortified strscpy has the same behavior than
vanilla strscpy (e.g.  returning -E2BIG when src content is truncated).
Finally, it generates a crash at runtime because there is a write overflow
in destination string.

Link: https://lkml.kernel.org/r/20201122162451.27551-5-laniel_francis@privacyrequired.com
Signed-off-by: Francis Laniel <laniel_francis@privacyrequired.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Daniel Axtens <dja@axtens.net>
Cc: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/misc/lkdtm/Makefile             |  1 +
 drivers/misc/lkdtm/core.c               |  1 +
 drivers/misc/lkdtm/fortify.c            | 82 +++++++++++++++++++++++++++++++++
 drivers/misc/lkdtm/lkdtm.h              |  3 ++
 tools/testing/selftests/lkdtm/tests.txt |  1 +
 5 files changed, 88 insertions(+)
 create mode 100644 drivers/misc/lkdtm/fortify.c

(limited to 'tools')

diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index 1ef7888a12b5..7727bfd32be9 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -10,6 +10,7 @@ lkdtm-$(CONFIG_LKDTM)		+= rodata_objcopy.o
 lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
 lkdtm-$(CONFIG_LKDTM)		+= stackleak.o
 lkdtm-$(CONFIG_LKDTM)		+= cfi.o
+lkdtm-$(CONFIG_LKDTM)		+= fortify.o
 
 KASAN_SANITIZE_rodata.o		:= n
 KASAN_SANITIZE_stackleak.o	:= n
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index b8c51a633fcc..3c0a67f072c0 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -175,6 +175,7 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(USERCOPY_KERNEL),
 	CRASHTYPE(STACKLEAK_ERASING),
 	CRASHTYPE(CFI_FORWARD_PROTO),
+	CRASHTYPE(FORTIFIED_STRSCPY),
 #ifdef CONFIG_X86_32
 	CRASHTYPE(DOUBLE_FAULT),
 #endif
diff --git a/drivers/misc/lkdtm/fortify.c b/drivers/misc/lkdtm/fortify.c
new file mode 100644
index 000000000000..faf29cf04baa
--- /dev/null
+++ b/drivers/misc/lkdtm/fortify.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Francis Laniel <laniel_francis@privacyrequired.com>
+ *
+ * Add tests related to fortified functions in this file.
+ */
+#include "lkdtm.h"
+#include <linux/string.h>
+#include <linux/slab.h>
+
+
+/*
+ * Calls fortified strscpy to test that it returns the same result as vanilla
+ * strscpy and generate a panic because there is a write overflow (i.e. src
+ * length is greater than dst length).
+ */
+void lkdtm_FORTIFIED_STRSCPY(void)
+{
+	char *src;
+	char dst[5];
+
+	struct {
+		union {
+			char big[10];
+			char src[5];
+		};
+	} weird = { .big = "hello!" };
+	char weird_dst[sizeof(weird.src) + 1];
+
+	src = kstrdup("foobar", GFP_KERNEL);
+
+	if (src == NULL)
+		return;
+
+	/* Vanilla strscpy returns -E2BIG if size is 0. */
+	if (strscpy(dst, src, 0) != -E2BIG)
+		pr_warn("FAIL: strscpy() of 0 length did not return -E2BIG\n");
+
+	/* Vanilla strscpy returns -E2BIG if src is truncated. */
+	if (strscpy(dst, src, sizeof(dst)) != -E2BIG)
+		pr_warn("FAIL: strscpy() did not return -E2BIG while src is truncated\n");
+
+	/* After above call, dst must contain "foob" because src was truncated. */
+	if (strncmp(dst, "foob", sizeof(dst)) != 0)
+		pr_warn("FAIL: after strscpy() dst does not contain \"foob\" but \"%s\"\n",
+			dst);
+
+	/* Shrink src so the strscpy() below succeeds. */
+	src[3] = '\0';
+
+	/*
+	 * Vanilla strscpy returns number of character copied if everything goes
+	 * well.
+	 */
+	if (strscpy(dst, src, sizeof(dst)) != 3)
+		pr_warn("FAIL: strscpy() did not return 3 while src was copied entirely truncated\n");
+
+	/* After above call, dst must contain "foo" because src was copied. */
+	if (strncmp(dst, "foo", sizeof(dst)) != 0)
+		pr_warn("FAIL: after strscpy() dst does not contain \"foo\" but \"%s\"\n",
+			dst);
+
+	/* Test when src is embedded inside a union. */
+	strscpy(weird_dst, weird.src, sizeof(weird_dst));
+
+	if (strcmp(weird_dst, "hello") != 0)
+		pr_warn("FAIL: after strscpy() weird_dst does not contain \"hello\" but \"%s\"\n",
+			weird_dst);
+
+	/* Restore src to its initial value. */
+	src[3] = 'b';
+
+	/*
+	 * Use strlen here so size cannot be known at compile time and there is
+	 * a runtime write overflow.
+	 */
+	strscpy(dst, src, strlen(src));
+
+	pr_warn("FAIL: No overflow in above strscpy()\n");
+
+	kfree(src);
+}
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 49e6b945feb7..138f06254b61 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -104,4 +104,7 @@ void lkdtm_STACKLEAK_ERASING(void);
 /* cfi.c */
 void lkdtm_CFI_FORWARD_PROTO(void);
 
+/* fortify.c */
+void lkdtm_FORTIFIED_STRSCPY(void);
+
 #endif
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 74a8d329a72c..92ba4cc41314 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -68,3 +68,4 @@ USERCOPY_STACK_BEYOND
 USERCOPY_KERNEL
 STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
 CFI_FORWARD_PROTO
+FORTIFIED_STRSCPY
-- 
cgit v1.2.3-70-g09d2


From 44f6a7c0755d8dd453c70557e11687bb080a6f21 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 14 Dec 2020 16:04:20 -0600
Subject: objtool: Fix seg fault with Clang non-section symbols

The Clang assembler likes to strip section symbols, which means objtool
can't reference some text code by its section.  This confuses objtool
greatly, causing it to seg fault.

The fix is similar to what was done before, for ORC reloc generation:

  e81e07244325 ("objtool: Support Clang non-section symbols in ORC generation")

Factor out that code into a common helper and use it for static call
reloc generation as well.

Reported-by: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Link: https://github.com/ClangBuiltLinux/linux/issues/1207
Link: https://lkml.kernel.org/r/ba6b6c0f0dd5acbba66e403955a967d9fdd1726a.1607983452.git.jpoimboe@redhat.com
---
 tools/objtool/check.c   | 11 +++++++++--
 tools/objtool/elf.c     | 26 ++++++++++++++++++++++++++
 tools/objtool/elf.h     |  2 ++
 tools/objtool/orc_gen.c | 29 +++++------------------------
 4 files changed, 42 insertions(+), 26 deletions(-)

(limited to 'tools')

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index c6ab44543c92..5f8d3eed78a1 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -467,13 +467,20 @@ static int create_static_call_sections(struct objtool_file *file)
 
 		/* populate reloc for 'addr' */
 		reloc = malloc(sizeof(*reloc));
+
 		if (!reloc) {
 			perror("malloc");
 			return -1;
 		}
 		memset(reloc, 0, sizeof(*reloc));
-		reloc->sym = insn->sec->sym;
-		reloc->addend = insn->offset;
+
+		insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc);
+		if (!reloc->sym) {
+			WARN_FUNC("static call tramp: missing containing symbol",
+				  insn->sec, insn->offset);
+			return -1;
+		}
+
 		reloc->type = R_X86_64_PC32;
 		reloc->offset = idx * sizeof(struct static_call_site);
 		reloc->sec = reloc_sec;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 4e1d7460574b..be89c741ba9a 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -262,6 +262,32 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns
 	return find_reloc_by_dest_range(elf, sec, offset, 1);
 }
 
+void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
+			      struct reloc *reloc)
+{
+	if (sec->sym) {
+		reloc->sym = sec->sym;
+		reloc->addend = offset;
+		return;
+	}
+
+	/*
+	 * The Clang assembler strips section symbols, so we have to reference
+	 * the function symbol instead:
+	 */
+	reloc->sym = find_symbol_containing(sec, offset);
+	if (!reloc->sym) {
+		/*
+		 * Hack alert.  This happens when we need to reference the NOP
+		 * pad insn immediately after the function.
+		 */
+		reloc->sym = find_symbol_containing(sec, offset - 1);
+	}
+
+	if (reloc->sym)
+		reloc->addend = offset - reloc->sym->offset;
+}
+
 static int read_sections(struct elf *elf)
 {
 	Elf_Scn *s = NULL;
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 807f8c670097..e6890cc70a25 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -140,6 +140,8 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns
 struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
 				     unsigned long offset, unsigned int len);
 struct symbol *find_func_containing(struct section *sec, unsigned long offset);
+void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
+			      struct reloc *reloc);
 int elf_rebuild_reloc_section(struct elf *elf, struct section *sec);
 
 #define for_each_sec(file, sec)						\
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 235663b96adc..9ce68b385a1b 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -105,30 +105,11 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti
 	}
 	memset(reloc, 0, sizeof(*reloc));
 
-	if (insn_sec->sym) {
-		reloc->sym = insn_sec->sym;
-		reloc->addend = insn_off;
-	} else {
-		/*
-		 * The Clang assembler doesn't produce section symbols, so we
-		 * have to reference the function symbol instead:
-		 */
-		reloc->sym = find_symbol_containing(insn_sec, insn_off);
-		if (!reloc->sym) {
-			/*
-			 * Hack alert.  This happens when we need to reference
-			 * the NOP pad insn immediately after the function.
-			 */
-			reloc->sym = find_symbol_containing(insn_sec,
-							   insn_off - 1);
-		}
-		if (!reloc->sym) {
-			WARN("missing symbol for insn at offset 0x%lx\n",
-			     insn_off);
-			return -1;
-		}
-
-		reloc->addend = insn_off - reloc->sym->offset;
+	insn_to_reloc_sym_addend(insn_sec, insn_off, reloc);
+	if (!reloc->sym) {
+		WARN("missing symbol for insn at offset 0x%lx",
+		     insn_off);
+		return -1;
 	}
 
 	reloc->type = R_X86_64_PC32;
-- 
cgit v1.2.3-70-g09d2


From 09d59c2f3465fb01e65a0c96698697b026ea8e79 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 4 Dec 2020 00:08:36 +0100
Subject: tools build: Add missing libcap to test-all.bin target

We're missing -lcap in test-all.bin target, so in case it's the only
library missing (if more are missing test-all.bin fails anyway), we will
falsely claim that we detected it and fail build, like:

  $ make
  ...
  Auto-detecting system features:
  ...                         dwarf: [ on  ]
  ...            dwarf_getlocations: [ on  ]
  ...                         glibc: [ on  ]
  ...                        libbfd: [ on  ]
  ...                libbfd-buildid: [ on  ]
  ...                        libcap: [ on  ]
  ...                        libelf: [ on  ]
  ...                       libnuma: [ on  ]
  ...        numa_num_possible_cpus: [ on  ]
  ...                       libperl: [ on  ]
  ...                     libpython: [ on  ]
  ...                     libcrypto: [ on  ]
  ...                     libunwind: [ on  ]
  ...            libdw-dwarf-unwind: [ on  ]
  ...                          zlib: [ on  ]
  ...                          lzma: [ on  ]
  ...                     get_cpuid: [ on  ]
  ...                           bpf: [ on  ]
  ...                        libaio: [ on  ]
  ...                       libzstd: [ on  ]
  ...        disassembler-four-args: [ on  ]

  ...

    CC       builtin-ftrace.o

  In file included from builtin-ftrace.c:29:
  util/cap.h:11:10: fatal error: sys/capability.h: No such file or directory
     11 | #include <sys/capability.h>
        |          ^~~~~~~~~~~~~~~~~~
  compilation terminated.

Fixes: 74d5f3d06f707eb5 ("tools build: Add capability-related feature detection")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Igor Lubashev <ilubashe@akamai.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201203230836.3751981-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/feature/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index cdde783f3018..89ba522e377d 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -90,7 +90,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
 ###############################
 
 $(OUTPUT)test-all.bin:
-	$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd
+	$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap
 
 $(OUTPUT)test-hello.bin:
 	$(BUILD)
-- 
cgit v1.2.3-70-g09d2


From 2eb5dd418034ecea2f7031e3d33f2991a878b148 Mon Sep 17 00:00:00 2001
From: Zheng Zengkai <zhengzengkai@huawei.com>
Date: Fri, 3 Jul 2020 17:33:44 +0800
Subject: perf record: Fix memory leak when using '--user-regs=?' to list
 registers

When using 'perf record's option '-I' or '--user-regs=' along with
argument '?' to list available register names, memory of variable 'os'
allocated by strdup() needs to be released before __parse_regs()
returns, otherwise memory leak will occur.

Fixes: bcc84ec65ad1 ("perf record: Add ability to name registers to record")
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20200703093344.189450-1-zhengzengkai@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-regs-options.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index e687497b3aac..a4a100425b3a 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -54,7 +54,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
 #endif
 				fputc('\n', stderr);
 				/* just printing available regs */
-				return -1;
+				goto error;
 			}
 #ifdef HAVE_PERF_REGS_SUPPORT
 			for (r = sample_reg_masks; r->name; r++) {
-- 
cgit v1.2.3-70-g09d2


From bf53fc6b5f415cddc7118091cb8fd6a211b2320d Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Fri, 4 Dec 2020 09:17:02 -0300
Subject: perf unwind: Fix separate debug info files when using elfutils'
 libdw's unwinder

elfutils needs to be provided main binary and separate debug info file
respectively. Providing separate debug info file instead of the main
binary is not sufficient.

One needs to try both supplied filename and its possible cache by its
build-id depending on the use case.

Signed-off-by: Jan Kratochvil <jan.kratochvil@redhat.com>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/unwind-libdw.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 7a3dbc259cec..0ada907c60d4 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -20,10 +20,24 @@
 
 static char *debuginfo_path;
 
+static int __find_debuginfo(Dwfl_Module *mod __maybe_unused, void **userdata,
+			    const char *modname __maybe_unused, Dwarf_Addr base __maybe_unused,
+			    const char *file_name, const char *debuglink_file __maybe_unused,
+			    GElf_Word debuglink_crc __maybe_unused, char **debuginfo_file_name)
+{
+	const struct dso *dso = *userdata;
+
+	assert(dso);
+	if (dso->symsrc_filename && strcmp (file_name, dso->symsrc_filename))
+		*debuginfo_file_name = strdup(dso->symsrc_filename);
+	return -1;
+}
+
 static const Dwfl_Callbacks offline_callbacks = {
-	.find_debuginfo		= dwfl_standard_find_debuginfo,
+	.find_debuginfo		= __find_debuginfo,
 	.debuginfo_path		= &debuginfo_path,
 	.section_address	= dwfl_offline_section_address,
+	// .find_elf is not set as we use dwfl_report_elf() instead.
 };
 
 static int __report_module(struct addr_location *al, u64 ip,
@@ -46,16 +60,24 @@ static int __report_module(struct addr_location *al, u64 ip,
 	mod = dwfl_addrmodule(ui->dwfl, ip);
 	if (mod) {
 		Dwarf_Addr s;
+		void **userdatap;
 
-		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
+		dwfl_module_info(mod, &userdatap, &s, NULL, NULL, NULL, NULL, NULL);
+		*userdatap = dso;
 		if (s != al->map->start - al->map->pgoff)
 			mod = 0;
 	}
 
 	if (!mod)
-		mod = dwfl_report_elf(ui->dwfl, dso->short_name,
-				      (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
-				      false);
+		mod = dwfl_report_elf(ui->dwfl, dso->short_name, dso->long_name, -1,
+				      al->map->start - al->map->pgoff, false);
+	if (!mod) {
+		char filename[PATH_MAX];
+
+		if (dso__build_id_filename(dso, filename, sizeof(filename), false))
+			mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1,
+					      al->map->start - al->map->pgoff, false);
+	}
 
 	return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
 }
-- 
cgit v1.2.3-70-g09d2


From 47d982202f8cfaac6f208c9109fa15cb6a0181f7 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 30 Nov 2020 09:27:52 -0800
Subject: tools headers UAPI: Update tools's copy of linux/perf_event.h

To get the changes in:

   commit 8d97e71811aa ("perf/core: Add PERF_SAMPLE_DATA_PAGE_SIZE")
   commit 995f088efebe ("perf/core: Add support for PERF_SAMPLE_CODE_PAGE_SIZE")

This silences this perf tools build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h'
differs from latest version at 'include/uapi/linux/perf_event.h'
  diff -u tools/include/uapi/linux/perf_event.h
include/uapi/linux/perf_event.h

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Stephane Eranian <eranian@google.com>
Cc: Will Deacon <will@kernel.org>
Link: http://lore.kernel.org/lkml/20201130172803.2676-2-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/perf_event.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b95d3c485d27..b15e3447cd9f 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -143,8 +143,10 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 	PERF_SAMPLE_AUX				= 1U << 20,
 	PERF_SAMPLE_CGROUP			= 1U << 21,
+	PERF_SAMPLE_DATA_PAGE_SIZE		= 1U << 22,
+	PERF_SAMPLE_CODE_PAGE_SIZE		= 1U << 23,
 
-	PERF_SAMPLE_MAX = 1U << 22,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 24,		/* non-ABI */
 
 	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
@@ -896,6 +898,8 @@ enum perf_event_type {
 	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
 	 *	{ u64			size;
 	 *	  char			data[size]; } && PERF_SAMPLE_AUX
+	 *	{ u64			data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
+	 *	{ u64			code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
-- 
cgit v1.2.3-70-g09d2


From 542b88fd12769bf5be307b11ca0f94a6140bba82 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 30 Nov 2020 09:27:53 -0800
Subject: perf record: Support new sample type for data page size

Support new sample type PERF_SAMPLE_DATA_PAGE_SIZE for page size.

Add new option --data-page-size to record sample data page size.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Stephane Eranian <eranian@google.com>
Cc: Will Deacon <will@kernel.org>
Link: http://lore.kernel.org/lkml/20201130172803.2676-3-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt  | 3 +++
 tools/perf/builtin-record.c               | 2 ++
 tools/perf/util/event.h                   | 1 +
 tools/perf/util/evsel.c                   | 9 +++++++++
 tools/perf/util/perf_event_attr_fprintf.c | 2 +-
 tools/perf/util/record.h                  | 1 +
 tools/perf/util/synthetic-events.c        | 8 ++++++++
 7 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 768888b9326a..2d30e525a600 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -293,6 +293,9 @@ OPTIONS
 --phys-data::
 	Record the sample physical addresses.
 
+--data-page-size::
+	Record the sampled data address data page size.
+
 -T::
 --timestamp::
 	Record the sample timestamps. Use it with 'perf report -D' to see the
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d832c108a1ca..fd3911650612 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2472,6 +2472,8 @@ static struct option __record_options[] = {
 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
 	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
 		    "Record the sample physical addresses"),
+	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
+		    "Record the sampled data address data page size"),
 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
 			&record.opts.sample_time_set,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b828b99176f4..448ac30c2fc4 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -135,6 +135,7 @@ struct perf_sample {
 	u32 raw_size;
 	u64 data_src;
 	u64 phys_addr;
+	u64 data_page_size;
 	u64 cgroup;
 	u32 flags;
 	u16 insn_len;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 3dd0eae9810d..5e6085c3fc76 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1188,6 +1188,9 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
 		evsel__set_sample_bit(evsel, CGROUP);
 	}
 
+	if (opts->sample_data_page_size)
+		evsel__set_sample_bit(evsel, DATA_PAGE_SIZE);
+
 	if (opts->record_switch_events)
 		attr->context_switch = track;
 
@@ -2355,6 +2358,12 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 		array++;
 	}
 
+	data->data_page_size = 0;
+	if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
+		data->data_page_size = *array;
+		array++;
+	}
+
 	if (type & PERF_SAMPLE_AUX) {
 		OVERFLOW_CHECK_u64(array);
 		sz = *array++;
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index e67a227c0ce7..fb0bb6684438 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -35,7 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
 		bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
 		bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
 		bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
-		bit_name(CGROUP),
+		bit_name(CGROUP), bit_name(DATA_PAGE_SIZE),
 		{ .name = NULL, }
 	};
 #undef bit_name
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 266760ac9143..694b351dcd27 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -22,6 +22,7 @@ struct record_opts {
 	bool	      raw_samples;
 	bool	      sample_address;
 	bool	      sample_phys_addr;
+	bool	      sample_data_page_size;
 	bool	      sample_weight;
 	bool	      sample_time;
 	bool	      sample_time_set;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 297987c6960b..2947e3f3c6d9 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1409,6 +1409,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
 	if (type & PERF_SAMPLE_CGROUP)
 		result += sizeof(u64);
 
+	if (type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		result += sizeof(u64);
+
 	if (type & PERF_SAMPLE_AUX) {
 		result += sizeof(u64);
 		result += sample->aux_sample.size;
@@ -1588,6 +1591,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
 		array++;
 	}
 
+	if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
+		*array = sample->data_page_size;
+		array++;
+	}
+
 	if (type & PERF_SAMPLE_AUX) {
 		sz = sample->aux_sample.size;
 		*array++ = sz;
-- 
cgit v1.2.3-70-g09d2


From 456ef4c11c06f0b8c53acaf796d77d2033f079f2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 7 Dec 2020 14:04:05 -0300
Subject: perf evsel: Emit warning about kernel not supporting the data page
 size sample_type bit

Before we had this unhelpful message:

  $ perf record --data-page-size sleep 1
  Error:
  The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cycles:u).
  /bin/dmesg | grep -i perf may provide additional information.
  $

Add support to the perf_missing_features variable to remember what
caused evsel__open() to fail and then use that information in
evsel__open_strerror().

  $ perf record --data-page-size sleep 1
  Error:
  Asking for the data page size isn't supported by this kernel.
  $

Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Stephane Eranian <eranian@google.com>
Cc: Will Deacon <will@kernel.org>
Link: http://lore.kernel.org/lkml/20201207170759.GB129853@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 9 ++++++++-
 tools/perf/util/evsel.h | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5e6085c3fc76..c26ea82220bd 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1873,7 +1873,12 @@ try_fallback:
 	 * Must probe features in the order they were added to the
 	 * perf_event_attr interface.
 	 */
-        if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
+        if (!perf_missing_features.data_page_size &&
+	    (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
+		perf_missing_features.data_page_size = true;
+		pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
+		goto out_close;
+	} else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
 		perf_missing_features.cgroup = true;
 		pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
 		goto out_close;
@@ -2673,6 +2678,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 	"We found oprofile daemon running, please stop it and try again.");
 		break;
 	case EINVAL:
+		if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size)
+			return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel.");
 		if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
 			return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
 		if (perf_missing_features.clockid)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 79a860d8e3ee..cd1d8dd43199 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -144,6 +144,7 @@ struct perf_missing_features {
 	bool aux_output;
 	bool branch_hw_idx;
 	bool cgroup;
+	bool data_page_size;
 };
 
 extern struct perf_missing_features perf_missing_features;
-- 
cgit v1.2.3-70-g09d2


From 4853f1caa43ea41a544c50a7cefc42e147aafeda Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 4 Dec 2020 19:10:07 +0800
Subject: perf jevents: Add support for an extra directory level

Currently only upto a level 2 directory is supported, in form
vendor/platform.

Add support for a further level, to support vendor/platform
sub-directories in future, which will be vendor/platform/cpu and
vendor/platform/sys.

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lore.kernel.org/lkml/1607080216-36968-2-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/jevents.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 72cfa3b5046d..9022216b1253 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -978,15 +978,20 @@ static int process_one_file(const char *fpath, const struct stat *sb,
 	int level   = ftwbuf->level;
 	int err = 0;
 
-	if (level == 2 && is_dir) {
+	if (level >= 2 && is_dir) {
+		int count = 0;
 		/*
 		 * For level 2 directory, bname will include parent name,
 		 * like vendor/platform. So search back from platform dir
 		 * to find this.
+		 * Something similar for level 3 directory, but we're a PMU
+		 * category folder, like vendor/platform/cpu.
 		 */
 		bname = (char *) fpath + ftwbuf->base - 2;
 		for (;;) {
 			if (*bname == '/')
+				count++;
+			if (count == level - 1)
 				break;
 			bname--;
 		}
@@ -999,13 +1004,13 @@ static int process_one_file(const char *fpath, const struct stat *sb,
 		 level, sb->st_size, bname, fpath);
 
 	/* base dir or too deep */
-	if (level == 0 || level > 3)
+	if (level == 0 || level > 4)
 		return 0;
 
 
 	/* model directory, reset topic */
 	if ((level == 1 && is_dir && is_leaf_dir(fpath)) ||
-	    (level == 2 && is_dir)) {
+	    (level >= 2 && is_dir && is_leaf_dir(fpath))) {
 		if (close_table)
 			print_events_table_suffix(eventsfp);
 
-- 
cgit v1.2.3-70-g09d2


From 4689f56796f87abee190d8a959dd318e006c5b5a Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 4 Dec 2020 19:10:08 +0800
Subject: perf jevents: Add support for system events tables

Process the JSONs to find support for "system" events, which are not
tied to a specific CPUID.

A "COMPAT" property is now used to match against the namespace ID from
the kernel PMU driver.

The generated pmu-events.c will now have 2 tables:

a. CPU events, as before.
b. New pmu_sys_event_tables[] table, which will have events matched to
   specific SoCs.

It will look like this:

struct pmu_event pme_hisilicon_hip09_sys[] = {
{
	.name = "cycles",
	.compat = "0x00030736",
	.event = "event=0",
	.desc = "Clock cycles",
	.topic = "smmu v3 pmcg",
	.long_desc = "Clock cycles",
},
{
	.name = "smmuv3_pmcg.l1_tlb",
	.compat = "0x00030736",
	.event = "event=0x8a",
	.desc = "SMMUv3 PMCG l1_tlb. Unit: smmuv3_pmcg ",
	.topic = "smmu v3 pmcg",
	.long_desc = "SMMUv3 PMCG l1_tlb",
	.pmu = "smmuv3_pmcg",
},
...
};

struct pmu_event pme_arm_cortex_a53[] = {
{
	.name = "ext_mem_req",
	.event = "event=0xc0",
	.desc = "External memory request",
	.topic = "memory",
},
{
	.name = "ext_mem_req_nc",
	.event = "event=0xc1",
	.desc = "Non-cacheable external memory request",
	.topic = "memory",
},
...
};

struct pmu_event pme_hisilicon_hip09_cpu[] = {
{
	.name = "l2d_cache_refill_wr",
	.event = "event=0x53",
	.desc = "L2D cache refill, write",
	.topic = "core imp def",
	.long_desc = "Attributable Level 2 data cache refill, write",
},
...
};

struct pmu_events_map pmu_events_map[] = {
{
	.cpuid = "0x00000000410fd030",
	.version = "v1",
	.type = "core",
	.table = pme_arm_cortex_a53
},
{
	.cpuid = "0x00000000480fd010",
	.version = "v1",
	.type = "core",
	.table = pme_hisilicon_hip09_cpu
},
	{
		.table = 0
	},
};

struct pmu_event pme_hisilicon_hip09_cpu[] = {
{
	.name = "uncore_hisi_l3c.rd_cpipe",
	.event = "event=0",
	.desc = "Total read accesses. Unit: hisi_sccl,l3c ",
	.topic = "uncore l3c",
	.long_desc = "Total read accesses",
	.pmu = "hisi_sccl,l3c",
},
{
	.name = "uncore_hisi_l3c.wr_cpipe",
	.event = "event=0x1",
	.desc = "Total write accesses. Unit: hisi_sccl,l3c ",
	.topic = "uncore l3c",
	.long_desc = "Total write accesses",
	.pmu = "hisi_sccl,l3c",
},
...
};

struct pmu_sys_events pmu_sys_event_tables[] = {
{
	.table = pme_hisilicon_hip09_sys,
},
...
};

Committer notes:

Added the fix for architectures without PMU events, provided by John
after I reported the build failing in such systems.

Link: https://lore.kernel.org/lkml/650baaf2-36b6-a9e2-ff49-963ef864c1f3@huawei.com/

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lore.kernel.org/lkml/1607080216-36968-3-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/jevents.c    | 86 +++++++++++++++++++++++++++++++++++++-
 tools/perf/pmu-events/pmu-events.h |  6 +++
 2 files changed, 91 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 9022216b1253..214975c819ff 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -55,6 +55,7 @@ char *prog;
 
 struct json_event {
 	char *name;
+	char *compat;
 	char *event;
 	char *desc;
 	char *long_desc;
@@ -82,6 +83,23 @@ enum aggr_mode_class convert(const char *aggr_mode)
 
 typedef int (*func)(void *data, struct json_event *je);
 
+static LIST_HEAD(sys_event_tables);
+
+struct sys_event_table {
+	struct list_head list;
+	char *soc_id;
+};
+
+static void free_sys_event_tables(void)
+{
+	struct sys_event_table *et, *next;
+
+	list_for_each_entry_safe(et, next, &sys_event_tables, list) {
+		free(et->soc_id);
+		free(et);
+	}
+}
+
 int eprintf(int level, int var, const char *fmt, ...)
 {
 
@@ -360,6 +378,8 @@ static int print_events_table_entry(void *data, struct json_event *je)
 	if (je->event)
 		fprintf(outfp, "\t.event = \"%s\",\n", je->event);
 	fprintf(outfp, "\t.desc = \"%s\",\n", je->desc);
+	if (je->compat)
+		fprintf(outfp, "\t.compat = \"%s\",\n", je->compat);
 	fprintf(outfp, "\t.topic = \"%s\",\n", topic);
 	if (je->long_desc && je->long_desc[0])
 		fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc);
@@ -390,6 +410,7 @@ struct event_struct {
 	struct list_head list;
 	char *name;
 	char *event;
+	char *compat;
 	char *desc;
 	char *long_desc;
 	char *pmu;
@@ -583,6 +604,8 @@ static int json_events(const char *fn,
 				free(code);
 			} else if (json_streq(map, field, "EventName")) {
 				addfield(map, &je.name, "", "", val);
+			} else if (json_streq(map, field, "Compat")) {
+				addfield(map, &je.compat, "", "", val);
 			} else if (json_streq(map, field, "BriefDescription")) {
 				addfield(map, &je.desc, "", "", val);
 				fixdesc(je.desc);
@@ -683,6 +706,7 @@ free_strings:
 		free(event);
 		free(je.desc);
 		free(je.name);
+		free(je.compat);
 		free(je.long_desc);
 		free(extra_desc);
 		free(je.pmu);
@@ -747,6 +771,15 @@ static char *file_name_to_table_name(char *fname)
 	return tblname;
 }
 
+static bool is_sys_dir(char *fname)
+{
+	size_t len = strlen(fname), len2 = strlen("/sys");
+
+	if (len2 > len)
+		return false;
+	return !strcmp(fname+len-len2, "/sys");
+}
+
 static void print_mapping_table_prefix(FILE *outfp)
 {
 	fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n");
@@ -781,6 +814,33 @@ static void print_mapping_test_table(FILE *outfp)
 	fprintf(outfp, "},\n");
 }
 
+static void print_system_event_mapping_table_prefix(FILE *outfp)
+{
+	fprintf(outfp, "\nstruct pmu_sys_events pmu_sys_event_tables[] = {");
+}
+
+static void print_system_event_mapping_table_suffix(FILE *outfp)
+{
+	fprintf(outfp, "\n\t{\n\t\t.table = 0\n\t},");
+	fprintf(outfp, "\n};\n");
+}
+
+static int process_system_event_tables(FILE *outfp)
+{
+	struct sys_event_table *sys_event_table;
+
+	print_system_event_mapping_table_prefix(outfp);
+
+	list_for_each_entry(sys_event_table, &sys_event_tables, list) {
+		fprintf(outfp, "\n\t{\n\t\t.table = %s,\n\t},",
+			sys_event_table->soc_id);
+	}
+
+	print_system_event_mapping_table_suffix(outfp);
+
+	return 0;
+}
+
 static int process_mapfile(FILE *outfp, char *fpath)
 {
 	int n = 16384;
@@ -886,6 +946,8 @@ static void create_empty_mapping(const char *output_file)
 	fprintf(outfp, "#include \"pmu-events/pmu-events.h\"\n");
 	print_mapping_table_prefix(outfp);
 	print_mapping_table_suffix(outfp);
+	print_system_event_mapping_table_prefix(outfp);
+	print_system_event_mapping_table_suffix(outfp);
 	fclose(outfp);
 }
 
@@ -1026,6 +1088,22 @@ static int process_one_file(const char *fpath, const struct stat *sb,
 			return -1;
 		}
 
+		if (is_sys_dir(bname)) {
+			struct sys_event_table *sys_event_table;
+
+			sys_event_table = malloc(sizeof(*sys_event_table));
+			if (!sys_event_table)
+				return -1;
+
+			sys_event_table->soc_id = strdup(tblname);
+			if (!sys_event_table->soc_id) {
+				free(sys_event_table);
+				return -1;
+			}
+			list_add_tail(&sys_event_table->list,
+				      &sys_event_tables);
+		}
+
 		print_events_table_prefix(eventsfp, tblname);
 		return 0;
 	}
@@ -1185,10 +1263,16 @@ int main(int argc, char *argv[])
 	}
 
 	rc = process_mapfile(eventsfp, mapfile);
-	fclose(eventsfp);
 	if (rc) {
 		pr_info("%s: Error processing mapfile %s\n", prog, mapfile);
 		/* Make build fail */
+		ret = 1;
+		goto err_close_eventsfp;
+	}
+
+	rc = process_system_event_tables(eventsfp);
+	fclose(eventsfp);
+	if (rc) {
 		ret = 1;
 		goto err_out;
 	}
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index 7da1a3743b77..d1172f6aebf1 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -12,6 +12,7 @@ enum aggr_mode_class {
  */
 struct pmu_event {
 	const char *name;
+	const char *compat;
 	const char *event;
 	const char *desc;
 	const char *topic;
@@ -43,10 +44,15 @@ struct pmu_events_map {
 	struct pmu_event *table;
 };
 
+struct pmu_sys_events {
+	struct pmu_event *table;
+};
+
 /*
  * Global table mapping each known CPU for the architecture to its
  * table of PMU events.
  */
 extern struct pmu_events_map pmu_events_map[];
+extern struct pmu_sys_events pmu_sys_event_tables[];
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 51d548471510843e56d9f427aa6473ca0981c4a4 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 4 Dec 2020 19:10:09 +0800
Subject: perf pmu: Add pmu_id()

Add a function to read the PMU id sysfs entry. This is only done for uncore
PMUs where this would possibly be relevant.

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lore.kernel.org/lkml/1607080216-36968-4-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 18 ++++++++++++++++++
 tools/perf/util/pmu.h |  1 +
 2 files changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index d41caeb35cf6..cbeda45ce578 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -597,6 +597,7 @@ static struct perf_cpu_map *__pmu_cpumask(const char *path)
  * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
  * may have a "cpus" file.
  */
+#define SYS_TEMPLATE_ID	"./bus/event_source/devices/%s/identifier"
 #define CPUS_TEMPLATE_UNCORE	"%s/bus/event_source/devices/%s/cpumask"
 #define CPUS_TEMPLATE_CPU	"%s/bus/event_source/devices/%s/cpus"
 
@@ -635,6 +636,21 @@ static bool pmu_is_uncore(const char *name)
 	return file_available(path);
 }
 
+static char *pmu_id(const char *name)
+{
+	char path[PATH_MAX], *str;
+	size_t len;
+
+	snprintf(path, PATH_MAX, SYS_TEMPLATE_ID, name);
+
+	if (sysfs__read_str(path, &str, &len) < 0)
+		return NULL;
+
+	str[len - 1] = 0; /* remove line feed */
+
+	return str;
+}
+
 /*
  *  PMU CORE devices have different name other than cpu in sysfs on some
  *  platforms.
@@ -847,6 +863,8 @@ static struct perf_pmu *pmu_lookup(const char *name)
 	pmu->name = strdup(name);
 	pmu->type = type;
 	pmu->is_uncore = pmu_is_uncore(name);
+	if (pmu->is_uncore)
+		pmu->id = pmu_id(name);
 	pmu->max_precise = pmu_max_precise(name);
 	pmu_add_cpu_aliases(&aliases, pmu);
 
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index a64e9c9ce731..d4366e8e79df 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -30,6 +30,7 @@ struct perf_pmu_caps {
 
 struct perf_pmu {
 	char *name;
+	char *id;
 	__u32 type;
 	bool selectable;
 	bool is_uncore;
-- 
cgit v1.2.3-70-g09d2


From 4513c719c6f1ccf0c362c8dcef1f9b476f8f5c9c Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 4 Dec 2020 19:10:10 +0800
Subject: perf pmu: Add pmu_add_sys_aliases()

Add pmu_add_sys_aliases() to add system PMU events aliases.

For adding system PMU events, iterate through all the events for all SoC
event tables in pmu_sys_event_tables[].

Matches must satisfy both:
- PMU identifier matches event "compat" value
- event "Unit" member must match, same as uncore event aliases matched by
  CPUID

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lore.kernel.org/lkml/1607080216-36968-5-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/pmu.h |  2 ++
 2 files changed, 80 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index cbeda45ce578..44ef28302fc7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -812,6 +812,83 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
 	pmu_add_cpu_aliases_map(head, pmu, map);
 }
 
+void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data)
+{
+	int i = 0;
+
+	while (1) {
+		struct pmu_sys_events *event_table;
+		int j = 0;
+
+		event_table = &pmu_sys_event_tables[i++];
+
+		if (!event_table->table)
+			break;
+
+		while (1) {
+			struct pmu_event *pe = &event_table->table[j++];
+			int ret;
+
+			if (!pe->name && !pe->metric_group && !pe->metric_name)
+				break;
+
+			ret = fn(pe, data);
+			if (ret)
+				break;
+		}
+	}
+}
+
+struct pmu_sys_event_iter_data {
+	struct list_head *head;
+	struct perf_pmu *pmu;
+};
+
+static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data)
+{
+	struct pmu_sys_event_iter_data *idata = data;
+	struct perf_pmu *pmu = idata->pmu;
+
+	if (!pe->name) {
+		if (pe->metric_group || pe->metric_name)
+			return 0;
+		return -EINVAL;
+	}
+
+	if (!pe->compat || !pe->pmu)
+		return 0;
+
+	if (!strcmp(pmu->id, pe->compat) &&
+	    pmu_uncore_alias_match(pe->pmu, pmu->name)) {
+		__perf_pmu__new_alias(idata->head, NULL,
+				      (char *)pe->name,
+				      (char *)pe->desc,
+				      (char *)pe->event,
+				      (char *)pe->long_desc,
+				      (char *)pe->topic,
+				      (char *)pe->unit,
+				      (char *)pe->perpkg,
+				      (char *)pe->metric_expr,
+				      (char *)pe->metric_name,
+				      (char *)pe->deprecated);
+	}
+
+	return 0;
+}
+
+static void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu)
+{
+	struct pmu_sys_event_iter_data idata = {
+		.head = head,
+		.pmu = pmu,
+	};
+
+	if (!pmu->id)
+		return;
+
+	pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, &idata);
+}
+
 struct perf_event_attr * __weak
 perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
 {
@@ -867,6 +944,7 @@ static struct perf_pmu *pmu_lookup(const char *name)
 		pmu->id = pmu_id(name);
 	pmu->max_precise = pmu_max_precise(name);
 	pmu_add_cpu_aliases(&aliases, pmu);
+	pmu_add_sys_aliases(&aliases, pmu);
 
 	INIT_LIST_HEAD(&pmu->format);
 	INIT_LIST_HEAD(&pmu->aliases);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index d4366e8e79df..8164388478c6 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -117,6 +117,8 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
 bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
 void perf_pmu_free_alias(struct perf_pmu_alias *alias);
 
+typedef int (*pmu_sys_event_iter_fn)(struct pmu_event *pe, void *data);
+void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data);
 int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
 
 int perf_pmu__caps_parse(struct perf_pmu *pmu);
-- 
cgit v1.2.3-70-g09d2


From 6d2783fe365fa5f571cf1416b5f5b1e352447a0e Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 4 Dec 2020 19:10:11 +0800
Subject: perf evlist: Change evlist__splice_list_tail() ordering

Function find_evsel_group() expects events to be ordered such that they
are grouped after their leader.

Modify evlist__splice_list_tail() to guarantee this (ordering).

[Should prob also change the function name]

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lore.kernel.org/lkml/1607080216-36968-6-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 493819173a8e..89286f148012 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -179,11 +179,22 @@ void evlist__remove(struct evlist *evlist, struct evsel *evsel)
 
 void evlist__splice_list_tail(struct evlist *evlist, struct list_head *list)
 {
-	struct evsel *evsel, *temp;
+	while (!list_empty(list)) {
+		struct evsel *evsel, *temp, *leader = NULL;
 
-	__evlist__for_each_entry_safe(list, temp, evsel) {
-		list_del_init(&evsel->core.node);
-		evlist__add(evlist, evsel);
+		__evlist__for_each_entry_safe(list, temp, evsel) {
+			list_del_init(&evsel->core.node);
+			evlist__add(evlist, evsel);
+			leader = evsel;
+			break;
+		}
+
+		__evlist__for_each_entry_safe(list, temp, evsel) {
+			if (evsel->leader == leader) {
+				list_del_init(&evsel->core.node);
+				evlist__add(evlist, evsel);
+			}
+		}
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From c2337d67199a1ea1c75083da5d376aced1ab2c40 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 4 Dec 2020 19:10:12 +0800
Subject: perf metricgroup: Fix metrics using aliases covering multiple PMUs

Support for metric expressions using aliases which cover multiple PMUs
is broken. Consider the following test metric expression:

  "MetricExpr": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE * UNC_CBO_XSNP_RESPONSE.MISS_EVICTION"

When used on my broadwell, "perf stat" gives:

  unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_1/umask=0x81,event=0x22/
  unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_0/umask=0x81,event=0x22/
  unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_1/umask=0x41,event=0x22/
  unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_0/umask=0x41,event=0x22/
  Control descriptor is not initialized
  unc_cbo_xsnp_response.miss_eviction: 3645925 1000850523 1000850523
  unc_cbo_xsnp_response.miss_xcore: 106850 1000850523 1000850523

   Performance counter stats for 'system wide':

           3,645,925      unc_cbo_xsnp_response.miss_eviction # 389567086250.00 test_metric_inc
             106,850      unc_cbo_xsnp_response.miss_xcore

         1.000883096 seconds time elapsed

Notice that only the results from one PMU are included. Fix the logic of
find_evsel_group() to enable events which apply to multiple PMUs, by
checking if the event pmu_name matches that of the metric event.

With that, "perf stat" now gives:

  unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_1/umask=0x81,event=0x22/
  unc_cbo_xsnp_response.miss_eviction -> uncore_cbox_0/umask=0x81,event=0x22/
  unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_1/umask=0x41,event=0x22/
  unc_cbo_xsnp_response.miss_xcore -> uncore_cbox_0/umask=0x41,event=0x22/
  Control descriptor is not initialized
  unc_cbo_xsnp_response.miss_eviction: 4237983 1000904100 1000904100
  unc_cbo_xsnp_response.miss_xcore: 218643 1000904100 1000904100
  unc_cbo_xsnp_response.miss_eviction: 4254148 1000902629 1000902629
  unc_cbo_xsnp_response.miss_xcore: 213352 1000902629 1000902629

   Performance counter stats for 'system wide':

           4,237,983      unc_cbo_xsnp_response.miss_eviction # 3668558131345.00 test_metric_inc
             218,643      unc_cbo_xsnp_response.miss_xcore
           4,254,148      unc_cbo_xsnp_response.miss_eviction
             213,352      unc_cbo_xsnp_response.miss_xcore

         1.000938151 seconds time elapsed

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lore.kernel.org/lkml/1607080216-36968-7-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/metricgroup.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 81d201c8b833..b89160718c04 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -279,7 +279,9 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
 			 * when then group is left.
 			 */
 			if (!has_constraint &&
-			    ev->leader != metric_events[i]->leader)
+			    ev->leader != metric_events[i]->leader &&
+			    !strcmp(ev->leader->pmu_name,
+				    metric_events[i]->leader->pmu_name))
 				break;
 			if (!strcmp(metric_events[i]->name, ev->name)) {
 				set_bit(ev->idx, evlist_used);
-- 
cgit v1.2.3-70-g09d2


From f6fe1e48ae185d028dfcabecb7d79036e2d89d27 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 4 Dec 2020 19:10:13 +0800
Subject: perf metricgroup: Split up metricgroup__print()

To aid supporting system event metric groups, break up the function
metricgroup__print() into a part which iterates metrics and a part which
actually "prints" the metric.

No functional change intended.

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lore.kernel.org/lkml/1607080216-36968-8-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/metricgroup.c | 124 ++++++++++++++++++++++++------------------
 1 file changed, 70 insertions(+), 54 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index b89160718c04..4c6a686b08eb 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -493,6 +493,72 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
 		putchar('\n');
 }
 
+static int metricgroup__print_pmu_event(struct pmu_event *pe,
+					bool metricgroups, char *filter,
+					bool raw, bool details,
+					struct rblist *groups,
+					struct strlist *metriclist)
+{
+	const char *g;
+	char *omg, *mg;
+
+	g = pe->metric_group;
+	if (!g && pe->metric_name) {
+		if (pe->name)
+			return 0;
+		g = "No_group";
+	}
+
+	if (!g)
+		return 0;
+
+	mg = strdup(g);
+
+	if (!mg)
+		return -ENOMEM;
+	omg = mg;
+	while ((g = strsep(&mg, ";")) != NULL) {
+		struct mep *me;
+		char *s;
+
+		g = skip_spaces(g);
+		if (*g == 0)
+			g = "No_group";
+		if (filter && !strstr(g, filter))
+			continue;
+		if (raw)
+			s = (char *)pe->metric_name;
+		else {
+			if (asprintf(&s, "%s\n%*s%s]",
+				     pe->metric_name, 8, "[", pe->desc) < 0)
+				return -1;
+			if (details) {
+				if (asprintf(&s, "%s\n%*s%s]",
+					     s, 8, "[", pe->metric_expr) < 0)
+					return -1;
+			}
+		}
+
+		if (!s)
+			continue;
+
+		if (!metricgroups) {
+			strlist__add(metriclist, s);
+		} else {
+			me = mep_lookup(groups, g);
+			if (!me)
+				continue;
+			strlist__add(me->metrics, s);
+		}
+
+		if (!raw)
+			free(s);
+	}
+	free(omg);
+
+	return 0;
+}
+
 void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 			bool raw, bool details)
 {
@@ -517,66 +583,16 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 	groups.node_cmp = mep_cmp;
 	groups.node_delete = mep_delete;
 	for (i = 0; ; i++) {
-		const char *g;
 		pe = &map->table[i];
 
 		if (!pe->name && !pe->metric_group && !pe->metric_name)
 			break;
 		if (!pe->metric_expr)
 			continue;
-		g = pe->metric_group;
-		if (!g && pe->metric_name) {
-			if (pe->name)
-				continue;
-			g = "No_group";
-		}
-		if (g) {
-			char *omg;
-			char *mg = strdup(g);
-
-			if (!mg)
-				return;
-			omg = mg;
-			while ((g = strsep(&mg, ";")) != NULL) {
-				struct mep *me;
-				char *s;
-
-				g = skip_spaces(g);
-				if (*g == 0)
-					g = "No_group";
-				if (filter && !strstr(g, filter))
-					continue;
-				if (raw)
-					s = (char *)pe->metric_name;
-				else {
-					if (asprintf(&s, "%s\n%*s%s]",
-						     pe->metric_name, 8, "[", pe->desc) < 0)
-						return;
-
-					if (details) {
-						if (asprintf(&s, "%s\n%*s%s]",
-							     s, 8, "[", pe->metric_expr) < 0)
-							return;
-					}
-				}
-
-				if (!s)
-					continue;
-
-				if (!metricgroups) {
-					strlist__add(metriclist, s);
-				} else {
-					me = mep_lookup(&groups, g);
-					if (!me)
-						continue;
-					strlist__add(me->metrics, s);
-				}
-
-				if (!raw)
-					free(s);
-			}
-			free(omg);
-		}
+		if (metricgroup__print_pmu_event(pe, metricgroups, filter,
+						 raw, details, &groups,
+						 metriclist) < 0)
+			return;
 	}
 
 	if (!filter || !rblist__empty(&groups)) {
-- 
cgit v1.2.3-70-g09d2


From a36fadb17c27b4b5360db69acc80f5f4ad8dde7e Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 4 Dec 2020 19:10:14 +0800
Subject: perf metricgroup: Support printing metric groups for system PMUs

Currently printing metricgroups for core- or uncore-based events matched
by CPUID is supported.

Extend this for system events.

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lore.kernel.org/lkml/1607080216-36968-9-git-send-email-john.garry@huawei.com
[ Reorder 'struct metricgroup_print_sys_idata' field to avoid alignment holes ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/metricgroup.c | 64 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 4c6a686b08eb..6344e9627ff3 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -559,6 +559,49 @@ static int metricgroup__print_pmu_event(struct pmu_event *pe,
 	return 0;
 }
 
+struct metricgroup_print_sys_idata {
+	struct strlist *metriclist;
+	char *filter;
+	struct rblist *groups;
+	bool metricgroups;
+	bool raw;
+	bool details;
+};
+
+typedef int (*metricgroup_sys_event_iter_fn)(struct pmu_event *pe, void *);
+
+struct metricgroup_iter_data {
+	metricgroup_sys_event_iter_fn fn;
+	void *data;
+};
+
+static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data)
+{
+	struct metricgroup_iter_data *d = data;
+	struct perf_pmu *pmu = NULL;
+
+	if (!pe->metric_expr || !pe->compat)
+		return 0;
+
+	while ((pmu = perf_pmu__scan(pmu))) {
+
+		if (!pmu->id || strcmp(pmu->id, pe->compat))
+			continue;
+
+		return d->fn(pe, d->data);
+	}
+
+	return 0;
+}
+
+static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data)
+{
+	struct metricgroup_print_sys_idata *d = data;
+
+	return metricgroup__print_pmu_event(pe, d->metricgroups, d->filter, d->raw,
+				     d->details, d->groups, d->metriclist);
+}
+
 void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 			bool raw, bool details)
 {
@@ -569,9 +612,6 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 	struct rb_node *node, *next;
 	struct strlist *metriclist = NULL;
 
-	if (!map)
-		return;
-
 	if (!metricgroups) {
 		metriclist = strlist__new(NULL, NULL);
 		if (!metriclist)
@@ -582,7 +622,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 	groups.node_new = mep_new;
 	groups.node_cmp = mep_cmp;
 	groups.node_delete = mep_delete;
-	for (i = 0; ; i++) {
+	for (i = 0; map; i++) {
 		pe = &map->table[i];
 
 		if (!pe->name && !pe->metric_group && !pe->metric_name)
@@ -595,6 +635,22 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 			return;
 	}
 
+	{
+		struct metricgroup_iter_data data = {
+			.fn = metricgroup__print_sys_event_iter,
+			.data = (void *) &(struct metricgroup_print_sys_idata){
+				.metriclist = metriclist,
+				.metricgroups = metricgroups,
+				.filter = filter,
+				.raw = raw,
+				.details = details,
+				.groups = &groups,
+			},
+		};
+
+		pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
+	}
+
 	if (!filter || !rblist__empty(&groups)) {
 		if (metricgroups && !raw)
 			printf("\nMetric Groups:\n\n");
-- 
cgit v1.2.3-70-g09d2


From be335ec28efa89d6bff8f4c6ce8daba88acf2b1a Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 4 Dec 2020 19:10:15 +0800
Subject: perf metricgroup: Support adding metrics for system PMUs

Currently adding metrics for core- or uncore-based events matched by CPUID
is supported.

Extend this for system events.

Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Link: http://lore.kernel.org/lkml/1607080216-36968-10-git-send-email-john.garry@huawei.com
[ Reorder 'struct metricgroup_add_iter_data' field to avoid alignment holes ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/metricgroup.c | 66 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 60 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 6344e9627ff3..ee94d3e8dd65 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -415,6 +415,12 @@ static bool match_metric(const char *n, const char *list)
 	return false;
 }
 
+static bool match_pe_metric(struct pmu_event *pe, const char *metric)
+{
+	return match_metric(pe->metric_group, metric) ||
+	       match_metric(pe->metric_name, metric);
+}
+
 struct mep {
 	struct rb_node nd;
 	const char *name;
@@ -757,6 +763,16 @@ int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused)
 	return 1;
 }
 
+struct metricgroup_add_iter_data {
+	struct list_head *metric_list;
+	const char *metric;
+	struct metric **m;
+	struct expr_ids *ids;
+	int *ret;
+	bool *has_match;
+	bool metric_no_group;
+};
+
 static int __add_metric(struct list_head *metric_list,
 			struct pmu_event *pe,
 			bool metric_no_group,
@@ -866,10 +882,11 @@ static int __add_metric(struct list_head *metric_list,
 	return 0;
 }
 
-#define map_for_each_event(__pe, __idx, __map)				\
-	for (__idx = 0, __pe = &__map->table[__idx];			\
-	     __pe->name || __pe->metric_group || __pe->metric_name;	\
-	     __pe = &__map->table[++__idx])
+#define map_for_each_event(__pe, __idx, __map)					\
+	if (__map)								\
+		for (__idx = 0, __pe = &__map->table[__idx];			\
+		     __pe->name || __pe->metric_group || __pe->metric_name;	\
+		     __pe = &__map->table[++__idx])
 
 #define map_for_each_metric(__pe, __idx, __map, __metric)		\
 	map_for_each_event(__pe, __idx, __map)				\
@@ -1037,6 +1054,29 @@ static int add_metric(struct list_head *metric_list,
 	return ret;
 }
 
+static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe,
+						  void *data)
+{
+	struct metricgroup_add_iter_data *d = data;
+	int ret;
+
+	if (!match_pe_metric(pe, d->metric))
+		return 0;
+
+	ret = add_metric(d->metric_list, pe, d->metric_no_group, d->m, NULL, d->ids);
+	if (ret)
+		return ret;
+
+	ret = resolve_metric(d->metric_no_group,
+				     d->metric_list, NULL, d->ids);
+	if (ret)
+		return ret;
+
+	*(d->has_match) = true;
+
+	return *d->ret;
+}
+
 static int metricgroup__add_metric(const char *metric, bool metric_no_group,
 				   struct strbuf *events,
 				   struct list_head *metric_list,
@@ -1067,6 +1107,22 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group,
 			goto out;
 	}
 
+	{
+		struct metricgroup_iter_data data = {
+			.fn = metricgroup__add_metric_sys_event_iter,
+			.data = (void *) &(struct metricgroup_add_iter_data) {
+				.metric_list = &list,
+				.metric = metric,
+				.metric_no_group = metric_no_group,
+				.m = &m,
+				.ids = &ids,
+				.has_match = &has_match,
+				.ret = &ret,
+			},
+		};
+
+		pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
+	}
 	/* End of pmu events. */
 	if (!has_match) {
 		ret = -EINVAL;
@@ -1193,8 +1249,6 @@ int metricgroup__parse_groups(const struct option *opt,
 	struct evlist *perf_evlist = *(struct evlist **)opt->value;
 	struct pmu_events_map *map = perf_pmu__find_map(NULL);
 
-	if (!map)
-		return 0;
 
 	return parse_groups(perf_evlist, str, metric_no_group,
 			    metric_no_merge, NULL, metric_events, map);
-- 
cgit v1.2.3-70-g09d2


From e15a536521ed7f48fac268152a78e6e2f99102d2 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Fri, 4 Dec 2020 19:10:16 +0800
Subject: perf vendor events: Add JSON metrics for imx8mm DDR Perf

Add JSON metrics for imx8mm DDR Perf.

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Acked-by: Kajol Jain <kjain@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxarm@huawei.com
Signed-off-by: John Garry <john.garry@huawei.com>
Link: http://lore.kernel.org/lkml/1607080216-36968-11-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/arm64/freescale/imx8mm/sys/ddrc.json      | 39 ++++++++++++++++++++++
 .../arch/arm64/freescale/imx8mm/sys/metrics.json   | 18 ++++++++++
 tools/perf/pmu-events/jevents.c                    |  2 ++
 3 files changed, 59 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json
 create mode 100644 tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json

(limited to 'tools')

diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json
new file mode 100644
index 000000000000..3b1cd708f568
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/ddrc.json
@@ -0,0 +1,39 @@
+[
+   {
+           "BriefDescription": "ddr cycles event",
+           "EventCode": "0x00",
+           "EventName": "imx8mm_ddr.cycles",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   },
+   {
+           "BriefDescription": "ddr read-cycles event",
+           "EventCode": "0x2a",
+           "EventName": "imx8mm_ddr.read_cycles",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   },
+   {
+           "BriefDescription": "ddr write-cycles event",
+           "EventCode": "0x2b",
+           "EventName": "imx8mm_ddr.write_cycles",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   },
+   {
+           "BriefDescription": "ddr read event",
+           "EventCode": "0x35",
+           "EventName": "imx8mm_ddr.read",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   },
+   {
+           "BriefDescription": "ddr write event",
+           "EventCode": "0x38",
+           "EventName": "imx8mm_ddr.write",
+           "Unit": "imx8_ddr",
+           "Compat": "i.MX8MM"
+   }
+]
+
+
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json
new file mode 100644
index 000000000000..8e553b67cae6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx8mm/sys/metrics.json
@@ -0,0 +1,18 @@
+[
+   {
+	    "BriefDescription": "bytes all masters read from ddr based on read-cycles event",
+	    "MetricName": "imx8mm_ddr_read.all",
+	    "MetricExpr": "imx8mm_ddr.read_cycles * 4 * 4",
+	    "ScaleUnit": "9.765625e-4KB",
+	    "Unit": "imx8_ddr",
+	    "Compat": "i.MX8MM"
+    },
+   {
+	    "BriefDescription": "bytes all masters write to ddr based on write-cycles event",
+	    "MetricName": "imx8mm_ddr_write.all",
+	    "MetricExpr": "imx8mm_ddr.write_cycles * 4 * 4",
+	    "ScaleUnit": "9.765625e-4KB",
+	    "Unit": "imx8_ddr",
+	    "Compat": "i.MX8MM"
+    }
+]
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 214975c819ff..e1f3f5c8c550 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -281,6 +281,8 @@ static struct map {
 	{ "hisi_sccl,ddrc", "hisi_sccl,ddrc" },
 	{ "hisi_sccl,hha", "hisi_sccl,hha" },
 	{ "hisi_sccl,l3c", "hisi_sccl,l3c" },
+	/* it's not realistic to keep adding these, we need something more scalable ... */
+	{ "imx8_ddr", "imx8_ddr" },
 	{ "L3PMC", "amd_l3" },
 	{ "DFPMC", "amd_df" },
 	{}
-- 
cgit v1.2.3-70-g09d2


From 03de8656c7778c5434cc2ca8e6b4699c1176c090 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 10 Dec 2020 15:13:01 +0900
Subject: perf report: Support --header-only for pipe mode

The --header-only checks file header and prints the feature data.  But
as pipe mode doesn't have it in the header it prints almost nothing.
Change it to process first few records until it founds HEADER_FEATURE.

Before:
  $ perf record -o- true | perf report -i- --header-only
  # ========
  # captured on    : Thu Dec 10 14:34:59 2020
  # header version : 1
  # data offset    : 0
  # data size      : 0
  # feat offset    : 0
  # ========
  #

After:
  $ perf record -o- true | perf report -i- --header-only
  # ========
  # captured on    : Thu Dec 10 14:49:11 2020
  # header version : 1
  # data offset    : 0
  # data size      : 0
  # feat offset    : 0
  # ========
  #
  # hostname : balhae
  # os release : 5.7.17-1xxx
  # perf version : 5.10.rc6.gdb0ea13cc741
  # arch : x86_64
  # nrcpus online : 8
  # nrcpus avail : 8
  # cpudesc : Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz
  # cpuid : GenuineIntel,6,142,12
  # total memory : 16158916 kB
  # cmdline : perf record -o- true
  # event : name = cycles, , id = { 81, 82, 83, 84, 85, 86, 87, 88 }, size = 120, ...
  # CPU_TOPOLOGY info available, use -I to display
  # NUMA_TOPOLOGY info available, use -I to display
  # pmu mappings: intel_pt = 9, intel_bts = 8, software = 1, power = 20, uprobe = 7, ...
  # time of first sample : 0.000000
  # time of last sample : 0.000000
  # sample duration :      0.000 ms
  # MEM_TOPOLOGY info available, use -I to display
  # cpu pmu capabilities: branches=32, max_precise=3, pmu_name=skylake

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20201210061302.88213-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5efbd0602f17..2a845d6cac09 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -226,6 +226,8 @@ static int process_feature_event(struct perf_session *session,
 		pr_err("failed: wrong feature ID: %" PRI_lu64 "\n",
 		       event->feat.feat_id);
 		return -1;
+	} else if (rep->header_only) {
+		session_done = 1;
 	}
 
 	/*
@@ -1512,6 +1514,13 @@ repeat:
 		perf_session__fprintf_info(session, stdout,
 					   report.show_full_info);
 		if (report.header_only) {
+			if (data.is_pipe) {
+				/*
+				 * we need to process first few records
+				 * which contains PERF_RECORD_HEADER_FEATURE.
+				 */
+				perf_session__process_events(session);
+			}
 			ret = 0;
 			goto error;
 		}
-- 
cgit v1.2.3-70-g09d2


From 96aea4daa6cb893d339d80ce14727e6421991d8b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 10 Dec 2020 15:13:02 +0900
Subject: perf evlist: Support pipe mode display

Likewise, perf evlist command should print event attributes by reading
PERF_RECORD_HEADER_ATTR records.

Before:
  $ perf record -o- true | ./perf evlist -i-
  (prints nothing)

After:
  $ perf record -o- true | ./perf evlist -i-
  cycles:pppH

Committer testing:

Verbose mode also works as expected:

  $ perf record -o- true | perf evlist -i-
  cycles:uhH
  $ perf record -o- true | perf evlist -vi-
  cycles:uhH: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ID|PERIOD, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1
  $

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20201210061302.88213-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-evlist.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 98e992801251..4617b32c9c97 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -17,6 +17,14 @@
 #include "util/data.h"
 #include "util/debug.h"
 #include <linux/err.h>
+#include "util/tool.h"
+
+static int process_header_feature(struct perf_session *session __maybe_unused,
+				  union perf_event *event __maybe_unused)
+{
+	session_done = 1;
+	return 0;
+}
 
 static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
 {
@@ -27,12 +35,20 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
 		.mode      = PERF_DATA_MODE_READ,
 		.force     = details->force,
 	};
+	struct perf_tool tool = {
+		/* only needed for pipe mode */
+		.attr = perf_event__process_attr,
+		.feature = process_header_feature,
+	};
 	bool has_tracepoint = false;
 
-	session = perf_session__new(&data, 0, NULL);
+	session = perf_session__new(&data, 0, &tool);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
+	if (data.is_pipe)
+		perf_session__process_events(session);
+
 	evlist__for_each_entry(session->evlist, pos) {
 		evsel__fprintf(pos, details, stdout);
 
-- 
cgit v1.2.3-70-g09d2


From 7cfcd1e016cce5a72b4b86a3882eb80565430f82 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 10 Dec 2020 21:43:28 +0100
Subject: perf tools: Add evlist__disable_evsel/evlist__enable_evsel

Adding interface to enable/disable single event in the evlist based on
its name. It will be used later in new control enable/disable interface.

Keeping the evlist::enabled true when one or more events are enabled so
the toggle can work properly and toggle evlist to disabled state.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Alexei Budankov <abudankov@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20201210204330.233864-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 69 +++++++++++++++++++++++++++++++++++++++++++++---
 tools/perf/util/evlist.h |  2 ++
 2 files changed, 68 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 89286f148012..05363a7247c4 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -381,7 +381,30 @@ bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
 	return true;
 }
 
-void evlist__disable(struct evlist *evlist)
+static int evsel__strcmp(struct evsel *pos, char *evsel_name)
+{
+	if (!evsel_name)
+		return 0;
+	if (evsel__is_dummy_event(pos))
+		return 1;
+	return strcmp(pos->name, evsel_name);
+}
+
+static int evlist__is_enabled(struct evlist *evlist)
+{
+	struct evsel *pos;
+
+	evlist__for_each_entry(evlist, pos) {
+		if (!evsel__is_group_leader(pos) || !pos->core.fd)
+			continue;
+		/* If at least one event is enabled, evlist is enabled. */
+		if (!pos->disabled)
+			return true;
+	}
+	return false;
+}
+
+static void __evlist__disable(struct evlist *evlist, char *evsel_name)
 {
 	struct evsel *pos;
 	struct affinity affinity;
@@ -397,6 +420,8 @@ void evlist__disable(struct evlist *evlist)
 			affinity__set(&affinity, cpu);
 
 			evlist__for_each_entry(evlist, pos) {
+				if (evsel__strcmp(pos, evsel_name))
+					continue;
 				if (evsel__cpu_iter_skip(pos, cpu))
 					continue;
 				if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
@@ -414,15 +439,34 @@ void evlist__disable(struct evlist *evlist)
 
 	affinity__cleanup(&affinity);
 	evlist__for_each_entry(evlist, pos) {
+		if (evsel__strcmp(pos, evsel_name))
+			continue;
 		if (!evsel__is_group_leader(pos) || !pos->core.fd)
 			continue;
 		pos->disabled = true;
 	}
 
-	evlist->enabled = false;
+	/*
+	 * If we disabled only single event, we need to check
+	 * the enabled state of the evlist manually.
+	 */
+	if (evsel_name)
+		evlist->enabled = evlist__is_enabled(evlist);
+	else
+		evlist->enabled = false;
+}
+
+void evlist__disable(struct evlist *evlist)
+{
+	__evlist__disable(evlist, NULL);
+}
+
+void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
+{
+	__evlist__disable(evlist, evsel_name);
 }
 
-void evlist__enable(struct evlist *evlist)
+static void __evlist__enable(struct evlist *evlist, char *evsel_name)
 {
 	struct evsel *pos;
 	struct affinity affinity;
@@ -435,6 +479,8 @@ void evlist__enable(struct evlist *evlist)
 		affinity__set(&affinity, cpu);
 
 		evlist__for_each_entry(evlist, pos) {
+			if (evsel__strcmp(pos, evsel_name))
+				continue;
 			if (evsel__cpu_iter_skip(pos, cpu))
 				continue;
 			if (!evsel__is_group_leader(pos) || !pos->core.fd)
@@ -444,14 +490,31 @@ void evlist__enable(struct evlist *evlist)
 	}
 	affinity__cleanup(&affinity);
 	evlist__for_each_entry(evlist, pos) {
+		if (evsel__strcmp(pos, evsel_name))
+			continue;
 		if (!evsel__is_group_leader(pos) || !pos->core.fd)
 			continue;
 		pos->disabled = false;
 	}
 
+	/*
+	 * Even single event sets the 'enabled' for evlist,
+	 * so the toggle can work properly and toggle to
+	 * 'disabled' state.
+	 */
 	evlist->enabled = true;
 }
 
+void evlist__enable(struct evlist *evlist)
+{
+	__evlist__enable(evlist, NULL);
+}
+
+void evlist__enable_evsel(struct evlist *evlist, char *evsel_name)
+{
+	__evlist__enable(evlist, evsel_name);
+}
+
 void evlist__toggle_enable(struct evlist *evlist)
 {
 	(evlist->enabled ? evlist__disable : evlist__enable)(evlist);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 9b0c795736bb..1aae75895dea 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -186,6 +186,8 @@ size_t evlist__mmap_size(unsigned long pages);
 void evlist__disable(struct evlist *evlist);
 void evlist__enable(struct evlist *evlist);
 void evlist__toggle_enable(struct evlist *evlist);
+void evlist__disable_evsel(struct evlist *evlist, char *evsel_name);
+void evlist__enable_evsel(struct evlist *evlist, char *evsel_name);
 
 int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx);
 
-- 
cgit v1.2.3-70-g09d2


From 8abceacff87d2fbb8e50e841d410e4808725151b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 12 Dec 2020 11:43:51 +0100
Subject: perf debug: Add debug_set_file function

Allow to set debug output file via new debug_set_file function.

It's called during perf startup in perf_debug_setup to set stderr file
as default and any perf command can set it later to different file.

It will be used in perf daemon command to get verbose output into log
file.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Budankov <abudankov@huawei.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201212104358.412065-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/debug.c | 9 ++++++++-
 tools/perf/util/debug.h | 2 ++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 5cda5565777a..50fd6a4be4e0 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -30,6 +30,12 @@ bool dump_trace = false, quiet = false;
 int debug_ordered_events;
 static int redirect_to_stderr;
 int debug_data_convert;
+static FILE *debug_file;
+
+void debug_set_file(FILE *file)
+{
+	debug_file = file;
+}
 
 int veprintf(int level, int var, const char *fmt, va_list args)
 {
@@ -39,7 +45,7 @@ int veprintf(int level, int var, const char *fmt, va_list args)
 		if (use_browser >= 1 && !redirect_to_stderr)
 			ui_helpline__vshow(fmt, args);
 		else
-			ret = vfprintf(stderr, fmt, args);
+			ret = vfprintf(debug_file, fmt, args);
 	}
 
 	return ret;
@@ -227,6 +233,7 @@ DEBUG_WRAPPER(debug, 1);
 
 void perf_debug_setup(void)
 {
+	debug_set_file(stderr);
 	libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
 }
 
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index f1734abd98dd..43f712295645 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -5,6 +5,7 @@
 
 #include <stdarg.h>
 #include <stdbool.h>
+#include <stdio.h>
 #include <linux/compiler.h>
 
 extern int verbose;
@@ -62,6 +63,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5)
 int veprintf(int level, int var, const char *fmt, va_list args);
 
 int perf_debug_option(const char *str);
+void debug_set_file(FILE *file);
 void perf_debug_setup(void);
 int perf_quiet_option(void);
 
-- 
cgit v1.2.3-70-g09d2


From 47dce51acc330eefef5ea876f7707585b402282a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 14 Dec 2020 11:54:48 +0100
Subject: perf tools: Add support to read build id from compressed elf

Adding support to decompress file before reading build id.

Adding filename__read_build_id and change its current versions to
read_build_id.

Shutting down stderr output of perf list in the shell test:
  82: Check open filename arg using perf trace + vfs_getname          : Ok

because with decompression code in the place we the
filename__read_build_id function is more verbose in case
of error and the test did not account for that.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Budankov <abudankov@huawei.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201214105457.543111-7-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/trace+probe_vfs_getname.sh |  2 +-
 tools/perf/util/symbol-elf.c                      | 37 +++++++++++++++++++++--
 2 files changed, 36 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 11cc2af13f2b..3d31c1d560d6 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -20,7 +20,7 @@ skip_if_no_perf_trace || exit 2
 file=$(mktemp /tmp/temporary_file.XXXXX)
 
 trace_open_vfs_getname() {
-	evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+	evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
 	perf trace -e $evts touch $file 2>&1 | \
 	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 44dd86a4f25f..f3577f7d72fe 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -534,7 +534,7 @@ out:
 
 #ifdef HAVE_LIBBFD_BUILDID_SUPPORT
 
-int filename__read_build_id(const char *filename, struct build_id *bid)
+static int read_build_id(const char *filename, struct build_id *bid)
 {
 	size_t size = sizeof(bid->data);
 	int err = -1;
@@ -563,7 +563,7 @@ out_close:
 
 #else // HAVE_LIBBFD_BUILDID_SUPPORT
 
-int filename__read_build_id(const char *filename, struct build_id *bid)
+static int read_build_id(const char *filename, struct build_id *bid)
 {
 	size_t size = sizeof(bid->data);
 	int fd, err = -1;
@@ -595,6 +595,39 @@ out:
 
 #endif // HAVE_LIBBFD_BUILDID_SUPPORT
 
+int filename__read_build_id(const char *filename, struct build_id *bid)
+{
+	struct kmod_path m = { .name = NULL, };
+	char path[PATH_MAX];
+	int err;
+
+	if (!filename)
+		return -EFAULT;
+
+	err = kmod_path__parse(&m, filename);
+	if (err)
+		return -1;
+
+	if (m.comp) {
+		int error = 0, fd;
+
+		fd = filename__decompress(filename, path, sizeof(path), m.comp, &error);
+		if (fd < 0) {
+			pr_debug("Failed to decompress (error %d) %s\n",
+				 error, filename);
+			return -1;
+		}
+		close(fd);
+		filename = path;
+	}
+
+	err = read_build_id(filename, bid);
+
+	if (m.comp)
+		unlink(filename);
+	return err;
+}
+
 int sysfs__read_build_id(const char *filename, struct build_id *bid)
 {
 	size_t size = sizeof(bid->data);
-- 
cgit v1.2.3-70-g09d2


From dc67d1920417140052976f3377fd216b87a50aad Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 16 Dec 2020 12:45:10 -0300
Subject: perf test: Make sample-parsing test aware of
 PERF_SAMPLE_{CODE,DATA}_PAGE_SIZE

To fix this:

  $ perf test -v 27
  27: Sample parsing                                                  :
  --- start ---
  test child forked, pid 586013
  sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating
  test child finished with -1
  ---- end ----
  Sample parsing: FAILED!
  $

This patchset is still not completely merged, so when adding the
PERF_SAMPLE_CODE_PAGE_SIZE to 'struct perf_sample' we need to add the
bits added in this patch for 'perf_sample.data_page_size'.

Fixes: 251cc77b8176de37 ("tools headers UAPI: Update tools's copy of linux/perf_event.h")
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/sample-parsing.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index a0bdaf390ac8..2393916f6128 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -154,6 +154,9 @@ static bool samples_same(const struct perf_sample *s1,
 	if (type & PERF_SAMPLE_CGROUP)
 		COMP(cgroup);
 
+	if (type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		COMP(data_page_size);
+
 	if (type & PERF_SAMPLE_AUX) {
 		COMP(aux_sample.size);
 		if (memcmp(s1->aux_sample.data, s2->aux_sample.data,
@@ -234,6 +237,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
 		},
 		.phys_addr	= 113,
 		.cgroup		= 114,
+		.data_page_size = 115,
 		.aux_sample	= {
 			.size	= sizeof(aux_data),
 			.data	= (void *)aux_data,
@@ -340,7 +344,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
 	 * were added.  Please actually update the test rather than just change
 	 * the condition below.
 	 */
-	if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) {
+	if (PERF_SAMPLE_MAX > PERF_SAMPLE_CODE_PAGE_SIZE << 1) {
 		pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
 		return -1;
 	}
-- 
cgit v1.2.3-70-g09d2


From 526671bfc47df175eb87f96067d51b389a8af50d Mon Sep 17 00:00:00 2001
From: Nick Thompson <nathompson7@protonmail.com>
Date: Wed, 16 Dec 2020 12:13:17 -0500
Subject: perf config: Fix example command in manpage to conform to syntax
 specified in the SYNOPSIS section.

Committer testing:

With the previously documented example:

  $ perf config --user report sort-order=srcline
  The config variable does not contain a section name: report
  $

With the fixed example line:

  $ perf config --user report.sort-order=srcline
  $ perf config --user report.sort-order
  report.sort-order=srcline
  $

Signed-off-by: Nick Thompson <nathompson7@protonmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/linux-perf-users/20201217142619.GA14524@redhat.com/
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-config.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 31069d8a5304..5c379adf8304 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -138,7 +138,7 @@ If you want to add or modify several config items, you can do like
 
 To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do
 
-	% perf config --user report sort-order=srcline
+	% perf config --user report.sort-order=srcline
 
 To change colors of selected line to other foreground and background colors
 in system config file (i.e. `$(sysconf)/perfconfig`), do
-- 
cgit v1.2.3-70-g09d2


From feca8a8342d3f53e394c9fc7d985b98ec0250ce1 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 16 Dec 2020 09:39:11 +0100
Subject: perf tools: Reformat record's control fd man text

Adding available control commands in separate paragraph, so it's more
readable and easier to add new commands.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Budankov <abudankov@huawei.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20201216083914.47215-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 2d30e525a600..34cf651ee237 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -637,11 +637,17 @@ endif::HAVE_LIBPFM[]
 --control=fifo:ctl-fifo[,ack-fifo]::
 --control=fd:ctl-fd[,ack-fd]::
 ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows.
-Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
-'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be
-started with events disabled using --delay=-1 option. Optionally send control command
-completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process.
-Example of bash shell script to enable and disable events during measurements:
+Listen on ctl-fd descriptor for command to control measurement.
+
+Available commands:
+  'enable'  : enable events
+  'disable' : disable events
+  'snapshot': AUX area tracing snapshot).
+
+Measurements can be started with events disabled using --delay=-1 option. Optionally
+send control command completion ('ack\n') to ack-fd descriptor to synchronize with the
+controlling process.  Example of bash shell script to enable and disable events during
+measurements:
 
  #!/bin/bash
 
-- 
cgit v1.2.3-70-g09d2


From 4262f8c3efa1e79bd5950437a3eea58eeb4c1c70 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 14 Dec 2020 09:59:00 -0300
Subject: tools headers: Syncronize linux/build_bug.h with the kernel sources

To pick up the changes in:

  14dc3983b5dff513 ("kbuild: avoid static_assert for genksyms")

And silence this perf build warning:

  Warning: Kernel ABI header at 'tools/include/linux/build_bug.h' differs from latest version at 'include/linux/build_bug.h'
  diff -u tools/include/linux/build_bug.h include/linux/build_bug.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/build_bug.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h
index cc7070c7439b..ce365d212768 100644
--- a/tools/include/linux/build_bug.h
+++ b/tools/include/linux/build_bug.h
@@ -79,4 +79,9 @@
 #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
 #endif // static_assert
 
+#ifdef __GENKSYMS__
+/* genksyms gets confused by _Static_assert */
+#define _Static_assert(expr, ...)
+#endif
+
 #endif	/* _LINUX_BUILD_BUG_H */
-- 
cgit v1.2.3-70-g09d2


From 1c28a05d1a972594164efc7fcffda416c5d6ab02 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 4 Dec 2020 09:13:16 -0300
Subject: tools headers UAPI: Sync linux/stat.h with the kernel sources

To pick the changes in:

  72d1249e2ffdbc34 ("uapi: fix statx attribute value overlap for DAX & MOUNT_ROOT")

That don't cause any change in tooling, just addresses this perf build
warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/stat.h' differs from latest version at 'include/uapi/linux/stat.h'
  diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Eric Sandeen <sandeen@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/stat.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 82cc58fe9368..1500a0f58041 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -171,9 +171,12 @@ struct statx {
  * be of use to ordinary userspace programs such as GUIs or ls rather than
  * specialised tools.
  *
- * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
+ * Note that the flags marked [I] correspond to the FS_IOC_SETFLAGS flags
  * semantically.  Where possible, the numerical value is picked to correspond
- * also.
+ * also.  Note that the DAX attribute indicates that the file is in the CPU
+ * direct access state.  It does not correspond to the per-inode flag that
+ * some filesystems support.
+ *
  */
 #define STATX_ATTR_COMPRESSED		0x00000004 /* [I] File is compressed by the fs */
 #define STATX_ATTR_IMMUTABLE		0x00000010 /* [I] File is marked immutable */
@@ -183,7 +186,7 @@ struct statx {
 #define STATX_ATTR_AUTOMOUNT		0x00001000 /* Dir: Automount trigger */
 #define STATX_ATTR_MOUNT_ROOT		0x00002000 /* Root of a mount */
 #define STATX_ATTR_VERITY		0x00100000 /* [I] Verity protected file */
-#define STATX_ATTR_DAX			0x00002000 /* [I] File is DAX */
+#define STATX_ATTR_DAX			0x00200000 /* File is currently in DAX state */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
-- 
cgit v1.2.3-70-g09d2


From 219d04992b689e0498ece02d2a451f2b6e2563a9 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 16 Dec 2020 12:48:34 +0100
Subject: mptcp: push pending frames when subflow has free space

When multiple subflows are active, we can receive a
window update on subflow with no write space available.
MPTCP will try to push frames on such subflow and will
fail. Pending frames will be pushed only after receiving
a window update on a subflow with some wspace available.

Overall the above could lead to suboptimal aggregate
bandwidth usage.

Instead, we should try to push pending frames as soon as
the subflow reaches both conditions mentioned above.

We can finally enable self-tests with asymmetric links,
as the above makes them finally pass.

Fixes: 6f8a612a33e4 ("mptcp: keep track of advertised windows right edge")
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/options.c                               | 13 ++++++++-----
 net/mptcp/protocol.c                              |  2 +-
 net/mptcp/protocol.h                              |  2 +-
 tools/testing/selftests/net/mptcp/simult_flows.sh |  6 +++---
 4 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index e8a1adf299d8..e0d21c0607e5 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -875,10 +875,13 @@ static void ack_update_msk(struct mptcp_sock *msk,
 
 	new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
 
-	if (after64(new_wnd_end, msk->wnd_end)) {
+	if (after64(new_wnd_end, msk->wnd_end))
 		msk->wnd_end = new_wnd_end;
-		__mptcp_wnd_updated(sk, ssk);
-	}
+
+	/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
+	if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) &&
+	    sk_stream_memory_free(ssk))
+		__mptcp_check_push(sk, ssk);
 
 	if (after64(new_snd_una, old_snd_una)) {
 		msk->snd_una = new_snd_una;
@@ -944,8 +947,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 		 * helpers are cheap.
 		 */
 		mptcp_data_lock(subflow->conn);
-		if (mptcp_send_head(subflow->conn))
-			__mptcp_wnd_updated(subflow->conn, sk);
+		if (sk_stream_memory_free(sk))
+			__mptcp_check_push(subflow->conn, sk);
 		__mptcp_data_acked(subflow->conn);
 		mptcp_data_unlock(subflow->conn);
 		return;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 64c0c54c80e8..b53a91801a6c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2915,7 +2915,7 @@ void __mptcp_data_acked(struct sock *sk)
 		mptcp_schedule_work(sk);
 }
 
-void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk)
+void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 {
 	if (!mptcp_send_head(sk))
 		return;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7cf9d110b85f..d67de793d363 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -503,7 +503,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
 void mptcp_data_ready(struct sock *sk, struct sock *ssk);
 bool mptcp_finish_join(struct sock *sk);
 bool mptcp_schedule_work(struct sock *sk);
-void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk);
+void __mptcp_check_push(struct sock *sk, struct sock *ssk);
 void __mptcp_data_acked(struct sock *sk);
 void mptcp_subflow_eof(struct sock *sk);
 bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 2f649b431456..f039ee57eb3c 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -287,7 +287,7 @@ run_test 10 10 0 0 "balanced bwidth"
 run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
 
 # we still need some additional infrastructure to pass the following test-cases
-# run_test 30 10 0 0 "unbalanced bwidth"
-# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
-# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+run_test 30 10 0 0 "unbalanced bwidth"
+run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
 exit $ret
-- 
cgit v1.2.3-70-g09d2


From 4bba4c4bb09ad4a2b70836725e08439c86d8f9e4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 18 Dec 2020 09:59:05 -0300
Subject: tools headers: Get tools's linux/compiler.h closer to the kernel's

We're cherry picking stuff from the kernel to allow for the other
headers that we keep in sync via tools/perf/check-headers.sh to work,
so introduce linux/compiler_types.h and from there get the compiler
specific stuff.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/compiler.h       |  4 +---
 tools/include/linux/compiler_types.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)
 create mode 100644 tools/include/linux/compiler_types.h

(limited to 'tools')

diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index d22a974372c0..ff872dc2637c 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -2,9 +2,7 @@
 #ifndef _TOOLS_LINUX_COMPILER_H_
 #define _TOOLS_LINUX_COMPILER_H_
 
-#ifdef __GNUC__
-#include <linux/compiler-gcc.h>
-#endif
+#include <linux/compiler_types.h>
 
 #ifndef __compiletime_error
 # define __compiletime_error(message)
diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h
new file mode 100644
index 000000000000..31fc2caa758a
--- /dev/null
+++ b/tools/include/linux/compiler_types.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_COMPILER_TYPES_H
+#define __LINUX_COMPILER_TYPES_H
+
+/* Compiler specific macros. */
+#ifdef __GNUC__
+#include <linux/compiler-gcc.h>
+#endif
+
+#endif /* __LINUX_COMPILER_TYPES_H */
-- 
cgit v1.2.3-70-g09d2


From ffb9beb13e8daf3fcb6bab470d07962b05d619b7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 18 Dec 2020 10:16:08 -0300
Subject: tools headers: Add conditional __has_builtin()

As it'll be used by the ctype.h sync with its kernel source original.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/compiler_types.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'tools')

diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h
index 31fc2caa758a..feea09029f61 100644
--- a/tools/include/linux/compiler_types.h
+++ b/tools/include/linux/compiler_types.h
@@ -2,6 +2,17 @@
 #ifndef __LINUX_COMPILER_TYPES_H
 #define __LINUX_COMPILER_TYPES_H
 
+/* Builtins */
+
+/*
+ * __has_builtin is supported on gcc >= 10, clang >= 3 and icc >= 21.
+ * In the meantime, to support gcc < 10, we implement __has_builtin
+ * by hand.
+ */
+#ifndef __has_builtin
+#define __has_builtin(x) (0)
+#endif
+
 /* Compiler specific macros. */
 #ifdef __GNUC__
 #include <linux/compiler-gcc.h>
-- 
cgit v1.2.3-70-g09d2


From 23cd9543a52b96ac75d666eee3576b47f1901248 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 17 Dec 2020 14:41:21 -0300
Subject: tools headers: Update linux/ctype.h with the kernel sources

To pick up the changes in:

  caabdd0f59a9771e ("ctype.h: remove duplicate isdigit() helper")

Addressing this perf build warning:

  Warning: Kernel ABI header at 'tools/include/linux/ctype.h' differs from latest version at 'include/linux/ctype.h'
  diff -u tools/include/linux/ctype.h include/linux/ctype.h

And we need to continue using the combination of:

  inline __isdigit()
  #define isdigit() __isdigit

When the __has_builtin() thing isn't available, as it is a builtin in
older systems with it as a builtin but with compilers not hacinv
__has_builtin(), rendering the __has_builtin() check useless otherwise.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/ctype.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/include/linux/ctype.h b/tools/include/linux/ctype.h
index 310090b4c474..29ed3fe94404 100644
--- a/tools/include/linux/ctype.h
+++ b/tools/include/linux/ctype.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_CTYPE_H
 #define _LINUX_CTYPE_H
 
+#include <linux/compiler.h>
+
 /*
  * NOTE! This ctype does not handle EOF like the standard C
  * library is required to.
@@ -23,11 +25,6 @@ extern const unsigned char _ctype[];
 #define isalnum(c)	((__ismask(c)&(_U|_L|_D)) != 0)
 #define isalpha(c)	((__ismask(c)&(_U|_L)) != 0)
 #define iscntrl(c)	((__ismask(c)&(_C)) != 0)
-static inline int __isdigit(int c)
-{
-	return '0' <= c && c <= '9';
-}
-#define isdigit(c)	__isdigit(c)
 #define isgraph(c)	((__ismask(c)&(_P|_U|_L|_D)) != 0)
 #define islower(c)	((__ismask(c)&(_L)) != 0)
 #define isprint(c)	((__ismask(c)&(_P|_U|_L|_D|_SP)) != 0)
@@ -40,6 +37,16 @@ static inline int __isdigit(int c)
 #define isascii(c) (((unsigned char)(c))<=0x7f)
 #define toascii(c) (((unsigned char)(c))&0x7f)
 
+#if __has_builtin(__builtin_isdigit)
+#define  isdigit(c) __builtin_isdigit(c)
+#else
+static inline int __isdigit(int c)
+{
+	return '0' <= c && c <= '9';
+}
+#define  isdigit(c) __isdigit(c)
+#endif
+
 static inline unsigned char __tolower(unsigned char c)
 {
 	if (isupper(c))
-- 
cgit v1.2.3-70-g09d2


From eb2842da77e1f7a3c46033f930524ab76dffe67a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 17 Dec 2020 14:42:48 -0300
Subject: perf trace beauty: Update copy of linux/socket.h with the kernel
 sources

This just triggers the rebuilding of the syscall beautifiers that
extract patterns from this file due to this cset:

  b713c195d5933227 ("net: provide __sys_shutdown_sock() that takes a socket")

After updating it:

    CC       /tmp/build/perf/trace/beauty/sockaddr.o

Addressing this perf build warning:

  Warning: Kernel ABI header at 'tools/perf/trace/beauty/include/linux/socket.h' differs from latest version at 'include/linux/socket.h'
  diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/include/linux/socket.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index e9cb30d8cbfb..385894b4a8bb 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
 			     int __user *usockaddr_len);
 extern int __sys_socketpair(int family, int type, int protocol,
 			    int __user *usockvec);
+extern int __sys_shutdown_sock(struct socket *sock, int how);
 extern int __sys_shutdown(int fd, int how);
 
 extern struct ns_common *get_net_ns(struct ns_common *ns);
-- 
cgit v1.2.3-70-g09d2


From e9bde94f1eb53c5721ba8e477dee837632fedebe Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 17 Dec 2020 14:46:24 -0300
Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources

To pick up the changes in:

  d205e0f1426e0f99 ("x86/{cpufeatures,msr}: Add Intel SGX Launch Control hardware bits")
  e7b6385b01d8e9fb ("x86/cpufeatures: Add Intel SGX hardware bits")
  43756a298928c9a4 ("powercap: Add AMD Fam17h RAPL support")
  298ed2b31f552806 ("x86/msr-index: sort AMD RAPL MSRs by address")
  68299a42f8428853 ("x86/mce: Enable additional error logging on certain Intel CPUs")

That cause these changes in tooling:

  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before
  $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h
  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after
  $ diff -u before after
  --- before	2020-12-17 14:45:49.036994450 -0300
  +++ after	2020-12-17 14:46:01.654256639 -0300
  @@ -22,6 +22,10 @@
   	[0x00000060] = "LBR_CORE_TO",
   	[0x00000079] = "IA32_UCODE_WRITE",
   	[0x0000008b] = "IA32_UCODE_REV",
  +	[0x0000008C] = "IA32_SGXLEPUBKEYHASH0",
  +	[0x0000008D] = "IA32_SGXLEPUBKEYHASH1",
  +	[0x0000008E] = "IA32_SGXLEPUBKEYHASH2",
  +	[0x0000008F] = "IA32_SGXLEPUBKEYHASH3",
   	[0x0000009b] = "IA32_SMM_MONITOR_CTL",
   	[0x0000009e] = "IA32_SMBASE",
   	[0x000000c1] = "IA32_PERFCTR0",
  @@ -59,6 +63,7 @@
   	[0x00000179] = "IA32_MCG_CAP",
   	[0x0000017a] = "IA32_MCG_STATUS",
   	[0x0000017b] = "IA32_MCG_CTL",
  +	[0x0000017f] = "ERROR_CONTROL",
   	[0x00000180] = "IA32_MCG_EAX",
   	[0x00000181] = "IA32_MCG_EBX",
   	[0x00000182] = "IA32_MCG_ECX",
  @@ -294,6 +299,7 @@
   	[0xc0010241 - x86_AMD_V_KVM_MSRs_offset] = "F15H_NB_PERF_CTR",
   	[0xc0010280 - x86_AMD_V_KVM_MSRs_offset] = "F15H_PTSC",
   	[0xc0010299 - x86_AMD_V_KVM_MSRs_offset] = "AMD_RAPL_POWER_UNIT",
  +	[0xc001029a - x86_AMD_V_KVM_MSRs_offset] = "AMD_CORE_ENERGY_STATUS",
   	[0xc001029b - x86_AMD_V_KVM_MSRs_offset] = "AMD_PKG_ENERGY_STATUS",
   	[0xc00102f0 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN_CTL",
   	[0xc00102f1 - x86_AMD_V_KVM_MSRs_offset] = "AMD_PPIN",
  $

Which causes these parts of tools/perf/ to be rebuilt:

  CC       /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o
  LD       /tmp/build/perf/trace/beauty/tracepoints/perf-in.o
  LD       /tmp/build/perf/trace/beauty/perf-in.o
  LD       /tmp/build/perf/perf-in.o
  LINK     /tmp/build/perf/perf

At some point these should just be tables read by perf on demand.

This allows 'perf trace' users to use those strings to translate from
the msr ids provided by the msr: tracepoints.

This addresses this perf tools build warning:

  diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h
  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h'

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Victor Ding <victording@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/msr-index.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 972a34d93505..2b5fc9accec4 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -139,6 +139,7 @@
 #define MSR_IA32_MCG_CAP		0x00000179
 #define MSR_IA32_MCG_STATUS		0x0000017a
 #define MSR_IA32_MCG_CTL		0x0000017b
+#define MSR_ERROR_CONTROL		0x0000017f
 #define MSR_IA32_MCG_EXT_CTL		0x000004d0
 
 #define MSR_OFFCORE_RSP_0		0x000001a6
@@ -326,8 +327,9 @@
 #define MSR_PP1_ENERGY_STATUS		0x00000641
 #define MSR_PP1_POLICY			0x00000642
 
-#define MSR_AMD_PKG_ENERGY_STATUS	0xc001029b
 #define MSR_AMD_RAPL_POWER_UNIT		0xc0010299
+#define MSR_AMD_CORE_ENERGY_STATUS		0xc001029a
+#define MSR_AMD_PKG_ENERGY_STATUS	0xc001029b
 
 /* Config TDP MSRs */
 #define MSR_CONFIG_TDP_NOMINAL		0x00000648
@@ -609,6 +611,8 @@
 #define FEAT_CTL_LOCKED				BIT(0)
 #define FEAT_CTL_VMX_ENABLED_INSIDE_SMX		BIT(1)
 #define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX	BIT(2)
+#define FEAT_CTL_SGX_LC_ENABLED			BIT(17)
+#define FEAT_CTL_SGX_ENABLED			BIT(18)
 #define FEAT_CTL_LMCE_ENABLED			BIT(20)
 
 #define MSR_IA32_TSC_ADJUST             0x0000003b
@@ -628,6 +632,12 @@
 #define MSR_IA32_UCODE_WRITE		0x00000079
 #define MSR_IA32_UCODE_REV		0x0000008b
 
+/* Intel SGX Launch Enclave Public Key Hash MSRs */
+#define MSR_IA32_SGXLEPUBKEYHASH0	0x0000008C
+#define MSR_IA32_SGXLEPUBKEYHASH1	0x0000008D
+#define MSR_IA32_SGXLEPUBKEYHASH2	0x0000008E
+#define MSR_IA32_SGXLEPUBKEYHASH3	0x0000008F
+
 #define MSR_IA32_SMM_MONITOR_CTL	0x0000009b
 #define MSR_IA32_SMBASE			0x0000009e
 
-- 
cgit v1.2.3-70-g09d2


From 7ddcdea5b54492f54700f427f58690cf1e187e5e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 17 Dec 2020 14:55:01 -0300
Subject: tools headers UAPI: Sync linux/const.h with the kernel headers

To pick up the changes in:

  a85cbe6159ffc973 ("uapi: move constants from <linux/kernel.h> to <linux/const.h>")

That causes no changes in tooling, just addresses this perf build
warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/const.h' differs from latest version at 'include/uapi/linux/const.h'
  diff -u tools/include/uapi/linux/const.h include/uapi/linux/const.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Petr Vorel <petr.vorel@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/const.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h
index 5ed721ad5b19..af2a44c08683 100644
--- a/tools/include/uapi/linux/const.h
+++ b/tools/include/uapi/linux/const.h
@@ -28,4 +28,9 @@
 #define _BITUL(x)	(_UL(1) << (x))
 #define _BITULL(x)	(_ULL(1) << (x))
 
+#define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask)	(((x) + (mask)) & ~(mask))
+
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
 #endif /* _UAPI_LINUX_CONST_H */
-- 
cgit v1.2.3-70-g09d2


From 4a443a51776ca9847942523cf987a330894d3a31 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 17 Dec 2020 14:58:51 -0300
Subject: tools headers UAPI: Sync linux/fscrypt.h with the kernel sources

To pick the changes from:

  3ceb6543e9cf6ed8 ("fscrypt: remove kernel-internal constants from UAPI header")

That don't result in any changes in tooling, just addressing this perf
build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/fscrypt.h' differs from latest version at 'include/uapi/linux/fscrypt.h'
  diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/fscrypt.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index e5de60336938..9f4428be3e36 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -20,7 +20,6 @@
 #define FSCRYPT_POLICY_FLAG_DIRECT_KEY		0x04
 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64	0x08
 #define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32	0x10
-#define FSCRYPT_POLICY_FLAGS_VALID		0x1F
 
 /* Encryption algorithms */
 #define FSCRYPT_MODE_AES_256_XTS		1
@@ -28,7 +27,7 @@
 #define FSCRYPT_MODE_AES_128_CBC		5
 #define FSCRYPT_MODE_AES_128_CTS		6
 #define FSCRYPT_MODE_ADIANTUM			9
-#define __FSCRYPT_MODE_MAX			9
+/* If adding a mode number > 9, update FSCRYPT_MODE_MAX in fscrypt_private.h */
 
 /*
  * Legacy policy version; ad-hoc KDF and no key verification.
@@ -177,7 +176,7 @@ struct fscrypt_get_key_status_arg {
 #define FS_POLICY_FLAGS_PAD_32		FSCRYPT_POLICY_FLAGS_PAD_32
 #define FS_POLICY_FLAGS_PAD_MASK	FSCRYPT_POLICY_FLAGS_PAD_MASK
 #define FS_POLICY_FLAG_DIRECT_KEY	FSCRYPT_POLICY_FLAG_DIRECT_KEY
-#define FS_POLICY_FLAGS_VALID		FSCRYPT_POLICY_FLAGS_VALID
+#define FS_POLICY_FLAGS_VALID		0x07	/* contains old flags only */
 #define FS_ENCRYPTION_MODE_INVALID	0	/* never used */
 #define FS_ENCRYPTION_MODE_AES_256_XTS	FSCRYPT_MODE_AES_256_XTS
 #define FS_ENCRYPTION_MODE_AES_256_GCM	2	/* never used */
-- 
cgit v1.2.3-70-g09d2


From d6dbfceec5dd41becbe8c47c402240925d31036a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 17 Dec 2020 15:01:08 -0300
Subject: tools headers UAPI: Sync linux/prctl.h with the kernel sources

To pick a new prctl introduced in:

  1446e1df9eb183fd ("kernel: Implement selective syscall userspace redirection")

That results in:

  $ tools/perf/trace/beauty/prctl_option.sh > before
  $ cp include/uapi/linux/prctl.h tools/include/uapi/linux/prctl.h
  $ tools/perf/trace/beauty/prctl_option.sh > after
  $ diff -u before after
  --- before	2020-12-17 15:00:42.012537367 -0300
  +++ after	2020-12-17 15:00:49.832699463 -0300
  @@ -53,6 +53,7 @@
   	[56] = "GET_TAGGED_ADDR_CTRL",
   	[57] = "SET_IO_FLUSHER",
   	[58] = "GET_IO_FLUSHER",
  +	[59] = "SET_SYSCALL_USER_DISPATCH",
   };
   static const char *prctl_set_mm_options[] = {
   	[1] = "START_CODE",
  $

Now users can do:

  # perf trace -e syscalls:sys_enter_prctl --filter "option==SET_SYSCALL_USER_DISPATCH"
^C#
  # trace -v -e syscalls:sys_enter_prctl --filter "option==SET_SYSCALL_USER_DISPATCH"
  New filter for syscalls:sys_enter_prctl: (option==0x3b) && (common_pid != 5519 && common_pid != 3404)
^C#

And also when prctl appears in a session, its options will be
translated to the string.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Gabriel Krisman Bertazi <krisman@collabora.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/prctl.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 7f0827705c9a..90deb41c8a34 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -247,4 +247,9 @@ struct prctl_mm_map {
 #define PR_SET_IO_FLUSHER		57
 #define PR_GET_IO_FLUSHER		58
 
+/* Dispatch syscalls to a userspace handler */
+#define PR_SET_SYSCALL_USER_DISPATCH	59
+# define PR_SYS_DISPATCH_OFF		0
+# define PR_SYS_DISPATCH_ON		1
+
 #endif /* _LINUX_PRCTL_H */
-- 
cgit v1.2.3-70-g09d2


From f93c789a3e245707e3eddcaab5c2b7c62615692d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 17 Dec 2020 15:44:29 -0300
Subject: tools headers cpufeatures: Sync with the kernel sources

To pick the changes in:

  e7b6385b01d8e9fb ("x86/cpufeatures: Add Intel SGX hardware bits")

That causes only these 'perf bench' objects to rebuild:

  CC       /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o
  CC       /tmp/build/perf/bench/mem-memset-x86-64-asm.o

And addresses these perf build warnings:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h'
  diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h
  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h'
  diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/cpufeatures.h       | 2 ++
 tools/arch/x86/include/asm/disabled-features.h | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index dad350d42ecf..f5ef2d5b9231 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -241,6 +241,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
 #define X86_FEATURE_TSC_ADJUST		( 9*32+ 1) /* TSC adjustment MSR 0x3B */
+#define X86_FEATURE_SGX			( 9*32+ 2) /* Software Guard Extensions */
 #define X86_FEATURE_BMI1		( 9*32+ 3) /* 1st group bit manipulation extensions */
 #define X86_FEATURE_HLE			( 9*32+ 4) /* Hardware Lock Elision */
 #define X86_FEATURE_AVX2		( 9*32+ 5) /* AVX2 instructions */
@@ -356,6 +357,7 @@
 #define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
 #define X86_FEATURE_ENQCMD		(16*32+29) /* ENQCMD and ENQCMDS instructions */
+#define X86_FEATURE_SGX_LC		(16*32+30) /* Software Guard Extensions Launch Control */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 5861d34f9771..7947cb1782da 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -62,6 +62,12 @@
 # define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
 #endif
 
+#ifdef CONFIG_X86_SGX
+# define DISABLE_SGX	0
+#else
+# define DISABLE_SGX	(1 << (X86_FEATURE_SGX & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -74,7 +80,7 @@
 #define DISABLED_MASK6	0
 #define DISABLED_MASK7	(DISABLE_PTI)
 #define DISABLED_MASK8	0
-#define DISABLED_MASK9	(DISABLE_SMAP)
+#define DISABLED_MASK9	(DISABLE_SMAP|DISABLE_SGX)
 #define DISABLED_MASK10	0
 #define DISABLED_MASK11	0
 #define DISABLED_MASK12	0
-- 
cgit v1.2.3-70-g09d2


From b53d4872d2cfbce117abedee2a29a93e624e4e32 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 17 Dec 2020 15:48:06 -0300
Subject: tools headers UAPI: Update asm-generic/unistd.h

Just a comment change, trivial.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/asm-generic/unistd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 2056318988f7..fc48c64700eb 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -517,7 +517,7 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
 #endif
 
-/* kernel/timer.c */
+/* kernel/sys.c */
 #define __NR_getpid 172
 __SYSCALL(__NR_getpid, sys_getpid)
 #define __NR_getppid 173
-- 
cgit v1.2.3-70-g09d2


From 697d1549140cdcdc4cfcd0bf94e62643008972b7 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Wed, 9 Dec 2020 16:42:03 +0800
Subject: tools/virtio: include asm/bug.h

WARN_ON is used in drivers/vhost/vringh.c, to avoid build failure,
need include asm/bug.h

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20201209084205.24062-2-peng.fan@oss.nxp.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/linux/bug.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h
index b14c2c3b6b85..813baf13f62a 100644
--- a/tools/virtio/linux/bug.h
+++ b/tools/virtio/linux/bug.h
@@ -2,6 +2,8 @@
 #ifndef BUG_H
 #define BUG_H
 
+#include <asm/bug.h>
+
 #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
 
 #define BUILD_BUG_ON(x)
-- 
cgit v1.2.3-70-g09d2


From b9ca93bcd186ec4144df91c619f6084cdad500ec Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Wed, 9 Dec 2020 16:42:04 +0800
Subject: tools/virtio: add krealloc_array

krealloc_array is used in drivers/vhost/vringh.c, add it to avoid build
failure.

Drop WARN_ON_ONCE, because duplicated with the one in bug.h

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20201209084205.24062-3-peng.fan@oss.nxp.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/linux/kernel.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 315e85cabeda..0b493542e61a 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -11,6 +11,7 @@
 
 #include <linux/compiler.h>
 #include <linux/types.h>
+#include <linux/overflow.h>
 #include <linux/list.h>
 #include <linux/printk.h>
 #include <linux/bug.h>
@@ -117,6 +118,16 @@ static inline void free_page(unsigned long addr)
 #  define unlikely(x)	(__builtin_expect(!!(x), 0))
 # endif
 
+static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp)
+{
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
+		return NULL;
+
+	return krealloc(p, bytes, gfp);
+}
+
 #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 #ifdef DEBUG
 #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
@@ -126,8 +137,6 @@ static inline void free_page(unsigned long addr)
 #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 
-#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0)
-
 #define min(x, y) ({				\
 	typeof(x) _min1 = (x);			\
 	typeof(y) _min2 = (y);			\
-- 
cgit v1.2.3-70-g09d2


From 1a5514cbb09aaf694d26ef26fd6da5c5d495cc22 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Wed, 9 Dec 2020 16:42:05 +0800
Subject: tools/virtio: add barrier for aarch64

Add barrier for aarch64 for cross compiling, and  most are from Linux Kernel.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Link: https://lore.kernel.org/r/20201209084205.24062-4-peng.fan@oss.nxp.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/asm/barrier.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 04d563fc9b95..468435ed64e6 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -16,6 +16,16 @@
 # define mb() abort()
 # define dma_rmb() abort()
 # define dma_wmb() abort()
+#elif defined(__aarch64__)
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() dmb(ishld)
+#define virt_wmb() dmb(ishst)
+#define virt_store_mb(var, value)  do { WRITE_ONCE(var, value); dmb(ish); } while (0)
+/* Weak barriers should be used. If not - it's a bug */
+# define mb() abort()
+# define dma_rmb() abort()
+# define dma_wmb() abort()
 #else
 #error Please fill in barrier macros
 #endif
-- 
cgit v1.2.3-70-g09d2


From ca202504ea6f04b2e724741100ab63f8f018a8af Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 18 Dec 2020 15:54:12 +0100
Subject: selftests/core: fix close_range_test build after XFAIL removal

XFAIL was removed in commit 9847d24af95c ("selftests/harness: Refactor
XFAIL into SKIP") and its use in close_range_test was already replaced
by commit 1d44d0dd61b6 ("selftests: core: use SKIP instead of XFAIL in
close_range_test.c"). However, commit 23afeaeff3d9 ("selftests: core:
add tests for CLOSE_RANGE_CLOEXEC") introduced usage of XFAIL in
TEST(close_range_cloexec). Use SKIP there as well.

Fixes: 23afeaeff3d9 ("selftests: core: add tests for CLOSE_RANGE_CLOEXEC")
Cc: Giuseppe Scrivano <gscrivan@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/20201218112428.13662-1-tklauser@distanz.ch
Link: https://lore.kernel.org/r/20201218145415.801063-1-christian.brauner@ubuntu.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/core/close_range_test.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 87e16d65d9d7..c97dd1a7abd6 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -102,7 +102,7 @@ TEST(close_range_unshare)
 	int i, ret, status;
 	pid_t pid;
 	int open_fds[101];
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = CLONE_FILES,
 		.exit_signal = SIGCHLD,
 	};
@@ -191,7 +191,7 @@ TEST(close_range_unshare_capped)
 	int i, ret, status;
 	pid_t pid;
 	int open_fds[101];
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = CLONE_FILES,
 		.exit_signal = SIGCHLD,
 	};
@@ -241,7 +241,7 @@ TEST(close_range_cloexec)
 		fd = open("/dev/null", O_RDONLY);
 		ASSERT_GE(fd, 0) {
 			if (errno == ENOENT)
-				XFAIL(return, "Skipping test since /dev/null does not exist");
+				SKIP(return, "Skipping test since /dev/null does not exist");
 		}
 
 		open_fds[i] = fd;
@@ -250,9 +250,9 @@ TEST(close_range_cloexec)
 	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
 	if (ret < 0) {
 		if (errno == ENOSYS)
-			XFAIL(return, "close_range() syscall not supported");
+			SKIP(return, "close_range() syscall not supported");
 		if (errno == EINVAL)
-			XFAIL(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
+			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
 	}
 
 	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
-- 
cgit v1.2.3-70-g09d2


From ae78ba8d3bb66dfe8c0f7b7ec5ffe3f6a13feb86 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 18 Dec 2020 15:54:13 +0100
Subject: selftests/core: handle missing syscall number for close_range

This improves the syscall number handling in the close_range()
selftests. This should handle any architecture.

Cc: Giuseppe Scrivano <gscrivan@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20201218145415.801063-2-christian.brauner@ubuntu.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/core/close_range_test.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index c97dd1a7abd6..bc592a1372bb 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -17,7 +17,23 @@
 #include "../clone3/clone3_selftests.h"
 
 #ifndef __NR_close_range
-#define __NR_close_range -1
+	#if defined __alpha__
+		#define __NR_close_range 546
+	#elif defined _MIPS_SIM
+		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
+			#define __NR_close_range (436 + 4000)
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
+			#define __NR_close_range (436 + 6000)
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+			#define __NR_close_range (436 + 5000)
+		#endif
+	#elif defined __ia64__
+		#define __NR_close_range (436 + 1024)
+	#else
+		#define __NR_close_range 436
+	#endif
 #endif
 
 #ifndef CLOSE_RANGE_UNSHARE
-- 
cgit v1.2.3-70-g09d2


From fe325c3ff3188d551668c5847bac58463b9f3437 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 18 Dec 2020 15:54:14 +0100
Subject: selftests/core: add test for CLOSE_RANGE_UNSHARE |
 CLOSE_RANGE_CLOEXEC

Add a test to verify that CLOSE_RANGE_UNSHARE works correctly when combined
with CLOSE_RANGE_CLOEXEC for the single-threaded case.

Cc: Giuseppe Scrivano <gscrivan@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20201218145415.801063-3-christian.brauner@ubuntu.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/core/close_range_test.c | 70 +++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index bc592a1372bb..862444f1c244 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -313,5 +313,75 @@ TEST(close_range_cloexec)
 	}
 }
 
+TEST(close_range_cloexec_unshare)
+{
+	int i, ret;
+	int open_fds[101];
+	struct rlimit rlimit;
+
+	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+		int fd;
+
+		fd = open("/dev/null", O_RDONLY);
+		ASSERT_GE(fd, 0) {
+			if (errno == ENOENT)
+				SKIP(return, "Skipping test since /dev/null does not exist");
+		}
+
+		open_fds[i] = fd;
+	}
+
+	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "close_range() syscall not supported");
+		if (errno == EINVAL)
+			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
+	}
+
+	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
+	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
+	rlimit.rlim_cur = 25;
+	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
+
+	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
+	ret = sys_close_range(open_fds[0], open_fds[50],
+			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
+	ASSERT_EQ(0, ret);
+	ret = sys_close_range(open_fds[75], open_fds[100],
+			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
+	ASSERT_EQ(0, ret);
+
+	for (i = 0; i <= 50; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+	}
+
+	for (i = 51; i <= 74; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, 0);
+	}
+
+	for (i = 75; i <= 100; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+	}
+
+	/* Test a common pattern.  */
+	ret = sys_close_range(3, UINT_MAX,
+			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
+	for (i = 0; i <= 100; i++) {
+		int flags = fcntl(open_fds[i], F_GETFD);
+
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+	}
+}
 
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3-70-g09d2


From 6abc20f8f879d891930f37186b19c9dc3ecc34dd Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Fri, 18 Dec 2020 15:54:15 +0100
Subject: selftests/core: add regression test for CLOSE_RANGE_UNSHARE |
 CLOSE_RANGE_CLOEXEC

This test is a minimalized version of the reproducer given by syzbot
(cf. [1]).

After introducing CLOSE_RANGE_CLOEXEC syzbot reported a crash when
CLOSE_RANGE_CLOEXEC is specified in conjunction with
CLOSE_RANGE_UNSHARE. When CLOSE_RANGE_UNSHARE is specified the caller
will receive a private file descriptor table in case their file
descriptor table is currently shared.
For the case where the caller has requested all file descriptors to be
actually closed via e.g. close_range(3, ~0U, 0) the kernel knows that
the caller does not need any of the file descriptors anymore and will
optimize the close operation by only copying all files in the range from
0 to 3 and no others.

However, if the caller requested CLOSE_RANGE_CLOEXEC together with
CLOSE_RANGE_UNSHARE the caller wants to still make use of the file
descriptors so the kernel needs to copy all of them and can't optimize.

The original patch didn't account for this and thus could cause oopses
as evidenced by the syzbot report. Add tests for this regression.

We first create a huge gap in the fd table. When we now call
CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
bound the kernel will only copy up to fd1 file descriptors into the new
fd table. If the kernel is buggy and doesn't handle CLOSE_RANGE_CLOEXEC
correctly it will not have copied all file descriptors and we will oops!

This test passes on a fixed kernel and will trigger an oops on a buggy
kernel.

[1]: https://syzkaller.appspot.com/text?tag=KernelConfig&x=db720fe37a6a41d8

Cc: Giuseppe Scrivano <gscrivan@redhat.com>
Cc: linux-fsdevel@vger.kernel.org
Link: syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
Link: https://lore.kernel.org/r/20201218145415.801063-4-christian.brauner@ubuntu.com
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/core/close_range_test.c | 183 ++++++++++++++++++++++++
 1 file changed, 183 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 862444f1c244..73eb29c916d1 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -384,4 +384,187 @@ TEST(close_range_cloexec_unshare)
 	}
 }
 
+/*
+ * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
+ */
+TEST(close_range_cloexec_syzbot)
+{
+	int fd1, fd2, fd3, flags, ret, status;
+	pid_t pid;
+	struct __clone_args args = {
+		.flags = CLONE_FILES,
+		.exit_signal = SIGCHLD,
+	};
+
+	/* Create a huge gap in the fd table. */
+	fd1 = open("/dev/null", O_RDWR);
+	EXPECT_GT(fd1, 0);
+
+	fd2 = dup2(fd1, 1000);
+	EXPECT_GT(fd2, 0);
+
+	pid = sys_clone3(&args, sizeof(args));
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
+		if (ret)
+			exit(EXIT_FAILURE);
+
+		/*
+			 * We now have a private file descriptor table and all
+			 * our open fds should still be open but made
+			 * close-on-exec.
+			 */
+		flags = fcntl(fd1, F_GETFD);
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+		flags = fcntl(fd2, F_GETFD);
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+		fd3 = dup2(fd1, 42);
+		EXPECT_GT(fd3, 0);
+
+		/*
+			 * Duplicating the file descriptor must remove the
+			 * FD_CLOEXEC flag.
+			 */
+		flags = fcntl(fd3, F_GETFD);
+		EXPECT_GT(flags, -1);
+		EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+		exit(EXIT_SUCCESS);
+	}
+
+	EXPECT_EQ(waitpid(pid, &status, 0), pid);
+	EXPECT_EQ(true, WIFEXITED(status));
+	EXPECT_EQ(0, WEXITSTATUS(status));
+
+	/*
+	 * We had a shared file descriptor table before along with requesting
+	 * close-on-exec so the original fds must not be close-on-exec.
+	 */
+	flags = fcntl(fd1, F_GETFD);
+	EXPECT_GT(flags, -1);
+	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+	flags = fcntl(fd2, F_GETFD);
+	EXPECT_GT(flags, -1);
+	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+	fd3 = dup2(fd1, 42);
+	EXPECT_GT(fd3, 0);
+
+	flags = fcntl(fd3, F_GETFD);
+	EXPECT_GT(flags, -1);
+	EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+	EXPECT_EQ(close(fd1), 0);
+	EXPECT_EQ(close(fd2), 0);
+	EXPECT_EQ(close(fd3), 0);
+}
+
+/*
+ * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
+ */
+TEST(close_range_cloexec_unshare_syzbot)
+{
+	int i, fd1, fd2, fd3, flags, ret, status;
+	pid_t pid;
+	struct __clone_args args = {
+		.flags = CLONE_FILES,
+		.exit_signal = SIGCHLD,
+	};
+
+	/*
+	 * Create a huge gap in the fd table. When we now call
+	 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
+	 * bound the kernel will only copy up to fd1 file descriptors into the
+	 * new fd table. If the kernel is buggy and doesn't handle
+	 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
+	 * descriptors and we will oops!
+	 *
+	 * On a buggy kernel this should immediately oops. But let's loop just
+	 * to be sure.
+	 */
+	fd1 = open("/dev/null", O_RDWR);
+	EXPECT_GT(fd1, 0);
+
+	fd2 = dup2(fd1, 1000);
+	EXPECT_GT(fd2, 0);
+
+	for (i = 0; i < 100; i++) {
+
+		pid = sys_clone3(&args, sizeof(args));
+		ASSERT_GE(pid, 0);
+
+		if (pid == 0) {
+			ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
+						      CLOSE_RANGE_CLOEXEC);
+			if (ret)
+				exit(EXIT_FAILURE);
+
+			/*
+			 * We now have a private file descriptor table and all
+			 * our open fds should still be open but made
+			 * close-on-exec.
+			 */
+			flags = fcntl(fd1, F_GETFD);
+			EXPECT_GT(flags, -1);
+			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+			flags = fcntl(fd2, F_GETFD);
+			EXPECT_GT(flags, -1);
+			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+			fd3 = dup2(fd1, 42);
+			EXPECT_GT(fd3, 0);
+
+			/*
+			 * Duplicating the file descriptor must remove the
+			 * FD_CLOEXEC flag.
+			 */
+			flags = fcntl(fd3, F_GETFD);
+			EXPECT_GT(flags, -1);
+			EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+			EXPECT_EQ(close(fd1), 0);
+			EXPECT_EQ(close(fd2), 0);
+			EXPECT_EQ(close(fd3), 0);
+
+			exit(EXIT_SUCCESS);
+		}
+
+		EXPECT_EQ(waitpid(pid, &status, 0), pid);
+		EXPECT_EQ(true, WIFEXITED(status));
+		EXPECT_EQ(0, WEXITSTATUS(status));
+	}
+
+	/*
+	 * We created a private file descriptor table before along with
+	 * requesting close-on-exec so the original fds must not be
+	 * close-on-exec.
+	 */
+	flags = fcntl(fd1, F_GETFD);
+	EXPECT_GT(flags, -1);
+	EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+	flags = fcntl(fd2, F_GETFD);
+	EXPECT_GT(flags, -1);
+	EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+	fd3 = dup2(fd1, 42);
+	EXPECT_GT(fd3, 0);
+
+	flags = fcntl(fd3, F_GETFD);
+	EXPECT_GT(flags, -1);
+	EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+	EXPECT_EQ(close(fd1), 0);
+	EXPECT_EQ(close(fd2), 0);
+	EXPECT_EQ(close(fd3), 0);
+}
+
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3-70-g09d2


From e9ce39b5b390e0e5944a46328cb0a18d132de532 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 18 Dec 2020 14:05:44 -0800
Subject: selftests/filesystems: expand epoll with epoll_pwait2

Code coverage for the epoll_pwait2 syscall.

epoll62: Repeat basic test epoll1, but exercising the new syscall.
epoll63: Pass a timespec and exercise the timeout wakeup path.

Link: https://lkml.kernel.org/r/20201121144401.3727659-5-willemdebruijn.kernel@gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../filesystems/epoll/epoll_wakeup_test.c          | 72 ++++++++++++++++++++++
 1 file changed, 72 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
index 8f82f99f7748..ad7fabd575f9 100644
--- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #define _GNU_SOURCE
+#include <asm/unistd.h>
+#include <linux/time_types.h>
 #include <poll.h>
 #include <unistd.h>
 #include <assert.h>
@@ -21,6 +23,19 @@ struct epoll_mtcontext
 	pthread_t waiter;
 };
 
+#ifndef __NR_epoll_pwait2
+#define __NR_epoll_pwait2 -1
+#endif
+
+static inline int sys_epoll_pwait2(int fd, struct epoll_event *events,
+				   int maxevents,
+				   const struct __kernel_timespec *timeout,
+				   const sigset_t *sigset, size_t sigsetsize)
+{
+	return syscall(__NR_epoll_pwait2, fd, events, maxevents, timeout,
+		       sigset, sigsetsize);
+}
+
 static void signal_handler(int signum)
 {
 }
@@ -3377,4 +3392,61 @@ TEST(epoll61)
 	close(ctx.evfd);
 }
 
+/* Equivalent to basic test epoll1, but exercising epoll_pwait2. */
+TEST(epoll62)
+{
+	int efd;
+	int sfd[2];
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+	EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1);
+	EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
+/* Epoll_pwait2 basic timeout test. */
+TEST(epoll63)
+{
+	const int cfg_delay_ms = 10;
+	unsigned long long tdiff;
+	struct __kernel_timespec ts;
+	int efd;
+	int sfd[2];
+	struct epoll_event e;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+	efd = epoll_create(1);
+	ASSERT_GE(efd, 0);
+
+	e.events = EPOLLIN;
+	ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = cfg_delay_ms * 1000 * 1000;
+
+	tdiff = msecs();
+	EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, &ts, NULL, 0), 0);
+	tdiff = msecs() - tdiff;
+
+	EXPECT_GE(tdiff, cfg_delay_ms);
+
+	close(efd);
+	close(sfd[0]);
+	close(sfd[1]);
+}
+
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3-70-g09d2


From 6b9bae63de4fe24365ad0c2d23e77ae06f8c58e4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Wed, 16 Dec 2020 10:57:57 -0800
Subject: perf script: Support data page size

Display the data page size if it is available and asked by the user:

Can be configured by the user, for example:

  perf script --fields comm,event,phys_addr,data_page_size
            dtlb mem-loads:uP:        3fec82ea8 4K
            dtlb mem-loads:uP:        3fec82e90 4K
            dtlb mem-loads:uP:        3e23700a4 4K
            dtlb mem-loads:uP:        3fec82f20 4K
            dtlb mem-loads:uP:        3e23700a4 4K
            dtlb mem-loads:uP:        3b4211bec 4K
            dtlb mem-loads:uP:        382205dc0 2M
            dtlb mem-loads:uP:        36fa082c0 2M
            dtlb mem-loads:uP:        377607340 2M
            dtlb mem-loads:uP:        330010180 2M
            dtlb mem-loads:uP:        33200fd80 2M
            dtlb mem-loads:uP:        31b012b80 2M

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Stephane Eranian <eranian@google.com>
Cc: Will Deacon <will@kernel.org>
Link: http://lore.kernel.org/lkml/20201216185805.9981-2-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt |  5 +++--
 tools/perf/builtin-script.c              | 17 +++++++++++++++--
 tools/perf/util/event.h                  |  3 +++
 tools/perf/util/session.c                | 13 +++++++++++++
 4 files changed, 34 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 4f712fb8f175..44d37210fc8f 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,8 +116,9 @@ OPTIONS
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-        srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
-        brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc.
+        srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
+        brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr,
+        metric, misc, srccode, ipc, data_page_size.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 1c322c129185..edacfa98d073 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -30,6 +30,7 @@
 #include "util/thread-stack.h"
 #include "util/time-utils.h"
 #include "util/path.h"
+#include "util/event.h"
 #include "ui/ui.h"
 #include "print_binary.h"
 #include "archinsn.h"
@@ -115,6 +116,7 @@ enum perf_output_field {
 	PERF_OUTPUT_SRCCODE	    = 1ULL << 30,
 	PERF_OUTPUT_IPC             = 1ULL << 31,
 	PERF_OUTPUT_TOD             = 1ULL << 32,
+	PERF_OUTPUT_DATA_PAGE_SIZE  = 1ULL << 33,
 };
 
 struct perf_script {
@@ -179,6 +181,7 @@ struct output_option {
 	{.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
 	{.str = "ipc", .field = PERF_OUTPUT_IPC},
 	{.str = "tod", .field = PERF_OUTPUT_TOD},
+	{.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE},
 };
 
 enum {
@@ -251,7 +254,8 @@ static struct {
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
 			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
 			      PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
-			      PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR,
+			      PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR |
+			      PERF_OUTPUT_DATA_PAGE_SIZE,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -499,6 +503,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
 	    evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR))
 		return -EINVAL;
 
+	if (PRINT_FIELD(DATA_PAGE_SIZE) &&
+	    evsel__check_stype(evsel, PERF_SAMPLE_DATA_PAGE_SIZE, "DATA_PAGE_SIZE", PERF_OUTPUT_DATA_PAGE_SIZE))
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -1920,6 +1928,7 @@ static void process_event(struct perf_script *script,
 	unsigned int type = output_type(attr->type);
 	struct evsel_script *es = evsel->priv;
 	FILE *fp = es->fp;
+	char str[PAGE_SIZE_NAME_LEN];
 
 	if (output[type].fields == 0)
 		return;
@@ -2008,6 +2017,9 @@ static void process_event(struct perf_script *script,
 	if (PRINT_FIELD(PHYS_ADDR))
 		fprintf(fp, "%16" PRIx64, sample->phys_addr);
 
+	if (PRINT_FIELD(DATA_PAGE_SIZE))
+		fprintf(fp, " %s", get_page_size_name(sample->data_page_size, str));
+
 	perf_sample__fprintf_ipc(sample, attr, fp);
 
 	fprintf(fp, "\n");
@@ -3506,7 +3518,8 @@ int cmd_script(int argc, const char **argv)
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,srcline,period,iregs,uregs,brstack,"
 		     "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
-		     "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod",
+		     "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod,"
+		     "data_page_size",
 		     parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 448ac30c2fc4..ff403ea578e1 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -409,4 +409,7 @@ extern int sysctl_perf_event_max_stack;
 extern int sysctl_perf_event_max_contexts_per_stack;
 extern unsigned int proc_map_timeout;
 
+#define PAGE_SIZE_NAME_LEN	32
+char *get_page_size_name(u64 size, char *str);
+
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 3b3c50b12791..50ff9795a4f1 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -32,6 +32,7 @@
 #include "ui/progress.h"
 #include "../perf.h"
 #include "arch/common.h"
+#include "units.h"
 #include <internal/lib.h>
 
 #ifdef HAVE_ZSTD_SUPPORT
@@ -1258,10 +1259,19 @@ static void dump_event(struct evlist *evlist, union perf_event *event,
 	       event->header.size, perf_event__name(event->header.type));
 }
 
+char *get_page_size_name(u64 size, char *str)
+{
+	if (!size || !unit_number__scnprintf(str, PAGE_SIZE_NAME_LEN, size))
+		snprintf(str, PAGE_SIZE_NAME_LEN, "%s", "N/A");
+
+	return str;
+}
+
 static void dump_sample(struct evsel *evsel, union perf_event *event,
 			struct perf_sample *sample)
 {
 	u64 sample_type;
+	char str[PAGE_SIZE_NAME_LEN];
 
 	if (!dump_trace)
 		return;
@@ -1296,6 +1306,9 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
 	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
 		printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
 
+	if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+		printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str));
+
 	if (sample_type & PERF_SAMPLE_TRANSACTION)
 		printf("... transaction: %" PRIx64 "\n", sample->transaction);
 
-- 
cgit v1.2.3-70-g09d2


From a50d03e3b8b68df13e47dcbde6c5d39b4237c479 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Wed, 16 Dec 2020 10:57:58 -0800
Subject: perf sort: Add sort option for data page size

Add a new sort option "data_page_size" for --mem-mode sort.  With this
option applied, perf can sort and report by sample's data page size.

Here is an example:

perf report --stdio --mem-mode
--sort=comm,symbol,phys_daddr,data_page_size

 # To display the perf.data header info, please use
 # --header/--header-only options.
 #
 #
 # Total Lost Samples: 0
 #
 # Samples: 9K of event 'mem-loads:uP'
 # Total weight : 9028
 # Sort order   : comm,symbol,phys_daddr,data_page_size
 #
 # Overhead  Command  Symbol                        Data Physical
 # Address
 # Data Page Size
 # ........  .......  ............................
 # ......................  ......................
 #
    11.19%  dtlb     [.] touch_buffer              [.] 0x00000003fec82ea8  4K
     8.61%  dtlb     [.] GetTickCount              [.] 0x00000003c4f2c8a8  4K
     4.52%  dtlb     [.] GetTickCount              [.] 0x00000003fec82f58  4K
     4.33%  dtlb     [.] __gettimeofday            [.] 0x00000003fec82f48  4K
     4.32%  dtlb     [.] GetTickCount              [.] 0x00000003fec82f78  4K
     4.28%  dtlb     [.] GetTickCount              [.] 0x00000003fec82f50  4K
     4.23%  dtlb     [.] GetTickCount              [.] 0x00000003fec82f70  4K
     4.11%  dtlb     [.] GetTickCount              [.] 0x00000003fec82f68  4K
     4.00%  dtlb     [.] Calibrate                 [.] 0x00000003fec82f98  4K
     3.91%  dtlb     [.] Calibrate                 [.] 0x00000003fec82f90  4K
     3.43%  dtlb     [.] touch_buffer              [.] 0x00000003fec82e98  4K
     3.42%  dtlb     [.] touch_buffer              [.] 0x00000003fec82e90  4K
     0.09%  dtlb     [.] DoDependentLoads          [.] 0x000000036ea084c0  2M
     0.08%  dtlb     [.] DoDependentLoads          [.] 0x000000032b010b80  2M

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Stephane Eranian <eranian@google.com>
Cc: Will Deacon <will@kernel.org>
Link: http://lore.kernel.org/lkml/20201216185805.9981-3-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt |  1 +
 tools/perf/util/hist.c                   |  3 +++
 tools/perf/util/hist.h                   |  1 +
 tools/perf/util/machine.c                |  7 +++++--
 tools/perf/util/map_symbol.h             |  1 +
 tools/perf/util/sort.c                   | 30 ++++++++++++++++++++++++++++++
 tools/perf/util/sort.h                   |  1 +
 7 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index d068103690cc..8f7f4e9605d8 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -150,6 +150,7 @@ OPTIONS
 	- snoop: type of snoop (if any) for the data at the time of the sample
 	- dcacheline: the cacheline the data address is on at the time of the sample
 	- phys_daddr: physical address of data being executed on at the time of sample
+	- data_page_size: the data page size of data being executed on at the time of sample
 
 	And the default sort keys are changed to local_weight, mem, sym, dso,
 	symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 7feeaa07c777..a08fb9ea411b 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -188,6 +188,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 		hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
 				   unresolved_col_width + 4 + 2);
 
+		hists__new_col_len(hists, HISTC_MEM_DATA_PAGE_SIZE,
+				   unresolved_col_width + 4 + 2);
+
 	} else {
 		symlen = unresolved_col_width + 4 + 2;
 		hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index df6c6eea0960..14f66330923d 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -56,6 +56,7 @@ enum hist_column {
 	HISTC_MEM_DADDR_SYMBOL,
 	HISTC_MEM_DADDR_DSO,
 	HISTC_MEM_PHYS_DADDR,
+	HISTC_MEM_DATA_PAGE_SIZE,
 	HISTC_MEM_LOCKED,
 	HISTC_MEM_TLB,
 	HISTC_MEM_LVL,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 1ae32a81639c..f841f3503cae 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2023,11 +2023,12 @@ static void ip__resolve_ams(struct thread *thread,
 	ams->ms.sym = al.sym;
 	ams->ms.map = al.map;
 	ams->phys_addr = 0;
+	ams->data_page_size = 0;
 }
 
 static void ip__resolve_data(struct thread *thread,
 			     u8 m, struct addr_map_symbol *ams,
-			     u64 addr, u64 phys_addr)
+			     u64 addr, u64 phys_addr, u64 daddr_page_size)
 {
 	struct addr_location al;
 
@@ -2041,6 +2042,7 @@ static void ip__resolve_data(struct thread *thread,
 	ams->ms.sym = al.sym;
 	ams->ms.map = al.map;
 	ams->phys_addr = phys_addr;
+	ams->data_page_size = daddr_page_size;
 }
 
 struct mem_info *sample__resolve_mem(struct perf_sample *sample,
@@ -2053,7 +2055,8 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 
 	ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
 	ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
-			 sample->addr, sample->phys_addr);
+			 sample->addr, sample->phys_addr,
+			 sample->data_page_size);
 	mi->data_src.val = sample->data_src;
 
 	return mi;
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h
index 5b8ca93798e9..7d22ade082c8 100644
--- a/tools/perf/util/map_symbol.h
+++ b/tools/perf/util/map_symbol.h
@@ -19,5 +19,6 @@ struct addr_map_symbol {
 	u64	      addr;
 	u64	      al_addr;
 	u64	      phys_addr;
+	u64	      data_page_size;
 };
 #endif // __PERF_MAP_SYMBOL
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 7d87bfcffb3f..80907bc32683 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1462,6 +1462,35 @@ struct sort_entry sort_mem_phys_daddr = {
 	.se_width_idx	= HISTC_MEM_PHYS_DADDR,
 };
 
+static int64_t
+sort__data_page_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->daddr.data_page_size;
+	if (right->mem_info)
+		r = right->mem_info->daddr.data_page_size;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__data_page_size_snprintf(struct hist_entry *he, char *bf,
+					  size_t size, unsigned int width)
+{
+	char str[PAGE_SIZE_NAME_LEN];
+
+	return repsep_snprintf(bf, size, "%-*s", width,
+			       get_page_size_name(he->mem_info->daddr.data_page_size, str));
+}
+
+struct sort_entry sort_mem_data_page_size = {
+	.se_header	= "Data Page Size",
+	.se_cmp		= sort__data_page_size_cmp,
+	.se_snprintf	= hist_entry__data_page_size_snprintf,
+	.se_width_idx	= HISTC_MEM_DATA_PAGE_SIZE,
+};
+
 static int64_t
 sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
 {
@@ -1740,6 +1769,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
 	DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
 	DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
 	DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
+	DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size),
 };
 
 #undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 66d39c4cfe2b..e50f2b695bc4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -255,6 +255,7 @@ enum sort_type {
 	SORT_MEM_DCACHELINE,
 	SORT_MEM_IADDR_SYMBOL,
 	SORT_MEM_PHYS_DADDR,
+	SORT_MEM_DATA_PAGE_SIZE,
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 2e7f545096f954a9726c9415763dd0bfbcac47e0 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Wed, 16 Dec 2020 10:57:59 -0800
Subject: perf mem: Factor out a function to generate sort order

Now, "--phys-data" is the only option which impacts the sort order.  A
simple "if else" is enough to handle the option. But there will be more
options added, e.g. "--data-page-size", which also impact the sort
order. The code will become too complex to be maintained.

Divide the sort order string into several small pieces.  The first piece
is always the default sort string for LOAD/STORE.  Appends the specific
sort string if related option is applied.

No functional change.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Stephane Eranian <eranian@google.com>
Cc: Will Deacon <will@kernel.org>
Link: http://lore.kernel.org/lkml/20201216185805.9981-4-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-mem.c | 41 +++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index fdfbff7592f4..823742036ddb 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -298,11 +298,35 @@ out_delete:
 	perf_session__delete(session);
 	return ret;
 }
+static char *get_sort_order(struct perf_mem *mem)
+{
+	bool has_extra_options = mem->phys_addr ? true : false;
+	char sort[128];
+
+	/*
+	 * there is no weight (cost) associated with stores, so don't print
+	 * the column
+	 */
+	if (!(mem->operation & MEM_OPERATION_LOAD)) {
+		strcpy(sort, "--sort=mem,sym,dso,symbol_daddr,"
+			     "dso_daddr,tlb,locked");
+	} else if (has_extra_options) {
+		strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr,"
+			     "dso_daddr,snoop,tlb,locked");
+	} else
+		return NULL;
+
+	if (mem->phys_addr)
+		strcat(sort, ",phys_daddr");
+
+	return strdup(sort);
+}
 
 static int report_events(int argc, const char **argv, struct perf_mem *mem)
 {
 	const char **rep_argv;
 	int ret, i = 0, j, rep_argc;
+	char *new_sort_order;
 
 	if (mem->dump_raw)
 		return report_raw_events(mem);
@@ -316,20 +340,9 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
 	rep_argv[i++] = "--mem-mode";
 	rep_argv[i++] = "-n"; /* display number of samples */
 
-	/*
-	 * there is no weight (cost) associated with stores, so don't print
-	 * the column
-	 */
-	if (!(mem->operation & MEM_OPERATION_LOAD)) {
-		if (mem->phys_addr)
-			rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
-					"dso_daddr,tlb,locked,phys_daddr";
-		else
-			rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
-					"dso_daddr,tlb,locked";
-	} else if (mem->phys_addr)
-		rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
-				"dso_daddr,snoop,tlb,locked,phys_daddr";
+	new_sort_order = get_sort_order(mem);
+	if (new_sort_order)
+		rep_argv[i++] = new_sort_order;
 
 	for (j = 1; j < argc; j++, i++)
 		rep_argv[i] = argv[j];
-- 
cgit v1.2.3-70-g09d2


From 29f080881601c90d39c8fa31c125ac70b8894b5e Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Tue, 22 Dec 2020 12:02:27 -0800
Subject: kselftest/arm64: check GCR_EL1 after context switch

This test is specific to MTE and verifies that the GCR_EL1 register is
context switched correctly.

It spawns 1024 processes and each process spawns 5 threads.  Each thread
writes a random setting of GCR_EL1 through the prctl() system call and
reads it back verifying that it is the same.  If the values are not the
same it reports a failure.

Note: The test has been extended to verify that even SYNC and ASYNC mode
setting is preserved correctly over context switching.

Link: https://lkml.kernel.org/r/b51a165426e906e7ec8a68d806ef3f8cd92581a6.1606161801.git.andreyknvl@google.com
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Branislav Rankov <Branislav.Rankov@arm.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Evgenii Stepanov <eugenis@google.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Marco Elver <elver@google.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/arm64/mte/Makefile         |   2 +-
 .../selftests/arm64/mte/check_gcr_el1_cswitch.c    | 154 +++++++++++++++++++++
 2 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c

(limited to 'tools')

diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
index 2480226dfe57..0b3af552632a 100644
--- a/tools/testing/selftests/arm64/mte/Makefile
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (C) 2020 ARM Limited
 
-CFLAGS += -std=gnu99 -I.
+CFLAGS += -std=gnu99 -I. -lpthread
 SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
 PROGS := $(patsubst %.c,%,$(SRCS))
 
diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
new file mode 100644
index 000000000000..a876db1f096a
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+
+#define PR_SET_TAGGED_ADDR_CTRL 55
+#define PR_GET_TAGGED_ADDR_CTRL 56
+# define PR_TAGGED_ADDR_ENABLE  (1UL << 0)
+# define PR_MTE_TCF_SHIFT	1
+# define PR_MTE_TCF_NONE	(0UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_SYNC	(1UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_ASYNC	(2UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_MASK	(3UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TAG_SHIFT	3
+# define PR_MTE_TAG_MASK	(0xffffUL << PR_MTE_TAG_SHIFT)
+
+#include "mte_def.h"
+
+#define NUM_ITERATIONS		1024
+#define MAX_THREADS		5
+#define THREAD_ITERATIONS	1000
+
+void *execute_thread(void *x)
+{
+	pid_t pid = *((pid_t *)x);
+	pid_t tid = gettid();
+	uint64_t prctl_tag_mask;
+	uint64_t prctl_set;
+	uint64_t prctl_get;
+	uint64_t prctl_tcf;
+
+	srand(time(NULL) ^ (pid << 16) ^ (tid << 16));
+
+	prctl_tag_mask = rand() & 0xffff;
+
+	if (prctl_tag_mask % 2)
+		prctl_tcf = PR_MTE_TCF_SYNC;
+	else
+		prctl_tcf = PR_MTE_TCF_ASYNC;
+
+	prctl_set = PR_TAGGED_ADDR_ENABLE | prctl_tcf | (prctl_tag_mask << PR_MTE_TAG_SHIFT);
+
+	for (int j = 0; j < THREAD_ITERATIONS; j++) {
+		if (prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_set, 0, 0, 0)) {
+			perror("prctl() failed");
+			goto fail;
+		}
+
+		prctl_get = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+
+		if (prctl_set != prctl_get) {
+			ksft_print_msg("Error: prctl_set: 0x%lx != prctl_get: 0x%lx\n",
+						prctl_set, prctl_get);
+			goto fail;
+		}
+	}
+
+	return (void *)KSFT_PASS;
+
+fail:
+	return (void *)KSFT_FAIL;
+}
+
+int execute_test(pid_t pid)
+{
+	pthread_t thread_id[MAX_THREADS];
+	int thread_data[MAX_THREADS];
+
+	for (int i = 0; i < MAX_THREADS; i++)
+		pthread_create(&thread_id[i], NULL,
+			       execute_thread, (void *)&pid);
+
+	for (int i = 0; i < MAX_THREADS; i++)
+		pthread_join(thread_id[i], (void *)&thread_data[i]);
+
+	for (int i = 0; i < MAX_THREADS; i++)
+		if (thread_data[i] == KSFT_FAIL)
+			return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
+
+int mte_gcr_fork_test(void)
+{
+	pid_t pid;
+	int results[NUM_ITERATIONS];
+	pid_t cpid;
+	int res;
+
+	for (int i = 0; i < NUM_ITERATIONS; i++) {
+		pid = fork();
+
+		if (pid < 0)
+			return KSFT_FAIL;
+
+		if (pid == 0) {
+			cpid = getpid();
+
+			res = execute_test(cpid);
+
+			exit(res);
+		}
+	}
+
+	for (int i = 0; i < NUM_ITERATIONS; i++) {
+		wait(&res);
+
+		if (WIFEXITED(res))
+			results[i] = WEXITSTATUS(res);
+		else
+			--i;
+	}
+
+	for (int i = 0; i < NUM_ITERATIONS; i++)
+		if (results[i] == KSFT_FAIL)
+			return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	ksft_set_plan(1);
+
+	evaluate_test(mte_gcr_fork_test(),
+		"Verify that GCR_EL1 is set correctly on context switch\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
-- 
cgit v1.2.3-70-g09d2


From 6e5192143ab571dbefb584edf900565098bdfd23 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 21 Dec 2020 09:03:04 -0300
Subject: tools headers UAPI: Update epoll_pwait2 affected files

To pick the changes from:

  b0a0c2615f6f199a ("epoll: wire up syscall epoll_pwait2")

That addresses these perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h'
  diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h
  Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'
  diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/asm-generic/unistd.h           | 4 +++-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index fc48c64700eb..728752917785 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
 __SYSCALL(__NR_faccessat2, sys_faccessat2)
 #define __NR_process_madvise 440
 __SYSCALL(__NR_process_madvise, sys_process_madvise)
+#define __NR_epoll_pwait2 441
+__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 441
+#define __NR_syscalls 442
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 379819244b91..78672124d28b 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -362,6 +362,7 @@
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
 440	common	process_madvise		sys_process_madvise
+441	common	epoll_pwait2		sys_epoll_pwait2
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
-- 
cgit v1.2.3-70-g09d2


From 7f3905f00a2025591a6883ee6880f928029b4d96 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 21 Dec 2020 09:04:54 -0300
Subject: tools headers cpufeatures: Sync with the kernel sources

To pick the changes in:

  69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature")
  e1b35da5e624f8b0 ("x86: Enumerate AVX512 FP16 CPUID feature flag")

That causes only these 'perf bench' objects to rebuild:

  CC       /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o
  CC       /tmp/build/perf/bench/mem-memset-x86-64-asm.o

And addresses these perf build warnings:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h'
  diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kyung Min Park <kyung.min.park@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/cpufeatures.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index f5ef2d5b9231..84b887825f12 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -237,6 +237,7 @@
 #define X86_FEATURE_VMCALL		( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL		( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
 #define X86_FEATURE_SEV_ES		( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_VM_PAGE_FLUSH	( 8*32+21) /* "" VM Page Flush MSR is supported */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -376,6 +377,7 @@
 #define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
+#define X86_FEATURE_AVX512_FP16		(18*32+23) /* AVX512 FP16 */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
-- 
cgit v1.2.3-70-g09d2


From fde668244d1d8d490b5b9daf53fe4f92a6751773 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 21 Dec 2020 09:07:36 -0300
Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources

To pick up the changes in:

Fixes: 69372cf01290b958 ("x86/cpu: Add VM page flush MSR availablility as a CPUID feature")

That cause these changes in tooling:

  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before
  $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h
  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after
  $ diff -u before after
  --- before	2020-12-21 09:09:05.593005003 -0300
  +++ after	2020-12-21 09:12:48.436994802 -0300
  @@ -21,7 +21,7 @@
   	[0x0000004f] = "PPIN",
   	[0x00000060] = "LBR_CORE_TO",
   	[0x00000079] = "IA32_UCODE_WRITE",
  -	[0x0000008b] = "IA32_UCODE_REV",
  +	[0x0000008b] = "AMD64_PATCH_LEVEL",
   	[0x0000008C] = "IA32_SGXLEPUBKEYHASH0",
   	[0x0000008D] = "IA32_SGXLEPUBKEYHASH1",
   	[0x0000008E] = "IA32_SGXLEPUBKEYHASH2",
  @@ -286,6 +286,7 @@
   	[0xc0010114 - x86_AMD_V_KVM_MSRs_offset] = "VM_CR",
   	[0xc0010115 - x86_AMD_V_KVM_MSRs_offset] = "VM_IGNNE",
   	[0xc0010117 - x86_AMD_V_KVM_MSRs_offset] = "VM_HSAVE_PA",
  +	[0xc001011e - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VM_PAGE_FLUSH",
   	[0xc001011f - x86_AMD_V_KVM_MSRs_offset] = "AMD64_VIRT_SPEC_CTRL",
   	[0xc0010130 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV_ES_GHCB",
   	[0xc0010131 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SEV",
  $

The new MSR has a pattern that wasn't matched to avoid a clash with
IA32_UCODE_REV, change the regex to prefer the more relevant AMD_
prefixed ones to catch this new AMD64_VM_PAGE_FLUSH MSR.

Which causes these parts of tools/perf/ to be rebuilt:

  CC       /tmp/build/perf/trace/beauty/tracepoints/x86_msr.o
  LD       /tmp/build/perf/trace/beauty/tracepoints/perf-in.o
  LD       /tmp/build/perf/trace/beauty/perf-in.o
  LD       /tmp/build/perf/perf-in.o
  LINK     /tmp/build/perf/perf

This addresses this perf tools build warning:

  diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h
  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h'

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/msr-index.h         | 1 +
 tools/perf/trace/beauty/tracepoints/x86_msr.sh | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 2b5fc9accec4..546d6ecf0a35 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -472,6 +472,7 @@
 #define MSR_AMD64_ICIBSEXTDCTL		0xc001103c
 #define MSR_AMD64_IBSOPDATA4		0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_VM_PAGE_FLUSH		0xc001011e
 #define MSR_AMD64_SEV_ES_GHCB		0xc0010130
 #define MSR_AMD64_SEV			0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT	0
diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
index 831c02cf0586..27ee1ea1fe94 100755
--- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh
+++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
@@ -15,7 +15,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h
 
 printf "static const char *x86_MSRs[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*'
-egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|AMD64|IA32_TSCDEADLINE|IDT_FCR4)' | \
+egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSCDEADLINE|UCODE_REV)|IDT_FCR4)' | \
 	sed -r "s/$regex/\2 \1/g" | sort -n | \
 	xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n\n"
-- 
cgit v1.2.3-70-g09d2


From 288807fc3a5f19ed77cb8c25342323bbe58a75a1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 21 Dec 2020 09:20:52 -0300
Subject: tools headers UAPI: Sync kvm.h headers with the kernel sources

To pick the changes in:

  fb04a1eddb1a65b6 ("KVM: X86: Implement ring-based dirty memory tracking")

That result in these change in tooling:

  $ tools/perf/trace/beauty/kvm_ioctl.sh > before
  $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h
  $ cp arch/x86/include/uapi/asm/kvm.h tools/arch/x86/include/uapi/asm/kvm.h
  $ tools/perf/trace/beauty/kvm_ioctl.sh > after
  $ diff -u before after
  --- before	2020-12-21 11:55:45.229737066 -0300
  +++ after	2020-12-21 11:55:56.379983393 -0300
  @@ -90,6 +90,7 @@
   	[0xc0] = "CLEAR_DIRTY_LOG",
   	[0xc1] = "GET_SUPPORTED_HV_CPUID",
   	[0xc6] = "X86_SET_MSR_FILTER",
  +	[0xc7] = "RESET_DIRTY_RINGS",
   	[0xe0] = "CREATE_DEVICE",
   	[0xe1] = "SET_DEVICE_ATTR",
   	[0xe2] = "GET_DEVICE_ATTR",
  $

Now one can use that string in filters when tracing ioctls, etc.

And silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h'
  diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h'
  diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/uapi/asm/kvm.h |  1 +
 tools/include/uapi/linux/kvm.h        | 56 ++++++++++++++++++++++++++++++++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 89e5f3d1bba8..8e76d3701db3 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -12,6 +12,7 @@
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
 #define DE_VECTOR 0
 #define DB_VECTOR 1
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index ca41220b40b8..886802b8ffba 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -250,6 +250,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_ARM_NISV         28
 #define KVM_EXIT_X86_RDMSR        29
 #define KVM_EXIT_X86_WRMSR        30
+#define KVM_EXIT_DIRTY_RING_FULL  31
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -1053,6 +1054,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
+#define KVM_CAP_SYS_HYPERV_CPUID 191
+#define KVM_CAP_DIRTY_LOG_RING 192
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1511,7 +1514,7 @@ struct kvm_enc_region {
 /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */
 #define KVM_CLEAR_DIRTY_LOG          _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
 
-/* Available with KVM_CAP_HYPERV_CPUID */
+/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */
 #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
 
 /* Available with KVM_CAP_ARM_SVE */
@@ -1557,6 +1560,9 @@ struct kvm_pv_cmd {
 /* Available with KVM_CAP_X86_MSR_FILTER */
 #define KVM_X86_SET_MSR_FILTER	_IOW(KVMIO,  0xc6, struct kvm_msr_filter)
 
+/* Available with KVM_CAP_DIRTY_LOG_RING */
+#define KVM_RESET_DIRTY_RINGS		_IO(KVMIO, 0xc7)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */
@@ -1710,4 +1716,52 @@ struct kvm_hyperv_eventfd {
 #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE    (1 << 0)
 #define KVM_DIRTY_LOG_INITIALLY_SET            (1 << 1)
 
+/*
+ * Arch needs to define the macro after implementing the dirty ring
+ * feature.  KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the
+ * starting page offset of the dirty ring structures.
+ */
+#ifndef KVM_DIRTY_LOG_PAGE_OFFSET
+#define KVM_DIRTY_LOG_PAGE_OFFSET 0
+#endif
+
+/*
+ * KVM dirty GFN flags, defined as:
+ *
+ * |---------------+---------------+--------------|
+ * | bit 1 (reset) | bit 0 (dirty) | Status       |
+ * |---------------+---------------+--------------|
+ * |             0 |             0 | Invalid GFN  |
+ * |             0 |             1 | Dirty GFN    |
+ * |             1 |             X | GFN to reset |
+ * |---------------+---------------+--------------|
+ *
+ * Lifecycle of a dirty GFN goes like:
+ *
+ *      dirtied         harvested        reset
+ * 00 -----------> 01 -------------> 1X -------+
+ *  ^                                          |
+ *  |                                          |
+ *  +------------------------------------------+
+ *
+ * The userspace program is only responsible for the 01->1X state
+ * conversion after harvesting an entry.  Also, it must not skip any
+ * dirty bits, so that dirty bits are always harvested in sequence.
+ */
+#define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
+#define KVM_DIRTY_GFN_F_RESET           BIT(1)
+#define KVM_DIRTY_GFN_F_MASK            0x3
+
+/*
+ * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of
+ * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn.  The
+ * size of the gfn buffer is decided by the first argument when
+ * enabling KVM_CAP_DIRTY_LOG_RING.
+ */
+struct kvm_dirty_gfn {
+	__u32 flags;
+	__u32 slot;
+	__u64 offset;
+};
+
 #endif /* __LINUX_KVM_H */
-- 
cgit v1.2.3-70-g09d2


From cd97448db80e0238a819dc6b733da6ec0173cadd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 21 Dec 2020 12:51:03 -0300
Subject: tools headers UAPI: Sync KVM's vmx.h header with the kernel sources

To pick the changes in:

  bf0cd88ce363a2de ("KVM: x86: emulate wait-for-SIPI and SIPI-VMExit")

That makes 'perf kvm-stat' aware of this new SIPI_SIGNAL exit reason,
thus addressing the following perf build warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h'
  diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Yadong Qi <yadong.qi@intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/uapi/asm/vmx.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index b8ff9e8ac0d5..ada955c5ebb6 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -32,6 +32,7 @@
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
 #define EXIT_REASON_TRIPLE_FAULT        2
 #define EXIT_REASON_INIT_SIGNAL			3
+#define EXIT_REASON_SIPI_SIGNAL         4
 
 #define EXIT_REASON_INTERRUPT_WINDOW    7
 #define EXIT_REASON_NMI_WINDOW          8
@@ -94,6 +95,7 @@
 	{ EXIT_REASON_EXTERNAL_INTERRUPT,    "EXTERNAL_INTERRUPT" }, \
 	{ EXIT_REASON_TRIPLE_FAULT,          "TRIPLE_FAULT" }, \
 	{ EXIT_REASON_INIT_SIGNAL,           "INIT_SIGNAL" }, \
+	{ EXIT_REASON_SIPI_SIGNAL,           "SIPI_SIGNAL" }, \
 	{ EXIT_REASON_INTERRUPT_WINDOW,      "INTERRUPT_WINDOW" }, \
 	{ EXIT_REASON_NMI_WINDOW,            "NMI_WINDOW" }, \
 	{ EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
-- 
cgit v1.2.3-70-g09d2


From 9880e71cbaa8a0e826d8f144704301476b2d6cf9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 21 Dec 2020 12:53:44 -0300
Subject: tools kvm headers: Update KVM headers from the kernel sources

To pick the changes from:

  8d14797b53f044fd ("KVM: arm64: Move 'struct kvm_arch_memory_slot' out of uapi/")

That don't causes any changes in tooling, only addresses this perf build
warning:

  Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h'
  diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h

Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/arm64/include/uapi/asm/kvm.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools')

diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 1c17c3a24411..24223adae150 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -156,9 +156,6 @@ struct kvm_sync_regs {
 	__u64 device_irq_level;
 };
 
-struct kvm_arch_memory_slot {
-};
-
 /*
  * PMU filter structure. Describe a range of events with a particular
  * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
-- 
cgit v1.2.3-70-g09d2


From b71df82d05b7a38f38c4b1109c57b209b8ed43ff Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 21 Dec 2020 20:04:45 -0300
Subject: tools headers UAPI: Synch KVM's svm.h header with the kernel

To pick up the changes from:

  d1949b93c60504b3 ("KVM: SVM: Add support for CR8 write traps for an SEV-ES guest")
  5b51cb13160ae0ba ("KVM: SVM: Add support for CR4 write traps for an SEV-ES guest")
  f27ad38aac23263c ("KVM: SVM: Add support for CR0 write traps for an SEV-ES guest")
  2985afbcdbb1957a ("KVM: SVM: Add support for EFER write traps for an SEV-ES guest")
  291bd20d5d88814a ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT")

Picking these new SVM exit reasons:

  +	{ SVM_EXIT_EFER_WRITE_TRAP,	"write_efer_trap" }, \
  +	{ SVM_EXIT_CR0_WRITE_TRAP,	"write_cr0_trap" }, \
  +	{ SVM_EXIT_CR4_WRITE_TRAP,	"write_cr4_trap" }, \
  +	{ SVM_EXIT_CR8_WRITE_TRAP,	"write_cr8_trap" }, \
  +	{ SVM_EXIT_VMGEXIT,		"vmgexit" }, \
  +	{ SVM_VMGEXIT_MMIO_READ,	"vmgexit_mmio_read" }, \
  +	{ SVM_VMGEXIT_MMIO_WRITE,	"vmgexit_mmio_write" }, \
  +	{ SVM_VMGEXIT_NMI_COMPLETE,	"vmgexit_nmi_complete" }, \
  +	{ SVM_VMGEXIT_AP_HLT_LOOP,	"vmgexit_ap_hlt_loop" }, \
  +	{ SVM_VMGEXIT_AP_JUMP_TABLE,	"vmgexit_ap_jump_table" }, \

And address this perf build warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/svm.h' differs from latest version at 'arch/x86/include/uapi/asm/svm.h'
  diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/uapi/asm/svm.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'tools')

diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index f1d8307454e0..554f75fe013c 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -77,10 +77,28 @@
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
 #define SVM_EXIT_RDPRU         0x08e
+#define SVM_EXIT_EFER_WRITE_TRAP		0x08f
+#define SVM_EXIT_CR0_WRITE_TRAP			0x090
+#define SVM_EXIT_CR1_WRITE_TRAP			0x091
+#define SVM_EXIT_CR2_WRITE_TRAP			0x092
+#define SVM_EXIT_CR3_WRITE_TRAP			0x093
+#define SVM_EXIT_CR4_WRITE_TRAP			0x094
+#define SVM_EXIT_CR5_WRITE_TRAP			0x095
+#define SVM_EXIT_CR6_WRITE_TRAP			0x096
+#define SVM_EXIT_CR7_WRITE_TRAP			0x097
+#define SVM_EXIT_CR8_WRITE_TRAP			0x098
+#define SVM_EXIT_CR9_WRITE_TRAP			0x099
+#define SVM_EXIT_CR10_WRITE_TRAP		0x09a
+#define SVM_EXIT_CR11_WRITE_TRAP		0x09b
+#define SVM_EXIT_CR12_WRITE_TRAP		0x09c
+#define SVM_EXIT_CR13_WRITE_TRAP		0x09d
+#define SVM_EXIT_CR14_WRITE_TRAP		0x09e
+#define SVM_EXIT_CR15_WRITE_TRAP		0x09f
 #define SVM_EXIT_INVPCID       0x0a2
 #define SVM_EXIT_NPF           0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI		0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS	0x402
+#define SVM_EXIT_VMGEXIT       0x403
 
 /* SEV-ES software-defined VMGEXIT events */
 #define SVM_VMGEXIT_MMIO_READ			0x80000001
@@ -183,10 +201,20 @@
 	{ SVM_EXIT_MONITOR,     "monitor" }, \
 	{ SVM_EXIT_MWAIT,       "mwait" }, \
 	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
+	{ SVM_EXIT_EFER_WRITE_TRAP,	"write_efer_trap" }, \
+	{ SVM_EXIT_CR0_WRITE_TRAP,	"write_cr0_trap" }, \
+	{ SVM_EXIT_CR4_WRITE_TRAP,	"write_cr4_trap" }, \
+	{ SVM_EXIT_CR8_WRITE_TRAP,	"write_cr8_trap" }, \
 	{ SVM_EXIT_INVPCID,     "invpcid" }, \
 	{ SVM_EXIT_NPF,         "npf" }, \
 	{ SVM_EXIT_AVIC_INCOMPLETE_IPI,		"avic_incomplete_ipi" }, \
 	{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
+	{ SVM_EXIT_VMGEXIT,		"vmgexit" }, \
+	{ SVM_VMGEXIT_MMIO_READ,	"vmgexit_mmio_read" }, \
+	{ SVM_VMGEXIT_MMIO_WRITE,	"vmgexit_mmio_write" }, \
+	{ SVM_VMGEXIT_NMI_COMPLETE,	"vmgexit_nmi_complete" }, \
+	{ SVM_VMGEXIT_AP_HLT_LOOP,	"vmgexit_ap_hlt_loop" }, \
+	{ SVM_VMGEXIT_AP_JUMP_TABLE,	"vmgexit_ap_jump_table" }, \
 	{ SVM_EXIT_ERR,         "invalid_guest_state" }
 
 
-- 
cgit v1.2.3-70-g09d2


From 9bad32b2c63c985fc9f04b29186974ad5bb0b74c Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 18 Dec 2020 15:59:21 +0800
Subject: perf powerpc: Move syscall.tbl check to check-headers.sh

It is better to check syscall.tbl for powerpc in check-headers.sh, it is
similar with commit c9b51a017065 ("perf tools: Move syscall_64.tbl check
into check-headers.sh").

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Link: http://lore.kernel.org/lkml/1608278364-6733-2-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/Makefile | 7 -------
 tools/perf/check-headers.sh      | 1 +
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index e58d00d62f02..840ea0e59287 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -14,7 +14,6 @@ PERF_HAVE_JITDUMP := 1
 out    := $(OUTPUT)arch/powerpc/include/generated/asm
 header32 := $(out)/syscalls_32.c
 header64 := $(out)/syscalls_64.c
-syskrn := $(srctree)/arch/powerpc/kernel/syscalls/syscall.tbl
 sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls
 sysdef := $(sysprf)/syscall.tbl
 systbl := $(sysprf)/mksyscalltbl
@@ -23,15 +22,9 @@ systbl := $(sysprf)/mksyscalltbl
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header64): $(sysdef) $(systbl)
-	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
-	(diff -B $(sysdef) $(syskrn) >/dev/null) \
-	|| echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
 	$(Q)$(SHELL) '$(systbl)' '64' $(sysdef) > $@
 
 $(header32): $(sysdef) $(systbl)
-	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
-	(diff -B $(sysdef) $(syskrn) >/dev/null) \
-	|| echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
 	$(Q)$(SHELL) '$(systbl)' '32' $(sysdef) > $@
 
 clean::
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 15ecb1803fb9..3c96fd3a7370 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -144,6 +144,7 @@ check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in
 
 # diff non-symmetric files
 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
+check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
 
 for i in $BEAUTY_FILES; do
   beauty_check $i -B
-- 
cgit v1.2.3-70-g09d2


From 22ffc3f5598d2a51e2da4ea5e07e734715bde782 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 18 Dec 2020 15:59:22 +0800
Subject: perf s390: Move syscall.tbl check into check-headers.sh

It is better to check syscall.tbl for s390 in check-headers.sh, it is
similar with commit c9b51a017065 ("perf tools: Move syscall_64.tbl check
into check-headers.sh").

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Link: http://lore.kernel.org/lkml/1608278364-6733-3-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/Makefile | 4 ----
 tools/perf/check-headers.sh   | 1 +
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
index 6ac8887be7c9..74bffbea03e2 100644
--- a/tools/perf/arch/s390/Makefile
+++ b/tools/perf/arch/s390/Makefile
@@ -12,7 +12,6 @@ PERF_HAVE_JITDUMP := 1
 
 out    := $(OUTPUT)arch/s390/include/generated/asm
 header := $(out)/syscalls_64.c
-syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl
 sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
 sysdef := $(sysprf)/syscall.tbl
 systbl := $(sysprf)/mksyscalltbl
@@ -21,9 +20,6 @@ systbl := $(sysprf)/mksyscalltbl
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
-	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
-        (diff -B $(sysdef) $(syskrn) >/dev/null) \
-        || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
 	$(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
 
 clean::
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 3c96fd3a7370..dded93a2bc89 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -145,6 +145,7 @@ check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in
 # diff non-symmetric files
 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
 check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
+check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
 
 for i in $BEAUTY_FILES; do
   beauty_check $i -B
-- 
cgit v1.2.3-70-g09d2


From c5ef52944a2d80017092cdf6aa474b2f4d596072 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Wed, 23 Dec 2020 13:13:16 -0300
Subject: perf tools: Update powerpc's syscall.tbl copy from the kernel sources

This silences the following tools/perf/ build warning:

  Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl'

Just make them same:

  cp arch/powerpc/kernel/syscalls/syscall.tbl tools/perf/arch/powerpc/entry/syscalls/syscall.tbl

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Link: http://lore.kernel.org/lkml/1608278364-6733-4-git-send-email-yangtiezhu@loongson.cn
[ There were updates after Tiezhu's post, so I just updated the copy ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 26 ++++++++++++++++------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index b168364ac050..f744eb5cba88 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -9,7 +9,9 @@
 #
 0	nospu	restart_syscall			sys_restart_syscall
 1	nospu	exit				sys_exit
-2	nospu	fork				ppc_fork
+2	32	fork				ppc_fork			sys_fork
+2	64	fork				sys_fork
+2	spu	fork				sys_ni_syscall
 3	common	read				sys_read
 4	common	write				sys_write
 5	common	open				sys_open			compat_sys_open
@@ -158,7 +160,9 @@
 119	32	sigreturn			sys_sigreturn			compat_sys_sigreturn
 119	64	sigreturn			sys_ni_syscall
 119	spu	sigreturn			sys_ni_syscall
-120	nospu	clone				ppc_clone
+120	32	clone				ppc_clone			sys_clone
+120	64	clone				sys_clone
+120	spu	clone				sys_ni_syscall
 121	common	setdomainname			sys_setdomainname
 122	common	uname				sys_newuname
 123	common	modify_ldt			sys_ni_syscall
@@ -240,7 +244,9 @@
 186	spu	sendfile			sys_sendfile64
 187	common	getpmsg				sys_ni_syscall
 188	common 	putpmsg				sys_ni_syscall
-189	nospu	vfork				ppc_vfork
+189	32	vfork				ppc_vfork			sys_vfork
+189	64	vfork				sys_vfork
+189	spu	vfork				sys_ni_syscall
 190	common	ugetrlimit			sys_getrlimit			compat_sys_getrlimit
 191	common	readahead			sys_readahead			compat_sys_readahead
 192	32	mmap2				sys_mmap2			compat_sys_mmap2
@@ -316,8 +322,8 @@
 248	32	clock_nanosleep			sys_clock_nanosleep_time32
 248	64	clock_nanosleep			sys_clock_nanosleep
 248	spu	clock_nanosleep			sys_clock_nanosleep
-249	32	swapcontext			ppc_swapcontext			ppc32_swapcontext
-249	64	swapcontext			ppc64_swapcontext
+249	32	swapcontext			ppc_swapcontext			compat_sys_swapcontext
+249	64	swapcontext			sys_swapcontext
 249	spu	swapcontext			sys_ni_syscall
 250	common	tgkill				sys_tgkill
 251	32	utimes				sys_utimes_time32
@@ -456,7 +462,7 @@
 361	common	bpf				sys_bpf
 362	nospu	execveat			sys_execveat			compat_sys_execveat
 363	32	switch_endian			sys_ni_syscall
-363	64	switch_endian			ppc_switch_endian
+363	64	switch_endian			sys_switch_endian
 363	spu	switch_endian			sys_ni_syscall
 364	common	userfaultfd			sys_userfaultfd
 365	common	membarrier			sys_membarrier
@@ -516,6 +522,12 @@
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
-435	nospu	clone3				ppc_clone3
+435	32	clone3				ppc_clone3			sys_clone3
+435	64	clone3				sys_clone3
+435	spu	clone3				sys_ni_syscall
+436	common	close_range			sys_close_range
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
-- 
cgit v1.2.3-70-g09d2


From b27d20ab1c6a1a7738c02419c28287d260ca8036 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Wed, 23 Dec 2020 13:24:35 -0300
Subject: perf tools: Update s390's syscall.tbl copy from the kernel sources

This silences the following tools/perf/ build warning:

  Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl'

Just make them same:

  cp arch/s390/kernel/syscalls/syscall.tbl tools/perf/arch/s390/entry/syscalls/syscall.tbl

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Link: http://lore.kernel.org/lkml/1608278364-6733-5-git-send-email-yangtiezhu@loongson.cn
[ There were updates after Tiezhu's post, so I just updated the copy ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/s390/entry/syscalls/syscall.tbl | 396 ++++++++++++++----------
 1 file changed, 226 insertions(+), 170 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index d2fa9647ce25..d443423495e5 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -15,86 +15,86 @@
 5    common	open			sys_open			compat_sys_open
 6    common	close			sys_close			sys_close
 7    common	restart_syscall		sys_restart_syscall		sys_restart_syscall
-8    common	creat			sys_creat			compat_sys_creat
-9    common	link			sys_link			compat_sys_link
-10   common	unlink			sys_unlink			compat_sys_unlink
+8    common	creat			sys_creat			sys_creat
+9    common	link			sys_link			sys_link
+10   common	unlink			sys_unlink			sys_unlink
 11   common	execve			sys_execve			compat_sys_execve
-12   common	chdir			sys_chdir			compat_sys_chdir
-13   32		time			-				compat_sys_time
-14   common	mknod			sys_mknod			compat_sys_mknod
-15   common	chmod			sys_chmod			compat_sys_chmod
-16   32		lchown			-				compat_sys_s390_lchown16
+12   common	chdir			sys_chdir			sys_chdir
+13   32		time			-				sys_time32
+14   common	mknod			sys_mknod			sys_mknod
+15   common	chmod			sys_chmod			sys_chmod
+16   32		lchown			-				sys_lchown16
 19   common	lseek			sys_lseek			compat_sys_lseek
 20   common	getpid			sys_getpid			sys_getpid
-21   common	mount			sys_mount
-22   common	umount			sys_oldumount			compat_sys_oldumount
-23   32		setuid			-				compat_sys_s390_setuid16
-24   32		getuid			-				compat_sys_s390_getuid16
-25   32		stime			-				compat_sys_stime
+21   common	mount			sys_mount			sys_mount
+22   common	umount			sys_oldumount			sys_oldumount
+23   32		setuid			-				sys_setuid16
+24   32		getuid			-				sys_getuid16
+25   32		stime			-				sys_stime32
 26   common	ptrace			sys_ptrace			compat_sys_ptrace
 27   common	alarm			sys_alarm			sys_alarm
 29   common	pause			sys_pause			sys_pause
-30   common	utime			sys_utime			compat_sys_utime
-33   common	access			sys_access			compat_sys_access
+30   common	utime			sys_utime			sys_utime32
+33   common	access			sys_access			sys_access
 34   common	nice			sys_nice			sys_nice
 36   common	sync			sys_sync			sys_sync
 37   common	kill			sys_kill			sys_kill
-38   common	rename			sys_rename			compat_sys_rename
-39   common	mkdir			sys_mkdir			compat_sys_mkdir
-40   common	rmdir			sys_rmdir			compat_sys_rmdir
+38   common	rename			sys_rename			sys_rename
+39   common	mkdir			sys_mkdir			sys_mkdir
+40   common	rmdir			sys_rmdir			sys_rmdir
 41   common	dup			sys_dup				sys_dup
-42   common	pipe			sys_pipe			compat_sys_pipe
+42   common	pipe			sys_pipe			sys_pipe
 43   common	times			sys_times			compat_sys_times
-45   common	brk			sys_brk				compat_sys_brk
-46   32		setgid			-				compat_sys_s390_setgid16
-47   32		getgid			-				compat_sys_s390_getgid16
-48   common	signal			sys_signal			compat_sys_signal
-49   32		geteuid			-				compat_sys_s390_geteuid16
-50   32		getegid			-				compat_sys_s390_getegid16
-51   common	acct			sys_acct			compat_sys_acct
-52   common	umount2			sys_umount			compat_sys_umount
+45   common	brk			sys_brk				sys_brk
+46   32		setgid			-				sys_setgid16
+47   32		getgid			-				sys_getgid16
+48   common	signal			sys_signal			sys_signal
+49   32		geteuid			-				sys_geteuid16
+50   32		getegid			-				sys_getegid16
+51   common	acct			sys_acct			sys_acct
+52   common	umount2			sys_umount			sys_umount
 54   common	ioctl			sys_ioctl			compat_sys_ioctl
 55   common	fcntl			sys_fcntl			compat_sys_fcntl
 57   common	setpgid			sys_setpgid			sys_setpgid
 60   common	umask			sys_umask			sys_umask
-61   common	chroot			sys_chroot			compat_sys_chroot
+61   common	chroot			sys_chroot			sys_chroot
 62   common	ustat			sys_ustat			compat_sys_ustat
 63   common	dup2			sys_dup2			sys_dup2
 64   common	getppid			sys_getppid			sys_getppid
 65   common	getpgrp			sys_getpgrp			sys_getpgrp
 66   common	setsid			sys_setsid			sys_setsid
 67   common	sigaction		sys_sigaction			compat_sys_sigaction
-70   32		setreuid		-				compat_sys_s390_setreuid16
-71   32		setregid		-				compat_sys_s390_setregid16
-72   common	sigsuspend		sys_sigsuspend			compat_sys_sigsuspend
+70   32		setreuid		-				sys_setreuid16
+71   32		setregid		-				sys_setregid16
+72   common	sigsuspend		sys_sigsuspend			sys_sigsuspend
 73   common	sigpending		sys_sigpending			compat_sys_sigpending
-74   common	sethostname		sys_sethostname			compat_sys_sethostname
+74   common	sethostname		sys_sethostname			sys_sethostname
 75   common	setrlimit		sys_setrlimit			compat_sys_setrlimit
 76   32		getrlimit		-				compat_sys_old_getrlimit
 77   common	getrusage		sys_getrusage			compat_sys_getrusage
 78   common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
 79   common	settimeofday		sys_settimeofday		compat_sys_settimeofday
-80   32		getgroups		-				compat_sys_s390_getgroups16
-81   32		setgroups		-				compat_sys_s390_setgroups16
-83   common	symlink			sys_symlink			compat_sys_symlink
-85   common	readlink		sys_readlink			compat_sys_readlink
-86   common	uselib			sys_uselib			compat_sys_uselib
-87   common	swapon			sys_swapon			compat_sys_swapon
-88   common	reboot			sys_reboot			compat_sys_reboot
+80   32		getgroups		-				sys_getgroups16
+81   32		setgroups		-				sys_setgroups16
+83   common	symlink			sys_symlink			sys_symlink
+85   common	readlink		sys_readlink			sys_readlink
+86   common	uselib			sys_uselib			sys_uselib
+87   common	swapon			sys_swapon			sys_swapon
+88   common	reboot			sys_reboot			sys_reboot
 89   common	readdir			-				compat_sys_old_readdir
 90   common	mmap			sys_old_mmap			compat_sys_s390_old_mmap
-91   common	munmap			sys_munmap			compat_sys_munmap
+91   common	munmap			sys_munmap			sys_munmap
 92   common	truncate		sys_truncate			compat_sys_truncate
 93   common	ftruncate		sys_ftruncate			compat_sys_ftruncate
 94   common	fchmod			sys_fchmod			sys_fchmod
-95   32		fchown			-				compat_sys_s390_fchown16
+95   32		fchown			-				sys_fchown16
 96   common	getpriority		sys_getpriority			sys_getpriority
 97   common	setpriority		sys_setpriority			sys_setpriority
 99   common	statfs			sys_statfs			compat_sys_statfs
 100  common	fstatfs			sys_fstatfs			compat_sys_fstatfs
 101  32		ioperm			-				-
 102  common	socketcall		sys_socketcall			compat_sys_socketcall
-103  common	syslog			sys_syslog			compat_sys_syslog
+103  common	syslog			sys_syslog			sys_syslog
 104  common	setitimer		sys_setitimer			compat_sys_setitimer
 105  common	getitimer		sys_getitimer			compat_sys_getitimer
 106  common	stat			sys_newstat			compat_sys_newstat
@@ -104,76 +104,76 @@
 111  common	vhangup			sys_vhangup			sys_vhangup
 112  common	idle			-				-
 114  common	wait4			sys_wait4			compat_sys_wait4
-115  common	swapoff			sys_swapoff			compat_sys_swapoff
+115  common	swapoff			sys_swapoff			sys_swapoff
 116  common	sysinfo			sys_sysinfo			compat_sys_sysinfo
 117  common	ipc			sys_s390_ipc			compat_sys_s390_ipc
 118  common	fsync			sys_fsync			sys_fsync
 119  common	sigreturn		sys_sigreturn			compat_sys_sigreturn
-120  common	clone			sys_clone			compat_sys_clone
-121  common	setdomainname		sys_setdomainname		compat_sys_setdomainname
-122  common	uname			sys_newuname			compat_sys_newuname
-124  common	adjtimex		sys_adjtimex			compat_sys_adjtimex
-125  common	mprotect		sys_mprotect			compat_sys_mprotect
+120  common	clone			sys_clone			sys_clone
+121  common	setdomainname		sys_setdomainname		sys_setdomainname
+122  common	uname			sys_newuname			sys_newuname
+124  common	adjtimex		sys_adjtimex			sys_adjtimex_time32
+125  common	mprotect		sys_mprotect			sys_mprotect
 126  common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
 127  common	create_module		-				-
-128  common	init_module		sys_init_module			compat_sys_init_module
-129  common	delete_module		sys_delete_module		compat_sys_delete_module
+128  common	init_module		sys_init_module			sys_init_module
+129  common	delete_module		sys_delete_module		sys_delete_module
 130  common	get_kernel_syms		-				-
-131  common	quotactl		sys_quotactl			compat_sys_quotactl
+131  common	quotactl		sys_quotactl			sys_quotactl
 132  common	getpgid			sys_getpgid			sys_getpgid
 133  common	fchdir			sys_fchdir			sys_fchdir
-134  common	bdflush			sys_bdflush			compat_sys_bdflush
-135  common	sysfs			sys_sysfs			compat_sys_sysfs
+134  common	bdflush			sys_bdflush			sys_bdflush
+135  common	sysfs			sys_sysfs			sys_sysfs
 136  common	personality		sys_s390_personality		sys_s390_personality
 137  common	afs_syscall		-				-
-138  32		setfsuid		-				compat_sys_s390_setfsuid16
-139  32		setfsgid		-				compat_sys_s390_setfsgid16
-140  32		_llseek			-				compat_sys_llseek
+138  32		setfsuid		-				sys_setfsuid16
+139  32		setfsgid		-				sys_setfsgid16
+140  32		_llseek			-				sys_llseek
 141  common	getdents		sys_getdents			compat_sys_getdents
 142  32		_newselect		-				compat_sys_select
 142  64		select			sys_select			-
 143  common	flock			sys_flock			sys_flock
-144  common	msync			sys_msync			compat_sys_msync
-145  common	readv			sys_readv
-146  common	writev			sys_writev
+144  common	msync			sys_msync			sys_msync
+145  common	readv			sys_readv			sys_readv
+146  common	writev			sys_writev			sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
 149  common	_sysctl			-				-
-150  common	mlock			sys_mlock			compat_sys_mlock
-151  common	munlock			sys_munlock			compat_sys_munlock
+150  common	mlock			sys_mlock			sys_mlock
+151  common	munlock			sys_munlock			sys_munlock
 152  common	mlockall		sys_mlockall			sys_mlockall
 153  common	munlockall		sys_munlockall			sys_munlockall
-154  common	sched_setparam		sys_sched_setparam		compat_sys_sched_setparam
-155  common	sched_getparam		sys_sched_getparam		compat_sys_sched_getparam
-156  common	sched_setscheduler	sys_sched_setscheduler		compat_sys_sched_setscheduler
+154  common	sched_setparam		sys_sched_setparam		sys_sched_setparam
+155  common	sched_getparam		sys_sched_getparam		sys_sched_getparam
+156  common	sched_setscheduler	sys_sched_setscheduler		sys_sched_setscheduler
 157  common	sched_getscheduler	sys_sched_getscheduler		sys_sched_getscheduler
 158  common	sched_yield		sys_sched_yield			sys_sched_yield
 159  common	sched_get_priority_max	sys_sched_get_priority_max	sys_sched_get_priority_max
 160  common	sched_get_priority_min	sys_sched_get_priority_min	sys_sched_get_priority_min
-161  common	sched_rr_get_interval	sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
-162  common	nanosleep		sys_nanosleep			compat_sys_nanosleep
-163  common	mremap			sys_mremap			compat_sys_mremap
-164  32		setresuid		-				compat_sys_s390_setresuid16
-165  32		getresuid		-				compat_sys_s390_getresuid16
+161  common	sched_rr_get_interval	sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
+162  common	nanosleep		sys_nanosleep			sys_nanosleep_time32
+163  common	mremap			sys_mremap			sys_mremap
+164  32		setresuid		-				sys_setresuid16
+165  32		getresuid		-				sys_getresuid16
 167  common	query_module		-				-
-168  common	poll			sys_poll			compat_sys_poll
+168  common	poll			sys_poll			sys_poll
 169  common	nfsservctl		-				-
-170  32		setresgid		-				compat_sys_s390_setresgid16
-171  32		getresgid		-				compat_sys_s390_getresgid16
-172  common	prctl			sys_prctl			compat_sys_prctl
+170  32		setresgid		-				sys_setresgid16
+171  32		getresgid		-				sys_getresgid16
+172  common	prctl			sys_prctl			sys_prctl
 173  common	rt_sigreturn		sys_rt_sigreturn		compat_sys_rt_sigreturn
 174  common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
 175  common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 176  common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
-177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
 178  common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 179  common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 180  common	pread64			sys_pread64			compat_sys_s390_pread64
 181  common	pwrite64		sys_pwrite64			compat_sys_s390_pwrite64
-182  32		chown			-				compat_sys_s390_chown16
-183  common	getcwd			sys_getcwd			compat_sys_getcwd
-184  common	capget			sys_capget			compat_sys_capget
-185  common	capset			sys_capset			compat_sys_capset
+182  32		chown			-				sys_chown16
+183  common	getcwd			sys_getcwd			sys_getcwd
+184  common	capget			sys_capget			sys_capget
+185  common	capset			sys_capset			sys_capset
 186  common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
 187  common	sendfile		sys_sendfile64			compat_sys_sendfile
 188  common	getpmsg			-				-
@@ -187,7 +187,7 @@
 195  32		stat64			-				compat_sys_s390_stat64
 196  32		lstat64			-				compat_sys_s390_lstat64
 197  32		fstat64			-				compat_sys_s390_fstat64
-198  32		lchown32		-				compat_sys_lchown
+198  32		lchown32		-				sys_lchown
 198  64		lchown			sys_lchown			-
 199  32		getuid32		-				sys_getuid
 199  64		getuid			sys_getuid			-
@@ -201,21 +201,21 @@
 203  64		setreuid		sys_setreuid			-
 204  32		setregid32		-				sys_setregid
 204  64		setregid		sys_setregid			-
-205  32		getgroups32		-				compat_sys_getgroups
+205  32		getgroups32		-				sys_getgroups
 205  64		getgroups		sys_getgroups			-
-206  32		setgroups32		-				compat_sys_setgroups
+206  32		setgroups32		-				sys_setgroups
 206  64		setgroups		sys_setgroups			-
 207  32		fchown32		-				sys_fchown
 207  64		fchown			sys_fchown			-
 208  32		setresuid32		-				sys_setresuid
 208  64		setresuid		sys_setresuid			-
-209  32		getresuid32		-				compat_sys_getresuid
+209  32		getresuid32		-				sys_getresuid
 209  64		getresuid		sys_getresuid			-
 210  32		setresgid32		-				sys_setresgid
 210  64		setresgid		sys_setresgid			-
-211  32		getresgid32		-				compat_sys_getresgid
+211  32		getresgid32		-				sys_getresgid
 211  64		getresgid		sys_getresgid			-
-212  32		chown32			-				compat_sys_chown
+212  32		chown32			-				sys_chown
 212  64		chown			sys_chown			-
 213  32		setuid32		-				sys_setuid
 213  64		setuid			sys_setuid			-
@@ -225,166 +225,222 @@
 215  64		setfsuid		sys_setfsuid			-
 216  32		setfsgid32		-				sys_setfsgid
 216  64		setfsgid		sys_setfsgid			-
-217  common	pivot_root		sys_pivot_root			compat_sys_pivot_root
-218  common	mincore			sys_mincore			compat_sys_mincore
-219  common	madvise			sys_madvise			compat_sys_madvise
-220  common	getdents64		sys_getdents64			compat_sys_getdents64
+217  common	pivot_root		sys_pivot_root			sys_pivot_root
+218  common	mincore			sys_mincore			sys_mincore
+219  common	madvise			sys_madvise			sys_madvise
+220  common	getdents64		sys_getdents64			sys_getdents64
 221  32		fcntl64			-				compat_sys_fcntl64
 222  common	readahead		sys_readahead			compat_sys_s390_readahead
 223  32		sendfile64		-				compat_sys_sendfile64
-224  common	setxattr		sys_setxattr			compat_sys_setxattr
-225  common	lsetxattr		sys_lsetxattr			compat_sys_lsetxattr
-226  common	fsetxattr		sys_fsetxattr			compat_sys_fsetxattr
-227  common	getxattr		sys_getxattr			compat_sys_getxattr
-228  common	lgetxattr		sys_lgetxattr			compat_sys_lgetxattr
-229  common	fgetxattr		sys_fgetxattr			compat_sys_fgetxattr
-230  common	listxattr		sys_listxattr			compat_sys_listxattr
-231  common	llistxattr		sys_llistxattr			compat_sys_llistxattr
-232  common	flistxattr		sys_flistxattr			compat_sys_flistxattr
-233  common	removexattr		sys_removexattr			compat_sys_removexattr
-234  common	lremovexattr		sys_lremovexattr		compat_sys_lremovexattr
-235  common	fremovexattr		sys_fremovexattr		compat_sys_fremovexattr
+224  common	setxattr		sys_setxattr			sys_setxattr
+225  common	lsetxattr		sys_lsetxattr			sys_lsetxattr
+226  common	fsetxattr		sys_fsetxattr			sys_fsetxattr
+227  common	getxattr		sys_getxattr			sys_getxattr
+228  common	lgetxattr		sys_lgetxattr			sys_lgetxattr
+229  common	fgetxattr		sys_fgetxattr			sys_fgetxattr
+230  common	listxattr		sys_listxattr			sys_listxattr
+231  common	llistxattr		sys_llistxattr			sys_llistxattr
+232  common	flistxattr		sys_flistxattr			sys_flistxattr
+233  common	removexattr		sys_removexattr			sys_removexattr
+234  common	lremovexattr		sys_lremovexattr		sys_lremovexattr
+235  common	fremovexattr		sys_fremovexattr		sys_fremovexattr
 236  common	gettid			sys_gettid			sys_gettid
 237  common	tkill			sys_tkill			sys_tkill
-238  common	futex			sys_futex			compat_sys_futex
+238  common	futex			sys_futex			sys_futex_time32
 239  common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
 240  common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
 241  common	tgkill			sys_tgkill			sys_tgkill
 243  common	io_setup		sys_io_setup			compat_sys_io_setup
-244  common	io_destroy		sys_io_destroy			compat_sys_io_destroy
-245  common	io_getevents		sys_io_getevents		compat_sys_io_getevents
+244  common	io_destroy		sys_io_destroy			sys_io_destroy
+245  common	io_getevents		sys_io_getevents		sys_io_getevents_time32
 246  common	io_submit		sys_io_submit			compat_sys_io_submit
-247  common	io_cancel		sys_io_cancel			compat_sys_io_cancel
+247  common	io_cancel		sys_io_cancel			sys_io_cancel
 248  common	exit_group		sys_exit_group			sys_exit_group
 249  common	epoll_create		sys_epoll_create		sys_epoll_create
-250  common	epoll_ctl		sys_epoll_ctl			compat_sys_epoll_ctl
-251  common	epoll_wait		sys_epoll_wait			compat_sys_epoll_wait
-252  common	set_tid_address		sys_set_tid_address		compat_sys_set_tid_address
+250  common	epoll_ctl		sys_epoll_ctl			sys_epoll_ctl
+251  common	epoll_wait		sys_epoll_wait			sys_epoll_wait
+252  common	set_tid_address		sys_set_tid_address		sys_set_tid_address
 253  common	fadvise64		sys_fadvise64_64		compat_sys_s390_fadvise64
 254  common	timer_create		sys_timer_create		compat_sys_timer_create
-255  common	timer_settime		sys_timer_settime		compat_sys_timer_settime
-256  common	timer_gettime		sys_timer_gettime		compat_sys_timer_gettime
+255  common	timer_settime		sys_timer_settime		sys_timer_settime32
+256  common	timer_gettime		sys_timer_gettime		sys_timer_gettime32
 257  common	timer_getoverrun	sys_timer_getoverrun		sys_timer_getoverrun
 258  common	timer_delete		sys_timer_delete		sys_timer_delete
-259  common	clock_settime		sys_clock_settime		compat_sys_clock_settime
-260  common	clock_gettime		sys_clock_gettime		compat_sys_clock_gettime
-261  common	clock_getres		sys_clock_getres		compat_sys_clock_getres
-262  common	clock_nanosleep		sys_clock_nanosleep		compat_sys_clock_nanosleep
+259  common	clock_settime		sys_clock_settime		sys_clock_settime32
+260  common	clock_gettime		sys_clock_gettime		sys_clock_gettime32
+261  common	clock_getres		sys_clock_getres		sys_clock_getres_time32
+262  common	clock_nanosleep		sys_clock_nanosleep		sys_clock_nanosleep_time32
 264  32		fadvise64_64		-				compat_sys_s390_fadvise64_64
 265  common	statfs64		sys_statfs64			compat_sys_statfs64
 266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
-267  common	remap_file_pages	sys_remap_file_pages		compat_sys_remap_file_pages
+267  common	remap_file_pages	sys_remap_file_pages		sys_remap_file_pages
 268  common	mbind			sys_mbind			compat_sys_mbind
 269  common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
 270  common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
 271  common	mq_open			sys_mq_open			compat_sys_mq_open
-272  common	mq_unlink		sys_mq_unlink			compat_sys_mq_unlink
-273  common	mq_timedsend		sys_mq_timedsend		compat_sys_mq_timedsend
-274  common	mq_timedreceive		sys_mq_timedreceive		compat_sys_mq_timedreceive
+272  common	mq_unlink		sys_mq_unlink			sys_mq_unlink
+273  common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
+274  common	mq_timedreceive		sys_mq_timedreceive		sys_mq_timedreceive_time32
 275  common	mq_notify		sys_mq_notify			compat_sys_mq_notify
 276  common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
 277  common	kexec_load		sys_kexec_load			compat_sys_kexec_load
-278  common	add_key			sys_add_key			compat_sys_add_key
-279  common	request_key		sys_request_key			compat_sys_request_key
+278  common	add_key			sys_add_key			sys_add_key
+279  common	request_key		sys_request_key			sys_request_key
 280  common	keyctl			sys_keyctl			compat_sys_keyctl
 281  common	waitid			sys_waitid			compat_sys_waitid
 282  common	ioprio_set		sys_ioprio_set			sys_ioprio_set
 283  common	ioprio_get		sys_ioprio_get			sys_ioprio_get
 284  common	inotify_init		sys_inotify_init		sys_inotify_init
-285  common	inotify_add_watch	sys_inotify_add_watch		compat_sys_inotify_add_watch
+285  common	inotify_add_watch	sys_inotify_add_watch		sys_inotify_add_watch
 286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
 287  common	migrate_pages		sys_migrate_pages		compat_sys_migrate_pages
 288  common	openat			sys_openat			compat_sys_openat
-289  common	mkdirat			sys_mkdirat			compat_sys_mkdirat
-290  common	mknodat			sys_mknodat			compat_sys_mknodat
-291  common	fchownat		sys_fchownat			compat_sys_fchownat
-292  common	futimesat		sys_futimesat			compat_sys_futimesat
+289  common	mkdirat			sys_mkdirat			sys_mkdirat
+290  common	mknodat			sys_mknodat			sys_mknodat
+291  common	fchownat		sys_fchownat			sys_fchownat
+292  common	futimesat		sys_futimesat			sys_futimesat_time32
 293  32		fstatat64		-				compat_sys_s390_fstatat64
 293  64		newfstatat		sys_newfstatat			-
-294  common	unlinkat		sys_unlinkat			compat_sys_unlinkat
-295  common	renameat		sys_renameat			compat_sys_renameat
-296  common	linkat			sys_linkat			compat_sys_linkat
-297  common	symlinkat		sys_symlinkat			compat_sys_symlinkat
-298  common	readlinkat		sys_readlinkat			compat_sys_readlinkat
-299  common	fchmodat		sys_fchmodat			compat_sys_fchmodat
-300  common	faccessat		sys_faccessat			compat_sys_faccessat
-301  common	pselect6		sys_pselect6			compat_sys_pselect6
-302  common	ppoll			sys_ppoll			compat_sys_ppoll
-303  common	unshare			sys_unshare			compat_sys_unshare
+294  common	unlinkat		sys_unlinkat			sys_unlinkat
+295  common	renameat		sys_renameat			sys_renameat
+296  common	linkat			sys_linkat			sys_linkat
+297  common	symlinkat		sys_symlinkat			sys_symlinkat
+298  common	readlinkat		sys_readlinkat			sys_readlinkat
+299  common	fchmodat		sys_fchmodat			sys_fchmodat
+300  common	faccessat		sys_faccessat			sys_faccessat
+301  common	pselect6		sys_pselect6			compat_sys_pselect6_time32
+302  common	ppoll			sys_ppoll			compat_sys_ppoll_time32
+303  common	unshare			sys_unshare			sys_unshare
 304  common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
 305  common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
-306  common	splice			sys_splice			compat_sys_splice
+306  common	splice			sys_splice			sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
-308  common	tee			sys_tee				compat_sys_tee
+308  common	tee			sys_tee				sys_tee
 309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
-311  common	getcpu			sys_getcpu			compat_sys_getcpu
+311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
-313  common	utimes			sys_utimes			compat_sys_utimes
+313  common	utimes			sys_utimes			sys_utimes_time32
 314  common	fallocate		sys_fallocate			compat_sys_s390_fallocate
-315  common	utimensat		sys_utimensat			compat_sys_utimensat
+315  common	utimensat		sys_utimensat			sys_utimensat_time32
 316  common	signalfd		sys_signalfd			compat_sys_signalfd
 317  common	timerfd			-				-
 318  common	eventfd			sys_eventfd			sys_eventfd
 319  common	timerfd_create		sys_timerfd_create		sys_timerfd_create
-320  common	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
-321  common	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
+320  common	timerfd_settime		sys_timerfd_settime		sys_timerfd_settime32
+321  common	timerfd_gettime		sys_timerfd_gettime		sys_timerfd_gettime32
 322  common	signalfd4		sys_signalfd4			compat_sys_signalfd4
 323  common	eventfd2		sys_eventfd2			sys_eventfd2
 324  common	inotify_init1		sys_inotify_init1		sys_inotify_init1
-325  common	pipe2			sys_pipe2			compat_sys_pipe2
+325  common	pipe2			sys_pipe2			sys_pipe2
 326  common	dup3			sys_dup3			sys_dup3
 327  common	epoll_create1		sys_epoll_create1		sys_epoll_create1
 328  common	preadv			sys_preadv			compat_sys_preadv
 329  common	pwritev			sys_pwritev			compat_sys_pwritev
 330  common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
-331  common	perf_event_open		sys_perf_event_open		compat_sys_perf_event_open
+331  common	perf_event_open		sys_perf_event_open		sys_perf_event_open
 332  common	fanotify_init		sys_fanotify_init		sys_fanotify_init
 333  common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
-334  common	prlimit64		sys_prlimit64			compat_sys_prlimit64
-335  common	name_to_handle_at	sys_name_to_handle_at		compat_sys_name_to_handle_at
+334  common	prlimit64		sys_prlimit64			sys_prlimit64
+335  common	name_to_handle_at	sys_name_to_handle_at		sys_name_to_handle_at
 336  common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
-337  common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+337  common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
 338  common	syncfs			sys_syncfs			sys_syncfs
 339  common	setns			sys_setns			sys_setns
 340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
 341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
 342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
-343  common	kcmp			sys_kcmp			compat_sys_kcmp
-344  common	finit_module		sys_finit_module		compat_sys_finit_module
-345  common	sched_setattr		sys_sched_setattr		compat_sys_sched_setattr
-346  common	sched_getattr		sys_sched_getattr		compat_sys_sched_getattr
-347  common	renameat2		sys_renameat2			compat_sys_renameat2
-348  common	seccomp			sys_seccomp			compat_sys_seccomp
-349  common	getrandom		sys_getrandom			compat_sys_getrandom
-350  common	memfd_create		sys_memfd_create		compat_sys_memfd_create
-351  common	bpf			sys_bpf				compat_sys_bpf
-352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		compat_sys_s390_pci_mmio_write
-353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		compat_sys_s390_pci_mmio_read
+343  common	kcmp			sys_kcmp			sys_kcmp
+344  common	finit_module		sys_finit_module		sys_finit_module
+345  common	sched_setattr		sys_sched_setattr		sys_sched_setattr
+346  common	sched_getattr		sys_sched_getattr		sys_sched_getattr
+347  common	renameat2		sys_renameat2			sys_renameat2
+348  common	seccomp			sys_seccomp			sys_seccomp
+349  common	getrandom		sys_getrandom			sys_getrandom
+350  common	memfd_create		sys_memfd_create		sys_memfd_create
+351  common	bpf			sys_bpf				sys_bpf
+352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		sys_s390_pci_mmio_write
+353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		sys_s390_pci_mmio_read
 354  common	execveat		sys_execveat			compat_sys_execveat
 355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
 356  common	membarrier		sys_membarrier			sys_membarrier
-357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
+357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
 358  common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
 359  common	socket			sys_socket			sys_socket
-360  common	socketpair		sys_socketpair			compat_sys_socketpair
-361  common	bind			sys_bind			compat_sys_bind
-362  common	connect			sys_connect			compat_sys_connect
+360  common	socketpair		sys_socketpair			sys_socketpair
+361  common	bind			sys_bind			sys_bind
+362  common	connect			sys_connect			sys_connect
 363  common	listen			sys_listen			sys_listen
-364  common	accept4			sys_accept4			compat_sys_accept4
+364  common	accept4			sys_accept4			sys_accept4
 365  common	getsockopt		sys_getsockopt			sys_getsockopt
 366  common	setsockopt		sys_setsockopt			sys_setsockopt
-367  common	getsockname		sys_getsockname			compat_sys_getsockname
-368  common	getpeername		sys_getpeername			compat_sys_getpeername
-369  common	sendto			sys_sendto			compat_sys_sendto
+367  common	getsockname		sys_getsockname			sys_getsockname
+368  common	getpeername		sys_getpeername			sys_getpeername
+369  common	sendto			sys_sendto			sys_sendto
 370  common	sendmsg			sys_sendmsg			compat_sys_sendmsg
 371  common	recvfrom		sys_recvfrom			compat_sys_recvfrom
 372  common	recvmsg			sys_recvmsg			compat_sys_recvmsg
 373  common	shutdown		sys_shutdown			sys_shutdown
-374  common	mlock2			sys_mlock2			compat_sys_mlock2
-375  common	copy_file_range		sys_copy_file_range		compat_sys_copy_file_range
+374  common	mlock2			sys_mlock2			sys_mlock2
+375  common	copy_file_range		sys_copy_file_range		sys_copy_file_range
 376  common	preadv2			sys_preadv2			compat_sys_preadv2
 377  common	pwritev2		sys_pwritev2			compat_sys_pwritev2
-378  common	s390_guarded_storage	sys_s390_guarded_storage	compat_sys_s390_guarded_storage
-379  common	statx			sys_statx			compat_sys_statx
-380  common	s390_sthyi		sys_s390_sthyi			compat_sys_s390_sthyi
+378  common	s390_guarded_storage	sys_s390_guarded_storage	sys_s390_guarded_storage
+379  common	statx			sys_statx			sys_statx
+380  common	s390_sthyi		sys_s390_sthyi			sys_s390_sthyi
+381  common	kexec_file_load		sys_kexec_file_load		sys_kexec_file_load
+382  common	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents
+383  common	rseq			sys_rseq			sys_rseq
+384  common	pkey_mprotect		sys_pkey_mprotect		sys_pkey_mprotect
+385  common	pkey_alloc		sys_pkey_alloc			sys_pkey_alloc
+386  common	pkey_free		sys_pkey_free			sys_pkey_free
+# room for arch specific syscalls
+392	64	semtimedop		sys_semtimedop			-
+393  common	semget			sys_semget			sys_semget
+394  common	semctl			sys_semctl			compat_sys_semctl
+395  common	shmget			sys_shmget			sys_shmget
+396  common	shmctl			sys_shmctl			compat_sys_shmctl
+397  common	shmat			sys_shmat			compat_sys_shmat
+398  common	shmdt			sys_shmdt 			sys_shmdt
+399  common	msgget			sys_msgget			sys_msgget
+400  common	msgsnd			sys_msgsnd			compat_sys_msgsnd
+401  common	msgrcv			sys_msgrcv			compat_sys_msgrcv
+402  common	msgctl			sys_msgctl			compat_sys_msgctl
+403	32	clock_gettime64		-				sys_clock_gettime
+404	32	clock_settime64		-				sys_clock_settime
+405	32	clock_adjtime64		-				sys_clock_adjtime
+406	32	clock_getres_time64	-				sys_clock_getres
+407	32	clock_nanosleep_time64	-				sys_clock_nanosleep
+408	32	timer_gettime64		-				sys_timer_gettime
+409	32	timer_settime64		-				sys_timer_settime
+410	32	timerfd_gettime64	-				sys_timerfd_gettime
+411	32	timerfd_settime64	-				sys_timerfd_settime
+412	32	utimensat_time64	-				sys_utimensat
+413	32	pselect6_time64		-				compat_sys_pselect6_time64
+414	32	ppoll_time64		-				compat_sys_ppoll_time64
+416	32	io_pgetevents_time64	-				sys_io_pgetevents
+417	32	recvmmsg_time64		-				compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64	-				sys_mq_timedsend
+419	32	mq_timedreceive_time64	-				sys_mq_timedreceive
+420	32	semtimedop_time64	-				sys_semtimedop
+421	32	rt_sigtimedwait_time64	-				compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64		-				sys_futex
+423	32	sched_rr_get_interval_time64	-			sys_sched_rr_get_interval
+424  common	pidfd_send_signal	sys_pidfd_send_signal		sys_pidfd_send_signal
+425  common	io_uring_setup		sys_io_uring_setup              sys_io_uring_setup
+426  common	io_uring_enter		sys_io_uring_enter              sys_io_uring_enter
+427  common	io_uring_register	sys_io_uring_register           sys_io_uring_register
+428  common	open_tree		sys_open_tree			sys_open_tree
+429  common	move_mount		sys_move_mount			sys_move_mount
+430  common	fsopen			sys_fsopen			sys_fsopen
+431  common	fsconfig		sys_fsconfig			sys_fsconfig
+432  common	fsmount			sys_fsmount			sys_fsmount
+433  common	fspick			sys_fspick			sys_fspick
+434  common	pidfd_open		sys_pidfd_open			sys_pidfd_open
+435  common	clone3			sys_clone3			sys_clone3
+436  common	close_range		sys_close_range			sys_close_range
+437  common	openat2			sys_openat2			sys_openat2
+438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
+439  common	faccessat2		sys_faccessat2			sys_faccessat2
+440  common	process_madvise		sys_process_madvise		sys_process_madvise
+441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
-- 
cgit v1.2.3-70-g09d2


From 23331eeb731a503aaa74d167055eeedc2073ff09 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:17 +0200
Subject: perf tests: Improve topology test to check all aggregation types

Improve the topology test to check all aggregation types. This is to
lock down the behaviour before 'id' is changed into a struct in later
commits.

Committer testing:

  $ perf test topology
  41: Session topology: Ok
  $

  $ perf test -v topology
  41: Session topology:
  --- start ---
  test child forked, pid 965552
  templ file: /tmp/perf-test-mO7NtI
  Problems creating module maps, continuing anyway...
  CPU 0, core 0, socket 0
  CPU 1, core 1, socket 0
  CPU 2, core 2, socket 0
  CPU 3, core 4, socket 0
  CPU 4, core 5, socket 0
  CPU 5, core 6, socket 0
  CPU 6, core 8, socket 0
  CPU 7, core 9, socket 0
  CPU 8, core 10, socket 0
  CPU 9, core 12, socket 0
  CPU 10, core 13, socket 0
  CPU 11, core 14, socket 0
  CPU 12, core 0, socket 0
  CPU 13, core 1, socket 0
  CPU 14, core 2, socket 0
  CPU 15, core 4, socket 0
  CPU 16, core 5, socket 0
  CPU 17, core 6, socket 0
  CPU 18, core 8, socket 0
  CPU 19, core 9, socket 0
  CPU 20, core 10, socket 0
  CPU 21, core 12, socket 0
  CPU 22, core 13, socket 0
  CPU 23, core 14, socket 0
  test child finished with 0
  ---- end ----
  Session topology: Ok
  $

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-2-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/topology.c | 53 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 46 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 165feedc7863..f988b3109afc 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -64,10 +64,11 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		.path = path,
 		.mode = PERF_DATA_MODE_READ,
 	};
-	int i;
+	int i, id;
 
 	session = perf_session__new(&data, false, NULL);
 	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
+	cpu__setup_cpunode_map();
 
 	/* On platforms with large numbers of CPUs process_cpu_topology()
 	 * might issue an error while reading the perf.data file section
@@ -85,11 +86,18 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 	 *  "socket_id number is too big. You may need to upgrade the
 	 *  perf tool."
 	 *
-	 *  This is the reason why this test might be skipped.
+	 *  This is the reason why this test might be skipped. aarch64 and
+	 *  s390 always write this part of the header, even when the above
+	 *  condition is true (see do_core_id_test in header.c). So always
+	 *  run this test on those platforms.
 	 */
-	if (!session->header.env.cpu)
+	if (!session->header.env.cpu
+			&& strncmp(session->header.env.arch, "s390", 4)
+			&& strncmp(session->header.env.arch, "aarch64", 7))
 		return TEST_SKIP;
 
+	TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
+
 	for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
 		if (!cpu_map__has(map, i))
 			continue;
@@ -98,14 +106,45 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 			 session->header.env.cpu[i].socket_id);
 	}
 
+	// Test that core ID contains socket, die and core
+	for (i = 0; i < map->nr; i++) {
+		id = cpu_map__get_core(map, i, NULL);
+		TEST_ASSERT_VAL("Core map - Core ID doesn't match",
+			session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id));
+
+		TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
+			session->header.env.cpu[map->map[i]].socket_id ==
+				cpu_map__id_to_socket(id));
+
+		TEST_ASSERT_VAL("Core map - Die ID doesn't match",
+			session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id));
+	}
+
+	// Test that die ID contains socket and die
 	for (i = 0; i < map->nr; i++) {
-		TEST_ASSERT_VAL("Core ID doesn't match",
-			(session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff)));
+		id = cpu_map__get_die(map, i, NULL);
+		TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
+			session->header.env.cpu[map->map[i]].socket_id ==
+				cpu_map__id_to_socket(id << 16));
+
+		TEST_ASSERT_VAL("Die map - Die ID doesn't match",
+			session->header.env.cpu[map->map[i]].die_id ==
+				cpu_map__id_to_die(id << 16));
+	}
 
-		TEST_ASSERT_VAL("Socket ID doesn't match",
-			(session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL)));
+	// Test that socket ID contains only socket
+	for (i = 0; i < map->nr; i++) {
+		id = cpu_map__get_socket(map, i, NULL);
+		TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
+			session->header.env.cpu[map->map[i]].socket_id == id);
 	}
 
+	// Test that node ID contains only node
+	for (i = 0; i < map->nr; i++) {
+		id = cpu_map__get_node(map, i, NULL);
+		TEST_ASSERT_VAL("Node map - Node ID doesn't match",
+			cpu__get_node(map->map[i]) == id);
+	}
 	perf_session__delete(session);
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 91585846f105ef2e3f479a5124a264ebb770f6ab Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:18 +0200
Subject: perf cpumap: Use existing allocator to avoid using malloc

Use the existing allocator for perf_cpu_map to avoid use of raw malloc.
This could cause an issue in later commits where the size of
perf_cpu_map is changed.

No functional changes.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-3-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cpumap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index dc5c5e6fc502..20e3a75953fc 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -132,15 +132,16 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
 		       int (*f)(struct perf_cpu_map *map, int cpu, void *data),
 		       void *data)
 {
-	struct perf_cpu_map *c;
 	int nr = cpus->nr;
+	struct perf_cpu_map *c = perf_cpu_map__empty_new(nr);
 	int cpu, s1, s2;
 
-	/* allocate as much as possible */
-	c = calloc(1, sizeof(*c) + nr * sizeof(int));
 	if (!c)
 		return -1;
 
+	/* Reset size as it may only be partially filled */
+	c->nr = 0;
+
 	for (cpu = 0; cpu < nr; cpu++) {
 		s1 = f(cpus, cpu, data);
 		for (s2 = 0; s2 < c->nr; s2++) {
@@ -155,7 +156,6 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
 	/* ensure we process id in increasing order */
 	qsort(c->map, c->nr, sizeof(int), cmp_ids);
 
-	refcount_set(&c->refcnt, 1);
 	*res = c;
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From fa265e59b81a09fa3d88f3322b1e44d583cac9b0 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:19 +0200
Subject: perf cpumap: Add new struct for cpu aggregation

This struct currently has only a single int member so that it can be
used as a drop in replacement for the existing behaviour.

Comparison and constructor functions have also been added that will
replace usages of '==' and '= -1'.

No functional changes.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-4-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cpumap.c | 18 ++++++++++++++++++
 tools/perf/util/cpumap.h |  8 ++++++++
 2 files changed, 26 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 20e3a75953fc..8624948b4f1d 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -586,3 +586,21 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
 
 	return online;
 }
+
+bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
+{
+	return a.id == b.id;
+}
+
+bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
+{
+	return a.id == -1;
+}
+
+struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
+{
+	struct aggr_cpu_id ret = {
+		.id = -1
+	};
+	return ret;
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 3a442f021468..1cdccc69cd4b 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -7,6 +7,10 @@
 #include <internal/cpumap.h>
 #include <perf/cpumap.h>
 
+struct aggr_cpu_id {
+	int id;
+};
+
 struct perf_record_cpu_map_data;
 
 struct perf_cpu_map *perf_cpu_map__empty_new(int nr);
@@ -64,4 +68,8 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
 int cpu_map__cpu(struct perf_cpu_map *cpus, int idx);
 bool cpu_map__has(struct perf_cpu_map *cpus, int cpu);
 
+bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b);
+bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a);
+struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void);
+
 #endif /* __PERF_CPUMAP_H */
-- 
cgit v1.2.3-70-g09d2


From 2760f5a14fe7aa466e38bbb92d0284fffc0e4da0 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:20 +0200
Subject: perf stat: Replace aggregation ID with a struct

Replace all occurences of the usage of int with the new struct
cpu_aggr_id.

No functional changes.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-5-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      |  76 ++++++++++++++++-------------
 tools/perf/tests/topology.c    |  17 ++++---
 tools/perf/util/cpumap.c       |  82 ++++++++++++++++++-------------
 tools/perf/util/cpumap.h       |  10 ++--
 tools/perf/util/stat-display.c | 108 ++++++++++++++++++++++++-----------------
 tools/perf/util/stat.c         |   2 +-
 tools/perf/util/stat.h         |   5 +-
 7 files changed, 173 insertions(+), 127 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 89c32692f40c..1ff76d36d1c9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1186,65 +1186,67 @@ static struct option stat_options[] = {
 	OPT_END()
 };
 
-static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
+static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
 				 struct perf_cpu_map *map, int cpu)
 {
 	return cpu_map__get_socket(map, cpu, NULL);
 }
 
-static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
+static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
 			      struct perf_cpu_map *map, int cpu)
 {
 	return cpu_map__get_die(map, cpu, NULL);
 }
 
-static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
+static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
 			       struct perf_cpu_map *map, int cpu)
 {
 	return cpu_map__get_core(map, cpu, NULL);
 }
 
-static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
+static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
 			       struct perf_cpu_map *map, int cpu)
 {
 	return cpu_map__get_node(map, cpu, NULL);
 }
 
-static int perf_stat__get_aggr(struct perf_stat_config *config,
+static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
 			       aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
 {
 	int cpu;
+	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
 
 	if (idx >= map->nr)
-		return -1;
+		return id;
 
 	cpu = map->map[idx];
 
 	if (config->cpus_aggr_map->map[cpu] == -1)
-		config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
+		config->cpus_aggr_map->map[cpu] = get_id(config, map, idx).id;
 
-	return config->cpus_aggr_map->map[cpu];
+	id.id = config->cpus_aggr_map->map[cpu];
+	return id;
 }
 
-static int perf_stat__get_socket_cached(struct perf_stat_config *config,
+static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config,
 					struct perf_cpu_map *map, int idx)
 {
 	return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
 }
 
-static int perf_stat__get_die_cached(struct perf_stat_config *config,
+static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config,
 					struct perf_cpu_map *map, int idx)
 {
 	return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
 }
 
-static int perf_stat__get_core_cached(struct perf_stat_config *config,
+static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
 				      struct perf_cpu_map *map, int idx)
 {
 	return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
 }
 
-static int perf_stat__get_node_cached(struct perf_stat_config *config,
+static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config,
 				      struct perf_cpu_map *map, int idx)
 {
 	return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
@@ -1345,18 +1347,23 @@ static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *m
 	return cpu;
 }
 
-static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
 {
 	struct perf_env *env = data;
 	int cpu = perf_env__get_cpu(env, map, idx);
+	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+
+	if (cpu != -1)
+		id.id = env->cpu[cpu].socket_id;
 
-	return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
+	return id;
 }
 
-static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
 {
 	struct perf_env *env = data;
-	int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
+	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+	int cpu = perf_env__get_cpu(env, map, idx);
 
 	if (cpu != -1) {
 		/*
@@ -1366,21 +1373,22 @@ static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
 		 * socket + die id
 		 */
 		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
-			return -1;
+			return cpu_map__empty_aggr_cpu_id();
 
 		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
-			return -1;
+			return cpu_map__empty_aggr_cpu_id();
 
-		die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
+		id.id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
 	}
 
-	return die_id;
+	return id;
 }
 
-static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
 {
 	struct perf_env *env = data;
-	int core = -1, cpu = perf_env__get_cpu(env, map, idx);
+	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+	int cpu = perf_env__get_cpu(env, map, idx);
 
 	if (cpu != -1) {
 		/*
@@ -1391,27 +1399,29 @@ static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
 		 * socket + die id + core id
 		 */
 		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
-			return -1;
+			return cpu_map__empty_aggr_cpu_id();
 
 		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
-			return -1;
+			return cpu_map__empty_aggr_cpu_id();
 
 		if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
-			return -1;
+			return cpu_map__empty_aggr_cpu_id();
 
-		core = (env->cpu[cpu].socket_id << 24) |
+		id.id = (env->cpu[cpu].socket_id << 24) |
 		       (env->cpu[cpu].die_id << 16) |
 		       (env->cpu[cpu].core_id & 0xffff);
 	}
 
-	return core;
+	return id;
 }
 
-static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
+static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
 {
 	int cpu = perf_env__get_cpu(data, map, idx);
+	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
 
-	return perf_env__numa_node(data, cpu);
+	id.id = perf_env__numa_node(data, cpu);
+	return id;
 }
 
 static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
@@ -1438,24 +1448,24 @@ static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *c
 	return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
 }
 
-static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
+static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
 				      struct perf_cpu_map *map, int idx)
 {
 	return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
 }
-static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
+static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
 				   struct perf_cpu_map *map, int idx)
 {
 	return perf_env__get_die(map, idx, &perf_stat.session->header.env);
 }
 
-static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
+static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
 				    struct perf_cpu_map *map, int idx)
 {
 	return perf_env__get_core(map, idx, &perf_stat.session->header.env);
 }
 
-static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
+static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
 				    struct perf_cpu_map *map, int idx)
 {
 	return perf_env__get_node(map, idx, &perf_stat.session->header.env);
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index f988b3109afc..7a7d16b3950a 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -64,7 +64,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		.path = path,
 		.mode = PERF_DATA_MODE_READ,
 	};
-	int i, id;
+	int i;
+	struct aggr_cpu_id id;
 
 	session = perf_session__new(&data, false, NULL);
 	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
@@ -110,14 +111,14 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 	for (i = 0; i < map->nr; i++) {
 		id = cpu_map__get_core(map, i, NULL);
 		TEST_ASSERT_VAL("Core map - Core ID doesn't match",
-			session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id));
+			session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id));
 
 		TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
 			session->header.env.cpu[map->map[i]].socket_id ==
-				cpu_map__id_to_socket(id));
+				cpu_map__id_to_socket(id.id));
 
 		TEST_ASSERT_VAL("Core map - Die ID doesn't match",
-			session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id));
+			session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id));
 	}
 
 	// Test that die ID contains socket and die
@@ -125,25 +126,25 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		id = cpu_map__get_die(map, i, NULL);
 		TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
 			session->header.env.cpu[map->map[i]].socket_id ==
-				cpu_map__id_to_socket(id << 16));
+				cpu_map__id_to_socket(id.id << 16));
 
 		TEST_ASSERT_VAL("Die map - Die ID doesn't match",
 			session->header.env.cpu[map->map[i]].die_id ==
-				cpu_map__id_to_die(id << 16));
+				cpu_map__id_to_die(id.id << 16));
 	}
 
 	// Test that socket ID contains only socket
 	for (i = 0; i < map->nr; i++) {
 		id = cpu_map__get_socket(map, i, NULL);
 		TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
-			session->header.env.cpu[map->map[i]].socket_id == id);
+			session->header.env.cpu[map->map[i]].socket_id == id.id);
 	}
 
 	// Test that node ID contains only node
 	for (i = 0; i < map->nr; i++) {
 		id = cpu_map__get_node(map, i, NULL);
 		TEST_ASSERT_VAL("Node map - Node ID doesn't match",
-			cpu__get_node(map->map[i]) == id);
+			cpu__get_node(map->map[i]) == id.id);
 	}
 	perf_session__delete(session);
 
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 8624948b4f1d..e05a12bde073 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -111,30 +111,37 @@ int cpu_map__get_socket_id(int cpu)
 	return ret ?: value;
 }
 
-int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
+struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
+					void *data __maybe_unused)
 {
 	int cpu;
+	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
 
 	if (idx > map->nr)
-		return -1;
+		return id;
 
 	cpu = map->map[idx];
 
-	return cpu_map__get_socket_id(cpu);
+	id.id = cpu_map__get_socket_id(cpu);
+	return id;
 }
 
-static int cmp_ids(const void *a, const void *b)
+static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
 {
-	return *(int *)a - *(int *)b;
+	struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
+	struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
+
+	return a->id - b->id;
 }
 
 int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
-		       int (*f)(struct perf_cpu_map *map, int cpu, void *data),
+		       struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
 		       void *data)
 {
 	int nr = cpus->nr;
 	struct perf_cpu_map *c = perf_cpu_map__empty_new(nr);
-	int cpu, s1, s2;
+	int cpu, s2;
+	struct aggr_cpu_id s1;
 
 	if (!c)
 		return -1;
@@ -145,16 +152,16 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
 	for (cpu = 0; cpu < nr; cpu++) {
 		s1 = f(cpus, cpu, data);
 		for (s2 = 0; s2 < c->nr; s2++) {
-			if (s1 == c->map[s2])
+			if (s1.id == c->map[s2])
 				break;
 		}
 		if (s2 == c->nr) {
-			c->map[c->nr] = s1;
+			c->map[c->nr] = s1.id;
 			c->nr++;
 		}
 	}
 	/* ensure we process id in increasing order */
-	qsort(c->map, c->nr, sizeof(int), cmp_ids);
+	qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
 
 	*res = c;
 	return 0;
@@ -167,23 +174,24 @@ int cpu_map__get_die_id(int cpu)
 	return ret ?: value;
 }
 
-int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
+struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
 {
-	int cpu, die_id, s;
+	int cpu, s;
+	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
 
 	if (idx > map->nr)
-		return -1;
+		return id;
 
 	cpu = map->map[idx];
 
-	die_id = cpu_map__get_die_id(cpu);
+	id.id = cpu_map__get_die_id(cpu);
 	/* There is no die_id on legacy system. */
-	if (die_id == -1)
-		die_id = 0;
+	if (id.id == -1)
+		id.id = 0;
 
-	s = cpu_map__get_socket(map, idx, data);
+	s = cpu_map__get_socket(map, idx, data).id;
 	if (s == -1)
-		return -1;
+		return cpu_map__empty_aggr_cpu_id();
 
 	/*
 	 * Encode socket in bit range 15:8
@@ -191,13 +199,14 @@ int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
 	 * we need a global id. So we combine
 	 * socket + die id
 	 */
-	if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
-		return -1;
+	if (WARN_ONCE(id.id >> 8, "The die id number is too big.\n"))
+		return cpu_map__empty_aggr_cpu_id();
 
 	if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
-		return -1;
+		return cpu_map__empty_aggr_cpu_id();
 
-	return (s << 8) | (die_id & 0xff);
+	id.id = (s << 8) | (id.id & 0xff);
+	return id;
 }
 
 int cpu_map__get_core_id(int cpu)
@@ -211,21 +220,22 @@ int cpu_map__get_node_id(int cpu)
 	return cpu__get_node(cpu);
 }
 
-int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
+struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
 {
-	int cpu, s_die;
+	int cpu;
+	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
 
 	if (idx > map->nr)
-		return -1;
+		return id;
 
 	cpu = map->map[idx];
 
 	cpu = cpu_map__get_core_id(cpu);
 
-	/* s_die is the combination of socket + die id */
-	s_die = cpu_map__get_die(map, idx, data);
-	if (s_die == -1)
-		return -1;
+	/* cpu_map__get_die returns the combination of socket + die id */
+	id = cpu_map__get_die(map, idx, data);
+	if (cpu_map__aggr_cpu_id_is_empty(id))
+		return id;
 
 	/*
 	 * encode socket in bit range 31:24
@@ -235,17 +245,21 @@ int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
 	 * socket + die id + core id
 	 */
 	if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n"))
-		return -1;
+		return cpu_map__empty_aggr_cpu_id();
 
-	return (s_die << 16) | (cpu & 0xffff);
+	id.id = (id.id << 16) | (cpu & 0xffff);
+	return id;
 }
 
-int cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
+struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
 {
+	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
+
 	if (idx < 0 || idx >= map->nr)
-		return -1;
+		return id;
 
-	return cpu_map__get_node_id(map->map[idx]);
+	id.id = cpu_map__get_node_id(map->map[idx]);
+	return id;
 }
 
 int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp)
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 1cdccc69cd4b..b8c2288a3f6d 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -19,13 +19,13 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size);
 size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size);
 size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp);
 int cpu_map__get_socket_id(int cpu);
-int cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data);
+struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data);
 int cpu_map__get_die_id(int cpu);
-int cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data);
+struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data);
 int cpu_map__get_core_id(int cpu);
-int cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data);
+struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data);
 int cpu_map__get_node_id(int cpu);
-int cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data);
+struct aggr_cpu_id  cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data);
 int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp);
 int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep);
 int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep);
@@ -62,7 +62,7 @@ int cpu__max_present_cpu(void);
 int cpu__get_node(int cpu);
 
 int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
-		       int (*f)(struct perf_cpu_map *map, int cpu, void *data),
+		       struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
 		       void *data);
 
 int cpu_map__cpu(struct perf_cpu_map *cpus, int idx);
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index fee7543843a8..8f4ee3621863 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -68,15 +68,15 @@ static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel)
 
 
 static void aggr_printout(struct perf_stat_config *config,
-			  struct evsel *evsel, int id, int nr)
+			  struct evsel *evsel, struct aggr_cpu_id id, int nr)
 {
 	switch (config->aggr_mode) {
 	case AGGR_CORE:
 		fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
-			cpu_map__id_to_socket(id),
-			cpu_map__id_to_die(id),
+			cpu_map__id_to_socket(id.id),
+			cpu_map__id_to_die(id.id),
 			config->csv_output ? 0 : -8,
-			cpu_map__id_to_cpu(id),
+			cpu_map__id_to_cpu(id.id),
 			config->csv_sep,
 			config->csv_output ? 0 : 4,
 			nr,
@@ -84,9 +84,9 @@ static void aggr_printout(struct perf_stat_config *config,
 		break;
 	case AGGR_DIE:
 		fprintf(config->output, "S%d-D%*d%s%*d%s",
-			cpu_map__id_to_socket(id << 16),
+			cpu_map__id_to_socket(id.id << 16),
 			config->csv_output ? 0 : -8,
-			cpu_map__id_to_die(id << 16),
+			cpu_map__id_to_die(id.id << 16),
 			config->csv_sep,
 			config->csv_output ? 0 : 4,
 			nr,
@@ -95,7 +95,7 @@ static void aggr_printout(struct perf_stat_config *config,
 	case AGGR_SOCKET:
 		fprintf(config->output, "S%*d%s%*d%s",
 			config->csv_output ? 0 : -5,
-			id,
+			id.id,
 			config->csv_sep,
 			config->csv_output ? 0 : 4,
 			nr,
@@ -104,7 +104,7 @@ static void aggr_printout(struct perf_stat_config *config,
 	case AGGR_NODE:
 		fprintf(config->output, "N%*d%s%*d%s",
 			config->csv_output ? 0 : -5,
-			id,
+			id.id,
 			config->csv_sep,
 			config->csv_output ? 0 : 4,
 			nr,
@@ -113,23 +113,23 @@ static void aggr_printout(struct perf_stat_config *config,
 	case AGGR_NONE:
 		if (evsel->percore && !config->percore_show_thread) {
 			fprintf(config->output, "S%d-D%d-C%*d%s",
-				cpu_map__id_to_socket(id),
-				cpu_map__id_to_die(id),
+				cpu_map__id_to_socket(id.id),
+				cpu_map__id_to_die(id.id),
 				config->csv_output ? 0 : -3,
-				cpu_map__id_to_cpu(id), config->csv_sep);
-		} else if (id > -1) {
+				cpu_map__id_to_cpu(id.id), config->csv_sep);
+		} else if (id.id > -1) {
 			fprintf(config->output, "CPU%*d%s",
 				config->csv_output ? 0 : -7,
-				evsel__cpus(evsel)->map[id],
+				evsel__cpus(evsel)->map[id.id],
 				config->csv_sep);
 		}
 		break;
 	case AGGR_THREAD:
 		fprintf(config->output, "%*s-%*d%s",
 			config->csv_output ? 0 : 16,
-			perf_thread_map__comm(evsel->core.threads, id),
+			perf_thread_map__comm(evsel->core.threads, id.id),
 			config->csv_output ? 0 : -8,
-			perf_thread_map__pid(evsel->core.threads, id),
+			perf_thread_map__pid(evsel->core.threads, id.id),
 			config->csv_sep);
 		break;
 	case AGGR_GLOBAL:
@@ -144,7 +144,8 @@ struct outstate {
 	bool newline;
 	const char *prefix;
 	int  nfields;
-	int  id, nr;
+	int  nr;
+	struct aggr_cpu_id id;
 	struct evsel *evsel;
 };
 
@@ -319,13 +320,13 @@ static void print_metric_header(struct perf_stat_config *config,
 }
 
 static int first_shadow_cpu(struct perf_stat_config *config,
-			    struct evsel *evsel, int id)
+			    struct evsel *evsel, struct aggr_cpu_id id)
 {
 	struct evlist *evlist = evsel->evlist;
 	int i;
 
 	if (config->aggr_mode == AGGR_NONE)
-		return id;
+		return id.id;
 
 	if (!config->aggr_get_id)
 		return 0;
@@ -333,14 +334,17 @@ static int first_shadow_cpu(struct perf_stat_config *config,
 	for (i = 0; i < evsel__nr_cpus(evsel); i++) {
 		int cpu2 = evsel__cpus(evsel)->map[i];
 
-		if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id)
+		if (cpu_map__compare_aggr_cpu_id(
+					config->aggr_get_id(config, evlist->core.cpus, cpu2),
+					id)) {
 			return cpu2;
+		}
 	}
 	return 0;
 }
 
 static void abs_printout(struct perf_stat_config *config,
-			 int id, int nr, struct evsel *evsel, double avg)
+			 struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg)
 {
 	FILE *output = config->output;
 	double sc =  evsel->scale;
@@ -393,7 +397,7 @@ static bool is_mixed_hw_group(struct evsel *counter)
 	return false;
 }
 
-static void printout(struct perf_stat_config *config, int id, int nr,
+static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr,
 		     struct evsel *counter, double uval,
 		     char *prefix, u64 run, u64 ena, double noise,
 		     struct runtime_stat *st)
@@ -496,17 +500,18 @@ static void printout(struct perf_stat_config *config, int id, int nr,
 static void aggr_update_shadow(struct perf_stat_config *config,
 			       struct evlist *evlist)
 {
-	int cpu, s2, id, s;
+	int cpu, s;
+	struct aggr_cpu_id s2, id;
 	u64 val;
 	struct evsel *counter;
 
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		id = config->aggr_map->map[s];
+		id.id = config->aggr_map->map[s];
 		evlist__for_each_entry(evlist, counter) {
 			val = 0;
 			for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
 				s2 = config->aggr_get_id(config, evlist->core.cpus, cpu);
-				if (s2 != id)
+				if (!cpu_map__compare_aggr_cpu_id(s2, id))
 					continue;
 				val += perf_counts(counter->counts, cpu, 0)->val;
 			}
@@ -584,7 +589,7 @@ static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
 
 struct aggr_data {
 	u64 ena, run, val;
-	int id;
+	struct aggr_cpu_id id;
 	int nr;
 	int cpu;
 };
@@ -593,13 +598,14 @@ static void aggr_cb(struct perf_stat_config *config,
 		    struct evsel *counter, void *data, bool first)
 {
 	struct aggr_data *ad = data;
-	int cpu, s2;
+	int cpu;
+	struct aggr_cpu_id s2;
 
 	for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
 		struct perf_counts_values *counts;
 
 		s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
-		if (s2 != ad->id)
+		if (!cpu_map__compare_aggr_cpu_id(s2, ad->id))
 			continue;
 		if (first)
 			ad->nr++;
@@ -628,10 +634,11 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	struct aggr_data ad;
 	FILE *output = config->output;
 	u64 ena, run, val;
-	int id, nr;
+	int nr;
+	struct aggr_cpu_id id;
 	double uval;
 
-	ad.id = id = config->aggr_map->map[s];
+	ad.id.id = id.id = config->aggr_map->map[s];
 	ad.val = ad.ena = ad.run = 0;
 	ad.nr = 0;
 	if (!collect_data(config, counter, aggr_cb, &ad))
@@ -649,8 +656,12 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 		fprintf(output, "%s", prefix);
 
 	uval = val * counter->scale;
-	printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix,
-		 run, ena, 1.0, &rt_stat);
+	if (cpu != -1) {
+		id = cpu_map__empty_aggr_cpu_id();
+		id.id = cpu;
+	}
+	printout(config, id, nr, counter, uval,
+		 prefix, run, ena, 1.0, &rt_stat);
 	if (!metric_only)
 		fputc('\n', output);
 }
@@ -728,7 +739,8 @@ static struct perf_aggr_thread_value *sort_aggr_thread(
 			continue;
 
 		buf[i].counter = counter;
-		buf[i].id = thread;
+		buf[i].id = cpu_map__empty_aggr_cpu_id();
+		buf[i].id.id = thread;
 		buf[i].uval = uval;
 		buf[i].val = val;
 		buf[i].run = run;
@@ -751,7 +763,8 @@ static void print_aggr_thread(struct perf_stat_config *config,
 	FILE *output = config->output;
 	int nthreads = perf_thread_map__nr(counter->core.threads);
 	int ncpus = perf_cpu_map__nr(counter->core.cpus);
-	int thread, sorted_threads, id;
+	int thread, sorted_threads;
+	struct aggr_cpu_id id;
 	struct perf_aggr_thread_value *buf;
 
 	buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target);
@@ -768,7 +781,7 @@ static void print_aggr_thread(struct perf_stat_config *config,
 		if (config->stats)
 			printout(config, id, 0, buf[thread].counter, buf[thread].uval,
 				 prefix, buf[thread].run, buf[thread].ena, 1.0,
-				 &config->stats[id]);
+				 &config->stats[id.id]);
 		else
 			printout(config, id, 0, buf[thread].counter, buf[thread].uval,
 				 prefix, buf[thread].run, buf[thread].ena, 1.0,
@@ -814,8 +827,8 @@ static void print_counter_aggr(struct perf_stat_config *config,
 		fprintf(output, "%s", prefix);
 
 	uval = cd.avg * counter->scale;
-	printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
-		 cd.avg, &rt_stat);
+	printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running,
+		 cd.avg_enabled, cd.avg, &rt_stat);
 	if (!metric_only)
 		fprintf(output, "\n");
 }
@@ -842,6 +855,7 @@ static void print_counter(struct perf_stat_config *config,
 	u64 ena, run, val;
 	double uval;
 	int cpu;
+	struct aggr_cpu_id id;
 
 	for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
 		struct aggr_data ad = { .cpu = cpu };
@@ -856,8 +870,10 @@ static void print_counter(struct perf_stat_config *config,
 			fprintf(output, "%s", prefix);
 
 		uval = val * counter->scale;
-		printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0,
-			 &rt_stat);
+		id = cpu_map__empty_aggr_cpu_id();
+		id.id = cpu;
+		printout(config, id, 0, counter, uval, prefix,
+			 run, ena, 1.0, &rt_stat);
 
 		fputc('\n', output);
 	}
@@ -872,6 +888,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 	struct evsel *counter;
 	u64 ena, run, val;
 	double uval;
+	struct aggr_cpu_id id;
 
 	nrcpus = evlist->core.cpus->nr;
 	for (cpu = 0; cpu < nrcpus; cpu++) {
@@ -880,8 +897,10 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 		if (prefix)
 			fputs(prefix, config->output);
 		evlist__for_each_entry(evlist, counter) {
+			id = cpu_map__empty_aggr_cpu_id();
+			id.id = cpu;
 			if (first) {
-				aggr_printout(config, counter, cpu, 0);
+				aggr_printout(config, counter, id, 0);
 				first = false;
 			}
 			val = perf_counts(counter->counts, cpu, 0)->val;
@@ -889,8 +908,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 			run = perf_counts(counter->counts, cpu, 0)->run;
 
 			uval = val * counter->scale;
-			printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0,
-				 &rt_stat);
+			printout(config, id, 0, counter, uval, prefix,
+				 run, ena, 1.0, &rt_stat);
 		}
 		fputc('\n', config->output);
 	}
@@ -1140,14 +1159,15 @@ static void print_footer(struct perf_stat_config *config)
 static void print_percore_thread(struct perf_stat_config *config,
 				 struct evsel *counter, char *prefix)
 {
-	int s, s2, id;
+	int s;
+	struct aggr_cpu_id s2, id;
 	bool first = true;
 
 	for (int i = 0; i < evsel__nr_cpus(counter); i++) {
 		s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
 		for (s = 0; s < config->aggr_map->nr; s++) {
-			id = config->aggr_map->map[s];
-			if (s2 == id)
+			id.id = config->aggr_map->map[s];
+			if (cpu_map__compare_aggr_cpu_id(s2, id))
 				break;
 		}
 
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 1e125e39ff84..1d5c9f98396b 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -313,7 +313,7 @@ static int check_per_pkg(struct evsel *counter,
 	if (!(vals->run && vals->ena))
 		return 0;
 
-	s = cpu_map__get_socket(cpus, cpu, NULL);
+	s = cpu_map__get_socket(cpus, cpu, NULL).id;
 	if (s < 0)
 		return -1;
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 9979b4b100f2..87522ffb2db2 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -6,6 +6,7 @@
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/resource.h>
+#include "cpumap.h"
 #include "rblist.h"
 
 struct perf_cpu_map;
@@ -99,7 +100,7 @@ struct runtime_stat {
 	struct rblist value_list;
 };
 
-typedef int (*aggr_get_id_t)(struct perf_stat_config *config,
+typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config,
 			     struct perf_cpu_map *m, int cpu);
 
 struct perf_stat_config {
@@ -170,7 +171,7 @@ struct evlist;
 
 struct perf_aggr_thread_value {
 	struct evsel *counter;
-	int id;
+	struct aggr_cpu_id id;
 	double uval;
 	u64 val;
 	u64 run;
-- 
cgit v1.2.3-70-g09d2


From cea6575fdccfc0624ca42f656e16e6b4d9bb48a5 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:21 +0200
Subject: perf cpumap: Add new map type for aggregation

Currently this is a duplicate of perf_cpu_map so that it can be used as
a drop in replacement.

In a later commit it will be changed from a map of ints to use the new
cpu_aggr_id struct.

No functional changes.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-6-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cpumap.c | 17 +++++++++++++++++
 tools/perf/util/cpumap.h |  8 ++++++++
 2 files changed, 25 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index e05a12bde073..b18e53506656 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -95,6 +95,23 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
 	return cpus;
 }
 
+struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
+{
+	struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr);
+
+	if (cpus != NULL) {
+		int i;
+
+		cpus->nr = nr;
+		for (i = 0; i < nr; i++)
+			cpus->map[i] = -1;
+
+		refcount_set(&cpus->refcnt, 1);
+	}
+
+	return cpus;
+}
+
 static int cpu__get_topology_int(int cpu, const char *name, int *value)
 {
 	char path[PATH_MAX];
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index b8c2288a3f6d..ebd65c4f431b 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -11,9 +11,17 @@ struct aggr_cpu_id {
 	int id;
 };
 
+struct cpu_aggr_map {
+	refcount_t refcnt;
+	int nr;
+	int map[];
+};
+
 struct perf_record_cpu_map_data;
 
 struct perf_cpu_map *perf_cpu_map__empty_new(int nr);
+struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr);
+
 struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data);
 size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size);
 size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size);
-- 
cgit v1.2.3-70-g09d2


From d526e1a033e03ec4515b1800f99d99a35c7ea790 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:22 +0200
Subject: perf cpumap: Drop in cpu_aggr_map struct

Replace usages of perf_cpu_map with cpu_aggr map in places that are
involved with 'perf stat' aggregation.

This will then later be changed to be a map of cpu_aggr_id rather than
an int so that more data can be stored.

No functional changes.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-7-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 29 ++++++++++++++++++++++-------
 tools/perf/util/cpumap.c  | 12 ++++++------
 tools/perf/util/cpumap.h  | 10 +++++-----
 tools/perf/util/stat.h    |  4 ++--
 4 files changed, 35 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1ff76d36d1c9..cc017c7dcee9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1320,14 +1320,29 @@ static int perf_stat_init_aggr_mode(void)
 	 * the aggregation translate cpumap.
 	 */
 	nr = perf_cpu_map__max(evsel_list->core.cpus);
-	stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
+	stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1);
 	return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
 }
 
+static void cpu_aggr_map__delete(struct cpu_aggr_map *map)
+{
+	if (map) {
+		WARN_ONCE(refcount_read(&map->refcnt) != 0,
+			  "cpu_aggr_map refcnt unbalanced\n");
+		free(map);
+	}
+}
+
+static void cpu_aggr_map__put(struct cpu_aggr_map *map)
+{
+	if (map && refcount_dec_and_test(&map->refcnt))
+		cpu_aggr_map__delete(map);
+}
+
 static void perf_stat__exit_aggr_mode(void)
 {
-	perf_cpu_map__put(stat_config.aggr_map);
-	perf_cpu_map__put(stat_config.cpus_aggr_map);
+	cpu_aggr_map__put(stat_config.aggr_map);
+	cpu_aggr_map__put(stat_config.cpus_aggr_map);
 	stat_config.aggr_map = NULL;
 	stat_config.cpus_aggr_map = NULL;
 }
@@ -1425,25 +1440,25 @@ static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx,
 }
 
 static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
-				      struct perf_cpu_map **sockp)
+				      struct cpu_aggr_map **sockp)
 {
 	return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
 }
 
 static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
-				   struct perf_cpu_map **diep)
+				   struct cpu_aggr_map **diep)
 {
 	return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
 }
 
 static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
-				    struct perf_cpu_map **corep)
+				    struct cpu_aggr_map **corep)
 {
 	return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
 }
 
 static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
-				    struct perf_cpu_map **nodep)
+				    struct cpu_aggr_map **nodep)
 {
 	return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
 }
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index b18e53506656..ea81586305f4 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -151,12 +151,12 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
 	return a->id - b->id;
 }
 
-int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
+int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
 		       struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
 		       void *data)
 {
 	int nr = cpus->nr;
-	struct perf_cpu_map *c = perf_cpu_map__empty_new(nr);
+	struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr);
 	int cpu, s2;
 	struct aggr_cpu_id s1;
 
@@ -279,22 +279,22 @@ struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *da
 	return id;
 }
 
-int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp)
+int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp)
 {
 	return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
 }
 
-int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep)
+int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep)
 {
 	return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL);
 }
 
-int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep)
+int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep)
 {
 	return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
 }
 
-int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct perf_cpu_map **numap)
+int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap)
 {
 	return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL);
 }
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index ebd65c4f431b..b112069038be 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -34,10 +34,10 @@ int cpu_map__get_core_id(int cpu);
 struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data);
 int cpu_map__get_node_id(int cpu);
 struct aggr_cpu_id  cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data);
-int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct perf_cpu_map **sockp);
-int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct perf_cpu_map **diep);
-int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct perf_cpu_map **corep);
-int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct perf_cpu_map **nodep);
+int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp);
+int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep);
+int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep);
+int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep);
 const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */
 
 static inline int cpu_map__socket(struct perf_cpu_map *sock, int s)
@@ -69,7 +69,7 @@ int cpu__max_cpu(void);
 int cpu__max_present_cpu(void);
 int cpu__get_node(int cpu);
 
-int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
+int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
 		       struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
 		       void *data);
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 87522ffb2db2..b5369730b4a2 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -139,9 +139,9 @@ struct perf_stat_config {
 	const char		*csv_sep;
 	struct stats		*walltime_nsecs_stats;
 	struct rusage		 ru_data;
-	struct perf_cpu_map		*aggr_map;
+	struct cpu_aggr_map	*aggr_map;
 	aggr_get_id_t		 aggr_get_id;
-	struct perf_cpu_map		*cpus_aggr_map;
+	struct cpu_aggr_map	*cpus_aggr_map;
 	u64			*walltime_run;
 	struct rblist		 metric_events;
 	int			 ctl_fd;
-- 
cgit v1.2.3-70-g09d2


From ff5232956e074994a66656f709c3ad1ee3d8a550 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:23 +0200
Subject: perf stat aggregation: Start using cpu_aggr_id in map

Use the new cpu_aggr_id struct in the cpu map instead of int so that it
can store more data.

No functional changes.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-8-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      | 6 +++---
 tools/perf/util/cpumap.c       | 8 ++++----
 tools/perf/util/cpumap.h       | 2 +-
 tools/perf/util/stat-display.c | 6 +++---
 4 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index cc017c7dcee9..3fec1b7e93b5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1221,10 +1221,10 @@ static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
 
 	cpu = map->map[idx];
 
-	if (config->cpus_aggr_map->map[cpu] == -1)
-		config->cpus_aggr_map->map[cpu] = get_id(config, map, idx).id;
+	if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu]))
+		config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
 
-	id.id = config->cpus_aggr_map->map[cpu];
+	id = config->cpus_aggr_map->map[cpu];
 	return id;
 }
 
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index ea81586305f4..b50609b9a585 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -97,14 +97,14 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
 
 struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
 {
-	struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr);
+	struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr);
 
 	if (cpus != NULL) {
 		int i;
 
 		cpus->nr = nr;
 		for (i = 0; i < nr; i++)
-			cpus->map[i] = -1;
+			cpus->map[i] = cpu_map__empty_aggr_cpu_id();
 
 		refcount_set(&cpus->refcnt, 1);
 	}
@@ -169,11 +169,11 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
 	for (cpu = 0; cpu < nr; cpu++) {
 		s1 = f(cpus, cpu, data);
 		for (s2 = 0; s2 < c->nr; s2++) {
-			if (s1.id == c->map[s2])
+			if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
 				break;
 		}
 		if (s2 == c->nr) {
-			c->map[c->nr] = s1.id;
+			c->map[c->nr] = s1;
 			c->nr++;
 		}
 	}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index b112069038be..d8fc265bc762 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -14,7 +14,7 @@ struct aggr_cpu_id {
 struct cpu_aggr_map {
 	refcount_t refcnt;
 	int nr;
-	int map[];
+	struct aggr_cpu_id map[];
 };
 
 struct perf_record_cpu_map_data;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 8f4ee3621863..a6309cedb37b 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -506,7 +506,7 @@ static void aggr_update_shadow(struct perf_stat_config *config,
 	struct evsel *counter;
 
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		id.id = config->aggr_map->map[s];
+		id = config->aggr_map->map[s];
 		evlist__for_each_entry(evlist, counter) {
 			val = 0;
 			for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
@@ -638,7 +638,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	struct aggr_cpu_id id;
 	double uval;
 
-	ad.id.id = id.id = config->aggr_map->map[s];
+	ad.id = id = config->aggr_map->map[s];
 	ad.val = ad.ena = ad.run = 0;
 	ad.nr = 0;
 	if (!collect_data(config, counter, aggr_cb, &ad))
@@ -1166,7 +1166,7 @@ static void print_percore_thread(struct perf_stat_config *config,
 	for (int i = 0; i < evsel__nr_cpus(counter); i++) {
 		s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
 		for (s = 0; s < config->aggr_map->nr; s++) {
-			id.id = config->aggr_map->map[s];
+			id = config->aggr_map->map[s];
 			if (cpu_map__compare_aggr_cpu_id(s2, id))
 				break;
 		}
-- 
cgit v1.2.3-70-g09d2


From fcd83a35dd93b89d3f48cfcd33c31b112cc96180 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:24 +0200
Subject: perf stat aggregation: Add separate node member

Add node as a separate member so that it doesn't have to be packed into
the int value.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-9-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      |  2 +-
 tools/perf/tests/topology.c    |  8 +++++++-
 tools/perf/util/cpumap.c       | 16 +++++++++++-----
 tools/perf/util/cpumap.h       |  1 +
 tools/perf/util/stat-display.c |  2 +-
 5 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3fec1b7e93b5..e51c5469d712 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1435,7 +1435,7 @@ static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx,
 	int cpu = perf_env__get_cpu(data, map, idx);
 	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
 
-	id.id = perf_env__numa_node(data, cpu);
+	id.node = perf_env__numa_node(data, cpu);
 	return id;
 }
 
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 7a7d16b3950a..ee272d45a445 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -119,6 +119,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 
 		TEST_ASSERT_VAL("Core map - Die ID doesn't match",
 			session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id));
+		TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
 	}
 
 	// Test that die ID contains socket and die
@@ -131,6 +132,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Die map - Die ID doesn't match",
 			session->header.env.cpu[map->map[i]].die_id ==
 				cpu_map__id_to_die(id.id << 16));
+
+		TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
 	}
 
 	// Test that socket ID contains only socket
@@ -138,13 +141,16 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		id = cpu_map__get_socket(map, i, NULL);
 		TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
 			session->header.env.cpu[map->map[i]].socket_id == id.id);
+
+		TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
 	}
 
 	// Test that node ID contains only node
 	for (i = 0; i < map->nr; i++) {
 		id = cpu_map__get_node(map, i, NULL);
 		TEST_ASSERT_VAL("Node map - Node ID doesn't match",
-			cpu__get_node(map->map[i]) == id.id);
+			cpu__get_node(map->map[i]) == id.node);
+		TEST_ASSERT_VAL("Node map - ID is set", id.id == -1);
 	}
 	perf_session__delete(session);
 
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index b50609b9a585..5f9e98ddbe34 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -148,7 +148,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
 	struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
 	struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
 
-	return a->id - b->id;
+	if (a->id != b->id)
+		return a->id - b->id;
+	else
+		return a->node - b->node;
 }
 
 int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
@@ -275,7 +278,7 @@ struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *da
 	if (idx < 0 || idx >= map->nr)
 		return id;
 
-	id.id = cpu_map__get_node_id(map->map[idx]);
+	id.node = cpu_map__get_node_id(map->map[idx]);
 	return id;
 }
 
@@ -620,18 +623,21 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
 
 bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
 {
-	return a.id == b.id;
+	return a.id == b.id &&
+		a.node == b.node;
 }
 
 bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
 {
-	return a.id == -1;
+	return a.id == -1 &&
+		a.node == -1;
 }
 
 struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
 {
 	struct aggr_cpu_id ret = {
-		.id = -1
+		.id = -1,
+		.node = -1
 	};
 	return ret;
 }
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index d8fc265bc762..f79e92603024 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -9,6 +9,7 @@
 
 struct aggr_cpu_id {
 	int id;
+	int node;
 };
 
 struct cpu_aggr_map {
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index a6309cedb37b..790392247026 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -104,7 +104,7 @@ static void aggr_printout(struct perf_stat_config *config,
 	case AGGR_NODE:
 		fprintf(config->output, "N%*d%s%*d%s",
 			config->csv_output ? 0 : -5,
-			id.id,
+			id.node,
 			config->csv_sep,
 			config->csv_output ? 0 : 4,
 			nr,
-- 
cgit v1.2.3-70-g09d2


From 1a270cb6b3cc18663f7fd165aa691c48d68739f2 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:25 +0200
Subject: perf stat aggregation: Add separate socket member

Add socket as a separate member so that it doesn't have to be packed
into the int value.

When the socket ID was larger than 8 bits the output appeared corrupted
or incomplete.

For example, here on ThunderX2 'perf stat' reports a socket of -1 and an
invalid die number:

  ./perf stat -a --per-die
  The socket id number is too big.

  Performance counter stats for 'system wide':

  S-1-D255       128             687.99 msec cpu-clock                 #   57.240 CPUs utilized
  ...
  S36-D0         128             842.34 msec cpu-clock                 #   70.081 CPUs utilized
  ...

And with --per-core there is an entry with an invalid core ID:

  ./perf stat record -a --per-core
  The socket id number is too big.

  Performance counter stats for 'system wide':
  S-1-D255-C65535     128             671.04 msec cpu-clock                 #   54.112 CPUs utilized
  ...
  S36-D0-C0           4              28.27 msec cpu-clock                 #    2.279 CPUs utilized
  ...

This fixes the "Session topology" self test on ThunderX2.

After this fix the output contains the correct socket and die IDs and no
longer prints a warning about the size of the socket ID:

  ./perf stat --per-die -a

  Performance counter stats for 'system wide':

  S36-D0         128         169,869.39 msec cpu-clock                 #  127.501 CPUs utilized
  ...
  S3612-D0         128         169,733.05 msec cpu-clock                 #  127.398 CPUs utilized

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-10-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      | 22 ++++++++-------------
 tools/perf/tests/topology.c    | 10 +++++-----
 tools/perf/util/cpumap.c       | 44 +++++++++++++++++++++---------------------
 tools/perf/util/cpumap.h       |  6 +-----
 tools/perf/util/stat-display.c |  8 ++++----
 tools/perf/util/stat.c         |  2 +-
 6 files changed, 41 insertions(+), 51 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e51c5469d712..6248baa0f612 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1369,7 +1369,7 @@ static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx
 	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
 
 	if (cpu != -1)
-		id.id = env->cpu[cpu].socket_id;
+		id.socket = env->cpu[cpu].socket_id;
 
 	return id;
 }
@@ -1382,18 +1382,16 @@ static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, v
 
 	if (cpu != -1) {
 		/*
-		 * Encode socket in bit range 15:8
-		 * die_id is relative to socket,
-		 * we need a global id. So we combine
-		 * socket + die id
+		 * die_id is relative to socket, so start
+		 * with the socket ID and then add die to
+		 * make a unique ID.
 		 */
-		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
-			return cpu_map__empty_aggr_cpu_id();
+		id.socket = env->cpu[cpu].socket_id;
 
 		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
 			return cpu_map__empty_aggr_cpu_id();
 
-		id.id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
+		id.id = env->cpu[cpu].die_id & 0xff;
 	}
 
 	return id;
@@ -1407,23 +1405,19 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx,
 
 	if (cpu != -1) {
 		/*
-		 * Encode socket in bit range 31:24
 		 * encode die id in bit range 23:16
 		 * core_id is relative to socket and die,
 		 * we need a global id. So we combine
 		 * socket + die id + core id
 		 */
-		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
-			return cpu_map__empty_aggr_cpu_id();
-
 		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
 			return cpu_map__empty_aggr_cpu_id();
 
 		if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
 			return cpu_map__empty_aggr_cpu_id();
 
-		id.id = (env->cpu[cpu].socket_id << 24) |
-		       (env->cpu[cpu].die_id << 16) |
+		id.socket = env->cpu[cpu].socket_id;
+		id.id = (env->cpu[cpu].die_id << 16) |
 		       (env->cpu[cpu].core_id & 0xffff);
 	}
 
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index ee272d45a445..777dd8291bcc 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -114,8 +114,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 			session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id));
 
 		TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
-			session->header.env.cpu[map->map[i]].socket_id ==
-				cpu_map__id_to_socket(id.id));
+			session->header.env.cpu[map->map[i]].socket_id == id.socket);
 
 		TEST_ASSERT_VAL("Core map - Die ID doesn't match",
 			session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id));
@@ -126,8 +125,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 	for (i = 0; i < map->nr; i++) {
 		id = cpu_map__get_die(map, i, NULL);
 		TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
-			session->header.env.cpu[map->map[i]].socket_id ==
-				cpu_map__id_to_socket(id.id << 16));
+			session->header.env.cpu[map->map[i]].socket_id == id.socket);
 
 		TEST_ASSERT_VAL("Die map - Die ID doesn't match",
 			session->header.env.cpu[map->map[i]].die_id ==
@@ -140,9 +138,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 	for (i = 0; i < map->nr; i++) {
 		id = cpu_map__get_socket(map, i, NULL);
 		TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
-			session->header.env.cpu[map->map[i]].socket_id == id.id);
+			session->header.env.cpu[map->map[i]].socket_id == id.socket);
 
 		TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
+		TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1);
 	}
 
 	// Test that node ID contains only node
@@ -151,6 +150,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Node map - Node ID doesn't match",
 			cpu__get_node(map->map[i]) == id.node);
 		TEST_ASSERT_VAL("Node map - ID is set", id.id == -1);
+		TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
 	}
 	perf_session__delete(session);
 
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 5f9e98ddbe34..d2630f03f682 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -139,7 +139,7 @@ struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
 
 	cpu = map->map[idx];
 
-	id.id = cpu_map__get_socket_id(cpu);
+	id.socket = cpu_map__get_socket_id(cpu);
 	return id;
 }
 
@@ -150,8 +150,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
 
 	if (a->id != b->id)
 		return a->id - b->id;
-	else
+	else if (a->node != b->node)
 		return a->node - b->node;
+	else
+		return a->socket - b->socket;
 }
 
 int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
@@ -196,7 +198,7 @@ int cpu_map__get_die_id(int cpu)
 
 struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
 {
-	int cpu, s;
+	int cpu, die;
 	struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
 
 	if (idx > map->nr)
@@ -204,28 +206,24 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat
 
 	cpu = map->map[idx];
 
-	id.id = cpu_map__get_die_id(cpu);
+	die = cpu_map__get_die_id(cpu);
 	/* There is no die_id on legacy system. */
-	if (id.id == -1)
-		id.id = 0;
-
-	s = cpu_map__get_socket(map, idx, data).id;
-	if (s == -1)
-		return cpu_map__empty_aggr_cpu_id();
+	if (die == -1)
+		die = 0;
 
 	/*
-	 * Encode socket in bit range 15:8
-	 * die_id is relative to socket, and
-	 * we need a global id. So we combine
-	 * socket + die id
+	 * die_id is relative to socket, so start
+	 * with the socket ID and then add die to
+	 * make a unique ID.
 	 */
-	if (WARN_ONCE(id.id >> 8, "The die id number is too big.\n"))
-		return cpu_map__empty_aggr_cpu_id();
+	id = cpu_map__get_socket(map, idx, data);
+	if (cpu_map__aggr_cpu_id_is_empty(id))
+		return id;
 
-	if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
+	if (WARN_ONCE(die >> 8, "The die id number is too big.\n"))
 		return cpu_map__empty_aggr_cpu_id();
 
-	id.id = (s << 8) | (id.id & 0xff);
+	id.id = (die & 0xff);
 	return id;
 }
 
@@ -258,7 +256,6 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da
 		return id;
 
 	/*
-	 * encode socket in bit range 31:24
 	 * encode die id in bit range 23:16
 	 * core_id is relative to socket and die,
 	 * we need a global id. So we combine
@@ -624,20 +621,23 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
 bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
 {
 	return a.id == b.id &&
-		a.node == b.node;
+		a.node == b.node &&
+		a.socket == b.socket;
 }
 
 bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
 {
 	return a.id == -1 &&
-		a.node == -1;
+		a.node == -1 &&
+		a.socket == -1;
 }
 
 struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
 {
 	struct aggr_cpu_id ret = {
 		.id = -1,
-		.node = -1
+		.node = -1,
+		.socket = -1
 	};
 	return ret;
 }
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index f79e92603024..0123ecc90694 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -10,6 +10,7 @@
 struct aggr_cpu_id {
 	int id;
 	int node;
+	int socket;
 };
 
 struct cpu_aggr_map {
@@ -48,11 +49,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s)
 	return sock->map[s];
 }
 
-static inline int cpu_map__id_to_socket(int id)
-{
-	return id >> 24;
-}
-
 static inline int cpu_map__id_to_die(int id)
 {
 	return (id >> 16) & 0xff;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 790392247026..5a756c88c124 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -73,7 +73,7 @@ static void aggr_printout(struct perf_stat_config *config,
 	switch (config->aggr_mode) {
 	case AGGR_CORE:
 		fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
-			cpu_map__id_to_socket(id.id),
+			id.socket,
 			cpu_map__id_to_die(id.id),
 			config->csv_output ? 0 : -8,
 			cpu_map__id_to_cpu(id.id),
@@ -84,7 +84,7 @@ static void aggr_printout(struct perf_stat_config *config,
 		break;
 	case AGGR_DIE:
 		fprintf(config->output, "S%d-D%*d%s%*d%s",
-			cpu_map__id_to_socket(id.id << 16),
+			id.socket,
 			config->csv_output ? 0 : -8,
 			cpu_map__id_to_die(id.id << 16),
 			config->csv_sep,
@@ -95,7 +95,7 @@ static void aggr_printout(struct perf_stat_config *config,
 	case AGGR_SOCKET:
 		fprintf(config->output, "S%*d%s%*d%s",
 			config->csv_output ? 0 : -5,
-			id.id,
+			id.socket,
 			config->csv_sep,
 			config->csv_output ? 0 : 4,
 			nr,
@@ -113,7 +113,7 @@ static void aggr_printout(struct perf_stat_config *config,
 	case AGGR_NONE:
 		if (evsel->percore && !config->percore_show_thread) {
 			fprintf(config->output, "S%d-D%d-C%*d%s",
-				cpu_map__id_to_socket(id.id),
+				id.socket,
 				cpu_map__id_to_die(id.id),
 				config->csv_output ? 0 : -3,
 				cpu_map__id_to_cpu(id.id), config->csv_sep);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 1d5c9f98396b..8ce1479c98f0 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -313,7 +313,7 @@ static int check_per_pkg(struct evsel *counter,
 	if (!(vals->run && vals->ena))
 		return 0;
 
-	s = cpu_map__get_socket(cpus, cpu, NULL).id;
+	s = cpu_map__get_socket(cpus, cpu, NULL).socket;
 	if (s < 0)
 		return -1;
 
-- 
cgit v1.2.3-70-g09d2


From ba2ee166d92b201078cb941956547ab9828989d3 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:26 +0200
Subject: perf stat aggregation: Add separate die member

Add die as a separate member so that it doesn't have to be packed into
the int value.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-11-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      | 14 +++-----------
 tools/perf/tests/topology.c    |  8 +++++---
 tools/perf/util/cpumap.c       | 28 ++++++++++++++--------------
 tools/perf/util/cpumap.h       |  6 +-----
 tools/perf/util/stat-display.c |  6 +++---
 5 files changed, 26 insertions(+), 36 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6248baa0f612..bac37fe9373c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1387,11 +1387,7 @@ static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, v
 		 * make a unique ID.
 		 */
 		id.socket = env->cpu[cpu].socket_id;
-
-		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
-			return cpu_map__empty_aggr_cpu_id();
-
-		id.id = env->cpu[cpu].die_id & 0xff;
+		id.die = env->cpu[cpu].die_id;
 	}
 
 	return id;
@@ -1405,20 +1401,16 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx,
 
 	if (cpu != -1) {
 		/*
-		 * encode die id in bit range 23:16
 		 * core_id is relative to socket and die,
 		 * we need a global id. So we combine
 		 * socket + die id + core id
 		 */
-		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
-			return cpu_map__empty_aggr_cpu_id();
-
 		if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
 			return cpu_map__empty_aggr_cpu_id();
 
 		id.socket = env->cpu[cpu].socket_id;
-		id.id = (env->cpu[cpu].die_id << 16) |
-		       (env->cpu[cpu].core_id & 0xffff);
+		id.die = env->cpu[cpu].die_id;
+		id.id = env->cpu[cpu].core_id & 0xffff;
 	}
 
 	return id;
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 777dd8291bcc..e3f822890a84 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -117,7 +117,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 			session->header.env.cpu[map->map[i]].socket_id == id.socket);
 
 		TEST_ASSERT_VAL("Core map - Die ID doesn't match",
-			session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id));
+			session->header.env.cpu[map->map[i]].die_id == id.die);
 		TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
 	}
 
@@ -128,10 +128,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 			session->header.env.cpu[map->map[i]].socket_id == id.socket);
 
 		TEST_ASSERT_VAL("Die map - Die ID doesn't match",
-			session->header.env.cpu[map->map[i]].die_id ==
-				cpu_map__id_to_die(id.id << 16));
+			session->header.env.cpu[map->map[i]].die_id == id.die);
 
 		TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
+		TEST_ASSERT_VAL("Die map - ID is set", id.id == -1);
 	}
 
 	// Test that socket ID contains only socket
@@ -141,6 +141,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 			session->header.env.cpu[map->map[i]].socket_id == id.socket);
 
 		TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
+		TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1);
 		TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1);
 	}
 
@@ -151,6 +152,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 			cpu__get_node(map->map[i]) == id.node);
 		TEST_ASSERT_VAL("Node map - ID is set", id.id == -1);
 		TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
+		TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
 	}
 	perf_session__delete(session);
 
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index d2630f03f682..10a52058d838 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -152,8 +152,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
 		return a->id - b->id;
 	else if (a->node != b->node)
 		return a->node - b->node;
-	else
+	else if (a->socket != b->socket)
 		return a->socket - b->socket;
+	else
+		return a->die - b->die;
 }
 
 int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
@@ -220,10 +222,7 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat
 	if (cpu_map__aggr_cpu_id_is_empty(id))
 		return id;
 
-	if (WARN_ONCE(die >> 8, "The die id number is too big.\n"))
-		return cpu_map__empty_aggr_cpu_id();
-
-	id.id = (die & 0xff);
+	id.die = die;
 	return id;
 }
 
@@ -250,21 +249,19 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da
 
 	cpu = cpu_map__get_core_id(cpu);
 
-	/* cpu_map__get_die returns the combination of socket + die id */
+	/* cpu_map__get_die returns a struct with socket and die set*/
 	id = cpu_map__get_die(map, idx, data);
 	if (cpu_map__aggr_cpu_id_is_empty(id))
 		return id;
 
 	/*
-	 * encode die id in bit range 23:16
-	 * core_id is relative to socket and die,
-	 * we need a global id. So we combine
-	 * socket + die id + core id
+	 * core_id is relative to socket and die, we need a global id.
+	 * So we combine the result from cpu_map__get_die with the core id
 	 */
 	if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n"))
 		return cpu_map__empty_aggr_cpu_id();
 
-	id.id = (id.id << 16) | (cpu & 0xffff);
+	id.id = (cpu & 0xffff);
 	return id;
 }
 
@@ -622,14 +619,16 @@ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
 {
 	return a.id == b.id &&
 		a.node == b.node &&
-		a.socket == b.socket;
+		a.socket == b.socket &&
+		a.die == b.die;
 }
 
 bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
 {
 	return a.id == -1 &&
 		a.node == -1 &&
-		a.socket == -1;
+		a.socket == -1 &&
+		a.die == -1;
 }
 
 struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
@@ -637,7 +636,8 @@ struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
 	struct aggr_cpu_id ret = {
 		.id = -1,
 		.node = -1,
-		.socket = -1
+		.socket = -1,
+		.die = -1
 	};
 	return ret;
 }
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 0123ecc90694..51bbe1eca3f4 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -11,6 +11,7 @@ struct aggr_cpu_id {
 	int id;
 	int node;
 	int socket;
+	int die;
 };
 
 struct cpu_aggr_map {
@@ -49,11 +50,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s)
 	return sock->map[s];
 }
 
-static inline int cpu_map__id_to_die(int id)
-{
-	return (id >> 16) & 0xff;
-}
-
 static inline int cpu_map__id_to_cpu(int id)
 {
 	return id & 0xffff;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 5a756c88c124..dcce753f351d 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -74,7 +74,7 @@ static void aggr_printout(struct perf_stat_config *config,
 	case AGGR_CORE:
 		fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
 			id.socket,
-			cpu_map__id_to_die(id.id),
+			id.die,
 			config->csv_output ? 0 : -8,
 			cpu_map__id_to_cpu(id.id),
 			config->csv_sep,
@@ -86,7 +86,7 @@ static void aggr_printout(struct perf_stat_config *config,
 		fprintf(config->output, "S%d-D%*d%s%*d%s",
 			id.socket,
 			config->csv_output ? 0 : -8,
-			cpu_map__id_to_die(id.id << 16),
+			id.die,
 			config->csv_sep,
 			config->csv_output ? 0 : 4,
 			nr,
@@ -114,7 +114,7 @@ static void aggr_printout(struct perf_stat_config *config,
 		if (evsel->percore && !config->percore_show_thread) {
 			fprintf(config->output, "S%d-D%d-C%*d%s",
 				id.socket,
-				cpu_map__id_to_die(id.id),
+				id.die,
 				config->csv_output ? 0 : -3,
 				cpu_map__id_to_cpu(id.id), config->csv_sep);
 		} else if (id.id > -1) {
-- 
cgit v1.2.3-70-g09d2


From b993381779da406ca9ca0ae1e1b3968e9075ce77 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:27 +0200
Subject: perf stat aggregation: Add separate core member

Add core as a separate member so that it doesn't have to be packed into
the int value.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-12-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      |  9 +++------
 tools/perf/tests/topology.c    |  6 +++++-
 tools/perf/util/cpumap.c       | 18 ++++++++++--------
 tools/perf/util/cpumap.h       |  6 +-----
 tools/perf/util/stat-display.c | 16 ++++++++--------
 5 files changed, 27 insertions(+), 28 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index bac37fe9373c..8cc24967bc27 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1402,15 +1402,12 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx,
 	if (cpu != -1) {
 		/*
 		 * core_id is relative to socket and die,
-		 * we need a global id. So we combine
-		 * socket + die id + core id
+		 * we need a global id. So we set
+		 * socket, die id and core id
 		 */
-		if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
-			return cpu_map__empty_aggr_cpu_id();
-
 		id.socket = env->cpu[cpu].socket_id;
 		id.die = env->cpu[cpu].die_id;
-		id.id = env->cpu[cpu].core_id & 0xffff;
+		id.core = env->cpu[cpu].core_id;
 	}
 
 	return id;
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index e3f822890a84..a6e289f2c6db 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -111,7 +111,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 	for (i = 0; i < map->nr; i++) {
 		id = cpu_map__get_core(map, i, NULL);
 		TEST_ASSERT_VAL("Core map - Core ID doesn't match",
-			session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id));
+			session->header.env.cpu[map->map[i]].core_id == id.core);
 
 		TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
 			session->header.env.cpu[map->map[i]].socket_id == id.socket);
@@ -119,6 +119,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Core map - Die ID doesn't match",
 			session->header.env.cpu[map->map[i]].die_id == id.die);
 		TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
+		TEST_ASSERT_VAL("Core map - ID is set", id.id == -1);
 	}
 
 	// Test that die ID contains socket and die
@@ -132,6 +133,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 
 		TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
 		TEST_ASSERT_VAL("Die map - ID is set", id.id == -1);
+		TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
 	}
 
 	// Test that socket ID contains only socket
@@ -143,6 +145,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
 		TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1);
 		TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1);
+		TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1);
 	}
 
 	// Test that node ID contains only node
@@ -153,6 +156,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Node map - ID is set", id.id == -1);
 		TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
 		TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
+		TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
 	}
 	perf_session__delete(session);
 
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 10a52058d838..d164f7bd1ac7 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -154,8 +154,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
 		return a->node - b->node;
 	else if (a->socket != b->socket)
 		return a->socket - b->socket;
-	else
+	else if (a->die != b->die)
 		return a->die - b->die;
+	else
+		return a->core - b->core;
 }
 
 int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
@@ -258,10 +260,7 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da
 	 * core_id is relative to socket and die, we need a global id.
 	 * So we combine the result from cpu_map__get_die with the core id
 	 */
-	if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n"))
-		return cpu_map__empty_aggr_cpu_id();
-
-	id.id = (cpu & 0xffff);
+	id.core = cpu;
 	return id;
 }
 
@@ -620,7 +619,8 @@ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
 	return a.id == b.id &&
 		a.node == b.node &&
 		a.socket == b.socket &&
-		a.die == b.die;
+		a.die == b.die &&
+		a.core == b.core;
 }
 
 bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
@@ -628,7 +628,8 @@ bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
 	return a.id == -1 &&
 		a.node == -1 &&
 		a.socket == -1 &&
-		a.die == -1;
+		a.die == -1 &&
+		a.core == -1;
 }
 
 struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
@@ -637,7 +638,8 @@ struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
 		.id = -1,
 		.node = -1,
 		.socket = -1,
-		.die = -1
+		.die = -1,
+		.core = -1
 	};
 	return ret;
 }
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 51bbe1eca3f4..1bb8f7d47206 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -12,6 +12,7 @@ struct aggr_cpu_id {
 	int node;
 	int socket;
 	int die;
+	int core;
 };
 
 struct cpu_aggr_map {
@@ -50,11 +51,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s)
 	return sock->map[s];
 }
 
-static inline int cpu_map__id_to_cpu(int id)
-{
-	return id & 0xffff;
-}
-
 int cpu__setup_cpunode_map(void);
 
 int cpu__max_node(void);
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index dcce753f351d..2b3842f6080d 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -76,7 +76,7 @@ static void aggr_printout(struct perf_stat_config *config,
 			id.socket,
 			id.die,
 			config->csv_output ? 0 : -8,
-			cpu_map__id_to_cpu(id.id),
+			id.core,
 			config->csv_sep,
 			config->csv_output ? 0 : 4,
 			nr,
@@ -116,11 +116,11 @@ static void aggr_printout(struct perf_stat_config *config,
 				id.socket,
 				id.die,
 				config->csv_output ? 0 : -3,
-				cpu_map__id_to_cpu(id.id), config->csv_sep);
-		} else if (id.id > -1) {
+				id.core, config->csv_sep);
+		} else if (id.core > -1) {
 			fprintf(config->output, "CPU%*d%s",
 				config->csv_output ? 0 : -7,
-				evsel__cpus(evsel)->map[id.id],
+				evsel__cpus(evsel)->map[id.core],
 				config->csv_sep);
 		}
 		break;
@@ -326,7 +326,7 @@ static int first_shadow_cpu(struct perf_stat_config *config,
 	int i;
 
 	if (config->aggr_mode == AGGR_NONE)
-		return id.id;
+		return id.core;
 
 	if (!config->aggr_get_id)
 		return 0;
@@ -658,7 +658,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	uval = val * counter->scale;
 	if (cpu != -1) {
 		id = cpu_map__empty_aggr_cpu_id();
-		id.id = cpu;
+		id.core = cpu;
 	}
 	printout(config, id, nr, counter, uval,
 		 prefix, run, ena, 1.0, &rt_stat);
@@ -871,7 +871,7 @@ static void print_counter(struct perf_stat_config *config,
 
 		uval = val * counter->scale;
 		id = cpu_map__empty_aggr_cpu_id();
-		id.id = cpu;
+		id.core = cpu;
 		printout(config, id, 0, counter, uval, prefix,
 			 run, ena, 1.0, &rt_stat);
 
@@ -898,7 +898,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 			fputs(prefix, config->output);
 		evlist__for_each_entry(evlist, counter) {
 			id = cpu_map__empty_aggr_cpu_id();
-			id.id = cpu;
+			id.core = cpu;
 			if (first) {
 				aggr_printout(config, counter, id, 0);
 				first = false;
-- 
cgit v1.2.3-70-g09d2


From 8d4852b468c38168c4e1e1652602b4a6c6c080b3 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 26 Nov 2020 16:13:28 +0200
Subject: perf stat aggregation: Add separate thread member

A separate field isn't strictly required. The core field could be
re-used for thread IDs as a single field was used previously.

But separating them will avoid confusion and catch potential errors
where core IDs are read as thread IDs and vice versa.

Also remove the placeholder id field which is now no longer used.

Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20201126141328.6509-13-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/topology.c    |  8 ++++----
 tools/perf/util/cpumap.c       | 14 +++++++-------
 tools/perf/util/cpumap.h       |  2 +-
 tools/perf/util/stat-display.c |  8 ++++----
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index a6e289f2c6db..74748ed75b2c 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -119,7 +119,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Core map - Die ID doesn't match",
 			session->header.env.cpu[map->map[i]].die_id == id.die);
 		TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
-		TEST_ASSERT_VAL("Core map - ID is set", id.id == -1);
+		TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1);
 	}
 
 	// Test that die ID contains socket and die
@@ -132,8 +132,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 			session->header.env.cpu[map->map[i]].die_id == id.die);
 
 		TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
-		TEST_ASSERT_VAL("Die map - ID is set", id.id == -1);
 		TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
+		TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1);
 	}
 
 	// Test that socket ID contains only socket
@@ -144,8 +144,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 
 		TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
 		TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1);
-		TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1);
 		TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1);
+		TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1);
 	}
 
 	// Test that node ID contains only node
@@ -153,10 +153,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		id = cpu_map__get_node(map, i, NULL);
 		TEST_ASSERT_VAL("Node map - Node ID doesn't match",
 			cpu__get_node(map->map[i]) == id.node);
-		TEST_ASSERT_VAL("Node map - ID is set", id.id == -1);
 		TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
 		TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
 		TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
+		TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1);
 	}
 	perf_session__delete(session);
 
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index d164f7bd1ac7..87d3eca9b872 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -148,16 +148,16 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
 	struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
 	struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
 
-	if (a->id != b->id)
-		return a->id - b->id;
-	else if (a->node != b->node)
+	if (a->node != b->node)
 		return a->node - b->node;
 	else if (a->socket != b->socket)
 		return a->socket - b->socket;
 	else if (a->die != b->die)
 		return a->die - b->die;
-	else
+	else if (a->core != b->core)
 		return a->core - b->core;
+	else
+		return a->thread - b->thread;
 }
 
 int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
@@ -616,7 +616,7 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
 
 bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
 {
-	return a.id == b.id &&
+	return a.thread == b.thread &&
 		a.node == b.node &&
 		a.socket == b.socket &&
 		a.die == b.die &&
@@ -625,7 +625,7 @@ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
 
 bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
 {
-	return a.id == -1 &&
+	return a.thread == -1 &&
 		a.node == -1 &&
 		a.socket == -1 &&
 		a.die == -1 &&
@@ -635,7 +635,7 @@ bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
 struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
 {
 	struct aggr_cpu_id ret = {
-		.id = -1,
+		.thread = -1,
 		.node = -1,
 		.socket = -1,
 		.die = -1,
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 1bb8f7d47206..a27eeaf086e8 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -8,7 +8,7 @@
 #include <perf/cpumap.h>
 
 struct aggr_cpu_id {
-	int id;
+	int thread;
 	int node;
 	int socket;
 	int die;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 2b3842f6080d..583ae4f09c5d 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -127,9 +127,9 @@ static void aggr_printout(struct perf_stat_config *config,
 	case AGGR_THREAD:
 		fprintf(config->output, "%*s-%*d%s",
 			config->csv_output ? 0 : 16,
-			perf_thread_map__comm(evsel->core.threads, id.id),
+			perf_thread_map__comm(evsel->core.threads, id.thread),
 			config->csv_output ? 0 : -8,
-			perf_thread_map__pid(evsel->core.threads, id.id),
+			perf_thread_map__pid(evsel->core.threads, id.thread),
 			config->csv_sep);
 		break;
 	case AGGR_GLOBAL:
@@ -740,7 +740,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread(
 
 		buf[i].counter = counter;
 		buf[i].id = cpu_map__empty_aggr_cpu_id();
-		buf[i].id.id = thread;
+		buf[i].id.thread = thread;
 		buf[i].uval = uval;
 		buf[i].val = val;
 		buf[i].run = run;
@@ -781,7 +781,7 @@ static void print_aggr_thread(struct perf_stat_config *config,
 		if (config->stats)
 			printout(config, id, 0, buf[thread].counter, buf[thread].uval,
 				 prefix, buf[thread].run, buf[thread].ena, 1.0,
-				 &config->stats[id.id]);
+				 &config->stats[id.thread]);
 		else
 			printout(config, id, 0, buf[thread].counter, buf[thread].uval,
 				 prefix, buf[thread].run, buf[thread].ena, 1.0,
-- 
cgit v1.2.3-70-g09d2


From 5149303fdfe5c67ddb51c911e23262f781cd75eb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 24 Dec 2020 10:52:10 -0300
Subject: perf probe: Fix memory leak when synthesizing SDT probes

The argv_split() function must be paired with argv_free(), else we must
keep a reference to the argv array received or do the freeing ourselves,
in synthesize_sdt_probe_command() we were simply leaking that argv[]
array.

Fixes: 3b1f8311f6963cd1 ("perf probe: Add sdt probes arguments into the uprobe cmd string")
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Truong <alexandre.truong@arm.com>
Cc: Alexis Berlemont <alexis.berlemont@gmail.com>
Cc: He Zhe <zhe.he@windriver.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201224135139.GF477817@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-file.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 064b63a6a3f3..bbecb449ea94 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -791,7 +791,7 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note,
 					const char *sdtgrp)
 {
 	struct strbuf buf;
-	char *ret = NULL, **args;
+	char *ret = NULL;
 	int i, args_count, err;
 	unsigned long long ref_ctr_offset;
 
@@ -813,12 +813,19 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note,
 		goto out;
 
 	if (note->args) {
-		args = argv_split(note->args, &args_count);
+		char **args = argv_split(note->args, &args_count);
+
+		if (args == NULL)
+			goto error;
 
 		for (i = 0; i < args_count; ++i) {
-			if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0)
+			if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0) {
+				argv_free(args);
 				goto error;
+			}
 		}
+
+		argv_free(args);
 	}
 
 out:
-- 
cgit v1.2.3-70-g09d2