41 files changed, 1341 insertions, 3815 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 7a913937888b..706836ec314d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -405,7 +405,7 @@ config ASSOCIATIVE_ARRAY
 
 	  See:
 
-		Documentation/assoc_array.txt
+		Documentation/core-api/assoc_array.rst
 
 	  for more information.
 
@@ -420,60 +420,15 @@ config HAS_IOPORT_MAP
 	depends on HAS_IOMEM && !NO_IOPORT_MAP
 	default y
 
-config HAS_DMA
-	bool
-	depends on !NO_DMA
-	default y
+source "kernel/dma/Kconfig"
 
 config SGL_ALLOC
 	bool
 	default n
 
-config NEED_SG_DMA_LENGTH
-	bool
-
-config NEED_DMA_MAP_STATE
-	bool
-
-config ARCH_DMA_ADDR_T_64BIT
-	def_bool 64BIT || PHYS_ADDR_T_64BIT
-
 config IOMMU_HELPER
 	bool
 
-config ARCH_HAS_SYNC_DMA_FOR_DEVICE
-	bool
-
-config ARCH_HAS_SYNC_DMA_FOR_CPU
-	bool
-	select NEED_DMA_MAP_STATE
-
-config DMA_DIRECT_OPS
-	bool
-	depends on HAS_DMA
-
-config DMA_NONCOHERENT_OPS
-	bool
-	depends on HAS_DMA
-	select DMA_DIRECT_OPS
-
-config DMA_NONCOHERENT_MMAP
-	bool
-	depends on DMA_NONCOHERENT_OPS
-
-config DMA_NONCOHERENT_CACHE_SYNC
-	bool
-	depends on DMA_NONCOHERENT_OPS
-
-config DMA_VIRT_OPS
-	bool
-	depends on HAS_DMA
-
-config SWIOTLB
-	bool
-	select DMA_DIRECT_OPS
-	select NEED_DMA_MAP_STATE
-
 config CHECK_SIGNATURE
 	bool
 
@@ -621,6 +576,9 @@ config ARCH_HAS_PMEM_API
 config ARCH_HAS_UACCESS_FLUSHCACHE
 	bool
 
+config ARCH_HAS_UACCESS_MCSAFE
+	bool
+
 config STACKDEPOT
 	bool
 	select STACKTRACE
@@ -639,20 +597,20 @@ config STRING_SELFTEST
 
 endmenu
 
-config GENERIC_ASHLDI3
+config GENERIC_LIB_ASHLDI3
 	bool
 
-config GENERIC_ASHRDI3
+config GENERIC_LIB_ASHRDI3
 	bool
 
-config GENERIC_LSHRDI3
+config GENERIC_LIB_LSHRDI3
 	bool
 
-config GENERIC_MULDI3
+config GENERIC_LIB_MULDI3
 	bool
 
-config GENERIC_CMPDI2
+config GENERIC_LIB_CMPDI2
 	bool
 
-config GENERIC_UCMPDI2
+config GENERIC_LIB_UCMPDI2
 	bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a7ef03009e9e..ce5a45e46885 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -747,12 +747,15 @@ config ARCH_HAS_KCOV
 	  only for x86_64. KCOV requires testing on other archs, and most likely
 	  disabling of instrumentation for some early boot code.
 
+config CC_HAS_SANCOV_TRACE_PC
+	def_bool $(cc-option,-fsanitize-coverage=trace-pc)
+
 config KCOV
 	bool "Code coverage for fuzzing"
 	depends on ARCH_HAS_KCOV
+	depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
 	select DEBUG_FS
-	select GCC_PLUGINS if !COMPILE_TEST
-	select GCC_PLUGIN_SANCOV if !COMPILE_TEST
+	select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
 	help
 	  KCOV exposes kernel code coverage information in a form suitable
 	  for coverage-guided fuzzing (randomized testing).
@@ -766,7 +769,7 @@ config KCOV
 config KCOV_ENABLE_COMPARISONS
 	bool "Enable comparison operands collection by KCOV"
 	depends on KCOV
-	default n
+	depends on $(cc-option,-fsanitize-coverage=trace-cmp)
 	help
 	  KCOV also exposes operands of every comparison in the instrumented
 	  code along with operand sizes and PCs of the comparison instructions.
@@ -776,7 +779,7 @@ config KCOV_ENABLE_COMPARISONS
 config KCOV_INSTRUMENT_ALL
 	bool "Instrument all code by default"
 	depends on KCOV
-	default y if KCOV
+	default y
 	help
 	  If you are doing generic system call fuzzing (like e.g. syzkaller),
 	  then you will want to instrument the whole kernel and you should
@@ -1514,6 +1517,10 @@ config NETDEV_NOTIFIER_ERROR_INJECT
 
 	  If unsure, say N.
 
+config FUNCTION_ERROR_INJECTION
+	def_bool y
+	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
+
 config FAULT_INJECTION
 	bool "Fault-injection framework"
 	depends on DEBUG_KERNEL
@@ -1521,10 +1528,6 @@ config FAULT_INJECTION
 	  Provide fault-injection framework.
 	  For more details, see Documentation/fault-injection/.
 
-config FUNCTION_ERROR_INJECTION
-	def_bool y
-	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
-
 config FAILSLAB
 	bool "Fault-injection capability for kmalloc"
 	depends on FAULT_INJECTION
@@ -1555,16 +1558,6 @@ config FAIL_IO_TIMEOUT
 	  Only works with drivers that use the generic timeout handling,
 	  for others it wont do anything.
 
-config FAIL_MMC_REQUEST
-	bool "Fault-injection capability for MMC IO"
-	depends on FAULT_INJECTION_DEBUG_FS && MMC
-	help
-	  Provide fault-injection capability for MMC IO.
-	  This will make the mmc core return data errors. This is
-	  useful to test the error handling in the mmc block device
-	  and to test how the mmc host driver handles retries from
-	  the block device.
-
 config FAIL_FUTEX
 	bool "Fault-injection capability for futexes"
 	select DEBUG_FS
@@ -1572,6 +1565,12 @@ config FAIL_FUTEX
 	help
 	  Provide fault-injection capability for futexes.
 
+config FAULT_INJECTION_DEBUG_FS
+	bool "Debugfs entries for fault-injection capabilities"
+	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
+	help
+	  Enable configuration of fault-injection capabilities via debugfs.
+
 config FAIL_FUNCTION
 	bool "Fault-injection capability for functions"
 	depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
@@ -1582,11 +1581,15 @@ config FAIL_FUNCTION
 	  an error value and have to handle it. This is useful to test the
 	  error handling in various subsystems.
 
-config FAULT_INJECTION_DEBUG_FS
-	bool "Debugfs entries for fault-injection capabilities"
-	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
+config FAIL_MMC_REQUEST
+	bool "Fault-injection capability for MMC IO"
+	depends on FAULT_INJECTION_DEBUG_FS && MMC
 	help
-	  Enable configuration of fault-injection capabilities via debugfs.
+	  Provide fault-injection capability for MMC IO.
+	  This will make the mmc core return data errors. This is
+	  useful to test the error handling in the mmc block device
+	  and to test how the mmc host driver handles retries from
+	  the block device.
 
 config FAULT_INJECTION_STACKTRACE_FILTER
 	bool "stacktrace filter for fault-injection capabilities"
@@ -1726,7 +1729,7 @@ config KPROBES_SANITY_TEST
 	default n
 	help
 	  This option provides for testing basic kprobes functionality on
-	  boot. A sample kprobe, jprobe and kretprobe are inserted and
+	  boot. Samples of kprobe and kretprobe are inserted and
 	  verified for functionality.
 
 	  Say N if you are unsure.
@@ -1813,6 +1816,9 @@ config TEST_BITMAP
 config TEST_UUID
 	tristate "Test functions located in the uuid module at runtime"
 
+config TEST_OVERFLOW
+	tristate "Test check_*_overflow() functions at runtime"
+
 config TEST_RHASHTABLE
 	tristate "Perform selftest on resizable hash table"
 	default n
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 3d35d062970d..befb127507c0 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,7 +5,8 @@ if HAVE_ARCH_KASAN
 
 config KASAN
 	bool "KASan: runtime memory debugger"
-	depends on SLUB || (SLAB && !DEBUG_SLAB)
+	depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB)
+	select SLUB_DEBUG if SLUB
 	select CONSTRUCTORS
 	select STACKDEPOT
 	help
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 19d42ea75ec2..98fa559ebd80 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,9 +1,6 @@
 config ARCH_HAS_UBSAN_SANITIZE_ALL
 	bool
 
-config ARCH_WANTS_UBSAN_NO_NULL
-	def_bool n
-
 config UBSAN
 	bool "Undefined behaviour sanity checker"
 	help
@@ -39,14 +36,6 @@ config UBSAN_ALIGNMENT
 	  Enabling this option on architectures that support unaligned
 	  accesses may produce a lot of false positives.
 
-config UBSAN_NULL
-	bool "Enable checking of null pointers"
-	depends on UBSAN
-	default y if !ARCH_WANTS_UBSAN_NO_NULL
-	help
-	  This option enables detection of memory accesses via a
-	  null pointer.
-
 config TEST_UBSAN
 	tristate "Module for testing for undefined behavior detection"
 	depends on m && UBSAN
diff --git a/lib/Makefile b/lib/Makefile
index 9f18c8152281..d4a3773ce4a6 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,15 +23,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 sha1.o chacha20.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-	 earlycpio.o seq_buf.o siphash.o \
+	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
 	 nmi_backtrace.o nodemask.o win_minmax.o
 
 lib-$(CONFIG_PRINTK) += dump_stack.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
-lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
-lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o
-lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
 
 lib-y	+= kobject.o klist.o
 obj-y	+= lockref.o
@@ -60,6 +57,7 @@ UBSAN_SANITIZE_test_ubsan.o := y
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
+obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_SORT) += test_sort.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
@@ -97,10 +95,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
-ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
-  lib-y += dec_and_lock.o
-endif
-
 obj-$(CONFIG_BITREVERSE) += bitrev.o
 obj-$(CONFIG_RATIONAL)	+= rational.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
@@ -122,6 +116,7 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
 obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
 obj-$(CONFIG_BCH) += bch.o
+CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500)
 obj-$(CONFIG_LZO_COMPRESS) += lzo/
 obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
 obj-$(CONFIG_LZ4_COMPRESS) += lz4/
@@ -147,7 +142,6 @@ obj-$(CONFIG_SMP) += percpu_counter.o
 obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
 
-obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
@@ -168,8 +162,6 @@ obj-$(CONFIG_NLATTR) += nlattr.o
 
 obj-$(CONFIG_LRU_CACHE) += lru_cache.o
 
-obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
-
 obj-$(CONFIG_GENERIC_CSUM) += checksum.o
 
 obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
@@ -260,9 +252,9 @@ obj-$(CONFIG_SBITMAP) += sbitmap.o
 obj-$(CONFIG_PARMAN) += parman.o
 
 # GCC library routines
-obj-$(CONFIG_GENERIC_ASHLDI3) += ashldi3.o
-obj-$(CONFIG_GENERIC_ASHRDI3) += ashrdi3.o
-obj-$(CONFIG_GENERIC_LSHRDI3) += lshrdi3.o
-obj-$(CONFIG_GENERIC_MULDI3) += muldi3.o
-obj-$(CONFIG_GENERIC_CMPDI2) += cmpdi2.o
-obj-$(CONFIG_GENERIC_UCMPDI2) += ucmpdi2.o
+obj-$(CONFIG_GENERIC_LIB_ASHLDI3) += ashldi3.o
+obj-$(CONFIG_GENERIC_LIB_ASHRDI3) += ashrdi3.o
+obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += lshrdi3.o
+obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o
+obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o
+obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o
diff --git a/lib/argv_split.c b/lib/argv_split.c
index 5c35752a9414..1a19a0a93dc1 100644
--- a/lib/argv_split.c
+++ b/lib/argv_split.c
@@ -69,7 +69,7 @@ char **argv_split(gfp_t gfp, const char *str, int *argcp)
 		return NULL;
 
 	argc = count_argc(argv_str);
-	argv = kmalloc(sizeof(*argv) * (argc + 2), gfp);
+	argv = kmalloc_array(argc + 2, sizeof(*argv), gfp);
 	if (!argv) {
 		kfree(argv_str);
 		return NULL;
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 53c2d5edc826..1d91e31eceec 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -178,18 +178,18 @@ long long atomic64_xchg(atomic64_t *v, long long new)
 }
 EXPORT_SYMBOL(atomic64_xchg);
 
-int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
-	int ret = 0;
+	long long val;
 
 	raw_spin_lock_irqsave(lock, flags);
-	if (v->counter != u) {
+	val = v->counter;
+	if (val != u)
 		v->counter += a;
-		ret = 1;
-	}
 	raw_spin_unlock_irqrestore(lock, flags);
-	return ret;
+
+	return val;
 }
-EXPORT_SYMBOL(atomic64_add_unless);
+EXPORT_SYMBOL(atomic64_fetch_add_unless);
diff --git a/lib/bch.c b/lib/bch.c
index bc89dfe4d1b3..7b0f2006698b 100644
--- a/lib/bch.c
+++ b/lib/bch.c
@@ -78,15 +78,22 @@
 #define GF_M(_p)               (CONFIG_BCH_CONST_M)
 #define GF_T(_p)               (CONFIG_BCH_CONST_T)
 #define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
+#define BCH_MAX_M              (CONFIG_BCH_CONST_M)
 #else
 #define GF_M(_p)               ((_p)->m)
 #define GF_T(_p)               ((_p)->t)
 #define GF_N(_p)               ((_p)->n)
+#define BCH_MAX_M              15
 #endif
 
+#define BCH_MAX_T              (((1 << BCH_MAX_M) - 1) / BCH_MAX_M)
+
 #define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
 #define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
 
+#define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
+#define BCH_ECC_MAX_BYTES      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 8)
+
 #ifndef dbg
 #define dbg(_fmt, args...)     do {} while (0)
 #endif
@@ -187,7 +194,8 @@ void encode_bch(struct bch_control *bch, const uint8_t *data,
 	const unsigned int l = BCH_ECC_WORDS(bch)-1;
 	unsigned int i, mlen;
 	unsigned long m;
-	uint32_t w, r[l+1];
+	uint32_t w, r[BCH_ECC_MAX_WORDS];
+	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
 	const uint32_t * const tab0 = bch->mod8_tab;
 	const uint32_t * const tab1 = tab0 + 256*(l+1);
 	const uint32_t * const tab2 = tab1 + 256*(l+1);
@@ -198,7 +206,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data,
 		/* load ecc parity bytes into internal 32-bit buffer */
 		load_ecc8(bch, bch->ecc_buf, ecc);
 	} else {
-		memset(bch->ecc_buf, 0, sizeof(r));
+		memset(bch->ecc_buf, 0, r_bytes);
 	}
 
 	/* process first unaligned data bytes */
@@ -215,7 +223,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data,
 	mlen  = len/4;
 	data += 4*mlen;
 	len  -= 4*mlen;
-	memcpy(r, bch->ecc_buf, sizeof(r));
+	memcpy(r, bch->ecc_buf, r_bytes);
 
 	/*
 	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
@@ -241,7 +249,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data,
 
 		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
 	}
-	memcpy(bch->ecc_buf, r, sizeof(r));
+	memcpy(bch->ecc_buf, r, r_bytes);
 
 	/* process last unaligned bytes */
 	if (len)
@@ -434,7 +442,7 @@ static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
 {
 	const int m = GF_M(bch);
 	unsigned int tmp, mask;
-	int rem, c, r, p, k, param[m];
+	int rem, c, r, p, k, param[BCH_MAX_M];
 
 	k = 0;
 	mask = 1 << m;
@@ -1114,7 +1122,7 @@ static int build_deg2_base(struct bch_control *bch)
 {
 	const int m = GF_M(bch);
 	int i, j, r;
-	unsigned int sum, x, y, remaining, ak = 0, xi[m];
+	unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
 
 	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
 	for (i = 0; i < m; i++) {
@@ -1254,7 +1262,6 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
 	struct bch_control *bch = NULL;
 
 	const int min_m = 5;
-	const int max_m = 15;
 
 	/* default primitive polynomials */
 	static const unsigned int prim_poly_tab[] = {
@@ -1270,7 +1277,7 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
 		goto fail;
 	}
 #endif
-	if ((m < min_m) || (m > max_m))
+	if ((m < min_m) || (m > BCH_MAX_M))
 		/*
 		 * values of m greater than 15 are not currently supported;
 		 * supporting m > 15 would require changing table base type
diff --git a/lib/bitmap.c b/lib/bitmap.c
index a42eff7e8c48..58f9750e49c6 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -64,12 +64,9 @@ EXPORT_SYMBOL(__bitmap_equal);
 
 void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
 {
-	unsigned int k, lim = bits/BITS_PER_LONG;
+	unsigned int k, lim = BITS_TO_LONGS(bits);
 	for (k = 0; k < lim; ++k)
 		dst[k] = ~src[k];
-
-	if (bits % BITS_PER_LONG)
-		dst[k] = ~src[k];
 }
 EXPORT_SYMBOL(__bitmap_complement);
 
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
index 266a97c5708b..ade3ce6c4af6 100644
--- a/lib/bucket_locks.c
+++ b/lib/bucket_locks.c
@@ -30,10 +30,7 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
 	}
 
 	if (sizeof(spinlock_t) != 0) {
-		if (gfpflags_allow_blocking(gfp))
-			tlocks = kvmalloc(size * sizeof(spinlock_t), gfp);
-		else
-			tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp);
+		tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp);
 		if (!tlocks)
 			return -ENOMEM;
 		for (i = 0; i < size; i++)
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 994be4805cec..70935ed91125 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -360,9 +360,12 @@ static void debug_object_is_on_stack(void *addr, int onstack)
 
 	limit++;
 	if (is_on_stack)
-		pr_warn("object is on stack, but not annotated\n");
+		pr_warn("object %p is on stack %p, but NOT annotated.\n", addr,
+			 task_stack_page(current));
 	else
-		pr_warn("object is not on stack, but annotated\n");
+		pr_warn("object %p is NOT on stack %p, but annotated.\n", addr,
+			 task_stack_page(current));
+
 	WARN_ON(1);
 }
 
@@ -1185,8 +1188,7 @@ void __init debug_objects_mem_init(void)
 
 	if (!obj_cache || debug_objects_replace_static_objects()) {
 		debug_objects_enabled = 0;
-		if (obj_cache)
-			kmem_cache_destroy(obj_cache);
+		kmem_cache_destroy(obj_cache);
 		pr_warn("out of memory.\n");
 	} else
 		debug_objects_selftest();
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index 347fa7ac2e8a..9555b68bb774 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
 }
 
 EXPORT_SYMBOL(_atomic_dec_and_lock);
+
+int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
+				 unsigned long *flags)
+{
+	/* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+	if (atomic_add_unless(atomic, -1, 1))
+		return 0;
+
+	/* Otherwise do it the slow way */
+	spin_lock_irqsave(lock, *flags);
+	if (atomic_dec_and_test(atomic))
+		return 1;
+	spin_unlock_irqrestore(lock, *flags);
+	return 0;
+}
+EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave);
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
deleted file mode 100644
index c007d25bee09..000000000000
--- a/lib/dma-debug.c
+++ /dev/null
@@ -1,1773 +0,0 @@
-/*
- * Copyright (C) 2008 Advanced Micro Devices, Inc.
- *
- * Author: Joerg Roedel <joerg.roedel@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/sched/task_stack.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/sched/task.h>
-#include <linux/stacktrace.h>
-#include <linux/dma-debug.h>
-#include <linux/spinlock.h>
-#include <linux/vmalloc.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include <linux/export.h>
-#include <linux/device.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/ctype.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-
-#include <asm/sections.h>
-
-#define HASH_SIZE       1024ULL
-#define HASH_FN_SHIFT   13
-#define HASH_FN_MASK    (HASH_SIZE - 1)
-
-/* allow architectures to override this if absolutely required */
-#ifndef PREALLOC_DMA_DEBUG_ENTRIES
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-#endif
-
-enum {
-	dma_debug_single,
-	dma_debug_page,
-	dma_debug_sg,
-	dma_debug_coherent,
-	dma_debug_resource,
-};
-
-enum map_err_types {
-	MAP_ERR_CHECK_NOT_APPLICABLE,
-	MAP_ERR_NOT_CHECKED,
-	MAP_ERR_CHECKED,
-};
-
-#define DMA_DEBUG_STACKTRACE_ENTRIES 5
-
-/**
- * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
- * @list: node on pre-allocated free_entries list
- * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
- * @type: single, page, sg, coherent
- * @pfn: page frame of the start address
- * @offset: offset of mapping relative to pfn
- * @size: length of the mapping
- * @direction: enum dma_data_direction
- * @sg_call_ents: 'nents' from dma_map_sg
- * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
- * @map_err_type: track whether dma_mapping_error() was checked
- * @stacktrace: support backtraces when a violation is detected
- */
-struct dma_debug_entry {
-	struct list_head list;
-	struct device    *dev;
-	int              type;
-	unsigned long	 pfn;
-	size_t		 offset;
-	u64              dev_addr;
-	u64              size;
-	int              direction;
-	int		 sg_call_ents;
-	int		 sg_mapped_ents;
-	enum map_err_types  map_err_type;
-#ifdef CONFIG_STACKTRACE
-	struct		 stack_trace stacktrace;
-	unsigned long	 st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
-#endif
-};
-
-typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *);
-
-struct hash_bucket {
-	struct list_head list;
-	spinlock_t lock;
-} ____cacheline_aligned_in_smp;
-
-/* Hash list to save the allocated dma addresses */
-static struct hash_bucket dma_entry_hash[HASH_SIZE];
-/* List of pre-allocated dma_debug_entry's */
-static LIST_HEAD(free_entries);
-/* Lock for the list above */
-static DEFINE_SPINLOCK(free_entries_lock);
-
-/* Global disable flag - will be set in case of an error */
-static bool global_disable __read_mostly;
-
-/* Early initialization disable flag, set at the end of dma_debug_init */
-static bool dma_debug_initialized __read_mostly;
-
-static inline bool dma_debug_disabled(void)
-{
-	return global_disable || !dma_debug_initialized;
-}
-
-/* Global error count */
-static u32 error_count;
-
-/* Global error show enable*/
-static u32 show_all_errors __read_mostly;
-/* Number of errors to show */
-static u32 show_num_errors = 1;
-
-static u32 num_free_entries;
-static u32 min_free_entries;
-static u32 nr_total_entries;
-
-/* number of preallocated entries requested by kernel cmdline */
-static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
-
-/* debugfs dentry's for the stuff above */
-static struct dentry *dma_debug_dent        __read_mostly;
-static struct dentry *global_disable_dent   __read_mostly;
-static struct dentry *error_count_dent      __read_mostly;
-static struct dentry *show_all_errors_dent  __read_mostly;
-static struct dentry *show_num_errors_dent  __read_mostly;
-static struct dentry *num_free_entries_dent __read_mostly;
-static struct dentry *min_free_entries_dent __read_mostly;
-static struct dentry *filter_dent           __read_mostly;
-
-/* per-driver filter related state */
-
-#define NAME_MAX_LEN	64
-
-static char                  current_driver_name[NAME_MAX_LEN] __read_mostly;
-static struct device_driver *current_driver                    __read_mostly;
-
-static DEFINE_RWLOCK(driver_name_lock);
-
-static const char *const maperr2str[] = {
-	[MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
-	[MAP_ERR_NOT_CHECKED] = "dma map error not checked",
-	[MAP_ERR_CHECKED] = "dma map error checked",
-};
-
-static const char *type2name[5] = { "single", "page",
-				    "scather-gather", "coherent",
-				    "resource" };
-
-static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
-				   "DMA_FROM_DEVICE", "DMA_NONE" };
-
-/*
- * The access to some variables in this macro is racy. We can't use atomic_t
- * here because all these variables are exported to debugfs. Some of them even
- * writeable. This is also the reason why a lock won't help much. But anyway,
- * the races are no big deal. Here is why:
- *
- *   error_count: the addition is racy, but the worst thing that can happen is
- *                that we don't count some errors
- *   show_num_errors: the subtraction is racy. Also no big deal because in
- *                    worst case this will result in one warning more in the
- *                    system log than the user configured. This variable is
- *                    writeable via debugfs.
- */
-static inline void dump_entry_trace(struct dma_debug_entry *entry)
-{
-#ifdef CONFIG_STACKTRACE
-	if (entry) {
-		pr_warning("Mapped at:\n");
-		print_stack_trace(&entry->stacktrace, 0);
-	}
-#endif
-}
-
-static bool driver_filter(struct device *dev)
-{
-	struct device_driver *drv;
-	unsigned long flags;
-	bool ret;
-
-	/* driver filter off */
-	if (likely(!current_driver_name[0]))
-		return true;
-
-	/* driver filter on and initialized */
-	if (current_driver && dev && dev->driver == current_driver)
-		return true;
-
-	/* driver filter on, but we can't filter on a NULL device... */
-	if (!dev)
-		return false;
-
-	if (current_driver || !current_driver_name[0])
-		return false;
-
-	/* driver filter on but not yet initialized */
-	drv = dev->driver;
-	if (!drv)
-		return false;
-
-	/* lock to protect against change of current_driver_name */
-	read_lock_irqsave(&driver_name_lock, flags);
-
-	ret = false;
-	if (drv->name &&
-	    strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
-		current_driver = drv;
-		ret = true;
-	}
-
-	read_unlock_irqrestore(&driver_name_lock, flags);
-
-	return ret;
-}
-
-#define err_printk(dev, entry, format, arg...) do {			\
-		error_count += 1;					\
-		if (driver_filter(dev) &&				\
-		    (show_all_errors || show_num_errors > 0)) {		\
-			WARN(1, "%s %s: " format,			\
-			     dev ? dev_driver_string(dev) : "NULL",	\
-			     dev ? dev_name(dev) : "NULL", ## arg);	\
-			dump_entry_trace(entry);			\
-		}							\
-		if (!show_all_errors && show_num_errors > 0)		\
-			show_num_errors -= 1;				\
-	} while (0);
-
-/*
- * Hash related functions
- *
- * Every DMA-API request is saved into a struct dma_debug_entry. To
- * have quick access to these structs they are stored into a hash.
- */
-static int hash_fn(struct dma_debug_entry *entry)
-{
-	/*
-	 * Hash function is based on the dma address.
-	 * We use bits 20-27 here as the index into the hash
-	 */
-	return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
-}
-
-/*
- * Request exclusive access to a hash bucket for a given dma_debug_entry.
- */
-static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
-					   unsigned long *flags)
-	__acquires(&dma_entry_hash[idx].lock)
-{
-	int idx = hash_fn(entry);
-	unsigned long __flags;
-
-	spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
-	*flags = __flags;
-	return &dma_entry_hash[idx];
-}
-
-/*
- * Give up exclusive access to the hash bucket
- */
-static void put_hash_bucket(struct hash_bucket *bucket,
-			    unsigned long *flags)
-	__releases(&bucket->lock)
-{
-	unsigned long __flags = *flags;
-
-	spin_unlock_irqrestore(&bucket->lock, __flags);
-}
-
-static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b)
-{
-	return ((a->dev_addr == b->dev_addr) &&
-		(a->dev == b->dev)) ? true : false;
-}
-
-static bool containing_match(struct dma_debug_entry *a,
-			     struct dma_debug_entry *b)
-{
-	if (a->dev != b->dev)
-		return false;
-
-	if ((b->dev_addr <= a->dev_addr) &&
-	    ((b->dev_addr + b->size) >= (a->dev_addr + a->size)))
-		return true;
-
-	return false;
-}
-
-/*
- * Search a given entry in the hash bucket list
- */
-static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket,
-						  struct dma_debug_entry *ref,
-						  match_fn match)
-{
-	struct dma_debug_entry *entry, *ret = NULL;
-	int matches = 0, match_lvl, last_lvl = -1;
-
-	list_for_each_entry(entry, &bucket->list, list) {
-		if (!match(ref, entry))
-			continue;
-
-		/*
-		 * Some drivers map the same physical address multiple
-		 * times. Without a hardware IOMMU this results in the
-		 * same device addresses being put into the dma-debug
-		 * hash multiple times too. This can result in false
-		 * positives being reported. Therefore we implement a
-		 * best-fit algorithm here which returns the entry from
-		 * the hash which fits best to the reference value
-		 * instead of the first-fit.
-		 */
-		matches += 1;
-		match_lvl = 0;
-		entry->size         == ref->size         ? ++match_lvl : 0;
-		entry->type         == ref->type         ? ++match_lvl : 0;
-		entry->direction    == ref->direction    ? ++match_lvl : 0;
-		entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0;
-
-		if (match_lvl == 4) {
-			/* perfect-fit - return the result */
-			return entry;
-		} else if (match_lvl > last_lvl) {
-			/*
-			 * We found an entry that fits better then the
-			 * previous one or it is the 1st match.
-			 */
-			last_lvl = match_lvl;
-			ret      = entry;
-		}
-	}
-
-	/*
-	 * If we have multiple matches but no perfect-fit, just return
-	 * NULL.
-	 */
-	ret = (matches == 1) ? ret : NULL;
-
-	return ret;
-}
-
-static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket,
-						 struct dma_debug_entry *ref)
-{
-	return __hash_bucket_find(bucket, ref, exact_match);
-}
-
-static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
-						   struct dma_debug_entry *ref,
-						   unsigned long *flags)
-{
-
-	unsigned int max_range = dma_get_max_seg_size(ref->dev);
-	struct dma_debug_entry *entry, index = *ref;
-	unsigned int range = 0;
-
-	while (range <= max_range) {
-		entry = __hash_bucket_find(*bucket, ref, containing_match);
-
-		if (entry)
-			return entry;
-
-		/*
-		 * Nothing found, go back a hash bucket
-		 */
-		put_hash_bucket(*bucket, flags);
-		range          += (1 << HASH_FN_SHIFT);
-		index.dev_addr -= (1 << HASH_FN_SHIFT);
-		*bucket = get_hash_bucket(&index, flags);
-	}
-
-	return NULL;
-}
-
-/*
- * Add an entry to a hash bucket
- */
-static void hash_bucket_add(struct hash_bucket *bucket,
-			    struct dma_debug_entry *entry)
-{
-	list_add_tail(&entry->list, &bucket->list);
-}
-
-/*
- * Remove entry from a hash bucket list
- */
-static void hash_bucket_del(struct dma_debug_entry *entry)
-{
-	list_del(&entry->list);
-}
-
-static unsigned long long phys_addr(struct dma_debug_entry *entry)
-{
-	if (entry->type == dma_debug_resource)
-		return __pfn_to_phys(entry->pfn) + entry->offset;
-
-	return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
-}
-
-/*
- * Dump mapping entries for debugging purposes
- */
-void debug_dma_dump_mappings(struct device *dev)
-{
-	int idx;
-
-	for (idx = 0; idx < HASH_SIZE; idx++) {
-		struct hash_bucket *bucket = &dma_entry_hash[idx];
-		struct dma_debug_entry *entry;
-		unsigned long flags;
-
-		spin_lock_irqsave(&bucket->lock, flags);
-
-		list_for_each_entry(entry, &bucket->list, list) {
-			if (!dev || dev == entry->dev) {
-				dev_info(entry->dev,
-					 "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
-					 type2name[entry->type], idx,
-					 phys_addr(entry), entry->pfn,
-					 entry->dev_addr, entry->size,
-					 dir2name[entry->direction],
-					 maperr2str[entry->map_err_type]);
-			}
-		}
-
-		spin_unlock_irqrestore(&bucket->lock, flags);
-	}
-}
-
-/*
- * For each mapping (initial cacheline in the case of
- * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
- * scatterlist, or the cacheline specified in dma_map_single) insert
- * into this tree using the cacheline as the key. At
- * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry.  If
- * the entry already exists at insertion time add a tag as a reference
- * count for the overlapping mappings.  For now, the overlap tracking
- * just ensures that 'unmaps' balance 'maps' before marking the
- * cacheline idle, but we should also be flagging overlaps as an API
- * violation.
- *
- * Memory usage is mostly constrained by the maximum number of available
- * dma-debug entries in that we need a free dma_debug_entry before
- * inserting into the tree.  In the case of dma_map_page and
- * dma_alloc_coherent there is only one dma_debug_entry and one
- * dma_active_cacheline entry to track per event.  dma_map_sg(), on the
- * other hand, consumes a single dma_debug_entry, but inserts 'nents'
- * entries into the tree.
- *
- * At any time debug_dma_assert_idle() can be called to trigger a
- * warning if any cachelines in the given page are in the active set.
- */
-static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
-static DEFINE_SPINLOCK(radix_lock);
-#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
-#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
-#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
-
-static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry)
-{
-	return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) +
-		(entry->offset >> L1_CACHE_SHIFT);
-}
-
-static int active_cacheline_read_overlap(phys_addr_t cln)
-{
-	int overlap = 0, i;
-
-	for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-		if (radix_tree_tag_get(&dma_active_cacheline, cln, i))
-			overlap |= 1 << i;
-	return overlap;
-}
-
-static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
-{
-	int i;
-
-	if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
-		return overlap;
-
-	for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-		if (overlap & 1 << i)
-			radix_tree_tag_set(&dma_active_cacheline, cln, i);
-		else
-			radix_tree_tag_clear(&dma_active_cacheline, cln, i);
-
-	return overlap;
-}
-
-static void active_cacheline_inc_overlap(phys_addr_t cln)
-{
-	int overlap = active_cacheline_read_overlap(cln);
-
-	overlap = active_cacheline_set_overlap(cln, ++overlap);
-
-	/* If we overflowed the overlap counter then we're potentially
-	 * leaking dma-mappings.  Otherwise, if maps and unmaps are
-	 * balanced then this overflow may cause false negatives in
-	 * debug_dma_assert_idle() as the cacheline may be marked idle
-	 * prematurely.
-	 */
-	WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
-		  "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
-		  ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
-}
-
-static int active_cacheline_dec_overlap(phys_addr_t cln)
-{
-	int overlap = active_cacheline_read_overlap(cln);
-
-	return active_cacheline_set_overlap(cln, --overlap);
-}
-
-static int active_cacheline_insert(struct dma_debug_entry *entry)
-{
-	phys_addr_t cln = to_cacheline_number(entry);
-	unsigned long flags;
-	int rc;
-
-	/* If the device is not writing memory then we don't have any
-	 * concerns about the cpu consuming stale data.  This mitigates
-	 * legitimate usages of overlapping mappings.
-	 */
-	if (entry->direction == DMA_TO_DEVICE)
-		return 0;
-
-	spin_lock_irqsave(&radix_lock, flags);
-	rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
-	if (rc == -EEXIST)
-		active_cacheline_inc_overlap(cln);
-	spin_unlock_irqrestore(&radix_lock, flags);
-
-	return rc;
-}
-
-static void active_cacheline_remove(struct dma_debug_entry *entry)
-{
-	phys_addr_t cln = to_cacheline_number(entry);
-	unsigned long flags;
-
-	/* ...mirror the insert case */
-	if (entry->direction == DMA_TO_DEVICE)
-		return;
-
-	spin_lock_irqsave(&radix_lock, flags);
-	/* since we are counting overlaps the final put of the
-	 * cacheline will occur when the overlap count is 0.
-	 * active_cacheline_dec_overlap() returns -1 in that case
-	 */
-	if (active_cacheline_dec_overlap(cln) < 0)
-		radix_tree_delete(&dma_active_cacheline, cln);
-	spin_unlock_irqrestore(&radix_lock, flags);
-}
-
-/**
- * debug_dma_assert_idle() - assert that a page is not undergoing dma
- * @page: page to lookup in the dma_active_cacheline tree
- *
- * Place a call to this routine in cases where the cpu touching the page
- * before the dma completes (page is dma_unmapped) will lead to data
- * corruption.
- */
-void debug_dma_assert_idle(struct page *page)
-{
-	static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
-	struct dma_debug_entry *entry = NULL;
-	void **results = (void **) &ents;
-	unsigned int nents, i;
-	unsigned long flags;
-	phys_addr_t cln;
-
-	if (dma_debug_disabled())
-		return;
-
-	if (!page)
-		return;
-
-	cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
-	spin_lock_irqsave(&radix_lock, flags);
-	nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
-				       CACHELINES_PER_PAGE);
-	for (i = 0; i < nents; i++) {
-		phys_addr_t ent_cln = to_cacheline_number(ents[i]);
-
-		if (ent_cln == cln) {
-			entry = ents[i];
-			break;
-		} else if (ent_cln >= cln + CACHELINES_PER_PAGE)
-			break;
-	}
-	spin_unlock_irqrestore(&radix_lock, flags);
-
-	if (!entry)
-		return;
-
-	cln = to_cacheline_number(entry);
-	err_printk(entry->dev, entry,
-		   "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
-		   &cln);
-}
-
-/*
- * Wrapper function for adding an entry to the hash.
- * This function takes care of locking itself.
- */
-static void add_dma_entry(struct dma_debug_entry *entry)
-{
-	struct hash_bucket *bucket;
-	unsigned long flags;
-	int rc;
-
-	bucket = get_hash_bucket(entry, &flags);
-	hash_bucket_add(bucket, entry);
-	put_hash_bucket(bucket, &flags);
-
-	rc = active_cacheline_insert(entry);
-	if (rc == -ENOMEM) {
-		pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
-		global_disable = true;
-	}
-
-	/* TODO: report -EEXIST errors here as overlapping mappings are
-	 * not supported by the DMA API
-	 */
-}
-
-static struct dma_debug_entry *__dma_entry_alloc(void)
-{
-	struct dma_debug_entry *entry;
-
-	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
-	list_del(&entry->list);
-	memset(entry, 0, sizeof(*entry));
-
-	num_free_entries -= 1;
-	if (num_free_entries < min_free_entries)
-		min_free_entries = num_free_entries;
-
-	return entry;
-}
-
-/* struct dma_entry allocator
- *
- * The next two functions implement the allocator for
- * struct dma_debug_entries.
- */
-static struct dma_debug_entry *dma_entry_alloc(void)
-{
-	struct dma_debug_entry *entry;
-	unsigned long flags;
-
-	spin_lock_irqsave(&free_entries_lock, flags);
-
-	if (list_empty(&free_entries)) {
-		global_disable = true;
-		spin_unlock_irqrestore(&free_entries_lock, flags);
-		pr_err("DMA-API: debugging out of memory - disabling\n");
-		return NULL;
-	}
-
-	entry = __dma_entry_alloc();
-
-	spin_unlock_irqrestore(&free_entries_lock, flags);
-
-#ifdef CONFIG_STACKTRACE
-	entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
-	entry->stacktrace.entries = entry->st_entries;
-	entry->stacktrace.skip = 2;
-	save_stack_trace(&entry->stacktrace);
-#endif
-
-	return entry;
-}
-
-static void dma_entry_free(struct dma_debug_entry *entry)
-{
-	unsigned long flags;
-
-	active_cacheline_remove(entry);
-
-	/*
-	 * add to beginning of the list - this way the entries are
-	 * more likely cache hot when they are reallocated.
-	 */
-	spin_lock_irqsave(&free_entries_lock, flags);
-	list_add(&entry->list, &free_entries);
-	num_free_entries += 1;
-	spin_unlock_irqrestore(&free_entries_lock, flags);
-}
-
-int dma_debug_resize_entries(u32 num_entries)
-{
-	int i, delta, ret = 0;
-	unsigned long flags;
-	struct dma_debug_entry *entry;
-	LIST_HEAD(tmp);
-
-	spin_lock_irqsave(&free_entries_lock, flags);
-
-	if (nr_total_entries < num_entries) {
-		delta = num_entries - nr_total_entries;
-
-		spin_unlock_irqrestore(&free_entries_lock, flags);
-
-		for (i = 0; i < delta; i++) {
-			entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-			if (!entry)
-				break;
-
-			list_add_tail(&entry->list, &tmp);
-		}
-
-		spin_lock_irqsave(&free_entries_lock, flags);
-
-		list_splice(&tmp, &free_entries);
-		nr_total_entries += i;
-		num_free_entries += i;
-	} else {
-		delta = nr_total_entries - num_entries;
-
-		for (i = 0; i < delta && !list_empty(&free_entries); i++) {
-			entry = __dma_entry_alloc();
-			kfree(entry);
-		}
-
-		nr_total_entries -= i;
-	}
-
-	if (nr_total_entries != num_entries)
-		ret = 1;
-
-	spin_unlock_irqrestore(&free_entries_lock, flags);
-
-	return ret;
-}
-
-/*
- * DMA-API debugging init code
- *
- * The init code does two things:
- *   1. Initialize core data structures
- *   2. Preallocate a given number of dma_debug_entry structs
- */
-
-static int prealloc_memory(u32 num_entries)
-{
-	struct dma_debug_entry *entry, *next_entry;
-	int i;
-
-	for (i = 0; i < num_entries; ++i) {
-		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-		if (!entry)
-			goto out_err;
-
-		list_add_tail(&entry->list, &free_entries);
-	}
-
-	num_free_entries = num_entries;
-	min_free_entries = num_entries;
-
-	pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
-
-	return 0;
-
-out_err:
-
-	list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
-		list_del(&entry->list);
-		kfree(entry);
-	}
-
-	return -ENOMEM;
-}
-
-static ssize_t filter_read(struct file *file, char __user *user_buf,
-			   size_t count, loff_t *ppos)
-{
-	char buf[NAME_MAX_LEN + 1];
-	unsigned long flags;
-	int len;
-
-	if (!current_driver_name[0])
-		return 0;
-
-	/*
-	 * We can't copy to userspace directly because current_driver_name can
-	 * only be read under the driver_name_lock with irqs disabled. So
-	 * create a temporary copy first.
-	 */
-	read_lock_irqsave(&driver_name_lock, flags);
-	len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
-	read_unlock_irqrestore(&driver_name_lock, flags);
-
-	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
-}
-
-static ssize_t filter_write(struct file *file, const char __user *userbuf,
-			    size_t count, loff_t *ppos)
-{
-	char buf[NAME_MAX_LEN];
-	unsigned long flags;
-	size_t len;
-	int i;
-
-	/*
-	 * We can't copy from userspace directly. Access to
-	 * current_driver_name is protected with a write_lock with irqs
-	 * disabled. Since copy_from_user can fault and may sleep we
-	 * need to copy to temporary buffer first
-	 */
-	len = min(count, (size_t)(NAME_MAX_LEN - 1));
-	if (copy_from_user(buf, userbuf, len))
-		return -EFAULT;
-
-	buf[len] = 0;
-
-	write_lock_irqsave(&driver_name_lock, flags);
-
-	/*
-	 * Now handle the string we got from userspace very carefully.
-	 * The rules are:
-	 *         - only use the first token we got
-	 *         - token delimiter is everything looking like a space
-	 *           character (' ', '\n', '\t' ...)
-	 *
-	 */
-	if (!isalnum(buf[0])) {
-		/*
-		 * If the first character userspace gave us is not
-		 * alphanumerical then assume the filter should be
-		 * switched off.
-		 */
-		if (current_driver_name[0])
-			pr_info("DMA-API: switching off dma-debug driver filter\n");
-		current_driver_name[0] = 0;
-		current_driver = NULL;
-		goto out_unlock;
-	}
-
-	/*
-	 * Now parse out the first token and use it as the name for the
-	 * driver to filter for.
-	 */
-	for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
-		current_driver_name[i] = buf[i];
-		if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
-			break;
-	}
-	current_driver_name[i] = 0;
-	current_driver = NULL;
-
-	pr_info("DMA-API: enable driver filter for driver [%s]\n",
-		current_driver_name);
-
-out_unlock:
-	write_unlock_irqrestore(&driver_name_lock, flags);
-
-	return count;
-}
-
-static const struct file_operations filter_fops = {
-	.read  = filter_read,
-	.write = filter_write,
-	.llseek = default_llseek,
-};
-
-static int dma_debug_fs_init(void)
-{
-	dma_debug_dent = debugfs_create_dir("dma-api", NULL);
-	if (!dma_debug_dent) {
-		pr_err("DMA-API: can not create debugfs directory\n");
-		return -ENOMEM;
-	}
-
-	global_disable_dent = debugfs_create_bool("disabled", 0444,
-			dma_debug_dent,
-			&global_disable);
-	if (!global_disable_dent)
-		goto out_err;
-
-	error_count_dent = debugfs_create_u32("error_count", 0444,
-			dma_debug_dent, &error_count);
-	if (!error_count_dent)
-		goto out_err;
-
-	show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
-			dma_debug_dent,
-			&show_all_errors);
-	if (!show_all_errors_dent)
-		goto out_err;
-
-	show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
-			dma_debug_dent,
-			&show_num_errors);
-	if (!show_num_errors_dent)
-		goto out_err;
-
-	num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
-			dma_debug_dent,
-			&num_free_entries);
-	if (!num_free_entries_dent)
-		goto out_err;
-
-	min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
-			dma_debug_dent,
-			&min_free_entries);
-	if (!min_free_entries_dent)
-		goto out_err;
-
-	filter_dent = debugfs_create_file("driver_filter", 0644,
-					  dma_debug_dent, NULL, &filter_fops);
-	if (!filter_dent)
-		goto out_err;
-
-	return 0;
-
-out_err:
-	debugfs_remove_recursive(dma_debug_dent);
-
-	return -ENOMEM;
-}
-
-static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
-{
-	struct dma_debug_entry *entry;
-	unsigned long flags;
-	int count = 0, i;
-
-	for (i = 0; i < HASH_SIZE; ++i) {
-		spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
-		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
-			if (entry->dev == dev) {
-				count += 1;
-				*out_entry = entry;
-			}
-		}
-		spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
-	}
-
-	return count;
-}
-
-static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct dma_debug_entry *uninitialized_var(entry);
-	int count;
-
-	if (dma_debug_disabled())
-		return 0;
-
-	switch (action) {
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		count = device_dma_allocations(dev, &entry);
-		if (count == 0)
-			break;
-		err_printk(dev, entry, "DMA-API: device driver has pending "
-				"DMA allocations while released from device "
-				"[count=%d]\n"
-				"One of leaked entries details: "
-				"[device address=0x%016llx] [size=%llu bytes] "
-				"[mapped with %s] [mapped as %s]\n",
-			count, entry->dev_addr, entry->size,
-			dir2name[entry->direction], type2name[entry->type]);
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-void dma_debug_add_bus(struct bus_type *bus)
-{
-	struct notifier_block *nb;
-
-	if (dma_debug_disabled())
-		return;
-
-	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
-	if (nb == NULL) {
-		pr_err("dma_debug_add_bus: out of memory\n");
-		return;
-	}
-
-	nb->notifier_call = dma_debug_device_change;
-
-	bus_register_notifier(bus, nb);
-}
-
-static int dma_debug_init(void)
-{
-	int i;
-
-	/* Do not use dma_debug_initialized here, since we really want to be
-	 * called to set dma_debug_initialized
-	 */
-	if (global_disable)
-		return 0;
-
-	for (i = 0; i < HASH_SIZE; ++i) {
-		INIT_LIST_HEAD(&dma_entry_hash[i].list);
-		spin_lock_init(&dma_entry_hash[i].lock);
-	}
-
-	if (dma_debug_fs_init() != 0) {
-		pr_err("DMA-API: error creating debugfs entries - disabling\n");
-		global_disable = true;
-
-		return 0;
-	}
-
-	if (prealloc_memory(nr_prealloc_entries) != 0) {
-		pr_err("DMA-API: debugging out of memory error - disabled\n");
-		global_disable = true;
-
-		return 0;
-	}
-
-	nr_total_entries = num_free_entries;
-
-	dma_debug_initialized = true;
-
-	pr_info("DMA-API: debugging enabled by kernel config\n");
-	return 0;
-}
-core_initcall(dma_debug_init);
-
-static __init int dma_debug_cmdline(char *str)
-{
-	if (!str)
-		return -EINVAL;
-
-	if (strncmp(str, "off", 3) == 0) {
-		pr_info("DMA-API: debugging disabled on kernel command line\n");
-		global_disable = true;
-	}
-
-	return 0;
-}
-
-static __init int dma_debug_entries_cmdline(char *str)
-{
-	if (!str)
-		return -EINVAL;
-	if (!get_option(&str, &nr_prealloc_entries))
-		nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
-	return 0;
-}
-
-__setup("dma_debug=", dma_debug_cmdline);
-__setup("dma_debug_entries=", dma_debug_entries_cmdline);
-
-static void check_unmap(struct dma_debug_entry *ref)
-{
-	struct dma_debug_entry *entry;
-	struct hash_bucket *bucket;
-	unsigned long flags;
-
-	bucket = get_hash_bucket(ref, &flags);
-	entry = bucket_find_exact(bucket, ref);
-
-	if (!entry) {
-		/* must drop lock before calling dma_mapping_error */
-		put_hash_bucket(bucket, &flags);
-
-		if (dma_mapping_error(ref->dev, ref->dev_addr)) {
-			err_printk(ref->dev, NULL,
-				   "DMA-API: device driver tries to free an "
-				   "invalid DMA memory address\n");
-		} else {
-			err_printk(ref->dev, NULL,
-				   "DMA-API: device driver tries to free DMA "
-				   "memory it has not allocated [device "
-				   "address=0x%016llx] [size=%llu bytes]\n",
-				   ref->dev_addr, ref->size);
-		}
-		return;
-	}
-
-	if (ref->size != entry->size) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA memory with different size "
-			   "[device address=0x%016llx] [map size=%llu bytes] "
-			   "[unmap size=%llu bytes]\n",
-			   ref->dev_addr, entry->size, ref->size);
-	}
-
-	if (ref->type != entry->type) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA memory with wrong function "
-			   "[device address=0x%016llx] [size=%llu bytes] "
-			   "[mapped as %s] [unmapped as %s]\n",
-			   ref->dev_addr, ref->size,
-			   type2name[entry->type], type2name[ref->type]);
-	} else if ((entry->type == dma_debug_coherent) &&
-		   (phys_addr(ref) != phys_addr(entry))) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA memory with different CPU address "
-			   "[device address=0x%016llx] [size=%llu bytes] "
-			   "[cpu alloc address=0x%016llx] "
-			   "[cpu free address=0x%016llx]",
-			   ref->dev_addr, ref->size,
-			   phys_addr(entry),
-			   phys_addr(ref));
-	}
-
-	if (ref->sg_call_ents && ref->type == dma_debug_sg &&
-	    ref->sg_call_ents != entry->sg_call_ents) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA sg list with different entry count "
-			   "[map count=%d] [unmap count=%d]\n",
-			   entry->sg_call_ents, ref->sg_call_ents);
-	}
-
-	/*
-	 * This may be no bug in reality - but most implementations of the
-	 * DMA API don't handle this properly, so check for it here
-	 */
-	if (ref->direction != entry->direction) {
-		err_printk(ref->dev, entry, "DMA-API: device driver frees "
-			   "DMA memory with different direction "
-			   "[device address=0x%016llx] [size=%llu bytes] "
-			   "[mapped with %s] [unmapped with %s]\n",
-			   ref->dev_addr, ref->size,
-			   dir2name[entry->direction],
-			   dir2name[ref->direction]);
-	}
-
-	/*
-	 * Drivers should use dma_mapping_error() to check the returned
-	 * addresses of dma_map_single() and dma_map_page().
-	 * If not, print this warning message. See Documentation/DMA-API.txt.
-	 */
-	if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
-		err_printk(ref->dev, entry,
-			   "DMA-API: device driver failed to check map error"
-			   "[device address=0x%016llx] [size=%llu bytes] "
-			   "[mapped as %s]",
-			   ref->dev_addr, ref->size,
-			   type2name[entry->type]);
-	}
-
-	hash_bucket_del(entry);
-	dma_entry_free(entry);
-
-	put_hash_bucket(bucket, &flags);
-}
-
-static void check_for_stack(struct device *dev,
-			    struct page *page, size_t offset)
-{
-	void *addr;
-	struct vm_struct *stack_vm_area = task_stack_vm_area(current);
-
-	if (!stack_vm_area) {
-		/* Stack is direct-mapped. */
-		if (PageHighMem(page))
-			return;
-		addr = page_address(page) + offset;
-		if (object_is_on_stack(addr))
-			err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
-	} else {
-		/* Stack is vmalloced. */
-		int i;
-
-		for (i = 0; i < stack_vm_area->nr_pages; i++) {
-			if (page != stack_vm_area->pages[i])
-				continue;
-
-			addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
-			err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
-			break;
-		}
-	}
-}
-
-static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
-{
-	unsigned long a1 = (unsigned long)addr;
-	unsigned long b1 = a1 + len;
-	unsigned long a2 = (unsigned long)start;
-	unsigned long b2 = (unsigned long)end;
-
-	return !(b1 <= a2 || a1 >= b2);
-}
-
-static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
-{
-	if (overlap(addr, len, _stext, _etext) ||
-	    overlap(addr, len, __start_rodata, __end_rodata))
-		err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
-}
-
-static void check_sync(struct device *dev,
-		       struct dma_debug_entry *ref,
-		       bool to_cpu)
-{
-	struct dma_debug_entry *entry;
-	struct hash_bucket *bucket;
-	unsigned long flags;
-
-	bucket = get_hash_bucket(ref, &flags);
-
-	entry = bucket_find_contain(&bucket, ref, &flags);
-
-	if (!entry) {
-		err_printk(dev, NULL, "DMA-API: device driver tries "
-				"to sync DMA memory it has not allocated "
-				"[device address=0x%016llx] [size=%llu bytes]\n",
-				(unsigned long long)ref->dev_addr, ref->size);
-		goto out;
-	}
-
-	if (ref->size > entry->size) {
-		err_printk(dev, entry, "DMA-API: device driver syncs"
-				" DMA memory outside allocated range "
-				"[device address=0x%016llx] "
-				"[allocation size=%llu bytes] "
-				"[sync offset+size=%llu]\n",
-				entry->dev_addr, entry->size,
-				ref->size);
-	}
-
-	if (entry->direction == DMA_BIDIRECTIONAL)
-		goto out;
-
-	if (ref->direction != entry->direction) {
-		err_printk(dev, entry, "DMA-API: device driver syncs "
-				"DMA memory with different direction "
-				"[device address=0x%016llx] [size=%llu bytes] "
-				"[mapped with %s] [synced with %s]\n",
-				(unsigned long long)ref->dev_addr, entry->size,
-				dir2name[entry->direction],
-				dir2name[ref->direction]);
-	}
-
-	if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
-		      !(ref->direction == DMA_TO_DEVICE))
-		err_printk(dev, entry, "DMA-API: device driver syncs "
-				"device read-only DMA memory for cpu "
-				"[device address=0x%016llx] [size=%llu bytes] "
-				"[mapped with %s] [synced with %s]\n",
-				(unsigned long long)ref->dev_addr, entry->size,
-				dir2name[entry->direction],
-				dir2name[ref->direction]);
-
-	if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
-		       !(ref->direction == DMA_FROM_DEVICE))
-		err_printk(dev, entry, "DMA-API: device driver syncs "
-				"device write-only DMA memory to device "
-				"[device address=0x%016llx] [size=%llu bytes] "
-				"[mapped with %s] [synced with %s]\n",
-				(unsigned long long)ref->dev_addr, entry->size,
-				dir2name[entry->direction],
-				dir2name[ref->direction]);
-
-	if (ref->sg_call_ents && ref->type == dma_debug_sg &&
-	    ref->sg_call_ents != entry->sg_call_ents) {
-		err_printk(ref->dev, entry, "DMA-API: device driver syncs "
-			   "DMA sg list with different entry count "
-			   "[map count=%d] [sync count=%d]\n",
-			   entry->sg_call_ents, ref->sg_call_ents);
-	}
-
-out:
-	put_hash_bucket(bucket, &flags);
-}
-
-static void check_sg_segment(struct device *dev, struct scatterlist *sg)
-{
-#ifdef CONFIG_DMA_API_DEBUG_SG
-	unsigned int max_seg = dma_get_max_seg_size(dev);
-	u64 start, end, boundary = dma_get_seg_boundary(dev);
-
-	/*
-	 * Either the driver forgot to set dma_parms appropriately, or
-	 * whoever generated the list forgot to check them.
-	 */
-	if (sg->length > max_seg)
-		err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
-			   sg->length, max_seg);
-	/*
-	 * In some cases this could potentially be the DMA API
-	 * implementation's fault, but it would usually imply that
-	 * the scatterlist was built inappropriately to begin with.
-	 */
-	start = sg_dma_address(sg);
-	end = start + sg_dma_len(sg) - 1;
-	if ((start ^ end) & ~boundary)
-		err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
-			   start, end, boundary);
-#endif
-}
-
-void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
-			size_t size, int direction, dma_addr_t dma_addr,
-			bool map_single)
-{
-	struct dma_debug_entry *entry;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	if (dma_mapping_error(dev, dma_addr))
-		return;
-
-	entry = dma_entry_alloc();
-	if (!entry)
-		return;
-
-	entry->dev       = dev;
-	entry->type      = dma_debug_page;
-	entry->pfn	 = page_to_pfn(page);
-	entry->offset	 = offset,
-	entry->dev_addr  = dma_addr;
-	entry->size      = size;
-	entry->direction = direction;
-	entry->map_err_type = MAP_ERR_NOT_CHECKED;
-
-	if (map_single)
-		entry->type = dma_debug_single;
-
-	check_for_stack(dev, page, offset);
-
-	if (!PageHighMem(page)) {
-		void *addr = page_address(page) + offset;
-
-		check_for_illegal_area(dev, addr, size);
-	}
-
-	add_dma_entry(entry);
-}
-EXPORT_SYMBOL(debug_dma_map_page);
-
-void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	struct dma_debug_entry ref;
-	struct dma_debug_entry *entry;
-	struct hash_bucket *bucket;
-	unsigned long flags;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.dev = dev;
-	ref.dev_addr = dma_addr;
-	bucket = get_hash_bucket(&ref, &flags);
-
-	list_for_each_entry(entry, &bucket->list, list) {
-		if (!exact_match(&ref, entry))
-			continue;
-
-		/*
-		 * The same physical address can be mapped multiple
-		 * times. Without a hardware IOMMU this results in the
-		 * same device addresses being put into the dma-debug
-		 * hash multiple times too. This can result in false
-		 * positives being reported. Therefore we implement a
-		 * best-fit algorithm here which updates the first entry
-		 * from the hash which fits the reference value and is
-		 * not currently listed as being checked.
-		 */
-		if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
-			entry->map_err_type = MAP_ERR_CHECKED;
-			break;
-		}
-	}
-
-	put_hash_bucket(bucket, &flags);
-}
-EXPORT_SYMBOL(debug_dma_mapping_error);
-
-void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
-			  size_t size, int direction, bool map_single)
-{
-	struct dma_debug_entry ref = {
-		.type           = dma_debug_page,
-		.dev            = dev,
-		.dev_addr       = addr,
-		.size           = size,
-		.direction      = direction,
-	};
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	if (map_single)
-		ref.type = dma_debug_single;
-
-	check_unmap(&ref);
-}
-EXPORT_SYMBOL(debug_dma_unmap_page);
-
-void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
-		      int nents, int mapped_ents, int direction)
-{
-	struct dma_debug_entry *entry;
-	struct scatterlist *s;
-	int i;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	for_each_sg(sg, s, mapped_ents, i) {
-		entry = dma_entry_alloc();
-		if (!entry)
-			return;
-
-		entry->type           = dma_debug_sg;
-		entry->dev            = dev;
-		entry->pfn	      = page_to_pfn(sg_page(s));
-		entry->offset	      = s->offset,
-		entry->size           = sg_dma_len(s);
-		entry->dev_addr       = sg_dma_address(s);
-		entry->direction      = direction;
-		entry->sg_call_ents   = nents;
-		entry->sg_mapped_ents = mapped_ents;
-
-		check_for_stack(dev, sg_page(s), s->offset);
-
-		if (!PageHighMem(sg_page(s))) {
-			check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
-		}
-
-		check_sg_segment(dev, s);
-
-		add_dma_entry(entry);
-	}
-}
-EXPORT_SYMBOL(debug_dma_map_sg);
-
-static int get_nr_mapped_entries(struct device *dev,
-				 struct dma_debug_entry *ref)
-{
-	struct dma_debug_entry *entry;
-	struct hash_bucket *bucket;
-	unsigned long flags;
-	int mapped_ents;
-
-	bucket       = get_hash_bucket(ref, &flags);
-	entry        = bucket_find_exact(bucket, ref);
-	mapped_ents  = 0;
-
-	if (entry)
-		mapped_ents = entry->sg_mapped_ents;
-	put_hash_bucket(bucket, &flags);
-
-	return mapped_ents;
-}
-
-void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
-			int nelems, int dir)
-{
-	struct scatterlist *s;
-	int mapped_ents = 0, i;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	for_each_sg(sglist, s, nelems, i) {
-
-		struct dma_debug_entry ref = {
-			.type           = dma_debug_sg,
-			.dev            = dev,
-			.pfn		= page_to_pfn(sg_page(s)),
-			.offset		= s->offset,
-			.dev_addr       = sg_dma_address(s),
-			.size           = sg_dma_len(s),
-			.direction      = dir,
-			.sg_call_ents   = nelems,
-		};
-
-		if (mapped_ents && i >= mapped_ents)
-			break;
-
-		if (!i)
-			mapped_ents = get_nr_mapped_entries(dev, &ref);
-
-		check_unmap(&ref);
-	}
-}
-EXPORT_SYMBOL(debug_dma_unmap_sg);
-
-void debug_dma_alloc_coherent(struct device *dev, size_t size,
-			      dma_addr_t dma_addr, void *virt)
-{
-	struct dma_debug_entry *entry;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	if (unlikely(virt == NULL))
-		return;
-
-	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
-		return;
-
-	entry = dma_entry_alloc();
-	if (!entry)
-		return;
-
-	entry->type      = dma_debug_coherent;
-	entry->dev       = dev;
-	entry->offset	 = offset_in_page(virt);
-	entry->size      = size;
-	entry->dev_addr  = dma_addr;
-	entry->direction = DMA_BIDIRECTIONAL;
-
-	if (is_vmalloc_addr(virt))
-		entry->pfn = vmalloc_to_pfn(virt);
-	else
-		entry->pfn = page_to_pfn(virt_to_page(virt));
-
-	add_dma_entry(entry);
-}
-EXPORT_SYMBOL(debug_dma_alloc_coherent);
-
-void debug_dma_free_coherent(struct device *dev, size_t size,
-			 void *virt, dma_addr_t addr)
-{
-	struct dma_debug_entry ref = {
-		.type           = dma_debug_coherent,
-		.dev            = dev,
-		.offset		= offset_in_page(virt),
-		.dev_addr       = addr,
-		.size           = size,
-		.direction      = DMA_BIDIRECTIONAL,
-	};
-
-	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
-		return;
-
-	if (is_vmalloc_addr(virt))
-		ref.pfn = vmalloc_to_pfn(virt);
-	else
-		ref.pfn = page_to_pfn(virt_to_page(virt));
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	check_unmap(&ref);
-}
-EXPORT_SYMBOL(debug_dma_free_coherent);
-
-void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
-			    int direction, dma_addr_t dma_addr)
-{
-	struct dma_debug_entry *entry;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	entry = dma_entry_alloc();
-	if (!entry)
-		return;
-
-	entry->type		= dma_debug_resource;
-	entry->dev		= dev;
-	entry->pfn		= PHYS_PFN(addr);
-	entry->offset		= offset_in_page(addr);
-	entry->size		= size;
-	entry->dev_addr		= dma_addr;
-	entry->direction	= direction;
-	entry->map_err_type	= MAP_ERR_NOT_CHECKED;
-
-	add_dma_entry(entry);
-}
-EXPORT_SYMBOL(debug_dma_map_resource);
-
-void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
-			      size_t size, int direction)
-{
-	struct dma_debug_entry ref = {
-		.type           = dma_debug_resource,
-		.dev            = dev,
-		.dev_addr       = dma_addr,
-		.size           = size,
-		.direction      = direction,
-	};
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	check_unmap(&ref);
-}
-EXPORT_SYMBOL(debug_dma_unmap_resource);
-
-void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
-				   size_t size, int direction)
-{
-	struct dma_debug_entry ref;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.type         = dma_debug_single;
-	ref.dev          = dev;
-	ref.dev_addr     = dma_handle;
-	ref.size         = size;
-	ref.direction    = direction;
-	ref.sg_call_ents = 0;
-
-	check_sync(dev, &ref, true);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
-
-void debug_dma_sync_single_for_device(struct device *dev,
-				      dma_addr_t dma_handle, size_t size,
-				      int direction)
-{
-	struct dma_debug_entry ref;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.type         = dma_debug_single;
-	ref.dev          = dev;
-	ref.dev_addr     = dma_handle;
-	ref.size         = size;
-	ref.direction    = direction;
-	ref.sg_call_ents = 0;
-
-	check_sync(dev, &ref, false);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_for_device);
-
-void debug_dma_sync_single_range_for_cpu(struct device *dev,
-					 dma_addr_t dma_handle,
-					 unsigned long offset, size_t size,
-					 int direction)
-{
-	struct dma_debug_entry ref;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.type         = dma_debug_single;
-	ref.dev          = dev;
-	ref.dev_addr     = dma_handle;
-	ref.size         = offset + size;
-	ref.direction    = direction;
-	ref.sg_call_ents = 0;
-
-	check_sync(dev, &ref, true);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
-
-void debug_dma_sync_single_range_for_device(struct device *dev,
-					    dma_addr_t dma_handle,
-					    unsigned long offset,
-					    size_t size, int direction)
-{
-	struct dma_debug_entry ref;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	ref.type         = dma_debug_single;
-	ref.dev          = dev;
-	ref.dev_addr     = dma_handle;
-	ref.size         = offset + size;
-	ref.direction    = direction;
-	ref.sg_call_ents = 0;
-
-	check_sync(dev, &ref, false);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
-
-void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-			       int nelems, int direction)
-{
-	struct scatterlist *s;
-	int mapped_ents = 0, i;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	for_each_sg(sg, s, nelems, i) {
-
-		struct dma_debug_entry ref = {
-			.type           = dma_debug_sg,
-			.dev            = dev,
-			.pfn		= page_to_pfn(sg_page(s)),
-			.offset		= s->offset,
-			.dev_addr       = sg_dma_address(s),
-			.size           = sg_dma_len(s),
-			.direction      = direction,
-			.sg_call_ents   = nelems,
-		};
-
-		if (!i)
-			mapped_ents = get_nr_mapped_entries(dev, &ref);
-
-		if (i >= mapped_ents)
-			break;
-
-		check_sync(dev, &ref, true);
-	}
-}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
-
-void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
-				  int nelems, int direction)
-{
-	struct scatterlist *s;
-	int mapped_ents = 0, i;
-
-	if (unlikely(dma_debug_disabled()))
-		return;
-
-	for_each_sg(sg, s, nelems, i) {
-
-		struct dma_debug_entry ref = {
-			.type           = dma_debug_sg,
-			.dev            = dev,
-			.pfn		= page_to_pfn(sg_page(s)),
-			.offset		= s->offset,
-			.dev_addr       = sg_dma_address(s),
-			.size           = sg_dma_len(s),
-			.direction      = direction,
-			.sg_call_ents   = nelems,
-		};
-		if (!i)
-			mapped_ents = get_nr_mapped_entries(dev, &ref);
-
-		if (i >= mapped_ents)
-			break;
-
-		check_sync(dev, &ref, false);
-	}
-}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
-
-static int __init dma_debug_driver_setup(char *str)
-{
-	int i;
-
-	for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
-		current_driver_name[i] = *str;
-		if (*str == 0)
-			break;
-	}
-
-	if (current_driver_name[0])
-		pr_info("DMA-API: enable driver filter for driver [%s]\n",
-			current_driver_name);
-
-
-	return 1;
-}
-__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
deleted file mode 100644
index 8be8106270c2..000000000000
--- a/lib/dma-direct.c
+++ /dev/null
@@ -1,204 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DMA operations that map physical memory directly without using an IOMMU or
- * flushing caches.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-direct.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-contiguous.h>
-#include <linux/pfn.h>
-#include <linux/set_memory.h>
-
-#define DIRECT_MAPPING_ERROR		0
-
-/*
- * Most architectures use ZONE_DMA for the first 16 Megabytes, but
- * some use it for entirely different regions:
- */
-#ifndef ARCH_ZONE_DMA_BITS
-#define ARCH_ZONE_DMA_BITS 24
-#endif
-
-/*
- * For AMD SEV all DMA must be to unencrypted addresses.
- */
-static inline bool force_dma_unencrypted(void)
-{
-	return sev_active();
-}
-
-static bool
-check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
-		const char *caller)
-{
-	if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
-		if (!dev->dma_mask) {
-			dev_err(dev,
-				"%s: call on device without dma_mask\n",
-				caller);
-			return false;
-		}
-
-		if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
-			dev_err(dev,
-				"%s: overflow %pad+%zu of device mask %llx\n",
-				caller, &dma_addr, size, *dev->dma_mask);
-		}
-		return false;
-	}
-	return true;
-}
-
-static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
-{
-	dma_addr_t addr = force_dma_unencrypted() ?
-		__phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
-	return addr + size - 1 <= dev->coherent_dma_mask;
-}
-
-void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		gfp_t gfp, unsigned long attrs)
-{
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	int page_order = get_order(size);
-	struct page *page = NULL;
-	void *ret;
-
-	/* we always manually zero the memory once we are done: */
-	gfp &= ~__GFP_ZERO;
-
-	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
-		gfp |= GFP_DMA;
-	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
-		gfp |= GFP_DMA32;
-
-again:
-	/* CMA can be used only in the context which permits sleeping */
-	if (gfpflags_allow_blocking(gfp)) {
-		page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
-		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
-			dma_release_from_contiguous(dev, page, count);
-			page = NULL;
-		}
-	}
-	if (!page)
-		page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
-
-	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
-		__free_pages(page, page_order);
-		page = NULL;
-
-		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
-		    dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
-		    !(gfp & (GFP_DMA32 | GFP_DMA))) {
-			gfp |= GFP_DMA32;
-			goto again;
-		}
-
-		if (IS_ENABLED(CONFIG_ZONE_DMA) &&
-		    dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
-		    !(gfp & GFP_DMA)) {
-			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
-			goto again;
-		}
-	}
-
-	if (!page)
-		return NULL;
-	ret = page_address(page);
-	if (force_dma_unencrypted()) {
-		set_memory_decrypted((unsigned long)ret, 1 << page_order);
-		*dma_handle = __phys_to_dma(dev, page_to_phys(page));
-	} else {
-		*dma_handle = phys_to_dma(dev, page_to_phys(page));
-	}
-	memset(ret, 0, size);
-	return ret;
-}
-
-/*
- * NOTE: this function must never look at the dma_addr argument, because we want
- * to be able to use it as a helper for iommu implementations as well.
- */
-void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
-		dma_addr_t dma_addr, unsigned long attrs)
-{
-	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	unsigned int page_order = get_order(size);
-
-	if (force_dma_unencrypted())
-		set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
-	if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
-		free_pages((unsigned long)cpu_addr, page_order);
-}
-
-dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
-{
-	dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
-
-	if (!check_addr(dev, dma_addr, size, __func__))
-		return DIRECT_MAPPING_ERROR;
-	return dma_addr;
-}
-
-int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
-		enum dma_data_direction dir, unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sgl, sg, nents, i) {
-		BUG_ON(!sg_page(sg));
-
-		sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
-		if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
-			return 0;
-		sg_dma_len(sg) = sg->length;
-	}
-
-	return nents;
-}
-
-int dma_direct_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_ZONE_DMA
-	if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
-		return 0;
-#else
-	/*
-	 * Because 32-bit DMA masks are so common we expect every architecture
-	 * to be able to satisfy them - either by not supporting more physical
-	 * memory, or by providing a ZONE_DMA32.  If neither is the case, the
-	 * architecture needs to use an IOMMU instead of the direct mapping.
-	 */
-	if (mask < DMA_BIT_MASK(32))
-		return 0;
-#endif
-	/*
-	 * Various PCI/PCIe bridges have broken support for > 32bit DMA even
-	 * if the device itself might support it.
-	 */
-	if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
-		return 0;
-	return 1;
-}
-
-int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return dma_addr == DIRECT_MAPPING_ERROR;
-}
-
-const struct dma_map_ops dma_direct_ops = {
-	.alloc			= dma_direct_alloc,
-	.free			= dma_direct_free,
-	.map_page		= dma_direct_map_page,
-	.map_sg			= dma_direct_map_sg,
-	.dma_supported		= dma_direct_supported,
-	.mapping_error		= dma_direct_mapping_error,
-};
-EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c
deleted file mode 100644
index 79e9a757387f..000000000000
--- a/lib/dma-noncoherent.c
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018 Christoph Hellwig.
- *
- * DMA operations that map physical memory directly without providing cache
- * coherence.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
-#include <linux/scatterlist.h>
-
-static void dma_noncoherent_sync_single_for_device(struct device *dev,
-		dma_addr_t addr, size_t size, enum dma_data_direction dir)
-{
-	arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
-}
-
-static void dma_noncoherent_sync_sg_for_device(struct device *dev,
-		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
-}
-
-static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
-{
-	dma_addr_t addr;
-
-	addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
-	if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		arch_sync_dma_for_device(dev, page_to_phys(page) + offset,
-				size, dir);
-	return addr;
-}
-
-static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-	nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
-	if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir);
-	return nents;
-}
-
-#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
-static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
-		dma_addr_t addr, size_t size, enum dma_data_direction dir)
-{
-	arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
-}
-
-static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
-		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
-}
-
-static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
-		size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir);
-}
-
-static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir);
-}
-#endif
-
-const struct dma_map_ops dma_noncoherent_ops = {
-	.alloc			= arch_dma_alloc,
-	.free			= arch_dma_free,
-	.mmap			= arch_dma_mmap,
-	.sync_single_for_device	= dma_noncoherent_sync_single_for_device,
-	.sync_sg_for_device	= dma_noncoherent_sync_sg_for_device,
-	.map_page		= dma_noncoherent_map_page,
-	.map_sg			= dma_noncoherent_map_sg,
-#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
-	.sync_single_for_cpu	= dma_noncoherent_sync_single_for_cpu,
-	.sync_sg_for_cpu	= dma_noncoherent_sync_sg_for_cpu,
-	.unmap_page		= dma_noncoherent_unmap_page,
-	.unmap_sg		= dma_noncoherent_unmap_sg,
-#endif
-	.dma_supported		= dma_direct_supported,
-	.mapping_error		= dma_direct_mapping_error,
-	.cache_sync		= arch_dma_cache_sync,
-};
-EXPORT_SYMBOL(dma_noncoherent_ops);
diff --git a/lib/dma-virt.c b/lib/dma-virt.c
deleted file mode 100644
index 8e61a02ef9ca..000000000000
--- a/lib/dma-virt.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *	lib/dma-virt.c
- *
- * DMA operations that map to virtual addresses without flushing memory.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-
-static void *dma_virt_alloc(struct device *dev, size_t size,
-			    dma_addr_t *dma_handle, gfp_t gfp,
-			    unsigned long attrs)
-{
-	void *ret;
-
-	ret = (void *)__get_free_pages(gfp, get_order(size));
-	if (ret)
-		*dma_handle = (uintptr_t)ret;
-	return ret;
-}
-
-static void dma_virt_free(struct device *dev, size_t size,
-			  void *cpu_addr, dma_addr_t dma_addr,
-			  unsigned long attrs)
-{
-	free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page,
-				    unsigned long offset, size_t size,
-				    enum dma_data_direction dir,
-				    unsigned long attrs)
-{
-	return (uintptr_t)(page_address(page) + offset);
-}
-
-static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
-			   int nents, enum dma_data_direction dir,
-			   unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sgl, sg, nents, i) {
-		BUG_ON(!sg_page(sg));
-		sg_dma_address(sg) = (uintptr_t)sg_virt(sg);
-		sg_dma_len(sg) = sg->length;
-	}
-
-	return nents;
-}
-
-const struct dma_map_ops dma_virt_ops = {
-	.alloc			= dma_virt_alloc,
-	.free			= dma_virt_free,
-	.map_page		= dma_virt_map_page,
-	.map_sg			= dma_virt_map_sg,
-};
-EXPORT_SYMBOL(dma_virt_ops);
diff --git a/lib/idr.c b/lib/idr.c
index 823b813f08f8..ed9c169c12bd 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -4,9 +4,9 @@
 #include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/xarray.h>
 
 DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
-static DEFINE_SPINLOCK(simple_ida_lock);
 
 /**
  * idr_alloc_u32() - Allocate an ID.
@@ -581,7 +581,7 @@ again:
 	if (!ida_pre_get(ida, gfp_mask))
 		return -ENOMEM;
 
-	spin_lock_irqsave(&simple_ida_lock, flags);
+	xa_lock_irqsave(&ida->ida_rt, flags);
 	ret = ida_get_new_above(ida, start, &id);
 	if (!ret) {
 		if (id > max) {
@@ -591,7 +591,7 @@ again:
 			ret = id;
 		}
 	}
-	spin_unlock_irqrestore(&simple_ida_lock, flags);
+	xa_unlock_irqrestore(&ida->ida_rt, flags);
 
 	if (unlikely(ret == -EAGAIN))
 		goto again;
@@ -615,8 +615,8 @@ void ida_simple_remove(struct ida *ida, unsigned int id)
 	unsigned long flags;
 
 	BUG_ON((int)id < 0);
-	spin_lock_irqsave(&simple_ida_lock, flags);
+	xa_lock_irqsave(&ida->ida_rt, flags);
 	ida_remove(ida, id);
-	spin_unlock_irqrestore(&simple_ida_lock, flags);
+	xa_unlock_irqrestore(&ida->ida_rt, flags);
 }
 EXPORT_SYMBOL(ida_simple_remove);
diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index 835242e74aaa..75509a1511a3 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -64,11 +64,12 @@ static int interval_tree_test_init(void)
 	unsigned long results;
 	cycles_t time1, time2, time;
 
-	nodes = kmalloc(nnodes * sizeof(struct interval_tree_node), GFP_KERNEL);
+	nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
+			      GFP_KERNEL);
 	if (!nodes)
 		return -ENOMEM;
 
-	queries = kmalloc(nsearches * sizeof(int), GFP_KERNEL);
+	queries = kmalloc_array(nsearches, sizeof(int), GFP_KERNEL);
 	if (!queries) {
 		kfree(nodes);
 		return -ENOMEM;
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 54e5bbaa3200..517f5853ffed 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 		if (ioremap_pmd_enabled() &&
 		    ((next - addr) == PMD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
-		    pmd_free_pte_page(pmd)) {
+		    pmd_free_pte_page(pmd, addr)) {
 			if (pmd_set_huge(pmd, phys_addr + addr, prot))
 				continue;
 		}
@@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 		if (ioremap_pud_enabled() &&
 		    ((next - addr) == PUD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
-		    pud_free_pmd_page(pud)) {
+		    pud_free_pmd_page(pud, addr)) {
 			if (pud_set_huge(pud, phys_addr + addr, prot))
 				continue;
 		}
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 7e43cd54c84c..8be175df3075 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -596,15 +596,70 @@ static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
 	return ret;
 }
 
+static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
+				struct iov_iter *i)
+{
+	struct pipe_inode_info *pipe = i->pipe;
+	size_t n, off, xfer = 0;
+	int idx;
+
+	if (!sanity(i))
+		return 0;
+
+	bytes = n = push_pipe(i, bytes, &idx, &off);
+	if (unlikely(!n))
+		return 0;
+	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
+		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
+		unsigned long rem;
+
+		rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
+				chunk);
+		i->idx = idx;
+		i->iov_offset = off + chunk - rem;
+		xfer += chunk - rem;
+		if (rem)
+			break;
+		n -= chunk;
+		addr += chunk;
+	}
+	i->count -= xfer;
+	return xfer;
+}
+
+/**
+ * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
+ * @addr: source kernel address
+ * @bytes: total transfer length
+ * @iter: destination iterator
+ *
+ * The pmem driver arranges for filesystem-dax to use this facility via
+ * dax_copy_to_iter() for protecting read/write to persistent memory.
+ * Unless / until an architecture can guarantee identical performance
+ * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
+ * performance regression to switch more users to the mcsafe version.
+ *
+ * Otherwise, the main differences between this and typical _copy_to_iter().
+ *
+ * * Typical tail/residue handling after a fault retries the copy
+ *   byte-by-byte until the fault happens again. Re-triggering machine
+ *   checks is potentially fatal so the implementation uses source
+ *   alignment and poison alignment assumptions to avoid re-triggering
+ *   hardware exceptions.
+ *
+ * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
+ *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
+ *   a short copy.
+ *
+ * See MCSAFE_TEST for self-test.
+ */
 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 
-	if (unlikely(i->type & ITER_PIPE)) {
-		WARN_ON(1);
-		return 0;
-	}
+	if (unlikely(i->type & ITER_PIPE))
+		return copy_pipe_to_iter_mcsafe(addr, bytes, i);
 	if (iter_is_iovec(i))
 		might_fault();
 	iterate_and_advance(i, bytes, v,
@@ -701,6 +756,20 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 EXPORT_SYMBOL(_copy_from_iter_nocache);
 
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+/**
+ * _copy_from_iter_flushcache - write destination through cpu cache
+ * @addr: destination kernel address
+ * @bytes: total transfer length
+ * @iter: source iterator
+ *
+ * The pmem driver arranges for filesystem-dax to use this facility via
+ * dax_copy_from_iter() for ensuring that writes to persistent memory
+ * are flushed through the CPU cache. It is differentiated from
+ * _copy_from_iter_nocache() in that guarantees all data is flushed for
+ * all iterator types. The _copy_from_iter_nocache() only attempts to
+ * bypass the cache for the ITER_IOVEC case, and on some archs may use
+ * instructions that strand dirty-data in the cache.
+ */
 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
diff --git a/lib/kfifo.c b/lib/kfifo.c
index b0f757bf7213..015656aa8182 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -54,7 +54,7 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
 		return -EINVAL;
 	}
 
-	fifo->data = kmalloc(size * esize, gfp_mask);
+	fifo->data = kmalloc_array(esize, size, gfp_mask);
 
 	if (!fifo->data) {
 		fifo->mask = 0;
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 15ea216a67ce..63d0816ab23b 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -22,6 +22,7 @@
 #include <linux/socket.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/uidgid.h>
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
@@ -231,30 +232,6 @@ out:
 	return r;
 }
 
-#ifdef CONFIG_NET
-static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
-{
-	struct kobject *kobj = data, *ksobj;
-	const struct kobj_ns_type_operations *ops;
-
-	ops = kobj_ns_ops(kobj);
-	if (!ops && kobj->kset) {
-		ksobj = &kobj->kset->kobj;
-		if (ksobj->parent != NULL)
-			ops = kobj_ns_ops(ksobj->parent);
-	}
-
-	if (ops && ops->netlink_ns && kobj->ktype->namespace) {
-		const void *sock_ns, *ns;
-		ns = kobj->ktype->namespace(kobj);
-		sock_ns = ops->netlink_ns(dsk);
-		return sock_ns != ns;
-	}
-
-	return 0;
-}
-#endif
-
 #ifdef CONFIG_UEVENT_HELPER
 static int kobj_usermode_filter(struct kobject *kobj)
 {
@@ -296,15 +273,44 @@ static void cleanup_uevent_env(struct subprocess_info *info)
 }
 #endif
 
-static int kobject_uevent_net_broadcast(struct kobject *kobj,
-					struct kobj_uevent_env *env,
+#ifdef CONFIG_NET
+static struct sk_buff *alloc_uevent_skb(struct kobj_uevent_env *env,
 					const char *action_string,
 					const char *devpath)
 {
-	int retval = 0;
-#if defined(CONFIG_NET)
+	struct netlink_skb_parms *parms;
+	struct sk_buff *skb = NULL;
+	char *scratch;
+	size_t len;
+
+	/* allocate message with maximum possible size */
+	len = strlen(action_string) + strlen(devpath) + 2;
+	skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+	if (!skb)
+		return NULL;
+
+	/* add header */
+	scratch = skb_put(skb, len);
+	sprintf(scratch, "%s@%s", action_string, devpath);
+
+	skb_put_data(skb, env->buf, env->buflen);
+
+	parms = &NETLINK_CB(skb);
+	parms->creds.uid = GLOBAL_ROOT_UID;
+	parms->creds.gid = GLOBAL_ROOT_GID;
+	parms->dst_group = 1;
+	parms->portid = 0;
+
+	return skb;
+}
+
+static int uevent_net_broadcast_untagged(struct kobj_uevent_env *env,
+					 const char *action_string,
+					 const char *devpath)
+{
 	struct sk_buff *skb = NULL;
 	struct uevent_sock *ue_sk;
+	int retval = 0;
 
 	/* send netlink message */
 	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
@@ -314,37 +320,99 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj,
 			continue;
 
 		if (!skb) {
-			/* allocate message with the maximum possible size */
-			size_t len = strlen(action_string) + strlen(devpath) + 2;
-			char *scratch;
-
 			retval = -ENOMEM;
-			skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+			skb = alloc_uevent_skb(env, action_string, devpath);
 			if (!skb)
 				continue;
-
-			/* add header */
-			scratch = skb_put(skb, len);
-			sprintf(scratch, "%s@%s", action_string, devpath);
-
-			skb_put_data(skb, env->buf, env->buflen);
-
-			NETLINK_CB(skb).dst_group = 1;
 		}
 
-		retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb),
-						    0, 1, GFP_KERNEL,
-						    kobj_bcast_filter,
-						    kobj);
+		retval = netlink_broadcast(uevent_sock, skb_get(skb), 0, 1,
+					   GFP_KERNEL);
 		/* ENOBUFS should be handled in userspace */
 		if (retval == -ENOBUFS || retval == -ESRCH)
 			retval = 0;
 	}
 	consume_skb(skb);
-#endif
+
 	return retval;
 }
 
+static int uevent_net_broadcast_tagged(struct sock *usk,
+				       struct kobj_uevent_env *env,
+				       const char *action_string,
+				       const char *devpath)
+{
+	struct user_namespace *owning_user_ns = sock_net(usk)->user_ns;
+	struct sk_buff *skb = NULL;
+	int ret = 0;
+
+	skb = alloc_uevent_skb(env, action_string, devpath);
+	if (!skb)
+		return -ENOMEM;
+
+	/* fix credentials */
+	if (owning_user_ns != &init_user_ns) {
+		struct netlink_skb_parms *parms = &NETLINK_CB(skb);
+		kuid_t root_uid;
+		kgid_t root_gid;
+
+		/* fix uid */
+		root_uid = make_kuid(owning_user_ns, 0);
+		if (uid_valid(root_uid))
+			parms->creds.uid = root_uid;
+
+		/* fix gid */
+		root_gid = make_kgid(owning_user_ns, 0);
+		if (gid_valid(root_gid))
+			parms->creds.gid = root_gid;
+	}
+
+	ret = netlink_broadcast(usk, skb, 0, 1, GFP_KERNEL);
+	/* ENOBUFS should be handled in userspace */
+	if (ret == -ENOBUFS || ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+#endif
+
+static int kobject_uevent_net_broadcast(struct kobject *kobj,
+					struct kobj_uevent_env *env,
+					const char *action_string,
+					const char *devpath)
+{
+	int ret = 0;
+
+#ifdef CONFIG_NET
+	const struct kobj_ns_type_operations *ops;
+	const struct net *net = NULL;
+
+	ops = kobj_ns_ops(kobj);
+	if (!ops && kobj->kset) {
+		struct kobject *ksobj = &kobj->kset->kobj;
+		if (ksobj->parent != NULL)
+			ops = kobj_ns_ops(ksobj->parent);
+	}
+
+	/* kobjects currently only carry network namespace tags and they
+	 * are the only tag relevant here since we want to decide which
+	 * network namespaces to broadcast the uevent into.
+	 */
+	if (ops && ops->netlink_ns && kobj->ktype->namespace)
+		if (ops->type == KOBJ_NS_TYPE_NET)
+			net = kobj->ktype->namespace(kobj);
+
+	if (!net)
+		ret = uevent_net_broadcast_untagged(env, action_string,
+						    devpath);
+	else
+		ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env,
+						  action_string, devpath);
+#endif
+
+	return ret;
+}
+
 static void zap_modalias_env(struct kobj_uevent_env *env)
 {
 	static const char modalias_prefix[] = "MODALIAS=";
@@ -703,9 +771,13 @@ static int uevent_net_init(struct net *net)
 
 	net->uevent_sock = ue_sk;
 
-	mutex_lock(&uevent_sock_mutex);
-	list_add_tail(&ue_sk->list, &uevent_sock_list);
-	mutex_unlock(&uevent_sock_mutex);
+	/* Restrict uevents to initial user namespace. */
+	if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
+		mutex_lock(&uevent_sock_mutex);
+		list_add_tail(&ue_sk->list, &uevent_sock_list);
+		mutex_unlock(&uevent_sock_mutex);
+	}
+
 	return 0;
 }
 
@@ -713,9 +785,11 @@ static void uevent_net_exit(struct net *net)
 {
 	struct uevent_sock *ue_sk = net->uevent_sock;
 
-	mutex_lock(&uevent_sock_mutex);
-	list_del(&ue_sk->list);
-	mutex_unlock(&uevent_sock_mutex);
+	if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) {
+		mutex_lock(&uevent_sock_mutex);
+		list_del(&ue_sk->list);
+		mutex_unlock(&uevent_sock_mutex);
+	}
 
 	netlink_kernel_release(ue_sk->sk);
 	kfree(ue_sk);
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 28ba40b99337..2b10a4024c35 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -119,7 +119,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 	slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL);
 	if (!slot)
 		goto out_fail;
-	element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
+	element = kcalloc(e_count, sizeof(struct lc_element *), GFP_KERNEL);
 	if (!element)
 		goto out_fail;
 
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
index 7eceeddb3fb8..c2d6f4efcfbc 100644
--- a/lib/mpi/mpi-internal.h
+++ b/lib/mpi/mpi-internal.h
@@ -65,13 +65,6 @@
 typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */
 typedef int mpi_size_t;		/* (must be a signed type) */
 
-static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
-{
-	if (a->alloced < b)
-		return mpi_resize(a, b);
-	return 0;
-}
-
 /* Copy N limbs from S to D.  */
 #define MPN_COPY(d, s, n) \
 	do {					\
@@ -80,13 +73,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
 			(d)[_i] = (s)[_i];	\
 	} while (0)
 
-#define MPN_COPY_INCR(d, s, n) \
-	do {					\
-		mpi_size_t _i;			\
-		for (_i = 0; _i < (n); _i++)	\
-			(d)[_i] = (s)[_i];	\
-	} while (0)
-
 #define MPN_COPY_DECR(d, s, n) \
 	do {					\
 		mpi_size_t _i;			\
@@ -111,15 +97,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
 		}				\
 	} while (0)
 
-#define MPN_NORMALIZE_NOT_ZERO(d, n) \
-	do {				\
-		for (;;) {		\
-			if ((d)[(n)-1])	\
-				break;	\
-			(n)--;		\
-		}			\
-	} while (0)
-
 #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
 	do {							\
 		if ((size) < KARATSUBA_THRESHOLD)		\
@@ -128,46 +105,11 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
 			mul_n(prodp, up, vp, size, tspace);	\
 	} while (0);
 
-/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
- * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
- * If this would yield overflow, DI should be the largest possible number
- * (i.e., only ones).  For correct operation, the most significant bit of D
- * has to be set.  Put the quotient in Q and the remainder in R.
- */
-#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
-	do {								\
-		mpi_limb_t _q, _ql, _r;					\
-		mpi_limb_t _xh, _xl;					\
-		umul_ppmm(_q, _ql, (nh), (di));				\
-		_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */ \
-		umul_ppmm(_xh, _xl, _q, (d));				\
-		sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);		\
-		if (_xh) {						\
-			sub_ddmmss(_xh, _r, _xh, _r, 0, (d));		\
-			_q++;						\
-			if (_xh) {					\
-				sub_ddmmss(_xh, _r, _xh, _r, 0, (d));	\
-				_q++;					\
-			}						\
-		}							\
-		if (_r >= (d)) {					\
-			_r -= (d);					\
-			_q++;						\
-		}							\
-		(r) = _r;						\
-		(q) = _q;						\
-	} while (0)
-
 /*-- mpiutil.c --*/
 mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
 void mpi_free_limb_space(mpi_ptr_t a);
 void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
 
-/*-- mpi-bit.c --*/
-void mpi_rshift_limbs(MPI a, unsigned int count);
-int mpi_lshift_limbs(MPI a, unsigned int count);
-
-/*-- mpihelp-add.c --*/
 static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
@@ -175,7 +117,6 @@ mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
-/*-- mpihelp-sub.c --*/
 static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
@@ -183,10 +124,10 @@ mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
 		       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
 
-/*-- mpihelp-cmp.c --*/
+/*-- mpih-cmp.c --*/
 int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
 
-/*-- mpihelp-mul.c --*/
+/*-- mpih-mul.c --*/
 
 struct karatsuba_ctx {
 	struct karatsuba_ctx *next;
@@ -202,7 +143,6 @@ mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			    mpi_size_t s1_size, mpi_limb_t s2_limb);
 mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			    mpi_size_t s1_size, mpi_limb_t s2_limb);
-int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
 int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
 		mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
 void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
@@ -214,21 +154,16 @@ int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 			       mpi_ptr_t vp, mpi_size_t vsize,
 			       struct karatsuba_ctx *ctx);
 
-/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
+/*-- generic_mpih-mul1.c --*/
 mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 
-/*-- mpihelp-div.c --*/
-mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
-			 mpi_limb_t divisor_limb);
+/*-- mpih-div.c --*/
 mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
 			  mpi_ptr_t np, mpi_size_t nsize,
 			  mpi_ptr_t dp, mpi_size_t dsize);
-mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
-			    mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
-			    mpi_limb_t divisor_limb);
 
-/*-- mpihelp-shift.c --*/
+/*-- generic_mpih-[lr]shift.c --*/
 mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
 			  unsigned cnt);
 mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 314f4dfa603e..20ed0f766787 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -91,14 +91,14 @@ int mpi_resize(MPI a, unsigned nlimbs)
 		return 0;	/* no need to do it */
 
 	if (a->d) {
-		p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+		p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
 		if (!p)
 			return -ENOMEM;
 		memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
 		kzfree(a->d);
 		a->d = p;
 	} else {
-		a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+		a->d = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL);
 		if (!a->d)
 			return -ENOMEM;
 	}
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index 6016f1deb1f5..beb14839b41a 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -112,18 +112,6 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
 		  min(pool->nr_free, pool->percpu_batch_size));
 }
 
-static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
-{
-	int tag = -ENOSPC;
-
-	spin_lock(&tags->lock);
-	if (tags->nr_free)
-		tag = tags->freelist[--tags->nr_free];
-	spin_unlock(&tags->lock);
-
-	return tag;
-}
-
 /**
  * percpu_ida_alloc - allocate a tag
  * @pool: pool to allocate from
@@ -147,20 +135,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
 	DEFINE_WAIT(wait);
 	struct percpu_ida_cpu *tags;
 	unsigned long flags;
-	int tag;
+	int tag = -ENOSPC;
 
-	local_irq_save(flags);
-	tags = this_cpu_ptr(pool->tag_cpu);
+	tags = raw_cpu_ptr(pool->tag_cpu);
+	spin_lock_irqsave(&tags->lock, flags);
 
 	/* Fastpath */
-	tag = alloc_local_tag(tags);
-	if (likely(tag >= 0)) {
-		local_irq_restore(flags);
+	if (likely(tags->nr_free)) {
+		tag = tags->freelist[--tags->nr_free];
+		spin_unlock_irqrestore(&tags->lock, flags);
 		return tag;
 	}
+	spin_unlock_irqrestore(&tags->lock, flags);
 
 	while (1) {
-		spin_lock(&pool->lock);
+		spin_lock_irqsave(&pool->lock, flags);
+		tags = this_cpu_ptr(pool->tag_cpu);
 
 		/*
 		 * prepare_to_wait() must come before steal_tags(), in case
@@ -184,8 +174,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
 						&pool->cpus_have_tags);
 		}
 
-		spin_unlock(&pool->lock);
-		local_irq_restore(flags);
+		spin_unlock_irqrestore(&pool->lock, flags);
 
 		if (tag >= 0 || state == TASK_RUNNING)
 			break;
@@ -196,9 +185,6 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state)
 		}
 
 		schedule();
-
-		local_irq_save(flags);
-		tags = this_cpu_ptr(pool->tag_cpu);
 	}
 	if (state != TASK_RUNNING)
 		finish_wait(&pool->wait, &wait);
@@ -222,28 +208,24 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
 
 	BUG_ON(tag >= pool->nr_tags);
 
-	local_irq_save(flags);
-	tags = this_cpu_ptr(pool->tag_cpu);
+	tags = raw_cpu_ptr(pool->tag_cpu);
 
-	spin_lock(&tags->lock);
+	spin_lock_irqsave(&tags->lock, flags);
 	tags->freelist[tags->nr_free++] = tag;
 
 	nr_free = tags->nr_free;
-	spin_unlock(&tags->lock);
 
 	if (nr_free == 1) {
 		cpumask_set_cpu(smp_processor_id(),
 				&pool->cpus_have_tags);
 		wake_up(&pool->wait);
 	}
+	spin_unlock_irqrestore(&tags->lock, flags);
 
 	if (nr_free == pool->percpu_max_size) {
-		spin_lock(&pool->lock);
+		spin_lock_irqsave(&pool->lock, flags);
+		spin_lock(&tags->lock);
 
-		/*
-		 * Global lock held and irqs disabled, don't need percpu
-		 * lock
-		 */
 		if (tags->nr_free == pool->percpu_max_size) {
 			move_tags(pool->freelist, &pool->nr_free,
 				  tags->freelist, &tags->nr_free,
@@ -251,10 +233,9 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
 
 			wake_up(&pool->wait);
 		}
-		spin_unlock(&pool->lock);
+		spin_unlock(&tags->lock);
+		spin_unlock_irqrestore(&pool->lock, flags);
 	}
-
-	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(percpu_ida_free);
 
@@ -346,29 +327,27 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
 	struct percpu_ida_cpu *remote;
 	unsigned cpu, i, err = 0;
 
-	local_irq_save(flags);
 	for_each_possible_cpu(cpu) {
 		remote = per_cpu_ptr(pool->tag_cpu, cpu);
-		spin_lock(&remote->lock);
+		spin_lock_irqsave(&remote->lock, flags);
 		for (i = 0; i < remote->nr_free; i++) {
 			err = fn(remote->freelist[i], data);
 			if (err)
 				break;
 		}
-		spin_unlock(&remote->lock);
+		spin_unlock_irqrestore(&remote->lock, flags);
 		if (err)
 			goto out;
 	}
 
-	spin_lock(&pool->lock);
+	spin_lock_irqsave(&pool->lock, flags);
 	for (i = 0; i < pool->nr_free; i++) {
 		err = fn(pool->freelist[i], data);
 		if (err)
 			break;
 	}
-	spin_unlock(&pool->lock);
+	spin_unlock_irqrestore(&pool->lock, flags);
 out:
-	local_irq_restore(flags);
 	return err;
 }
 EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
index 140fa8bb5c23..914ebe98fc21 100644
--- a/lib/raid6/s390vx.uc
+++ b/lib/raid6/s390vx.uc
@@ -55,22 +55,24 @@ static inline void XOR(int x, int y, int z)
 	asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
 }
 
-static inline void LOAD_DATA(int x, int n, u8 *ptr)
+static inline void LOAD_DATA(int x, u8 *ptr)
 {
-	typedef struct { u8 _[16*n]; } addrtype;
+	typedef struct { u8 _[16 * $#]; } addrtype;
 	register addrtype *__ptr asm("1") = (addrtype *) ptr;
 
 	asm volatile ("VLM %2,%3,0,%r1"
-		      : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
+		      : : "m" (*__ptr), "a" (__ptr), "i" (x),
+			  "i" (x + $# - 1));
 }
 
-static inline void STORE_DATA(int x, int n, u8 *ptr)
+static inline void STORE_DATA(int x, u8 *ptr)
 {
-	typedef struct { u8 _[16*n]; } addrtype;
+	typedef struct { u8 _[16 * $#]; } addrtype;
 	register addrtype *__ptr asm("1") = (addrtype *) ptr;
 
 	asm volatile ("VSTM %2,%3,0,1"
-		      : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
+		      : "=m" (*__ptr) : "a" (__ptr), "i" (x),
+			"i" (x + $# - 1));
 }
 
 static inline void COPY_VEC(int x, int y)
@@ -93,19 +95,19 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
 	q = dptr[z0 + 2];	/* RS syndrome */
 
 	for (d = 0; d < bytes; d += $#*NSIZE) {
-		LOAD_DATA(0,$#,&dptr[z0][d]);
+		LOAD_DATA(0,&dptr[z0][d]);
 		COPY_VEC(8+$$,0+$$);
 		for (z = z0 - 1; z >= 0; z--) {
 			MASK(16+$$,8+$$);
 			AND(16+$$,16+$$,25);
 			SHLBYTE(8+$$,8+$$);
 			XOR(8+$$,8+$$,16+$$);
-			LOAD_DATA(16,$#,&dptr[z][d]);
+			LOAD_DATA(16,&dptr[z][d]);
 			XOR(0+$$,0+$$,16+$$);
 			XOR(8+$$,8+$$,16+$$);
 		}
-		STORE_DATA(0,$#,&p[d]);
-		STORE_DATA(8,$#,&q[d]);
+		STORE_DATA(0,&p[d]);
+		STORE_DATA(8,&q[d]);
 	}
 	kernel_fpu_end(&vxstate, KERNEL_VXR);
 }
@@ -127,14 +129,14 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
 
 	for (d = 0; d < bytes; d += $#*NSIZE) {
 		/* P/Q data pages */
-		LOAD_DATA(0,$#,&dptr[z0][d]);
+		LOAD_DATA(0,&dptr[z0][d]);
 		COPY_VEC(8+$$,0+$$);
 		for (z = z0 - 1; z >= start; z--) {
 			MASK(16+$$,8+$$);
 			AND(16+$$,16+$$,25);
 			SHLBYTE(8+$$,8+$$);
 			XOR(8+$$,8+$$,16+$$);
-			LOAD_DATA(16,$#,&dptr[z][d]);
+			LOAD_DATA(16,&dptr[z][d]);
 			XOR(0+$$,0+$$,16+$$);
 			XOR(8+$$,8+$$,16+$$);
 		}
@@ -145,12 +147,12 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
 			SHLBYTE(8+$$,8+$$);
 			XOR(8+$$,8+$$,16+$$);
 		}
-		LOAD_DATA(16,$#,&p[d]);
+		LOAD_DATA(16,&p[d]);
 		XOR(16+$$,16+$$,0+$$);
-		STORE_DATA(16,$#,&p[d]);
-		LOAD_DATA(16,$#,&q[d]);
+		STORE_DATA(16,&p[d]);
+		LOAD_DATA(16,&q[d]);
 		XOR(16+$$,16+$$,8+$$);
-		STORE_DATA(16,$#,&q[d]);
+		STORE_DATA(16,&q[d]);
 	}
 	kernel_fpu_end(&vxstate, KERNEL_VXR);
 }
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index 7d36c1e27ff6..b7055b2a07d3 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -247,7 +247,7 @@ static int __init rbtree_test_init(void)
 	cycles_t time1, time2, time;
 	struct rb_node *node;
 
-	nodes = kmalloc(nnodes * sizeof(*nodes), GFP_KERNEL);
+	nodes = kmalloc_array(nnodes, sizeof(*nodes), GFP_KERNEL);
 	if (!nodes)
 		return -ENOMEM;
 
diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c
index dfcf54242fb9..e5fdc8b9e856 100644
--- a/lib/reed_solomon/reed_solomon.c
+++ b/lib/reed_solomon/reed_solomon.c
@@ -88,15 +88,15 @@ static struct rs_codec *codec_init(int symsize, int gfpoly, int (*gffunc)(int),
 	rs->gffunc = gffunc;
 
 	/* Allocate the arrays */
-	rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp);
+	rs->alpha_to = kmalloc_array(rs->nn + 1, sizeof(uint16_t), gfp);
 	if (rs->alpha_to == NULL)
 		goto err;
 
-	rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp);
+	rs->index_of = kmalloc_array(rs->nn + 1, sizeof(uint16_t), gfp);
 	if (rs->index_of == NULL)
 		goto err;
 
-	rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), gfp);
+	rs->genpoly = kmalloc_array(rs->nroots + 1, sizeof(uint16_t), gfp);
 	if(rs->genpoly == NULL)
 		goto err;
 
@@ -283,7 +283,7 @@ out:
  *		in index form
  *  @prim:	primitive element to generate polynomial roots
  *  @nroots:	RS code generator polynomial degree (number of roots)
- *  @gfp:	GFP_ flags for allocations
+ *  @gfp:	Memory allocation flags.
  */
 struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim,
 			       int nroots, gfp_t gfp)
diff --git a/lib/refcount.c b/lib/refcount.c
index 0eb48353abe3..ebcf8cd49e05 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -35,13 +35,13 @@
  *
  */
 
+#include <linux/mutex.h>
 #include <linux/refcount.h>
+#include <linux/spinlock.h>
 #include <linux/bug.h>
 
-#ifdef CONFIG_REFCOUNT_FULL
-
 /**
- * refcount_add_not_zero - add a value to a refcount unless it is 0
+ * refcount_add_not_zero_checked - add a value to a refcount unless it is 0
  * @i: the value to add to the refcount
  * @r: the refcount
  *
@@ -58,7 +58,7 @@
  *
  * Return: false if the passed refcount is 0, true otherwise
  */
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
@@ -79,10 +79,10 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 
 	return true;
 }
-EXPORT_SYMBOL(refcount_add_not_zero);
+EXPORT_SYMBOL(refcount_add_not_zero_checked);
 
 /**
- * refcount_add - add a value to a refcount
+ * refcount_add_checked - add a value to a refcount
  * @i: the value to add to the refcount
  * @r: the refcount
  *
@@ -97,14 +97,14 @@ EXPORT_SYMBOL(refcount_add_not_zero);
  * cases, refcount_inc(), or one of its variants, should instead be used to
  * increment a reference count.
  */
-void refcount_add(unsigned int i, refcount_t *r)
+void refcount_add_checked(unsigned int i, refcount_t *r)
 {
-	WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n");
 }
-EXPORT_SYMBOL(refcount_add);
+EXPORT_SYMBOL(refcount_add_checked);
 
 /**
- * refcount_inc_not_zero - increment a refcount unless it is 0
+ * refcount_inc_not_zero_checked - increment a refcount unless it is 0
  * @r: the refcount to increment
  *
  * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN.
@@ -115,7 +115,7 @@ EXPORT_SYMBOL(refcount_add);
  *
  * Return: true if the increment was successful, false otherwise
  */
-bool refcount_inc_not_zero(refcount_t *r)
+bool refcount_inc_not_zero_checked(refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
@@ -134,10 +134,10 @@ bool refcount_inc_not_zero(refcount_t *r)
 
 	return true;
 }
-EXPORT_SYMBOL(refcount_inc_not_zero);
+EXPORT_SYMBOL(refcount_inc_not_zero_checked);
 
 /**
- * refcount_inc - increment a refcount
+ * refcount_inc_checked - increment a refcount
  * @r: the refcount to increment
  *
  * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN.
@@ -148,14 +148,14 @@ EXPORT_SYMBOL(refcount_inc_not_zero);
  * Will WARN if the refcount is 0, as this represents a possible use-after-free
  * condition.
  */
-void refcount_inc(refcount_t *r)
+void refcount_inc_checked(refcount_t *r)
 {
-	WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n");
 }
-EXPORT_SYMBOL(refcount_inc);
+EXPORT_SYMBOL(refcount_inc_checked);
 
 /**
- * refcount_sub_and_test - subtract from a refcount and test if it is 0
+ * refcount_sub_and_test_checked - subtract from a refcount and test if it is 0
  * @i: amount to subtract from the refcount
  * @r: the refcount
  *
@@ -174,7 +174,7 @@ EXPORT_SYMBOL(refcount_inc);
  *
  * Return: true if the resulting refcount is 0, false otherwise
  */
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
@@ -192,10 +192,10 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 
 	return !new;
 }
-EXPORT_SYMBOL(refcount_sub_and_test);
+EXPORT_SYMBOL(refcount_sub_and_test_checked);
 
 /**
- * refcount_dec_and_test - decrement a refcount and test if it is 0
+ * refcount_dec_and_test_checked - decrement a refcount and test if it is 0
  * @r: the refcount
  *
  * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
@@ -207,14 +207,14 @@ EXPORT_SYMBOL(refcount_sub_and_test);
  *
  * Return: true if the resulting refcount is 0, false otherwise
  */
-bool refcount_dec_and_test(refcount_t *r)
+bool refcount_dec_and_test_checked(refcount_t *r)
 {
-	return refcount_sub_and_test(1, r);
+	return refcount_sub_and_test_checked(1, r);
 }
-EXPORT_SYMBOL(refcount_dec_and_test);
+EXPORT_SYMBOL(refcount_dec_and_test_checked);
 
 /**
- * refcount_dec - decrement a refcount
+ * refcount_dec_checked - decrement a refcount
  * @r: the refcount
  *
  * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
@@ -223,12 +223,11 @@ EXPORT_SYMBOL(refcount_dec_and_test);
  * Provides release memory ordering, such that prior loads and stores are done
  * before.
  */
-void refcount_dec(refcount_t *r)
+void refcount_dec_checked(refcount_t *r)
 {
-	WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+	WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n");
 }
-EXPORT_SYMBOL(refcount_dec);
-#endif /* CONFIG_REFCOUNT_FULL */
+EXPORT_SYMBOL(refcount_dec_checked);
 
 /**
  * refcount_dec_if_one - decrement a refcount if it is 1
@@ -350,3 +349,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
 }
 EXPORT_SYMBOL(refcount_dec_and_lock);
 
+/**
+ * refcount_dec_and_lock_irqsave - return holding spinlock with disabled
+ *                                 interrupts if able to decrement refcount to 0
+ * @r: the refcount
+ * @lock: the spinlock to be locked
+ * @flags: saved IRQ-flags if the is acquired
+ *
+ * Same as refcount_dec_and_lock() above except that the spinlock is acquired
+ * with disabled interupts.
+ *
+ * Return: true and hold spinlock if able to decrement refcount to 0, false
+ *         otherwise
+ */
+bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock,
+				   unsigned long *flags)
+{
+	if (refcount_dec_not_one(r))
+		return false;
+
+	spin_lock_irqsave(lock, *flags);
+	if (!refcount_dec_and_test(r)) {
+		spin_unlock_irqrestore(lock, *flags);
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(refcount_dec_and_lock_irqsave);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 2b2b79974b61..e5c8586cf717 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -668,8 +668,9 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
  * For a completely stable walk you should construct your own data
  * structure outside the hash table.
  *
- * This function may sleep so you must not call it from interrupt
- * context or with spin locks held.
+ * This function may be called from any process context, including
+ * non-preemptable context, but cannot be called from softirq or
+ * hardirq context.
  *
  * You must call rhashtable_walk_exit after this function returns.
  */
@@ -726,6 +727,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 	__acquires(RCU)
 {
 	struct rhashtable *ht = iter->ht;
+	bool rhlist = ht->rhlist;
 
 	rcu_read_lock();
 
@@ -734,11 +736,52 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 		list_del(&iter->walker.list);
 	spin_unlock(&ht->lock);
 
-	if (!iter->walker.tbl && !iter->end_of_table) {
+	if (iter->end_of_table)
+		return 0;
+	if (!iter->walker.tbl) {
 		iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
+		iter->slot = 0;
+		iter->skip = 0;
 		return -EAGAIN;
 	}
 
+	if (iter->p && !rhlist) {
+		/*
+		 * We need to validate that 'p' is still in the table, and
+		 * if so, update 'skip'
+		 */
+		struct rhash_head *p;
+		int skip = 0;
+		rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+			skip++;
+			if (p == iter->p) {
+				iter->skip = skip;
+				goto found;
+			}
+		}
+		iter->p = NULL;
+	} else if (iter->p && rhlist) {
+		/* Need to validate that 'list' is still in the table, and
+		 * if so, update 'skip' and 'p'.
+		 */
+		struct rhash_head *p;
+		struct rhlist_head *list;
+		int skip = 0;
+		rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
+			for (list = container_of(p, struct rhlist_head, rhead);
+			     list;
+			     list = rcu_dereference(list->next)) {
+				skip++;
+				if (list == iter->list) {
+					iter->p = p;
+					iter->skip = skip;
+					goto found;
+				}
+			}
+		}
+		iter->p = NULL;
+	}
+found:
 	return 0;
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
@@ -914,8 +957,6 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter)
 		iter->walker.tbl = NULL;
 	spin_unlock(&ht->lock);
 
-	iter->p = NULL;
-
 out:
 	rcu_read_unlock();
 }
@@ -923,8 +964,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
 
 static size_t rounded_hashtable_size(const struct rhashtable_params *params)
 {
-	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
-		   (unsigned long)params->min_size);
+	size_t retsize;
+
+	if (params->nelem_hint)
+		retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+			      (unsigned long)params->min_size);
+	else
+		retsize = max(HASH_DEFAULT_SIZE,
+			      (unsigned long)params->min_size);
+
+	return retsize;
 }
 
 static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
@@ -981,8 +1030,6 @@ int rhashtable_init(struct rhashtable *ht,
 	struct bucket_table *tbl;
 	size_t size;
 
-	size = HASH_DEFAULT_SIZE;
-
 	if ((!params->key_len && !params->obj_hashfn) ||
 	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
@@ -1009,8 +1056,7 @@ int rhashtable_init(struct rhashtable *ht,
 
 	ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
 
-	if (params->nelem_hint)
-		size = rounded_hashtable_size(&ht->p);
+	size = rounded_hashtable_size(&ht->p);
 
 	if (params->locks_mul)
 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
@@ -1102,13 +1148,14 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 				 void (*free_fn)(void *ptr, void *arg),
 				 void *arg)
 {
-	struct bucket_table *tbl;
+	struct bucket_table *tbl, *next_tbl;
 	unsigned int i;
 
 	cancel_work_sync(&ht->run_work);
 
 	mutex_lock(&ht->mutex);
 	tbl = rht_dereference(ht->tbl, ht);
+restart:
 	if (free_fn) {
 		for (i = 0; i < tbl->size; i++) {
 			struct rhash_head *pos, *next;
@@ -1125,7 +1172,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 		}
 	}
 
+	next_tbl = rht_dereference(tbl->future_tbl, ht);
 	bucket_table_free(tbl);
+	if (next_tbl) {
+		tbl = next_tbl;
+		goto restart;
+	}
 	mutex_unlock(&ht->mutex);
 }
 EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 6fdc6267f4a8..fdd1b8aa8ac6 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -52,7 +52,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
 		return 0;
 	}
 
-	sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node);
+	sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node);
 	if (!sb->map)
 		return -ENOMEM;
 
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 06dad7a072fd..7c6096a71704 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -24,9 +24,6 @@
  **/
 struct scatterlist *sg_next(struct scatterlist *sg)
 {
-#ifdef CONFIG_DEBUG_SG
-	BUG_ON(sg->sg_magic != SG_MAGIC);
-#endif
 	if (sg_is_last(sg))
 		return NULL;
 
@@ -111,10 +108,7 @@ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
 	for_each_sg(sgl, sg, nents, i)
 		ret = sg;
 
-#ifdef CONFIG_DEBUG_SG
-	BUG_ON(sgl[0].sg_magic != SG_MAGIC);
 	BUG_ON(!sg_is_last(ret));
-#endif
 	return ret;
 }
 EXPORT_SYMBOL(sg_last);
@@ -170,7 +164,8 @@ static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
 		kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
 		return ptr;
 	} else
-		return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
+		return kmalloc_array(nents, sizeof(struct scatterlist),
+				     gfp_mask);
 }
 
 static void sg_kfree(struct scatterlist *sg, unsigned int nents)
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
deleted file mode 100644
index 04b68d9dffac..000000000000
--- a/lib/swiotlb.c
+++ /dev/null
@@ -1,1087 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- *
- * This implementation is a fallback for platforms that do not support
- * I/O TLBs (aka DMA address translation hardware).
- * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
- * Copyright (C) 2000, 2003 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 03/05/07 davidm	Switch from PCI-DMA to generic device DMA API.
- * 00/12/13 davidm	Rename to swiotlb.c and add mark_clean() to avoid
- *			unnecessary i-cache flushing.
- * 04/07/.. ak		Better overflow handling. Assorted fixes.
- * 05/09/10 linville	Add support for syncing ranges, support syncing for
- *			DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
- * 08/12/11 beckyb	Add highmem support
- */
-
-#include <linux/cache.h>
-#include <linux/dma-direct.h>
-#include <linux/mm.h>
-#include <linux/export.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/swiotlb.h>
-#include <linux/pfn.h>
-#include <linux/types.h>
-#include <linux/ctype.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/scatterlist.h>
-#include <linux/mem_encrypt.h>
-#include <linux/set_memory.h>
-
-#include <asm/io.h>
-#include <asm/dma.h>
-
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/iommu-helper.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/swiotlb.h>
-
-#define OFFSET(val,align) ((unsigned long)	\
-	                   ( (val) & ( (align) - 1)))
-
-#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
-
-/*
- * Minimum IO TLB size to bother booting with.  Systems with mainly
- * 64bit capable cards will only lightly use the swiotlb.  If we can't
- * allocate a contiguous 1MB, we're probably in trouble anyway.
- */
-#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-
-enum swiotlb_force swiotlb_force;
-
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-static phys_addr_t io_tlb_start, io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-static phys_addr_t io_tlb_overflow_buffer;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
-
-/*
- * Max segment that we can provide which (if pages are contingous) will
- * not be bounced (unless SWIOTLB_FORCE is set).
- */
-unsigned int max_segment;
-
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
-static int late_alloc;
-
-static int __init
-setup_io_tlb_npages(char *str)
-{
-	if (isdigit(*str)) {
-		io_tlb_nslabs = simple_strtoul(str, &str, 0);
-		/* avoid tail segment of size < IO_TLB_SEGSIZE */
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-	if (*str == ',')
-		++str;
-	if (!strcmp(str, "force")) {
-		swiotlb_force = SWIOTLB_FORCE;
-	} else if (!strcmp(str, "noforce")) {
-		swiotlb_force = SWIOTLB_NO_FORCE;
-		io_tlb_nslabs = 1;
-	}
-
-	return 0;
-}
-early_param("swiotlb", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
-
-unsigned long swiotlb_nr_tbl(void)
-{
-	return io_tlb_nslabs;
-}
-EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
-
-unsigned int swiotlb_max_segment(void)
-{
-	return max_segment;
-}
-EXPORT_SYMBOL_GPL(swiotlb_max_segment);
-
-void swiotlb_set_max_segment(unsigned int val)
-{
-	if (swiotlb_force == SWIOTLB_FORCE)
-		max_segment = 1;
-	else
-		max_segment = rounddown(val, PAGE_SIZE);
-}
-
-/* default to 64MB */
-#define IO_TLB_DEFAULT_SIZE (64UL<<20)
-unsigned long swiotlb_size_or_default(void)
-{
-	unsigned long size;
-
-	size = io_tlb_nslabs << IO_TLB_SHIFT;
-
-	return size ? size : (IO_TLB_DEFAULT_SIZE);
-}
-
-static bool no_iotlb_memory;
-
-void swiotlb_print_info(void)
-{
-	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-	unsigned char *vstart, *vend;
-
-	if (no_iotlb_memory) {
-		pr_warn("software IO TLB: No low mem\n");
-		return;
-	}
-
-	vstart = phys_to_virt(io_tlb_start);
-	vend = phys_to_virt(io_tlb_end);
-
-	printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
-	       (unsigned long long)io_tlb_start,
-	       (unsigned long long)io_tlb_end,
-	       bytes >> 20, vstart, vend - 1);
-}
-
-/*
- * Early SWIOTLB allocation may be too early to allow an architecture to
- * perform the desired operations.  This function allows the architecture to
- * call SWIOTLB when the operations are possible.  It needs to be called
- * before the SWIOTLB memory is used.
- */
-void __init swiotlb_update_mem_attributes(void)
-{
-	void *vaddr;
-	unsigned long bytes;
-
-	if (no_iotlb_memory || late_alloc)
-		return;
-
-	vaddr = phys_to_virt(io_tlb_start);
-	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
-	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-	memset(vaddr, 0, bytes);
-
-	vaddr = phys_to_virt(io_tlb_overflow_buffer);
-	bytes = PAGE_ALIGN(io_tlb_overflow);
-	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-	memset(vaddr, 0, bytes);
-}
-
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
-{
-	void *v_overflow_buffer;
-	unsigned long i, bytes;
-
-	bytes = nslabs << IO_TLB_SHIFT;
-
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = __pa(tlb);
-	io_tlb_end = io_tlb_start + bytes;
-
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	v_overflow_buffer = memblock_virt_alloc_low_nopanic(
-						PAGE_ALIGN(io_tlb_overflow),
-						PAGE_SIZE);
-	if (!v_overflow_buffer)
-		return -ENOMEM;
-
-	io_tlb_overflow_buffer = __pa(v_overflow_buffer);
-
-	/*
-	 * Allocate and initialize the free list array.  This array is used
-	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
-	 */
-	io_tlb_list = memblock_virt_alloc(
-				PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
-				PAGE_SIZE);
-	io_tlb_orig_addr = memblock_virt_alloc(
-				PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
-				PAGE_SIZE);
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-	}
-	io_tlb_index = 0;
-
-	if (verbose)
-		swiotlb_print_info();
-
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
-	return 0;
-}
-
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void  __init
-swiotlb_init(int verbose)
-{
-	size_t default_size = IO_TLB_DEFAULT_SIZE;
-	unsigned char *vstart;
-	unsigned long bytes;
-
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
-	/* Get IO TLB memory from the low pages */
-	vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
-	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
-		return;
-
-	if (io_tlb_start)
-		memblock_free_early(io_tlb_start,
-				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-	pr_warn("Cannot allocate SWIOTLB buffer");
-	no_iotlb_memory = true;
-}
-
-/*
- * Systems with larger DMA zones (those that don't support ISA) can
- * initialize the swiotlb later using the slab allocator if needed.
- * This should be just like above, but with some error catching.
- */
-int
-swiotlb_late_init_with_default_size(size_t default_size)
-{
-	unsigned long bytes, req_nslabs = io_tlb_nslabs;
-	unsigned char *vstart = NULL;
-	unsigned int order;
-	int rc = 0;
-
-	if (!io_tlb_nslabs) {
-		io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-		io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-	}
-
-	/*
-	 * Get IO TLB memory from the low pages
-	 */
-	order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
-	io_tlb_nslabs = SLABS_PER_PAGE << order;
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
-	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-						  order);
-		if (vstart)
-			break;
-		order--;
-	}
-
-	if (!vstart) {
-		io_tlb_nslabs = req_nslabs;
-		return -ENOMEM;
-	}
-	if (order != get_order(bytes)) {
-		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
-		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
-		io_tlb_nslabs = SLABS_PER_PAGE << order;
-	}
-	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
-	if (rc)
-		free_pages((unsigned long)vstart, order);
-
-	return rc;
-}
-
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
-{
-	unsigned long i, bytes;
-	unsigned char *v_overflow_buffer;
-
-	bytes = nslabs << IO_TLB_SHIFT;
-
-	io_tlb_nslabs = nslabs;
-	io_tlb_start = virt_to_phys(tlb);
-	io_tlb_end = io_tlb_start + bytes;
-
-	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-	memset(tlb, 0, bytes);
-
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-						     get_order(io_tlb_overflow));
-	if (!v_overflow_buffer)
-		goto cleanup2;
-
-	set_memory_decrypted((unsigned long)v_overflow_buffer,
-			io_tlb_overflow >> PAGE_SHIFT);
-	memset(v_overflow_buffer, 0, io_tlb_overflow);
-	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
-
-	/*
-	 * Allocate and initialize the free list array.  This array is used
-	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-	 * between io_tlb_start and io_tlb_end.
-	 */
-	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-	                              get_order(io_tlb_nslabs * sizeof(int)));
-	if (!io_tlb_list)
-		goto cleanup3;
-
-	io_tlb_orig_addr = (phys_addr_t *)
-		__get_free_pages(GFP_KERNEL,
-				 get_order(io_tlb_nslabs *
-					   sizeof(phys_addr_t)));
-	if (!io_tlb_orig_addr)
-		goto cleanup4;
-
-	for (i = 0; i < io_tlb_nslabs; i++) {
-		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-	}
-	io_tlb_index = 0;
-
-	swiotlb_print_info();
-
-	late_alloc = 1;
-
-	swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
-
-	return 0;
-
-cleanup4:
-	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-	                                                 sizeof(int)));
-	io_tlb_list = NULL;
-cleanup3:
-	free_pages((unsigned long)v_overflow_buffer,
-		   get_order(io_tlb_overflow));
-	io_tlb_overflow_buffer = 0;
-cleanup2:
-	io_tlb_end = 0;
-	io_tlb_start = 0;
-	io_tlb_nslabs = 0;
-	max_segment = 0;
-	return -ENOMEM;
-}
-
-void __init swiotlb_exit(void)
-{
-	if (!io_tlb_orig_addr)
-		return;
-
-	if (late_alloc) {
-		free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
-			   get_order(io_tlb_overflow));
-		free_pages((unsigned long)io_tlb_orig_addr,
-			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-								 sizeof(int)));
-		free_pages((unsigned long)phys_to_virt(io_tlb_start),
-			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
-	} else {
-		memblock_free_late(io_tlb_overflow_buffer,
-				   PAGE_ALIGN(io_tlb_overflow));
-		memblock_free_late(__pa(io_tlb_orig_addr),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
-		memblock_free_late(__pa(io_tlb_list),
-				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-		memblock_free_late(io_tlb_start,
-				   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-	}
-	io_tlb_nslabs = 0;
-	max_segment = 0;
-}
-
-int is_swiotlb_buffer(phys_addr_t paddr)
-{
-	return paddr >= io_tlb_start && paddr < io_tlb_end;
-}
-
-/*
- * Bounce: copy the swiotlb buffer back to the original dma location
- */
-static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
-			   size_t size, enum dma_data_direction dir)
-{
-	unsigned long pfn = PFN_DOWN(orig_addr);
-	unsigned char *vaddr = phys_to_virt(tlb_addr);
-
-	if (PageHighMem(pfn_to_page(pfn))) {
-		/* The buffer does not have a mapping.  Map it in and copy */
-		unsigned int offset = orig_addr & ~PAGE_MASK;
-		char *buffer;
-		unsigned int sz = 0;
-		unsigned long flags;
-
-		while (size) {
-			sz = min_t(size_t, PAGE_SIZE - offset, size);
-
-			local_irq_save(flags);
-			buffer = kmap_atomic(pfn_to_page(pfn));
-			if (dir == DMA_TO_DEVICE)
-				memcpy(vaddr, buffer + offset, sz);
-			else
-				memcpy(buffer + offset, vaddr, sz);
-			kunmap_atomic(buffer);
-			local_irq_restore(flags);
-
-			size -= sz;
-			pfn++;
-			vaddr += sz;
-			offset = 0;
-		}
-	} else if (dir == DMA_TO_DEVICE) {
-		memcpy(vaddr, phys_to_virt(orig_addr), size);
-	} else {
-		memcpy(phys_to_virt(orig_addr), vaddr, size);
-	}
-}
-
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
-				   dma_addr_t tbl_dma_addr,
-				   phys_addr_t orig_addr, size_t size,
-				   enum dma_data_direction dir,
-				   unsigned long attrs)
-{
-	unsigned long flags;
-	phys_addr_t tlb_addr;
-	unsigned int nslots, stride, index, wrap;
-	int i;
-	unsigned long mask;
-	unsigned long offset_slots;
-	unsigned long max_slots;
-
-	if (no_iotlb_memory)
-		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
-
-	if (mem_encrypt_active())
-		pr_warn_once("%s is active and system is using DMA bounce buffers\n",
-			     sme_active() ? "SME" : "SEV");
-
-	mask = dma_get_seg_boundary(hwdev);
-
-	tbl_dma_addr &= mask;
-
-	offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-
-	/*
- 	 * Carefully handle integer overflow which can occur when mask == ~0UL.
- 	 */
-	max_slots = mask + 1
-		    ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
-		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
-
-	/*
-	 * For mappings greater than or equal to a page, we limit the stride
-	 * (and hence alignment) to a page size.
-	 */
-	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	if (size >= PAGE_SIZE)
-		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-	else
-		stride = 1;
-
-	BUG_ON(!nslots);
-
-	/*
-	 * Find suitable number of IO TLB entries size that will fit this
-	 * request and allocate a buffer from that IO TLB pool.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	index = ALIGN(io_tlb_index, stride);
-	if (index >= io_tlb_nslabs)
-		index = 0;
-	wrap = index;
-
-	do {
-		while (iommu_is_span_boundary(index, nslots, offset_slots,
-					      max_slots)) {
-			index += stride;
-			if (index >= io_tlb_nslabs)
-				index = 0;
-			if (index == wrap)
-				goto not_found;
-		}
-
-		/*
-		 * If we find a slot that indicates we have 'nslots' number of
-		 * contiguous buffers, we allocate the buffers from that slot
-		 * and mark the entries as '0' indicating unavailable.
-		 */
-		if (io_tlb_list[index] >= nslots) {
-			int count = 0;
-
-			for (i = index; i < (int) (index + nslots); i++)
-				io_tlb_list[i] = 0;
-			for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-				io_tlb_list[i] = ++count;
-			tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
-			/*
-			 * Update the indices to avoid searching in the next
-			 * round.
-			 */
-			io_tlb_index = ((index + nslots) < io_tlb_nslabs
-					? (index + nslots) : 0);
-
-			goto found;
-		}
-		index += stride;
-		if (index >= io_tlb_nslabs)
-			index = 0;
-	} while (index != wrap);
-
-not_found:
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
-		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
-	return SWIOTLB_MAP_ERROR;
-found:
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-
-	/*
-	 * Save away the mapping from the original address to the DMA address.
-	 * This is needed when we sync the memory.  Then we sync the buffer if
-	 * needed.
-	 */
-	for (i = 0; i < nslots; i++)
-		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
-
-	return tlb_addr;
-}
-
-/*
- * Allocates bounce buffer and returns its physical address.
- */
-static phys_addr_t
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-	   enum dma_data_direction dir, unsigned long attrs)
-{
-	dma_addr_t start_dma_addr;
-
-	if (swiotlb_force == SWIOTLB_NO_FORCE) {
-		dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
-				     &phys);
-		return SWIOTLB_MAP_ERROR;
-	}
-
-	start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
-	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
-				      dir, attrs);
-}
-
-/*
- * tlb_addr is the physical address of the bounce buffer to unmap.
- */
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
-			      size_t size, enum dma_data_direction dir,
-			      unsigned long attrs)
-{
-	unsigned long flags;
-	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
-
-	/*
-	 * First, sync the memory before unmapping the entry
-	 */
-	if (orig_addr != INVALID_PHYS_ADDR &&
-	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
-
-	/*
-	 * Return the buffer to the free list by setting the corresponding
-	 * entries to indicate the number of contiguous entries available.
-	 * While returning the entries to the free list, we merge the entries
-	 * with slots below and above the pool being returned.
-	 */
-	spin_lock_irqsave(&io_tlb_lock, flags);
-	{
-		count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-			 io_tlb_list[index + nslots] : 0);
-		/*
-		 * Step 1: return the slots to the free list, merging the
-		 * slots with superceeding slots
-		 */
-		for (i = index + nslots - 1; i >= index; i--) {
-			io_tlb_list[i] = ++count;
-			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-		}
-		/*
-		 * Step 2: merge the returned slots with the preceding slots,
-		 * if available (non zero)
-		 */
-		for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-			io_tlb_list[i] = ++count;
-	}
-	spin_unlock_irqrestore(&io_tlb_lock, flags);
-}
-
-void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
-			     size_t size, enum dma_data_direction dir,
-			     enum dma_sync_target target)
-{
-	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-	phys_addr_t orig_addr = io_tlb_orig_addr[index];
-
-	if (orig_addr == INVALID_PHYS_ADDR)
-		return;
-	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
-
-	switch (target) {
-	case SYNC_FOR_CPU:
-		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(orig_addr, tlb_addr,
-				       size, DMA_FROM_DEVICE);
-		else
-			BUG_ON(dir != DMA_TO_DEVICE);
-		break;
-	case SYNC_FOR_DEVICE:
-		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(orig_addr, tlb_addr,
-				       size, DMA_TO_DEVICE);
-		else
-			BUG_ON(dir != DMA_FROM_DEVICE);
-		break;
-	default:
-		BUG();
-	}
-}
-
-static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
-		size_t size)
-{
-	u64 mask = DMA_BIT_MASK(32);
-
-	if (dev && dev->coherent_dma_mask)
-		mask = dev->coherent_dma_mask;
-	return addr + size - 1 <= mask;
-}
-
-static void *
-swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		unsigned long attrs)
-{
-	phys_addr_t phys_addr;
-
-	if (swiotlb_force == SWIOTLB_NO_FORCE)
-		goto out_warn;
-
-	phys_addr = swiotlb_tbl_map_single(dev,
-			__phys_to_dma(dev, io_tlb_start),
-			0, size, DMA_FROM_DEVICE, attrs);
-	if (phys_addr == SWIOTLB_MAP_ERROR)
-		goto out_warn;
-
-	*dma_handle = __phys_to_dma(dev, phys_addr);
-	if (!dma_coherent_ok(dev, *dma_handle, size))
-		goto out_unmap;
-
-	memset(phys_to_virt(phys_addr), 0, size);
-	return phys_to_virt(phys_addr);
-
-out_unmap:
-	dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-		(unsigned long long)dev->coherent_dma_mask,
-		(unsigned long long)*dma_handle);
-
-	/*
-	 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
-	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-	 */
-	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-			DMA_ATTR_SKIP_CPU_SYNC);
-out_warn:
-	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
-		dev_warn(dev,
-			"swiotlb: coherent allocation failed, size=%zu\n",
-			size);
-		dump_stack();
-	}
-	return NULL;
-}
-
-static bool swiotlb_free_buffer(struct device *dev, size_t size,
-		dma_addr_t dma_addr)
-{
-	phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
-
-	WARN_ON_ONCE(irqs_disabled());
-
-	if (!is_swiotlb_buffer(phys_addr))
-		return false;
-
-	/*
-	 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
-	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-	 */
-	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-				 DMA_ATTR_SKIP_CPU_SYNC);
-	return true;
-}
-
-static void
-swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
-	     int do_panic)
-{
-	if (swiotlb_force == SWIOTLB_NO_FORCE)
-		return;
-
-	/*
-	 * Ran out of IOMMU space for this operation. This is very bad.
-	 * Unfortunately the drivers cannot handle this operation properly.
-	 * unless they check for dma_mapping_error (most don't)
-	 * When the mapping is small enough return a static buffer to limit
-	 * the damage, or panic when the transfer is too big.
-	 */
-	dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
-			    size);
-
-	if (size <= io_tlb_overflow || !do_panic)
-		return;
-
-	if (dir == DMA_BIDIRECTIONAL)
-		panic("DMA: Random memory could be DMA accessed\n");
-	if (dir == DMA_FROM_DEVICE)
-		panic("DMA: Random memory could be DMA written\n");
-	if (dir == DMA_TO_DEVICE)
-		panic("DMA: Random memory could be DMA read\n");
-}
-
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode.  The
- * physical address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
- */
-dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
-			    unsigned long offset, size_t size,
-			    enum dma_data_direction dir,
-			    unsigned long attrs)
-{
-	phys_addr_t map, phys = page_to_phys(page) + offset;
-	dma_addr_t dev_addr = phys_to_dma(dev, phys);
-
-	BUG_ON(dir == DMA_NONE);
-	/*
-	 * If the address happens to be in the device's DMA window,
-	 * we can safely return the device addr and not worry about bounce
-	 * buffering it.
-	 */
-	if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
-		return dev_addr;
-
-	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
-
-	/* Oh well, have to allocate and map a bounce buffer. */
-	map = map_single(dev, phys, size, dir, attrs);
-	if (map == SWIOTLB_MAP_ERROR) {
-		swiotlb_full(dev, size, dir, 1);
-		return __phys_to_dma(dev, io_tlb_overflow_buffer);
-	}
-
-	dev_addr = __phys_to_dma(dev, map);
-
-	/* Ensure that the address returned is DMA'ble */
-	if (dma_capable(dev, dev_addr, size))
-		return dev_addr;
-
-	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-	swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
-
-	return __phys_to_dma(dev, io_tlb_overflow_buffer);
-}
-
-/*
- * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_page call.  All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guaranteed to see
- * whatever the device wrote there.
- */
-static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-			 size_t size, enum dma_data_direction dir,
-			 unsigned long attrs)
-{
-	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-
-	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
-		return;
-	}
-
-	if (dir != DMA_FROM_DEVICE)
-		return;
-
-	/*
-	 * phys_to_virt doesn't work with hihgmem page but we could
-	 * call dma_mark_clean() with hihgmem page here. However, we
-	 * are fine since dma_mark_clean() is null on POWERPC. We can
-	 * make dma_mark_clean() take a physical address if necessary.
-	 */
-	dma_mark_clean(phys_to_virt(paddr), size);
-}
-
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-			size_t size, enum dma_data_direction dir,
-			unsigned long attrs)
-{
-	unmap_single(hwdev, dev_addr, size, dir, attrs);
-}
-
-/*
- * Make physical memory consistent for a single streaming mode DMA translation
- * after a transfer.
- *
- * If you perform a swiotlb_map_page() but wish to interrogate the buffer
- * using the cpu, yet do not wish to teardown the dma mapping, you must
- * call this function before doing so.  At the next point you give the dma
- * address back to the card, you must first perform a
- * swiotlb_dma_sync_for_device, and then the device again owns the buffer
- */
-static void
-swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
-		    size_t size, enum dma_data_direction dir,
-		    enum dma_sync_target target)
-{
-	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-
-	if (is_swiotlb_buffer(paddr)) {
-		swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
-		return;
-	}
-
-	if (dir != DMA_FROM_DEVICE)
-		return;
-
-	dma_mark_clean(phys_to_virt(paddr), size);
-}
-
-void
-swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-			    size_t size, enum dma_data_direction dir)
-{
-	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
-			       size_t size, enum dma_data_direction dir)
-{
-	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
-}
-
-/*
- * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_page
- * interface.  Here the scatter gather list elements are each tagged with the
- * appropriate dma address and length.  They are obtained via
- * sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for swiotlb_map_page are the
- * same here.
- */
-int
-swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
-		     enum dma_data_direction dir, unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	BUG_ON(dir == DMA_NONE);
-
-	for_each_sg(sgl, sg, nelems, i) {
-		phys_addr_t paddr = sg_phys(sg);
-		dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
-
-		if (swiotlb_force == SWIOTLB_FORCE ||
-		    !dma_capable(hwdev, dev_addr, sg->length)) {
-			phys_addr_t map = map_single(hwdev, sg_phys(sg),
-						     sg->length, dir, attrs);
-			if (map == SWIOTLB_MAP_ERROR) {
-				/* Don't panic here, we expect map_sg users
-				   to do proper error handling. */
-				swiotlb_full(hwdev, sg->length, dir, 0);
-				attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-				swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
-						       attrs);
-				sg_dma_len(sgl) = 0;
-				return 0;
-			}
-			sg->dma_address = __phys_to_dma(hwdev, map);
-		} else
-			sg->dma_address = dev_addr;
-		sg_dma_len(sg) = sg->length;
-	}
-	return nelems;
-}
-
-/*
- * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_page() above.
- */
-void
-swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-		       int nelems, enum dma_data_direction dir,
-		       unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	BUG_ON(dir == DMA_NONE);
-
-	for_each_sg(sgl, sg, nelems, i)
-		unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
-			     attrs);
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA translations
- * after a transfer.
- *
- * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
- * and usage.
- */
-static void
-swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
-		int nelems, enum dma_data_direction dir,
-		enum dma_sync_target target)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nelems, i)
-		swiotlb_sync_single(hwdev, sg->dma_address,
-				    sg_dma_len(sg), dir, target);
-}
-
-void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
-			int nelems, enum dma_data_direction dir)
-{
-	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
-			   int nelems, enum dma_data_direction dir)
-{
-	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
-}
-
-int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-{
-	return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
-}
-
-/*
- * Return whether the given device DMA address mask can be supported
- * properly.  For example, if your device can only drive the low 24-bits
- * during bus mastering, then you would pass 0x00ffffff as the mask to
- * this function.
- */
-int
-swiotlb_dma_supported(struct device *hwdev, u64 mask)
-{
-	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
-}
-
-void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		gfp_t gfp, unsigned long attrs)
-{
-	void *vaddr;
-
-	/* temporary workaround: */
-	if (gfp & __GFP_NOWARN)
-		attrs |= DMA_ATTR_NO_WARN;
-
-	/*
-	 * Don't print a warning when the first allocation attempt fails.
-	 * swiotlb_alloc_coherent() will print a warning when the DMA memory
-	 * allocation ultimately failed.
-	 */
-	gfp |= __GFP_NOWARN;
-
-	vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
-	if (!vaddr)
-		vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
-	return vaddr;
-}
-
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_addr, unsigned long attrs)
-{
-	if (!swiotlb_free_buffer(dev, size, dma_addr))
-		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-const struct dma_map_ops swiotlb_dma_ops = {
-	.mapping_error		= swiotlb_dma_mapping_error,
-	.alloc			= swiotlb_alloc,
-	.free			= swiotlb_free,
-	.sync_single_for_cpu	= swiotlb_sync_single_for_cpu,
-	.sync_single_for_device	= swiotlb_sync_single_for_device,
-	.sync_sg_for_cpu	= swiotlb_sync_sg_for_cpu,
-	.sync_sg_for_device	= swiotlb_sync_sg_for_device,
-	.map_sg			= swiotlb_map_sg_attrs,
-	.unmap_sg		= swiotlb_unmap_sg_attrs,
-	.map_page		= swiotlb_map_page,
-	.unmap_page		= swiotlb_unmap_page,
-	.dma_supported		= dma_direct_supported,
-};
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 8e157806df7a..08d3d59dca17 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -356,29 +356,22 @@ static int bpf_fill_maxinsns11(struct bpf_test *self)
 	return __bpf_fill_ja(self, BPF_MAXINSNS, 68);
 }
 
-static int bpf_fill_ja(struct bpf_test *self)
-{
-	/* Hits exactly 11 passes on x86_64 JIT. */
-	return __bpf_fill_ja(self, 12, 9);
-}
-
-static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self)
+static int bpf_fill_maxinsns12(struct bpf_test *self)
 {
 	unsigned int len = BPF_MAXINSNS;
 	struct sock_filter *insn;
-	int i;
+	int i = 0;
 
 	insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
 	if (!insn)
 		return -ENOMEM;
 
-	for (i = 0; i < len - 1; i += 2) {
-		insn[i] = __BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 0);
-		insn[i + 1] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
-					 SKF_AD_OFF + SKF_AD_CPU);
-	}
+	insn[0] = __BPF_JUMP(BPF_JMP | BPF_JA, len - 2, 0, 0);
 
-	insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xbee);
+	for (i = 1; i < len - 1; i++)
+		insn[i] = __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0);
+
+	insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xabababab);
 
 	self->u.ptr.insns = insn;
 	self->u.ptr.len = len;
@@ -386,50 +379,22 @@ static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self)
 	return 0;
 }
 
-#define PUSH_CNT 68
-/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
-static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
+static int bpf_fill_maxinsns13(struct bpf_test *self)
 {
 	unsigned int len = BPF_MAXINSNS;
-	struct bpf_insn *insn;
-	int i = 0, j, k = 0;
+	struct sock_filter *insn;
+	int i = 0;
 
 	insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
 	if (!insn)
 		return -ENOMEM;
 
-	insn[i++] = BPF_MOV64_REG(R6, R1);
-loop:
-	for (j = 0; j < PUSH_CNT; j++) {
-		insn[i++] = BPF_LD_ABS(BPF_B, 0);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2);
-		i++;
-		insn[i++] = BPF_MOV64_REG(R1, R6);
-		insn[i++] = BPF_MOV64_IMM(R2, 1);
-		insn[i++] = BPF_MOV64_IMM(R3, 2);
-		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-					 bpf_skb_vlan_push_proto.func - __bpf_call_base);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2);
-		i++;
-	}
+	for (i = 0; i < len - 3; i++)
+		insn[i] = __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0);
 
-	for (j = 0; j < PUSH_CNT; j++) {
-		insn[i++] = BPF_LD_ABS(BPF_B, 0);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2);
-		i++;
-		insn[i++] = BPF_MOV64_REG(R1, R6);
-		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-					 bpf_skb_vlan_pop_proto.func - __bpf_call_base);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2);
-		i++;
-	}
-	if (++k < 5)
-		goto loop;
-
-	for (; i < len - 1; i++)
-		insn[i] = BPF_ALU32_IMM(BPF_MOV, R0, 0xbef);
-
-	insn[len - 1] = BPF_EXIT_INSN();
+	insn[len - 3] = __BPF_STMT(BPF_LD | BPF_IMM, 0xabababab);
+	insn[len - 2] = __BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0);
+	insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0);
 
 	self->u.ptr.insns = insn;
 	self->u.ptr.len = len;
@@ -437,58 +402,29 @@ loop:
 	return 0;
 }
 
-static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
+static int bpf_fill_ja(struct bpf_test *self)
 {
-	struct bpf_insn *insn;
-
-	insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
-	if (!insn)
-		return -ENOMEM;
-
-	/* Due to func address being non-const, we need to
-	 * assemble this here.
-	 */
-	insn[0] = BPF_MOV64_REG(R6, R1);
-	insn[1] = BPF_LD_ABS(BPF_B, 0);
-	insn[2] = BPF_LD_ABS(BPF_H, 0);
-	insn[3] = BPF_LD_ABS(BPF_W, 0);
-	insn[4] = BPF_MOV64_REG(R7, R6);
-	insn[5] = BPF_MOV64_IMM(R6, 0);
-	insn[6] = BPF_MOV64_REG(R1, R7);
-	insn[7] = BPF_MOV64_IMM(R2, 1);
-	insn[8] = BPF_MOV64_IMM(R3, 2);
-	insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-			       bpf_skb_vlan_push_proto.func - __bpf_call_base);
-	insn[10] = BPF_MOV64_REG(R6, R7);
-	insn[11] = BPF_LD_ABS(BPF_B, 0);
-	insn[12] = BPF_LD_ABS(BPF_H, 0);
-	insn[13] = BPF_LD_ABS(BPF_W, 0);
-	insn[14] = BPF_MOV64_IMM(R0, 42);
-	insn[15] = BPF_EXIT_INSN();
-
-	self->u.ptr.insns = insn;
-	self->u.ptr.len = 16;
-
-	return 0;
+	/* Hits exactly 11 passes on x86_64 JIT. */
+	return __bpf_fill_ja(self, 12, 9);
 }
 
-static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
+static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self)
 {
 	unsigned int len = BPF_MAXINSNS;
-	struct bpf_insn *insn;
-	int i = 0;
+	struct sock_filter *insn;
+	int i;
 
 	insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
 	if (!insn)
 		return -ENOMEM;
 
-	insn[i++] = BPF_MOV64_REG(R6, R1);
-	insn[i++] = BPF_LD_ABS(BPF_B, 0);
-	insn[i] = BPF_JMP_IMM(BPF_JEQ, R0, 10, len - i - 2);
-	i++;
-	while (i < len - 1)
-		insn[i++] = BPF_LD_ABS(BPF_B, 1);
-	insn[i] = BPF_EXIT_INSN();
+	for (i = 0; i < len - 1; i += 2) {
+		insn[i] = __BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 0);
+		insn[i + 1] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+					 SKF_AD_OFF + SKF_AD_CPU);
+	}
+
+	insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xbee);
 
 	self->u.ptr.insns = insn;
 	self->u.ptr.len = len;
@@ -1988,40 +1924,6 @@ static struct bpf_test tests[] = {
 		{ { 0, -1 } }
 	},
 	{
-		"INT: DIV + ABS",
-		.u.insns_int = {
-			BPF_ALU64_REG(BPF_MOV, R6, R1),
-			BPF_LD_ABS(BPF_B, 3),
-			BPF_ALU64_IMM(BPF_MOV, R2, 2),
-			BPF_ALU32_REG(BPF_DIV, R0, R2),
-			BPF_ALU64_REG(BPF_MOV, R8, R0),
-			BPF_LD_ABS(BPF_B, 4),
-			BPF_ALU64_REG(BPF_ADD, R8, R0),
-			BPF_LD_IND(BPF_B, R8, -70),
-			BPF_EXIT_INSN(),
-		},
-		INTERNAL,
-		{ 10, 20, 30, 40, 50 },
-		{ { 4, 0 }, { 5, 10 } }
-	},
-	{
-		/* This one doesn't go through verifier, but is just raw insn
-		 * as opposed to cBPF tests from here. Thus div by 0 tests are
-		 * done in test_verifier in BPF kselftests.
-		 */
-		"INT: DIV by -1",
-		.u.insns_int = {
-			BPF_ALU64_REG(BPF_MOV, R6, R1),
-			BPF_ALU64_IMM(BPF_MOV, R7, -1),
-			BPF_LD_ABS(BPF_B, 3),
-			BPF_ALU32_REG(BPF_DIV, R0, R7),
-			BPF_EXIT_INSN(),
-		},
-		INTERNAL,
-		{ 10, 20, 30, 40, 50 },
-		{ { 3, 0 }, { 4, 0 } }
-	},
-	{
 		"check: missing ret",
 		.u.insns = {
 			BPF_STMT(BPF_LD | BPF_IMM, 1),
@@ -2383,50 +2285,6 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 1 } }
 	},
-	{
-		"nmap reduced",
-		.u.insns_int = {
-			BPF_MOV64_REG(R6, R1),
-			BPF_LD_ABS(BPF_H, 12),
-			BPF_JMP_IMM(BPF_JNE, R0, 0x806, 28),
-			BPF_LD_ABS(BPF_H, 12),
-			BPF_JMP_IMM(BPF_JNE, R0, 0x806, 26),
-			BPF_MOV32_IMM(R0, 18),
-			BPF_STX_MEM(BPF_W, R10, R0, -64),
-			BPF_LDX_MEM(BPF_W, R7, R10, -64),
-			BPF_LD_IND(BPF_W, R7, 14),
-			BPF_STX_MEM(BPF_W, R10, R0, -60),
-			BPF_MOV32_IMM(R0, 280971478),
-			BPF_STX_MEM(BPF_W, R10, R0, -56),
-			BPF_LDX_MEM(BPF_W, R7, R10, -56),
-			BPF_LDX_MEM(BPF_W, R0, R10, -60),
-			BPF_ALU32_REG(BPF_SUB, R0, R7),
-			BPF_JMP_IMM(BPF_JNE, R0, 0, 15),
-			BPF_LD_ABS(BPF_H, 12),
-			BPF_JMP_IMM(BPF_JNE, R0, 0x806, 13),
-			BPF_MOV32_IMM(R0, 22),
-			BPF_STX_MEM(BPF_W, R10, R0, -56),
-			BPF_LDX_MEM(BPF_W, R7, R10, -56),
-			BPF_LD_IND(BPF_H, R7, 14),
-			BPF_STX_MEM(BPF_W, R10, R0, -52),
-			BPF_MOV32_IMM(R0, 17366),
-			BPF_STX_MEM(BPF_W, R10, R0, -48),
-			BPF_LDX_MEM(BPF_W, R7, R10, -48),
-			BPF_LDX_MEM(BPF_W, R0, R10, -52),
-			BPF_ALU32_REG(BPF_SUB, R0, R7),
-			BPF_JMP_IMM(BPF_JNE, R0, 0, 2),
-			BPF_MOV32_IMM(R0, 256),
-			BPF_EXIT_INSN(),
-			BPF_MOV32_IMM(R0, 0),
-			BPF_EXIT_INSN(),
-		},
-		INTERNAL,
-		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0,
-		  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		  0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6},
-		{ { 38, 256 } },
-		.stack_depth = 64,
-	},
 	/* BPF_ALU | BPF_MOV | BPF_X */
 	{
 		"ALU_MOV_X: dst = 2",
@@ -5424,21 +5282,31 @@ static struct bpf_test tests[] = {
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Ctx heavy transformations",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC,
+#endif
 		{ },
 		{
 			{  1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) },
 			{ 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }
 		},
 		.fill_helper = bpf_fill_maxinsns6,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Call heavy transformations",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC | FLAG_NO_DATA,
+#endif
 		{ },
 		{ { 1, 0 }, { 10, 0 } },
 		.fill_helper = bpf_fill_maxinsns7,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Jump heavy test",
@@ -5478,28 +5346,39 @@ static struct bpf_test tests[] = {
 		.expected_errcode = -ENOTSUPP,
 	},
 	{
-		"BPF_MAXINSNS: ld_abs+get_processor_id",
-		{ },
-		CLASSIC,
+		"BPF_MAXINSNS: jump over MSH",
 		{ },
-		{ { 1, 0xbee } },
-		.fill_helper = bpf_fill_ld_abs_get_processor_id,
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		{ 0xfa, 0xfb, 0xfc, 0xfd, },
+		{ { 4, 0xabababab } },
+		.fill_helper = bpf_fill_maxinsns12,
+		.expected_errcode = -EINVAL,
 	},
 	{
-		"BPF_MAXINSNS: ld_abs+vlan_push/pop",
+		"BPF_MAXINSNS: exec all MSH",
 		{ },
-		INTERNAL,
-		{ 0x34 },
-		{ { ETH_HLEN, 0xbef } },
-		.fill_helper = bpf_fill_ld_abs_vlan_push_pop,
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_EXPECTED_FAIL,
+#else
+		CLASSIC,
+#endif
+		{ 0xfa, 0xfb, 0xfc, 0xfd, },
+		{ { 4, 0xababab83 } },
+		.fill_helper = bpf_fill_maxinsns13,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{
-		"BPF_MAXINSNS: jump around ld_abs",
+		"BPF_MAXINSNS: ld_abs+get_processor_id",
 		{ },
-		INTERNAL,
-		{ 10, 11 },
-		{ { 2, 10 } },
-		.fill_helper = bpf_fill_jump_around_ld_abs,
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_EXPECTED_FAIL,
+#else
+		CLASSIC,
+#endif
+		{ },
+		{ { 1, 0xbee } },
+		.fill_helper = bpf_fill_ld_abs_get_processor_id,
+		.expected_errcode = -ENOTSUPP,
 	},
 	/*
 	 * LD_IND / LD_ABS on fragmented SKBs
@@ -5683,6 +5562,53 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0x05 } },
 	},
 	{
+		"LD_IND byte positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xff } },
+	},
+	{
+		"LD_IND byte positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_IND byte negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, -0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 } },
+	},
+	{
+		"LD_IND byte negative offset, multiple calls",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3b),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 1),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 2),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 3),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 4),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x82 }, },
+	},
+	{
 		"LD_IND halfword positive offset",
 		.u.insns = {
 			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
@@ -5731,6 +5657,39 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0x66cc } },
 	},
 	{
+		"LD_IND halfword positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3d),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xffff } },
+	},
+	{
+		"LD_IND halfword positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_IND halfword negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_H, -0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 } },
+	},
+	{
 		"LD_IND word positive offset",
 		.u.insns = {
 			BPF_STMT(BPF_LDX | BPF_IMM, 0x20),
@@ -5821,6 +5780,39 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0x66cc77dd } },
 	},
 	{
+		"LD_IND word positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3b),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xffffffff } },
+	},
+	{
+		"LD_IND word positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_IND word negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LDX | BPF_IMM, 0x3e),
+			BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 } },
+	},
+	{
 		"LD_ABS byte",
 		.u.insns = {
 			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x20),
@@ -5838,6 +5830,68 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0xcc } },
 	},
 	{
+		"LD_ABS byte positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xff } },
+	},
+	{
+		"LD_ABS byte positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_ABS byte negative offset, out of bounds load",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, -1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		.expected_errcode = -EINVAL,
+	},
+	{
+		"LD_ABS byte negative offset, in bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x82 }, },
+	},
+	{
+		"LD_ABS byte negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_ABS byte negative offset, multiple calls",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3c),
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3d),
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3e),
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x82 }, },
+	},
+	{
 		"LD_ABS halfword",
 		.u.insns = {
 			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x22),
@@ -5872,6 +5926,55 @@ static struct bpf_test tests[] = {
 		{ {0x40, 0x99ff } },
 	},
 	{
+		"LD_ABS halfword positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3e),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xffff } },
+	},
+	{
+		"LD_ABS halfword positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_ABS halfword negative offset, out of bounds load",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, -1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		.expected_errcode = -EINVAL,
+	},
+	{
+		"LD_ABS halfword negative offset, in bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x1982 }, },
+	},
+	{
+		"LD_ABS halfword negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
 		"LD_ABS word",
 		.u.insns = {
 			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x1c),
@@ -5939,6 +6042,140 @@ static struct bpf_test tests[] = {
 		},
 		{ {0x40, 0x88ee99ff } },
 	},
+	{
+		"LD_ABS word positive offset, all ff",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3c),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0xff, [0x3d] = 0xff,  [0x3e] = 0xff, [0x3f] = 0xff },
+		{ {0x40, 0xffffffff } },
+	},
+	{
+		"LD_ABS word positive offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LD_ABS word negative offset, out of bounds load",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, -1),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC | FLAG_EXPECTED_FAIL,
+		.expected_errcode = -EINVAL,
+	},
+	{
+		"LD_ABS word negative offset, in bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x25051982 }, },
+	},
+	{
+		"LD_ABS word negative offset, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x3f, 0 }, },
+	},
+	{
+		"LDX_MSH standalone, preserved A",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0xffeebbaa }, },
+	},
+	{
+		"LDX_MSH standalone, preserved A 2",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0x175e9d63),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3d),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3f),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x175e9d63 }, },
+	},
+	{
+		"LDX_MSH standalone, test result 1",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x14 }, },
+	},
+	{
+		"LDX_MSH standalone, test result 2",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x24 }, },
+	},
+	{
+		"LDX_MSH standalone, negative offset",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, -1),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0 }, },
+	},
+	{
+		"LDX_MSH standalone, negative offset 2",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, SKF_LL_OFF + 0x3e),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0x24 }, },
+	},
+	{
+		"LDX_MSH standalone, out of bounds",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa),
+			BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x40),
+			BPF_STMT(BPF_MISC | BPF_TXA, 0),
+			BPF_STMT(BPF_RET | BPF_A, 0x0),
+		},
+		CLASSIC,
+		{ [0x3c] = 0x25, [0x3d] = 0x05,  [0x3e] = 0x19, [0x3f] = 0x82 },
+		{ {0x40, 0 }, },
+	},
 	/*
 	 * verify that the interpreter or JIT correctly sets A and X
 	 * to 0.
@@ -6127,14 +6364,6 @@ static struct bpf_test tests[] = {
 		{},
 		{ {0x1, 0x42 } },
 	},
-	{
-		"LD_ABS with helper changing skb data",
-		{ },
-		INTERNAL,
-		{ 0x34 },
-		{ { ETH_HLEN, 42 } },
-		.fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
-	},
 	/* Checking interpreter vs JIT wrt signed extended imms. */
 	{
 		"JNE signed compare, test 1",
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index cee000ac54d8..b984806d7d7b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -618,8 +618,9 @@ static ssize_t trigger_batched_requests_store(struct device *dev,
 
 	mutex_lock(&test_fw_mutex);
 
-	test_fw_config->reqs = vzalloc(sizeof(struct test_batched_req) *
-				       test_fw_config->num_requests * 2);
+	test_fw_config->reqs =
+		vzalloc(array3_size(sizeof(struct test_batched_req),
+				    test_fw_config->num_requests, 2));
 	if (!test_fw_config->reqs) {
 		rc = -ENOMEM;
 		goto out_unlock;
@@ -720,8 +721,9 @@ ssize_t trigger_batched_requests_async_store(struct device *dev,
 
 	mutex_lock(&test_fw_mutex);
 
-	test_fw_config->reqs = vzalloc(sizeof(struct test_batched_req) *
-				       test_fw_config->num_requests * 2);
+	test_fw_config->reqs =
+		vzalloc(array3_size(sizeof(struct test_batched_req),
+				    test_fw_config->num_requests, 2));
 	if (!test_fw_config->reqs) {
 		rc = -ENOMEM;
 		goto out;
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 0e5b7a61460b..e3ddd836491f 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -779,8 +779,9 @@ static int kmod_config_sync_info(struct kmod_test_device *test_dev)
 	struct test_config *config = &test_dev->config;
 
 	free_test_dev_info(test_dev);
-	test_dev->info = vzalloc(config->num_threads *
-				 sizeof(struct kmod_test_device_info));
+	test_dev->info =
+		vzalloc(array_size(sizeof(struct kmod_test_device_info),
+				   config->num_threads));
 	if (!test_dev->info)
 		return -ENOMEM;
 
diff --git a/lib/test_overflow.c b/lib/test_overflow.c
new file mode 100644
index 000000000000..2278fe05a1b0
--- /dev/null
+++ b/lib/test_overflow.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Test cases for arithmetic overflow checks.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/overflow.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+
+#define DEFINE_TEST_ARRAY(t)			\
+	static const struct test_ ## t {	\
+		t a, b;				\
+		t sum, diff, prod;		\
+		bool s_of, d_of, p_of;		\
+	} t ## _tests[] __initconst
+
+DEFINE_TEST_ARRAY(u8) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U8_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U8_MAX, U8_MAX, 1, 0, false, true, false},
+	{U8_MAX, 0, U8_MAX, U8_MAX, 0, false, false, false},
+	{1, U8_MAX, 0, 2, U8_MAX, true, true, false},
+	{U8_MAX, 1, 0, U8_MAX-1, U8_MAX, true, false, false},
+	{U8_MAX, U8_MAX, U8_MAX-1, 0, 1, true, false, true},
+
+	{U8_MAX, U8_MAX-1, U8_MAX-2, 1, 2, true, false, true},
+	{U8_MAX-1, U8_MAX, U8_MAX-2, U8_MAX, 2, true, true, true},
+
+	{1U << 3, 1U << 3, 1U << 4, 0, 1U << 6, false, false, false},
+	{1U << 4, 1U << 4, 1U << 5, 0, 0, false, false, true},
+	{1U << 4, 1U << 3, 3*(1U << 3), 1U << 3, 1U << 7, false, false, false},
+	{1U << 7, 1U << 7, 0, 0, 0, true, false, true},
+
+	{48, 32, 80, 16, 0, false, false, true},
+	{128, 128, 0, 0, 0, true, false, true},
+	{123, 234, 101, 145, 110, true, true, true},
+};
+DEFINE_TEST_ARRAY(u16) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U16_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U16_MAX, U16_MAX, 1, 0, false, true, false},
+	{U16_MAX, 0, U16_MAX, U16_MAX, 0, false, false, false},
+	{1, U16_MAX, 0, 2, U16_MAX, true, true, false},
+	{U16_MAX, 1, 0, U16_MAX-1, U16_MAX, true, false, false},
+	{U16_MAX, U16_MAX, U16_MAX-1, 0, 1, true, false, true},
+
+	{U16_MAX, U16_MAX-1, U16_MAX-2, 1, 2, true, false, true},
+	{U16_MAX-1, U16_MAX, U16_MAX-2, U16_MAX, 2, true, true, true},
+
+	{1U << 7, 1U << 7, 1U << 8, 0, 1U << 14, false, false, false},
+	{1U << 8, 1U << 8, 1U << 9, 0, 0, false, false, true},
+	{1U << 8, 1U << 7, 3*(1U << 7), 1U << 7, 1U << 15, false, false, false},
+	{1U << 15, 1U << 15, 0, 0, 0, true, false, true},
+
+	{123, 234, 357, 65425, 28782, false, true, false},
+	{1234, 2345, 3579, 64425, 10146, false, true, true},
+};
+DEFINE_TEST_ARRAY(u32) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U32_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U32_MAX, U32_MAX, 1, 0, false, true, false},
+	{U32_MAX, 0, U32_MAX, U32_MAX, 0, false, false, false},
+	{1, U32_MAX, 0, 2, U32_MAX, true, true, false},
+	{U32_MAX, 1, 0, U32_MAX-1, U32_MAX, true, false, false},
+	{U32_MAX, U32_MAX, U32_MAX-1, 0, 1, true, false, true},
+
+	{U32_MAX, U32_MAX-1, U32_MAX-2, 1, 2, true, false, true},
+	{U32_MAX-1, U32_MAX, U32_MAX-2, U32_MAX, 2, true, true, true},
+
+	{1U << 15, 1U << 15, 1U << 16, 0, 1U << 30, false, false, false},
+	{1U << 16, 1U << 16, 1U << 17, 0, 0, false, false, true},
+	{1U << 16, 1U << 15, 3*(1U << 15), 1U << 15, 1U << 31, false, false, false},
+	{1U << 31, 1U << 31, 0, 0, 0, true, false, true},
+
+	{-2U, 1U, -1U, -3U, -2U, false, false, false},
+	{-4U, 5U, 1U, -9U, -20U, true, false, true},
+};
+
+DEFINE_TEST_ARRAY(u64) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U64_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U64_MAX, U64_MAX, 1, 0, false, true, false},
+	{U64_MAX, 0, U64_MAX, U64_MAX, 0, false, false, false},
+	{1, U64_MAX, 0, 2, U64_MAX, true, true, false},
+	{U64_MAX, 1, 0, U64_MAX-1, U64_MAX, true, false, false},
+	{U64_MAX, U64_MAX, U64_MAX-1, 0, 1, true, false, true},
+
+	{U64_MAX, U64_MAX-1, U64_MAX-2, 1, 2, true, false, true},
+	{U64_MAX-1, U64_MAX, U64_MAX-2, U64_MAX, 2, true, true, true},
+
+	{1ULL << 31, 1ULL << 31, 1ULL << 32, 0, 1ULL << 62, false, false, false},
+	{1ULL << 32, 1ULL << 32, 1ULL << 33, 0, 0, false, false, true},
+	{1ULL << 32, 1ULL << 31, 3*(1ULL << 31), 1ULL << 31, 1ULL << 63, false, false, false},
+	{1ULL << 63, 1ULL << 63, 0, 0, 0, true, false, true},
+	{1000000000ULL /* 10^9 */, 10000000000ULL /* 10^10 */,
+	 11000000000ULL, 18446744064709551616ULL, 10000000000000000000ULL,
+	 false, true, false},
+	{-15ULL, 10ULL, -5ULL, -25ULL, -150ULL, false, false, true},
+};
+
+DEFINE_TEST_ARRAY(s8) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S8_MAX, S8_MAX, -S8_MAX, 0, false, false, false},
+	{S8_MAX, 0, S8_MAX, S8_MAX, 0, false, false, false},
+	{0, S8_MIN, S8_MIN, S8_MIN, 0, false, true, false},
+	{S8_MIN, 0, S8_MIN, S8_MIN, 0, false, false, false},
+
+	{-1, S8_MIN, S8_MAX, S8_MAX, S8_MIN, true, false, true},
+	{S8_MIN, -1, S8_MAX, -S8_MAX, S8_MIN, true, false, true},
+	{-1, S8_MAX, S8_MAX-1, S8_MIN, -S8_MAX, false, false, false},
+	{S8_MAX, -1, S8_MAX-1, S8_MIN, -S8_MAX, false, true, false},
+	{-1, -S8_MAX, S8_MIN, S8_MAX-1, S8_MAX, false, false, false},
+	{-S8_MAX, -1, S8_MIN, S8_MIN+2, S8_MAX, false, false, false},
+
+	{1, S8_MIN, -S8_MAX, -S8_MAX, S8_MIN, false, true, false},
+	{S8_MIN, 1, -S8_MAX, S8_MAX, S8_MIN, false, true, false},
+	{1, S8_MAX, S8_MIN, S8_MIN+2, S8_MAX, true, false, false},
+	{S8_MAX, 1, S8_MIN, S8_MAX-1, S8_MAX, true, false, false},
+
+	{S8_MIN, S8_MIN, 0, 0, 0, true, false, true},
+	{S8_MAX, S8_MAX, -2, 0, 1, true, false, true},
+
+	{-4, -32, -36, 28, -128, false, false, true},
+	{-4, 32, 28, -36, -128, false, false, false},
+};
+
+DEFINE_TEST_ARRAY(s16) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S16_MAX, S16_MAX, -S16_MAX, 0, false, false, false},
+	{S16_MAX, 0, S16_MAX, S16_MAX, 0, false, false, false},
+	{0, S16_MIN, S16_MIN, S16_MIN, 0, false, true, false},
+	{S16_MIN, 0, S16_MIN, S16_MIN, 0, false, false, false},
+
+	{-1, S16_MIN, S16_MAX, S16_MAX, S16_MIN, true, false, true},
+	{S16_MIN, -1, S16_MAX, -S16_MAX, S16_MIN, true, false, true},
+	{-1, S16_MAX, S16_MAX-1, S16_MIN, -S16_MAX, false, false, false},
+	{S16_MAX, -1, S16_MAX-1, S16_MIN, -S16_MAX, false, true, false},
+	{-1, -S16_MAX, S16_MIN, S16_MAX-1, S16_MAX, false, false, false},
+	{-S16_MAX, -1, S16_MIN, S16_MIN+2, S16_MAX, false, false, false},
+
+	{1, S16_MIN, -S16_MAX, -S16_MAX, S16_MIN, false, true, false},
+	{S16_MIN, 1, -S16_MAX, S16_MAX, S16_MIN, false, true, false},
+	{1, S16_MAX, S16_MIN, S16_MIN+2, S16_MAX, true, false, false},
+	{S16_MAX, 1, S16_MIN, S16_MAX-1, S16_MAX, true, false, false},
+
+	{S16_MIN, S16_MIN, 0, 0, 0, true, false, true},
+	{S16_MAX, S16_MAX, -2, 0, 1, true, false, true},
+};
+DEFINE_TEST_ARRAY(s32) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S32_MAX, S32_MAX, -S32_MAX, 0, false, false, false},
+	{S32_MAX, 0, S32_MAX, S32_MAX, 0, false, false, false},
+	{0, S32_MIN, S32_MIN, S32_MIN, 0, false, true, false},
+	{S32_MIN, 0, S32_MIN, S32_MIN, 0, false, false, false},
+
+	{-1, S32_MIN, S32_MAX, S32_MAX, S32_MIN, true, false, true},
+	{S32_MIN, -1, S32_MAX, -S32_MAX, S32_MIN, true, false, true},
+	{-1, S32_MAX, S32_MAX-1, S32_MIN, -S32_MAX, false, false, false},
+	{S32_MAX, -1, S32_MAX-1, S32_MIN, -S32_MAX, false, true, false},
+	{-1, -S32_MAX, S32_MIN, S32_MAX-1, S32_MAX, false, false, false},
+	{-S32_MAX, -1, S32_MIN, S32_MIN+2, S32_MAX, false, false, false},
+
+	{1, S32_MIN, -S32_MAX, -S32_MAX, S32_MIN, false, true, false},
+	{S32_MIN, 1, -S32_MAX, S32_MAX, S32_MIN, false, true, false},
+	{1, S32_MAX, S32_MIN, S32_MIN+2, S32_MAX, true, false, false},
+	{S32_MAX, 1, S32_MIN, S32_MAX-1, S32_MAX, true, false, false},
+
+	{S32_MIN, S32_MIN, 0, 0, 0, true, false, true},
+	{S32_MAX, S32_MAX, -2, 0, 1, true, false, true},
+};
+DEFINE_TEST_ARRAY(s64) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S64_MAX, S64_MAX, -S64_MAX, 0, false, false, false},
+	{S64_MAX, 0, S64_MAX, S64_MAX, 0, false, false, false},
+	{0, S64_MIN, S64_MIN, S64_MIN, 0, false, true, false},
+	{S64_MIN, 0, S64_MIN, S64_MIN, 0, false, false, false},
+
+	{-1, S64_MIN, S64_MAX, S64_MAX, S64_MIN, true, false, true},
+	{S64_MIN, -1, S64_MAX, -S64_MAX, S64_MIN, true, false, true},
+	{-1, S64_MAX, S64_MAX-1, S64_MIN, -S64_MAX, false, false, false},
+	{S64_MAX, -1, S64_MAX-1, S64_MIN, -S64_MAX, false, true, false},
+	{-1, -S64_MAX, S64_MIN, S64_MAX-1, S64_MAX, false, false, false},
+	{-S64_MAX, -1, S64_MIN, S64_MIN+2, S64_MAX, false, false, false},
+
+	{1, S64_MIN, -S64_MAX, -S64_MAX, S64_MIN, false, true, false},
+	{S64_MIN, 1, -S64_MAX, S64_MAX, S64_MIN, false, true, false},
+	{1, S64_MAX, S64_MIN, S64_MIN+2, S64_MAX, true, false, false},
+	{S64_MAX, 1, S64_MIN, S64_MAX-1, S64_MAX, true, false, false},
+
+	{S64_MIN, S64_MIN, 0, 0, 0, true, false, true},
+	{S64_MAX, S64_MAX, -2, 0, 1, true, false, true},
+
+	{-1, -1, -2, 0, 1, false, false, false},
+	{-1, -128, -129, 127, 128, false, false, false},
+	{-128, -1, -129, -127, 128, false, false, false},
+	{0, -S64_MAX, -S64_MAX, S64_MAX, 0, false, false, false},
+};
+
+#define check_one_op(t, fmt, op, sym, a, b, r, of) do {		\
+	t _r;							\
+	bool _of;						\
+								\
+	_of = check_ ## op ## _overflow(a, b, &_r);		\
+	if (_of != of) {					\
+		pr_warn("expected "fmt" "sym" "fmt		\
+			" to%s overflow (type %s)\n",		\
+			a, b, of ? "" : " not", #t);		\
+		err = 1;					\
+	}							\
+	if (_r != r) {						\
+		pr_warn("expected "fmt" "sym" "fmt" == "	\
+			fmt", got "fmt" (type %s)\n",		\
+			a, b, r, _r, #t);			\
+		err = 1;					\
+	}							\
+} while (0)
+
+#define DEFINE_TEST_FUNC(t, fmt)					\
+static int __init do_test_ ## t(const struct test_ ## t *p)		\
+{							   		\
+	int err = 0;							\
+									\
+	check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of);	\
+	check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of);	\
+	check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of);	\
+	check_one_op(t, fmt, mul, "*", p->a, p->b, p->prod, p->p_of);	\
+	check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of);	\
+									\
+	return err;							\
+}									\
+									\
+static int __init test_ ## t ## _overflow(void) {			\
+	int err = 0;							\
+	unsigned i;							\
+									\
+	pr_info("%-3s: %zu tests\n", #t, ARRAY_SIZE(t ## _tests));	\
+	for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i)			\
+		err |= do_test_ ## t(&t ## _tests[i]);			\
+	return err;							\
+}
+
+DEFINE_TEST_FUNC(u8, "%d");
+DEFINE_TEST_FUNC(s8, "%d");
+DEFINE_TEST_FUNC(u16, "%d");
+DEFINE_TEST_FUNC(s16, "%d");
+DEFINE_TEST_FUNC(u32, "%u");
+DEFINE_TEST_FUNC(s32, "%d");
+#if BITS_PER_LONG == 64
+DEFINE_TEST_FUNC(u64, "%llu");
+DEFINE_TEST_FUNC(s64, "%lld");
+#endif
+
+static int __init test_overflow_calculation(void)
+{
+	int err = 0;
+
+	err |= test_u8_overflow();
+	err |= test_s8_overflow();
+	err |= test_u16_overflow();
+	err |= test_s16_overflow();
+	err |= test_u32_overflow();
+	err |= test_s32_overflow();
+#if BITS_PER_LONG == 64
+	err |= test_u64_overflow();
+	err |= test_s64_overflow();
+#endif
+
+	return err;
+}
+
+/*
+ * Deal with the various forms of allocator arguments. See comments above
+ * the DEFINE_TEST_ALLOC() instances for mapping of the "bits".
+ */
+#define alloc010(alloc, arg, sz) alloc(sz, GFP_KERNEL)
+#define alloc011(alloc, arg, sz) alloc(sz, GFP_KERNEL, NUMA_NO_NODE)
+#define alloc000(alloc, arg, sz) alloc(sz)
+#define alloc001(alloc, arg, sz) alloc(sz, NUMA_NO_NODE)
+#define alloc110(alloc, arg, sz) alloc(arg, sz, GFP_KERNEL)
+#define free0(free, arg, ptr)	 free(ptr)
+#define free1(free, arg, ptr)	 free(arg, ptr)
+
+/* Wrap around to 8K */
+#define TEST_SIZE		(9 << PAGE_SHIFT)
+
+#define DEFINE_TEST_ALLOC(func, free_func, want_arg, want_gfp, want_node)\
+static int __init test_ ## func (void *arg)				\
+{									\
+	volatile size_t a = TEST_SIZE;					\
+	volatile size_t b = (SIZE_MAX / TEST_SIZE) + 1;			\
+	void *ptr;							\
+									\
+	/* Tiny allocation test. */					\
+	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, 1);\
+	if (!ptr) {							\
+		pr_warn(#func " failed regular allocation?!\n");	\
+		return 1;						\
+	}								\
+	free ## want_arg (free_func, arg, ptr);				\
+									\
+	/* Wrapped allocation test. */					\
+	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg,	\
+							  a * b);	\
+	if (!ptr) {							\
+		pr_warn(#func " unexpectedly failed bad wrapping?!\n");	\
+		return 1;						\
+	}								\
+	free ## want_arg (free_func, arg, ptr);				\
+									\
+	/* Saturated allocation test. */				\
+	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg,	\
+						   array_size(a, b));	\
+	if (ptr) {							\
+		pr_warn(#func " missed saturation!\n");			\
+		free ## want_arg (free_func, arg, ptr);			\
+		return 1;						\
+	}								\
+	pr_info(#func " detected saturation\n");			\
+	return 0;							\
+}
+
+/*
+ * Allocator uses a trailing node argument --------+  (e.g. kmalloc_node())
+ * Allocator uses the gfp_t argument -----------+  |  (e.g. kmalloc())
+ * Allocator uses a special leading argument +  |  |  (e.g. devm_kmalloc())
+ *                                           |  |  |
+ */
+DEFINE_TEST_ALLOC(kmalloc,	 kfree,	     0, 1, 0);
+DEFINE_TEST_ALLOC(kmalloc_node,	 kfree,	     0, 1, 1);
+DEFINE_TEST_ALLOC(kzalloc,	 kfree,	     0, 1, 0);
+DEFINE_TEST_ALLOC(kzalloc_node,  kfree,	     0, 1, 1);
+DEFINE_TEST_ALLOC(vmalloc,	 vfree,	     0, 0, 0);
+DEFINE_TEST_ALLOC(vmalloc_node,  vfree,	     0, 0, 1);
+DEFINE_TEST_ALLOC(vzalloc,	 vfree,	     0, 0, 0);
+DEFINE_TEST_ALLOC(vzalloc_node,  vfree,	     0, 0, 1);
+DEFINE_TEST_ALLOC(kvmalloc,	 kvfree,     0, 1, 0);
+DEFINE_TEST_ALLOC(kvmalloc_node, kvfree,     0, 1, 1);
+DEFINE_TEST_ALLOC(kvzalloc,	 kvfree,     0, 1, 0);
+DEFINE_TEST_ALLOC(kvzalloc_node, kvfree,     0, 1, 1);
+DEFINE_TEST_ALLOC(devm_kmalloc,  devm_kfree, 1, 1, 0);
+DEFINE_TEST_ALLOC(devm_kzalloc,  devm_kfree, 1, 1, 0);
+
+static int __init test_overflow_allocation(void)
+{
+	const char device_name[] = "overflow-test";
+	struct device *dev;
+	int err = 0;
+
+	/* Create dummy device for devm_kmalloc()-family tests. */
+	dev = root_device_register(device_name);
+	if (IS_ERR(dev)) {
+		pr_warn("Cannot register test device\n");
+		return 1;
+	}
+
+	err |= test_kmalloc(NULL);
+	err |= test_kmalloc_node(NULL);
+	err |= test_kzalloc(NULL);
+	err |= test_kzalloc_node(NULL);
+	err |= test_kvmalloc(NULL);
+	err |= test_kvmalloc_node(NULL);
+	err |= test_kvzalloc(NULL);
+	err |= test_kvzalloc_node(NULL);
+	err |= test_vmalloc(NULL);
+	err |= test_vmalloc_node(NULL);
+	err |= test_vzalloc(NULL);
+	err |= test_vzalloc_node(NULL);
+	err |= test_devm_kmalloc(dev);
+	err |= test_devm_kzalloc(dev);
+
+	device_unregister(dev);
+
+	return err;
+}
+
+static int __init test_module_init(void)
+{
+	int err = 0;
+
+	err |= test_overflow_calculation();
+	err |= test_overflow_allocation();
+
+	if (err) {
+		pr_warn("FAIL!\n");
+		err = -EINVAL;
+	} else {
+		pr_info("all tests passed\n");
+	}
+
+	return err;
+}
+
+static void __exit test_module_exit(void)
+{ }
+
+module_init(test_module_init);
+module_exit(test_module_exit);
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index f4000c137dbe..fb6968109113 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -285,12 +285,14 @@ static int __init test_rhltable(unsigned int entries)
 	if (entries == 0)
 		entries = 1;
 
-	rhl_test_objects = vzalloc(sizeof(*rhl_test_objects) * entries);
+	rhl_test_objects = vzalloc(array_size(entries,
+					      sizeof(*rhl_test_objects)));
 	if (!rhl_test_objects)
 		return -ENOMEM;
 
 	ret = -ENOMEM;
-	obj_in_table = vzalloc(BITS_TO_LONGS(entries) * sizeof(unsigned long));
+	obj_in_table = vzalloc(array_size(sizeof(unsigned long),
+					  BITS_TO_LONGS(entries)));
 	if (!obj_in_table)
 		goto out_free;
 
@@ -706,7 +708,8 @@ static int __init test_rht_init(void)
 	test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries);
 	test_rht_params.nelem_hint = size;
 
-	objs = vzalloc((test_rht_params.max_size + 1) * sizeof(struct test_obj));
+	objs = vzalloc(array_size(sizeof(struct test_obj),
+				  test_rht_params.max_size + 1));
 	if (!objs)
 		return -ENOMEM;
 
@@ -753,10 +756,10 @@ static int __init test_rht_init(void)
 	pr_info("Testing concurrent rhashtable access from %d threads\n",
 	        tcount);
 	sema_init(&prestart_sem, 1 - tcount);
-	tdata = vzalloc(tcount * sizeof(struct thread_data));
+	tdata = vzalloc(array_size(tcount, sizeof(struct thread_data)));
 	if (!tdata)
 		return -ENOMEM;
-	objs  = vzalloc(tcount * entries * sizeof(struct test_obj));
+	objs  = vzalloc(array3_size(sizeof(struct test_obj), tcount, entries));
 	if (!objs) {
 		vfree(tdata);
 		return -ENOMEM;
diff --git a/lib/ucmpdi2.c b/lib/ucmpdi2.c
index 25ca2d4c1e19..597998169a96 100644
--- a/lib/ucmpdi2.c
+++ b/lib/ucmpdi2.c
@@ -17,7 +17,7 @@
 #include <linux/module.h>
 #include <linux/libgcc.h>
 
-word_type __ucmpdi2(unsigned long long a, unsigned long long b)
+word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b)
 {
 	const DWunion au = {.ll = a};
 	const DWunion bu = {.ll = b};
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index d7e06b28de38..0a559a42359b 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -112,3 +112,5 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength)
 	return j;
 }
 EXPORT_SYMBOL(ucs2_as_utf8);
+
+MODULE_LICENSE("GPL v2");