diff options
Diffstat (limited to 'lib')
41 files changed, 1341 insertions, 3815 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 7a913937888b..706836ec314d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -405,7 +405,7 @@ config ASSOCIATIVE_ARRAY See: - Documentation/assoc_array.txt + Documentation/core-api/assoc_array.rst for more information. @@ -420,60 +420,15 @@ config HAS_IOPORT_MAP depends on HAS_IOMEM && !NO_IOPORT_MAP default y -config HAS_DMA - bool - depends on !NO_DMA - default y +source "kernel/dma/Kconfig" config SGL_ALLOC bool default n -config NEED_SG_DMA_LENGTH - bool - -config NEED_DMA_MAP_STATE - bool - -config ARCH_DMA_ADDR_T_64BIT - def_bool 64BIT || PHYS_ADDR_T_64BIT - config IOMMU_HELPER bool -config ARCH_HAS_SYNC_DMA_FOR_DEVICE - bool - -config ARCH_HAS_SYNC_DMA_FOR_CPU - bool - select NEED_DMA_MAP_STATE - -config DMA_DIRECT_OPS - bool - depends on HAS_DMA - -config DMA_NONCOHERENT_OPS - bool - depends on HAS_DMA - select DMA_DIRECT_OPS - -config DMA_NONCOHERENT_MMAP - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_NONCOHERENT_CACHE_SYNC - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_VIRT_OPS - bool - depends on HAS_DMA - -config SWIOTLB - bool - select DMA_DIRECT_OPS - select NEED_DMA_MAP_STATE - config CHECK_SIGNATURE bool @@ -621,6 +576,9 @@ config ARCH_HAS_PMEM_API config ARCH_HAS_UACCESS_FLUSHCACHE bool +config ARCH_HAS_UACCESS_MCSAFE + bool + config STACKDEPOT bool select STACKTRACE @@ -639,20 +597,20 @@ config STRING_SELFTEST endmenu -config GENERIC_ASHLDI3 +config GENERIC_LIB_ASHLDI3 bool -config GENERIC_ASHRDI3 +config GENERIC_LIB_ASHRDI3 bool -config GENERIC_LSHRDI3 +config GENERIC_LIB_LSHRDI3 bool -config GENERIC_MULDI3 +config GENERIC_LIB_MULDI3 bool -config GENERIC_CMPDI2 +config GENERIC_LIB_CMPDI2 bool -config GENERIC_UCMPDI2 +config GENERIC_LIB_UCMPDI2 bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a7ef03009e9e..ce5a45e46885 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -747,12 +747,15 @@ config ARCH_HAS_KCOV only for x86_64. KCOV requires testing on other archs, and most likely disabling of instrumentation for some early boot code. +config CC_HAS_SANCOV_TRACE_PC + def_bool $(cc-option,-fsanitize-coverage=trace-pc) + config KCOV bool "Code coverage for fuzzing" depends on ARCH_HAS_KCOV + depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS select DEBUG_FS - select GCC_PLUGINS if !COMPILE_TEST - select GCC_PLUGIN_SANCOV if !COMPILE_TEST + select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC help KCOV exposes kernel code coverage information in a form suitable for coverage-guided fuzzing (randomized testing). @@ -766,7 +769,7 @@ config KCOV config KCOV_ENABLE_COMPARISONS bool "Enable comparison operands collection by KCOV" depends on KCOV - default n + depends on $(cc-option,-fsanitize-coverage=trace-cmp) help KCOV also exposes operands of every comparison in the instrumented code along with operand sizes and PCs of the comparison instructions. @@ -776,7 +779,7 @@ config KCOV_ENABLE_COMPARISONS config KCOV_INSTRUMENT_ALL bool "Instrument all code by default" depends on KCOV - default y if KCOV + default y help If you are doing generic system call fuzzing (like e.g. syzkaller), then you will want to instrument the whole kernel and you should @@ -1514,6 +1517,10 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. +config FUNCTION_ERROR_INJECTION + def_bool y + depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + config FAULT_INJECTION bool "Fault-injection framework" depends on DEBUG_KERNEL @@ -1521,10 +1528,6 @@ config FAULT_INJECTION Provide fault-injection framework. For more details, see Documentation/fault-injection/. -config FUNCTION_ERROR_INJECTION - def_bool y - depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES - config FAILSLAB bool "Fault-injection capability for kmalloc" depends on FAULT_INJECTION @@ -1555,16 +1558,6 @@ config FAIL_IO_TIMEOUT Only works with drivers that use the generic timeout handling, for others it wont do anything. -config FAIL_MMC_REQUEST - bool "Fault-injection capability for MMC IO" - depends on FAULT_INJECTION_DEBUG_FS && MMC - help - Provide fault-injection capability for MMC IO. - This will make the mmc core return data errors. This is - useful to test the error handling in the mmc block device - and to test how the mmc host driver handles retries from - the block device. - config FAIL_FUTEX bool "Fault-injection capability for futexes" select DEBUG_FS @@ -1572,6 +1565,12 @@ config FAIL_FUTEX help Provide fault-injection capability for futexes. +config FAULT_INJECTION_DEBUG_FS + bool "Debugfs entries for fault-injection capabilities" + depends on FAULT_INJECTION && SYSFS && DEBUG_FS + help + Enable configuration of fault-injection capabilities via debugfs. + config FAIL_FUNCTION bool "Fault-injection capability for functions" depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION @@ -1582,11 +1581,15 @@ config FAIL_FUNCTION an error value and have to handle it. This is useful to test the error handling in various subsystems. -config FAULT_INJECTION_DEBUG_FS - bool "Debugfs entries for fault-injection capabilities" - depends on FAULT_INJECTION && SYSFS && DEBUG_FS +config FAIL_MMC_REQUEST + bool "Fault-injection capability for MMC IO" + depends on FAULT_INJECTION_DEBUG_FS && MMC help - Enable configuration of fault-injection capabilities via debugfs. + Provide fault-injection capability for MMC IO. + This will make the mmc core return data errors. This is + useful to test the error handling in the mmc block device + and to test how the mmc host driver handles retries from + the block device. config FAULT_INJECTION_STACKTRACE_FILTER bool "stacktrace filter for fault-injection capabilities" @@ -1726,7 +1729,7 @@ config KPROBES_SANITY_TEST default n help This option provides for testing basic kprobes functionality on - boot. A sample kprobe, jprobe and kretprobe are inserted and + boot. Samples of kprobe and kretprobe are inserted and verified for functionality. Say N if you are unsure. @@ -1813,6 +1816,9 @@ config TEST_BITMAP config TEST_UUID tristate "Test functions located in the uuid module at runtime" +config TEST_OVERFLOW + tristate "Test check_*_overflow() functions at runtime" + config TEST_RHASHTABLE tristate "Perform selftest on resizable hash table" default n diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 3d35d062970d..befb127507c0 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -5,7 +5,8 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" - depends on SLUB || (SLAB && !DEBUG_SLAB) + depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) + select SLUB_DEBUG if SLUB select CONSTRUCTORS select STACKDEPOT help diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 19d42ea75ec2..98fa559ebd80 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -1,9 +1,6 @@ config ARCH_HAS_UBSAN_SANITIZE_ALL bool -config ARCH_WANTS_UBSAN_NO_NULL - def_bool n - config UBSAN bool "Undefined behaviour sanity checker" help @@ -39,14 +36,6 @@ config UBSAN_ALIGNMENT Enabling this option on architectures that support unaligned accesses may produce a lot of false positives. -config UBSAN_NULL - bool "Enable checking of null pointers" - depends on UBSAN - default y if !ARCH_WANTS_UBSAN_NO_NULL - help - This option enables detection of memory accesses via a - null pointer. - config TEST_UBSAN tristate "Module for testing for undefined behavior detection" depends on m && UBSAN diff --git a/lib/Makefile b/lib/Makefile index 9f18c8152281..d4a3773ce4a6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,15 +23,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o siphash.o \ + earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o -lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o -lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o obj-y += lockref.o @@ -60,6 +57,7 @@ UBSAN_SANITIZE_test_ubsan.o := y obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o obj-$(CONFIG_TEST_LKM) += test_module.o +obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_SORT) += test_sort.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o @@ -97,10 +95,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o -ifneq ($(CONFIG_HAVE_DEC_LOCK),y) - lib-y += dec_and_lock.o -endif - obj-$(CONFIG_BITREVERSE) += bitrev.o obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o @@ -122,6 +116,7 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o +CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500) obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ @@ -147,7 +142,6 @@ obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o -obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o @@ -168,8 +162,6 @@ obj-$(CONFIG_NLATTR) += nlattr.o obj-$(CONFIG_LRU_CACHE) += lru_cache.o -obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o - obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o @@ -260,9 +252,9 @@ obj-$(CONFIG_SBITMAP) += sbitmap.o obj-$(CONFIG_PARMAN) += parman.o # GCC library routines -obj-$(CONFIG_GENERIC_ASHLDI3) += ashldi3.o -obj-$(CONFIG_GENERIC_ASHRDI3) += ashrdi3.o -obj-$(CONFIG_GENERIC_LSHRDI3) += lshrdi3.o -obj-$(CONFIG_GENERIC_MULDI3) += muldi3.o -obj-$(CONFIG_GENERIC_CMPDI2) += cmpdi2.o -obj-$(CONFIG_GENERIC_UCMPDI2) += ucmpdi2.o +obj-$(CONFIG_GENERIC_LIB_ASHLDI3) += ashldi3.o +obj-$(CONFIG_GENERIC_LIB_ASHRDI3) += ashrdi3.o +obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += lshrdi3.o +obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o +obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o +obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o diff --git a/lib/argv_split.c b/lib/argv_split.c index 5c35752a9414..1a19a0a93dc1 100644 --- a/lib/argv_split.c +++ b/lib/argv_split.c @@ -69,7 +69,7 @@ char **argv_split(gfp_t gfp, const char *str, int *argcp) return NULL; argc = count_argc(argv_str); - argv = kmalloc(sizeof(*argv) * (argc + 2), gfp); + argv = kmalloc_array(argc + 2, sizeof(*argv), gfp); if (!argv) { kfree(argv_str); return NULL; diff --git a/lib/atomic64.c b/lib/atomic64.c index 53c2d5edc826..1d91e31eceec 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -178,18 +178,18 @@ long long atomic64_xchg(atomic64_t *v, long long new) } EXPORT_SYMBOL(atomic64_xchg); -int atomic64_add_unless(atomic64_t *v, long long a, long long u) +long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u) { unsigned long flags; raw_spinlock_t *lock = lock_addr(v); - int ret = 0; + long long val; raw_spin_lock_irqsave(lock, flags); - if (v->counter != u) { + val = v->counter; + if (val != u) v->counter += a; - ret = 1; - } raw_spin_unlock_irqrestore(lock, flags); - return ret; + + return val; } -EXPORT_SYMBOL(atomic64_add_unless); +EXPORT_SYMBOL(atomic64_fetch_add_unless); diff --git a/lib/bch.c b/lib/bch.c index bc89dfe4d1b3..7b0f2006698b 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -78,15 +78,22 @@ #define GF_M(_p) (CONFIG_BCH_CONST_M) #define GF_T(_p) (CONFIG_BCH_CONST_T) #define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1) +#define BCH_MAX_M (CONFIG_BCH_CONST_M) #else #define GF_M(_p) ((_p)->m) #define GF_T(_p) ((_p)->t) #define GF_N(_p) ((_p)->n) +#define BCH_MAX_M 15 #endif +#define BCH_MAX_T (((1 << BCH_MAX_M) - 1) / BCH_MAX_M) + #define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32) #define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8) +#define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32) +#define BCH_ECC_MAX_BYTES DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 8) + #ifndef dbg #define dbg(_fmt, args...) do {} while (0) #endif @@ -187,7 +194,8 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, const unsigned int l = BCH_ECC_WORDS(bch)-1; unsigned int i, mlen; unsigned long m; - uint32_t w, r[l+1]; + uint32_t w, r[BCH_ECC_MAX_WORDS]; + const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r); const uint32_t * const tab0 = bch->mod8_tab; const uint32_t * const tab1 = tab0 + 256*(l+1); const uint32_t * const tab2 = tab1 + 256*(l+1); @@ -198,7 +206,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, /* load ecc parity bytes into internal 32-bit buffer */ load_ecc8(bch, bch->ecc_buf, ecc); } else { - memset(bch->ecc_buf, 0, sizeof(r)); + memset(bch->ecc_buf, 0, r_bytes); } /* process first unaligned data bytes */ @@ -215,7 +223,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, mlen = len/4; data += 4*mlen; len -= 4*mlen; - memcpy(r, bch->ecc_buf, sizeof(r)); + memcpy(r, bch->ecc_buf, r_bytes); /* * split each 32-bit word into 4 polynomials of weight 8 as follows: @@ -241,7 +249,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, r[l] = p0[l]^p1[l]^p2[l]^p3[l]; } - memcpy(bch->ecc_buf, r, sizeof(r)); + memcpy(bch->ecc_buf, r, r_bytes); /* process last unaligned bytes */ if (len) @@ -434,7 +442,7 @@ static int solve_linear_system(struct bch_control *bch, unsigned int *rows, { const int m = GF_M(bch); unsigned int tmp, mask; - int rem, c, r, p, k, param[m]; + int rem, c, r, p, k, param[BCH_MAX_M]; k = 0; mask = 1 << m; @@ -1114,7 +1122,7 @@ static int build_deg2_base(struct bch_control *bch) { const int m = GF_M(bch); int i, j, r; - unsigned int sum, x, y, remaining, ak = 0, xi[m]; + unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M]; /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */ for (i = 0; i < m; i++) { @@ -1254,7 +1262,6 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) struct bch_control *bch = NULL; const int min_m = 5; - const int max_m = 15; /* default primitive polynomials */ static const unsigned int prim_poly_tab[] = { @@ -1270,7 +1277,7 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) goto fail; } #endif - if ((m < min_m) || (m > max_m)) + if ((m < min_m) || (m > BCH_MAX_M)) /* * values of m greater than 15 are not currently supported; * supporting m > 15 would require changing table base type diff --git a/lib/bitmap.c b/lib/bitmap.c index a42eff7e8c48..58f9750e49c6 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -64,12 +64,9 @@ EXPORT_SYMBOL(__bitmap_equal); void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits) { - unsigned int k, lim = bits/BITS_PER_LONG; + unsigned int k, lim = BITS_TO_LONGS(bits); for (k = 0; k < lim; ++k) dst[k] = ~src[k]; - - if (bits % BITS_PER_LONG) - dst[k] = ~src[k]; } EXPORT_SYMBOL(__bitmap_complement); diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c index 266a97c5708b..ade3ce6c4af6 100644 --- a/lib/bucket_locks.c +++ b/lib/bucket_locks.c @@ -30,10 +30,7 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, } if (sizeof(spinlock_t) != 0) { - if (gfpflags_allow_blocking(gfp)) - tlocks = kvmalloc(size * sizeof(spinlock_t), gfp); - else - tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp); + tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp); if (!tlocks) return -ENOMEM; for (i = 0; i < size; i++) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 994be4805cec..70935ed91125 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -360,9 +360,12 @@ static void debug_object_is_on_stack(void *addr, int onstack) limit++; if (is_on_stack) - pr_warn("object is on stack, but not annotated\n"); + pr_warn("object %p is on stack %p, but NOT annotated.\n", addr, + task_stack_page(current)); else - pr_warn("object is not on stack, but annotated\n"); + pr_warn("object %p is NOT on stack %p, but annotated.\n", addr, + task_stack_page(current)); + WARN_ON(1); } @@ -1185,8 +1188,7 @@ void __init debug_objects_mem_init(void) if (!obj_cache || debug_objects_replace_static_objects()) { debug_objects_enabled = 0; - if (obj_cache) - kmem_cache_destroy(obj_cache); + kmem_cache_destroy(obj_cache); pr_warn("out of memory.\n"); } else debug_objects_selftest(); diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index 347fa7ac2e8a..9555b68bb774 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) } EXPORT_SYMBOL(_atomic_dec_and_lock); + +int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + spin_lock_irqsave(lock, *flags); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock_irqrestore(lock, *flags); + return 0; +} +EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); diff --git a/lib/dma-debug.c b/lib/dma-debug.c deleted file mode 100644 index c007d25bee09..000000000000 --- a/lib/dma-debug.c +++ /dev/null @@ -1,1773 +0,0 @@ -/* - * Copyright (C) 2008 Advanced Micro Devices, Inc. - * - * Author: Joerg Roedel <joerg.roedel@amd.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/sched/task_stack.h> -#include <linux/scatterlist.h> -#include <linux/dma-mapping.h> -#include <linux/sched/task.h> -#include <linux/stacktrace.h> -#include <linux/dma-debug.h> -#include <linux/spinlock.h> -#include <linux/vmalloc.h> -#include <linux/debugfs.h> -#include <linux/uaccess.h> -#include <linux/export.h> -#include <linux/device.h> -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/ctype.h> -#include <linux/list.h> -#include <linux/slab.h> - -#include <asm/sections.h> - -#define HASH_SIZE 1024ULL -#define HASH_FN_SHIFT 13 -#define HASH_FN_MASK (HASH_SIZE - 1) - -/* allow architectures to override this if absolutely required */ -#ifndef PREALLOC_DMA_DEBUG_ENTRIES -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) -#endif - -enum { - dma_debug_single, - dma_debug_page, - dma_debug_sg, - dma_debug_coherent, - dma_debug_resource, -}; - -enum map_err_types { - MAP_ERR_CHECK_NOT_APPLICABLE, - MAP_ERR_NOT_CHECKED, - MAP_ERR_CHECKED, -}; - -#define DMA_DEBUG_STACKTRACE_ENTRIES 5 - -/** - * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping - * @list: node on pre-allocated free_entries list - * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent - * @type: single, page, sg, coherent - * @pfn: page frame of the start address - * @offset: offset of mapping relative to pfn - * @size: length of the mapping - * @direction: enum dma_data_direction - * @sg_call_ents: 'nents' from dma_map_sg - * @sg_mapped_ents: 'mapped_ents' from dma_map_sg - * @map_err_type: track whether dma_mapping_error() was checked - * @stacktrace: support backtraces when a violation is detected - */ -struct dma_debug_entry { - struct list_head list; - struct device *dev; - int type; - unsigned long pfn; - size_t offset; - u64 dev_addr; - u64 size; - int direction; - int sg_call_ents; - int sg_mapped_ents; - enum map_err_types map_err_type; -#ifdef CONFIG_STACKTRACE - struct stack_trace stacktrace; - unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; -#endif -}; - -typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *); - -struct hash_bucket { - struct list_head list; - spinlock_t lock; -} ____cacheline_aligned_in_smp; - -/* Hash list to save the allocated dma addresses */ -static struct hash_bucket dma_entry_hash[HASH_SIZE]; -/* List of pre-allocated dma_debug_entry's */ -static LIST_HEAD(free_entries); -/* Lock for the list above */ -static DEFINE_SPINLOCK(free_entries_lock); - -/* Global disable flag - will be set in case of an error */ -static bool global_disable __read_mostly; - -/* Early initialization disable flag, set at the end of dma_debug_init */ -static bool dma_debug_initialized __read_mostly; - -static inline bool dma_debug_disabled(void) -{ - return global_disable || !dma_debug_initialized; -} - -/* Global error count */ -static u32 error_count; - -/* Global error show enable*/ -static u32 show_all_errors __read_mostly; -/* Number of errors to show */ -static u32 show_num_errors = 1; - -static u32 num_free_entries; -static u32 min_free_entries; -static u32 nr_total_entries; - -/* number of preallocated entries requested by kernel cmdline */ -static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - -/* debugfs dentry's for the stuff above */ -static struct dentry *dma_debug_dent __read_mostly; -static struct dentry *global_disable_dent __read_mostly; -static struct dentry *error_count_dent __read_mostly; -static struct dentry *show_all_errors_dent __read_mostly; -static struct dentry *show_num_errors_dent __read_mostly; -static struct dentry *num_free_entries_dent __read_mostly; -static struct dentry *min_free_entries_dent __read_mostly; -static struct dentry *filter_dent __read_mostly; - -/* per-driver filter related state */ - -#define NAME_MAX_LEN 64 - -static char current_driver_name[NAME_MAX_LEN] __read_mostly; -static struct device_driver *current_driver __read_mostly; - -static DEFINE_RWLOCK(driver_name_lock); - -static const char *const maperr2str[] = { - [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable", - [MAP_ERR_NOT_CHECKED] = "dma map error not checked", - [MAP_ERR_CHECKED] = "dma map error checked", -}; - -static const char *type2name[5] = { "single", "page", - "scather-gather", "coherent", - "resource" }; - -static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", - "DMA_FROM_DEVICE", "DMA_NONE" }; - -/* - * The access to some variables in this macro is racy. We can't use atomic_t - * here because all these variables are exported to debugfs. Some of them even - * writeable. This is also the reason why a lock won't help much. But anyway, - * the races are no big deal. Here is why: - * - * error_count: the addition is racy, but the worst thing that can happen is - * that we don't count some errors - * show_num_errors: the subtraction is racy. Also no big deal because in - * worst case this will result in one warning more in the - * system log than the user configured. This variable is - * writeable via debugfs. - */ -static inline void dump_entry_trace(struct dma_debug_entry *entry) -{ -#ifdef CONFIG_STACKTRACE - if (entry) { - pr_warning("Mapped at:\n"); - print_stack_trace(&entry->stacktrace, 0); - } -#endif -} - -static bool driver_filter(struct device *dev) -{ - struct device_driver *drv; - unsigned long flags; - bool ret; - - /* driver filter off */ - if (likely(!current_driver_name[0])) - return true; - - /* driver filter on and initialized */ - if (current_driver && dev && dev->driver == current_driver) - return true; - - /* driver filter on, but we can't filter on a NULL device... */ - if (!dev) - return false; - - if (current_driver || !current_driver_name[0]) - return false; - - /* driver filter on but not yet initialized */ - drv = dev->driver; - if (!drv) - return false; - - /* lock to protect against change of current_driver_name */ - read_lock_irqsave(&driver_name_lock, flags); - - ret = false; - if (drv->name && - strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) { - current_driver = drv; - ret = true; - } - - read_unlock_irqrestore(&driver_name_lock, flags); - - return ret; -} - -#define err_printk(dev, entry, format, arg...) do { \ - error_count += 1; \ - if (driver_filter(dev) && \ - (show_all_errors || show_num_errors > 0)) { \ - WARN(1, "%s %s: " format, \ - dev ? dev_driver_string(dev) : "NULL", \ - dev ? dev_name(dev) : "NULL", ## arg); \ - dump_entry_trace(entry); \ - } \ - if (!show_all_errors && show_num_errors > 0) \ - show_num_errors -= 1; \ - } while (0); - -/* - * Hash related functions - * - * Every DMA-API request is saved into a struct dma_debug_entry. To - * have quick access to these structs they are stored into a hash. - */ -static int hash_fn(struct dma_debug_entry *entry) -{ - /* - * Hash function is based on the dma address. - * We use bits 20-27 here as the index into the hash - */ - return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK; -} - -/* - * Request exclusive access to a hash bucket for a given dma_debug_entry. - */ -static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, - unsigned long *flags) - __acquires(&dma_entry_hash[idx].lock) -{ - int idx = hash_fn(entry); - unsigned long __flags; - - spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags); - *flags = __flags; - return &dma_entry_hash[idx]; -} - -/* - * Give up exclusive access to the hash bucket - */ -static void put_hash_bucket(struct hash_bucket *bucket, - unsigned long *flags) - __releases(&bucket->lock) -{ - unsigned long __flags = *flags; - - spin_unlock_irqrestore(&bucket->lock, __flags); -} - -static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b) -{ - return ((a->dev_addr == b->dev_addr) && - (a->dev == b->dev)) ? true : false; -} - -static bool containing_match(struct dma_debug_entry *a, - struct dma_debug_entry *b) -{ - if (a->dev != b->dev) - return false; - - if ((b->dev_addr <= a->dev_addr) && - ((b->dev_addr + b->size) >= (a->dev_addr + a->size))) - return true; - - return false; -} - -/* - * Search a given entry in the hash bucket list - */ -static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket, - struct dma_debug_entry *ref, - match_fn match) -{ - struct dma_debug_entry *entry, *ret = NULL; - int matches = 0, match_lvl, last_lvl = -1; - - list_for_each_entry(entry, &bucket->list, list) { - if (!match(ref, entry)) - continue; - - /* - * Some drivers map the same physical address multiple - * times. Without a hardware IOMMU this results in the - * same device addresses being put into the dma-debug - * hash multiple times too. This can result in false - * positives being reported. Therefore we implement a - * best-fit algorithm here which returns the entry from - * the hash which fits best to the reference value - * instead of the first-fit. - */ - matches += 1; - match_lvl = 0; - entry->size == ref->size ? ++match_lvl : 0; - entry->type == ref->type ? ++match_lvl : 0; - entry->direction == ref->direction ? ++match_lvl : 0; - entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0; - - if (match_lvl == 4) { - /* perfect-fit - return the result */ - return entry; - } else if (match_lvl > last_lvl) { - /* - * We found an entry that fits better then the - * previous one or it is the 1st match. - */ - last_lvl = match_lvl; - ret = entry; - } - } - - /* - * If we have multiple matches but no perfect-fit, just return - * NULL. - */ - ret = (matches == 1) ? ret : NULL; - - return ret; -} - -static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket, - struct dma_debug_entry *ref) -{ - return __hash_bucket_find(bucket, ref, exact_match); -} - -static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, - struct dma_debug_entry *ref, - unsigned long *flags) -{ - - unsigned int max_range = dma_get_max_seg_size(ref->dev); - struct dma_debug_entry *entry, index = *ref; - unsigned int range = 0; - - while (range <= max_range) { - entry = __hash_bucket_find(*bucket, ref, containing_match); - - if (entry) - return entry; - - /* - * Nothing found, go back a hash bucket - */ - put_hash_bucket(*bucket, flags); - range += (1 << HASH_FN_SHIFT); - index.dev_addr -= (1 << HASH_FN_SHIFT); - *bucket = get_hash_bucket(&index, flags); - } - - return NULL; -} - -/* - * Add an entry to a hash bucket - */ -static void hash_bucket_add(struct hash_bucket *bucket, - struct dma_debug_entry *entry) -{ - list_add_tail(&entry->list, &bucket->list); -} - -/* - * Remove entry from a hash bucket list - */ -static void hash_bucket_del(struct dma_debug_entry *entry) -{ - list_del(&entry->list); -} - -static unsigned long long phys_addr(struct dma_debug_entry *entry) -{ - if (entry->type == dma_debug_resource) - return __pfn_to_phys(entry->pfn) + entry->offset; - - return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; -} - -/* - * Dump mapping entries for debugging purposes - */ -void debug_dma_dump_mappings(struct device *dev) -{ - int idx; - - for (idx = 0; idx < HASH_SIZE; idx++) { - struct hash_bucket *bucket = &dma_entry_hash[idx]; - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&bucket->lock, flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!dev || dev == entry->dev) { - dev_info(entry->dev, - "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", - type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - dir2name[entry->direction], - maperr2str[entry->map_err_type]); - } - } - - spin_unlock_irqrestore(&bucket->lock, flags); - } -} - -/* - * For each mapping (initial cacheline in the case of - * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a - * scatterlist, or the cacheline specified in dma_map_single) insert - * into this tree using the cacheline as the key. At - * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If - * the entry already exists at insertion time add a tag as a reference - * count for the overlapping mappings. For now, the overlap tracking - * just ensures that 'unmaps' balance 'maps' before marking the - * cacheline idle, but we should also be flagging overlaps as an API - * violation. - * - * Memory usage is mostly constrained by the maximum number of available - * dma-debug entries in that we need a free dma_debug_entry before - * inserting into the tree. In the case of dma_map_page and - * dma_alloc_coherent there is only one dma_debug_entry and one - * dma_active_cacheline entry to track per event. dma_map_sg(), on the - * other hand, consumes a single dma_debug_entry, but inserts 'nents' - * entries into the tree. - * - * At any time debug_dma_assert_idle() can be called to trigger a - * warning if any cachelines in the given page are in the active set. - */ -static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); -static DEFINE_SPINLOCK(radix_lock); -#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) -#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) -#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) - -static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) -{ - return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + - (entry->offset >> L1_CACHE_SHIFT); -} - -static int active_cacheline_read_overlap(phys_addr_t cln) -{ - int overlap = 0, i; - - for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) - if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) - overlap |= 1 << i; - return overlap; -} - -static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) -{ - int i; - - if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) - return overlap; - - for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) - if (overlap & 1 << i) - radix_tree_tag_set(&dma_active_cacheline, cln, i); - else - radix_tree_tag_clear(&dma_active_cacheline, cln, i); - - return overlap; -} - -static void active_cacheline_inc_overlap(phys_addr_t cln) -{ - int overlap = active_cacheline_read_overlap(cln); - - overlap = active_cacheline_set_overlap(cln, ++overlap); - - /* If we overflowed the overlap counter then we're potentially - * leaking dma-mappings. Otherwise, if maps and unmaps are - * balanced then this overflow may cause false negatives in - * debug_dma_assert_idle() as the cacheline may be marked idle - * prematurely. - */ - WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, - "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", - ACTIVE_CACHELINE_MAX_OVERLAP, &cln); -} - -static int active_cacheline_dec_overlap(phys_addr_t cln) -{ - int overlap = active_cacheline_read_overlap(cln); - - return active_cacheline_set_overlap(cln, --overlap); -} - -static int active_cacheline_insert(struct dma_debug_entry *entry) -{ - phys_addr_t cln = to_cacheline_number(entry); - unsigned long flags; - int rc; - - /* If the device is not writing memory then we don't have any - * concerns about the cpu consuming stale data. This mitigates - * legitimate usages of overlapping mappings. - */ - if (entry->direction == DMA_TO_DEVICE) - return 0; - - spin_lock_irqsave(&radix_lock, flags); - rc = radix_tree_insert(&dma_active_cacheline, cln, entry); - if (rc == -EEXIST) - active_cacheline_inc_overlap(cln); - spin_unlock_irqrestore(&radix_lock, flags); - - return rc; -} - -static void active_cacheline_remove(struct dma_debug_entry *entry) -{ - phys_addr_t cln = to_cacheline_number(entry); - unsigned long flags; - - /* ...mirror the insert case */ - if (entry->direction == DMA_TO_DEVICE) - return; - - spin_lock_irqsave(&radix_lock, flags); - /* since we are counting overlaps the final put of the - * cacheline will occur when the overlap count is 0. - * active_cacheline_dec_overlap() returns -1 in that case - */ - if (active_cacheline_dec_overlap(cln) < 0) - radix_tree_delete(&dma_active_cacheline, cln); - spin_unlock_irqrestore(&radix_lock, flags); -} - -/** - * debug_dma_assert_idle() - assert that a page is not undergoing dma - * @page: page to lookup in the dma_active_cacheline tree - * - * Place a call to this routine in cases where the cpu touching the page - * before the dma completes (page is dma_unmapped) will lead to data - * corruption. - */ -void debug_dma_assert_idle(struct page *page) -{ - static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; - struct dma_debug_entry *entry = NULL; - void **results = (void **) &ents; - unsigned int nents, i; - unsigned long flags; - phys_addr_t cln; - - if (dma_debug_disabled()) - return; - - if (!page) - return; - - cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; - spin_lock_irqsave(&radix_lock, flags); - nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, - CACHELINES_PER_PAGE); - for (i = 0; i < nents; i++) { - phys_addr_t ent_cln = to_cacheline_number(ents[i]); - - if (ent_cln == cln) { - entry = ents[i]; - break; - } else if (ent_cln >= cln + CACHELINES_PER_PAGE) - break; - } - spin_unlock_irqrestore(&radix_lock, flags); - - if (!entry) - return; - - cln = to_cacheline_number(entry); - err_printk(entry->dev, entry, - "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", - &cln); -} - -/* - * Wrapper function for adding an entry to the hash. - * This function takes care of locking itself. - */ -static void add_dma_entry(struct dma_debug_entry *entry) -{ - struct hash_bucket *bucket; - unsigned long flags; - int rc; - - bucket = get_hash_bucket(entry, &flags); - hash_bucket_add(bucket, entry); - put_hash_bucket(bucket, &flags); - - rc = active_cacheline_insert(entry); - if (rc == -ENOMEM) { - pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); - global_disable = true; - } - - /* TODO: report -EEXIST errors here as overlapping mappings are - * not supported by the DMA API - */ -} - -static struct dma_debug_entry *__dma_entry_alloc(void) -{ - struct dma_debug_entry *entry; - - entry = list_entry(free_entries.next, struct dma_debug_entry, list); - list_del(&entry->list); - memset(entry, 0, sizeof(*entry)); - - num_free_entries -= 1; - if (num_free_entries < min_free_entries) - min_free_entries = num_free_entries; - - return entry; -} - -/* struct dma_entry allocator - * - * The next two functions implement the allocator for - * struct dma_debug_entries. - */ -static struct dma_debug_entry *dma_entry_alloc(void) -{ - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&free_entries_lock, flags); - - if (list_empty(&free_entries)) { - global_disable = true; - spin_unlock_irqrestore(&free_entries_lock, flags); - pr_err("DMA-API: debugging out of memory - disabling\n"); - return NULL; - } - - entry = __dma_entry_alloc(); - - spin_unlock_irqrestore(&free_entries_lock, flags); - -#ifdef CONFIG_STACKTRACE - entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; - entry->stacktrace.entries = entry->st_entries; - entry->stacktrace.skip = 2; - save_stack_trace(&entry->stacktrace); -#endif - - return entry; -} - -static void dma_entry_free(struct dma_debug_entry *entry) -{ - unsigned long flags; - - active_cacheline_remove(entry); - - /* - * add to beginning of the list - this way the entries are - * more likely cache hot when they are reallocated. - */ - spin_lock_irqsave(&free_entries_lock, flags); - list_add(&entry->list, &free_entries); - num_free_entries += 1; - spin_unlock_irqrestore(&free_entries_lock, flags); -} - -int dma_debug_resize_entries(u32 num_entries) -{ - int i, delta, ret = 0; - unsigned long flags; - struct dma_debug_entry *entry; - LIST_HEAD(tmp); - - spin_lock_irqsave(&free_entries_lock, flags); - - if (nr_total_entries < num_entries) { - delta = num_entries - nr_total_entries; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - for (i = 0; i < delta; i++) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - - list_add_tail(&entry->list, &tmp); - } - - spin_lock_irqsave(&free_entries_lock, flags); - - list_splice(&tmp, &free_entries); - nr_total_entries += i; - num_free_entries += i; - } else { - delta = nr_total_entries - num_entries; - - for (i = 0; i < delta && !list_empty(&free_entries); i++) { - entry = __dma_entry_alloc(); - kfree(entry); - } - - nr_total_entries -= i; - } - - if (nr_total_entries != num_entries) - ret = 1; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - return ret; -} - -/* - * DMA-API debugging init code - * - * The init code does two things: - * 1. Initialize core data structures - * 2. Preallocate a given number of dma_debug_entry structs - */ - -static int prealloc_memory(u32 num_entries) -{ - struct dma_debug_entry *entry, *next_entry; - int i; - - for (i = 0; i < num_entries; ++i) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - goto out_err; - - list_add_tail(&entry->list, &free_entries); - } - - num_free_entries = num_entries; - min_free_entries = num_entries; - - pr_info("DMA-API: preallocated %d debug entries\n", num_entries); - - return 0; - -out_err: - - list_for_each_entry_safe(entry, next_entry, &free_entries, list) { - list_del(&entry->list); - kfree(entry); - } - - return -ENOMEM; -} - -static ssize_t filter_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - char buf[NAME_MAX_LEN + 1]; - unsigned long flags; - int len; - - if (!current_driver_name[0]) - return 0; - - /* - * We can't copy to userspace directly because current_driver_name can - * only be read under the driver_name_lock with irqs disabled. So - * create a temporary copy first. - */ - read_lock_irqsave(&driver_name_lock, flags); - len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name); - read_unlock_irqrestore(&driver_name_lock, flags); - - return simple_read_from_buffer(user_buf, count, ppos, buf, len); -} - -static ssize_t filter_write(struct file *file, const char __user *userbuf, - size_t count, loff_t *ppos) -{ - char buf[NAME_MAX_LEN]; - unsigned long flags; - size_t len; - int i; - - /* - * We can't copy from userspace directly. Access to - * current_driver_name is protected with a write_lock with irqs - * disabled. Since copy_from_user can fault and may sleep we - * need to copy to temporary buffer first - */ - len = min(count, (size_t)(NAME_MAX_LEN - 1)); - if (copy_from_user(buf, userbuf, len)) - return -EFAULT; - - buf[len] = 0; - - write_lock_irqsave(&driver_name_lock, flags); - - /* - * Now handle the string we got from userspace very carefully. - * The rules are: - * - only use the first token we got - * - token delimiter is everything looking like a space - * character (' ', '\n', '\t' ...) - * - */ - if (!isalnum(buf[0])) { - /* - * If the first character userspace gave us is not - * alphanumerical then assume the filter should be - * switched off. - */ - if (current_driver_name[0]) - pr_info("DMA-API: switching off dma-debug driver filter\n"); - current_driver_name[0] = 0; - current_driver = NULL; - goto out_unlock; - } - - /* - * Now parse out the first token and use it as the name for the - * driver to filter for. - */ - for (i = 0; i < NAME_MAX_LEN - 1; ++i) { - current_driver_name[i] = buf[i]; - if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) - break; - } - current_driver_name[i] = 0; - current_driver = NULL; - - pr_info("DMA-API: enable driver filter for driver [%s]\n", - current_driver_name); - -out_unlock: - write_unlock_irqrestore(&driver_name_lock, flags); - - return count; -} - -static const struct file_operations filter_fops = { - .read = filter_read, - .write = filter_write, - .llseek = default_llseek, -}; - -static int dma_debug_fs_init(void) -{ - dma_debug_dent = debugfs_create_dir("dma-api", NULL); - if (!dma_debug_dent) { - pr_err("DMA-API: can not create debugfs directory\n"); - return -ENOMEM; - } - - global_disable_dent = debugfs_create_bool("disabled", 0444, - dma_debug_dent, - &global_disable); - if (!global_disable_dent) - goto out_err; - - error_count_dent = debugfs_create_u32("error_count", 0444, - dma_debug_dent, &error_count); - if (!error_count_dent) - goto out_err; - - show_all_errors_dent = debugfs_create_u32("all_errors", 0644, - dma_debug_dent, - &show_all_errors); - if (!show_all_errors_dent) - goto out_err; - - show_num_errors_dent = debugfs_create_u32("num_errors", 0644, - dma_debug_dent, - &show_num_errors); - if (!show_num_errors_dent) - goto out_err; - - num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444, - dma_debug_dent, - &num_free_entries); - if (!num_free_entries_dent) - goto out_err; - - min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444, - dma_debug_dent, - &min_free_entries); - if (!min_free_entries_dent) - goto out_err; - - filter_dent = debugfs_create_file("driver_filter", 0644, - dma_debug_dent, NULL, &filter_fops); - if (!filter_dent) - goto out_err; - - return 0; - -out_err: - debugfs_remove_recursive(dma_debug_dent); - - return -ENOMEM; -} - -static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry) -{ - struct dma_debug_entry *entry; - unsigned long flags; - int count = 0, i; - - for (i = 0; i < HASH_SIZE; ++i) { - spin_lock_irqsave(&dma_entry_hash[i].lock, flags); - list_for_each_entry(entry, &dma_entry_hash[i].list, list) { - if (entry->dev == dev) { - count += 1; - *out_entry = entry; - } - } - spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags); - } - - return count; -} - -static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) -{ - struct device *dev = data; - struct dma_debug_entry *uninitialized_var(entry); - int count; - - if (dma_debug_disabled()) - return 0; - - switch (action) { - case BUS_NOTIFY_UNBOUND_DRIVER: - count = device_dma_allocations(dev, &entry); - if (count == 0) - break; - err_printk(dev, entry, "DMA-API: device driver has pending " - "DMA allocations while released from device " - "[count=%d]\n" - "One of leaked entries details: " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [mapped as %s]\n", - count, entry->dev_addr, entry->size, - dir2name[entry->direction], type2name[entry->type]); - break; - default: - break; - } - - return 0; -} - -void dma_debug_add_bus(struct bus_type *bus) -{ - struct notifier_block *nb; - - if (dma_debug_disabled()) - return; - - nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); - if (nb == NULL) { - pr_err("dma_debug_add_bus: out of memory\n"); - return; - } - - nb->notifier_call = dma_debug_device_change; - - bus_register_notifier(bus, nb); -} - -static int dma_debug_init(void) -{ - int i; - - /* Do not use dma_debug_initialized here, since we really want to be - * called to set dma_debug_initialized - */ - if (global_disable) - return 0; - - for (i = 0; i < HASH_SIZE; ++i) { - INIT_LIST_HEAD(&dma_entry_hash[i].list); - spin_lock_init(&dma_entry_hash[i].lock); - } - - if (dma_debug_fs_init() != 0) { - pr_err("DMA-API: error creating debugfs entries - disabling\n"); - global_disable = true; - - return 0; - } - - if (prealloc_memory(nr_prealloc_entries) != 0) { - pr_err("DMA-API: debugging out of memory error - disabled\n"); - global_disable = true; - - return 0; - } - - nr_total_entries = num_free_entries; - - dma_debug_initialized = true; - - pr_info("DMA-API: debugging enabled by kernel config\n"); - return 0; -} -core_initcall(dma_debug_init); - -static __init int dma_debug_cmdline(char *str) -{ - if (!str) - return -EINVAL; - - if (strncmp(str, "off", 3) == 0) { - pr_info("DMA-API: debugging disabled on kernel command line\n"); - global_disable = true; - } - - return 0; -} - -static __init int dma_debug_entries_cmdline(char *str) -{ - if (!str) - return -EINVAL; - if (!get_option(&str, &nr_prealloc_entries)) - nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; -} - -__setup("dma_debug=", dma_debug_cmdline); -__setup("dma_debug_entries=", dma_debug_entries_cmdline); - -static void check_unmap(struct dma_debug_entry *ref) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - bucket = get_hash_bucket(ref, &flags); - entry = bucket_find_exact(bucket, ref); - - if (!entry) { - /* must drop lock before calling dma_mapping_error */ - put_hash_bucket(bucket, &flags); - - if (dma_mapping_error(ref->dev, ref->dev_addr)) { - err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free an " - "invalid DMA memory address\n"); - } else { - err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free DMA " - "memory it has not allocated [device " - "address=0x%016llx] [size=%llu bytes]\n", - ref->dev_addr, ref->size); - } - return; - } - - if (ref->size != entry->size) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different size " - "[device address=0x%016llx] [map size=%llu bytes] " - "[unmap size=%llu bytes]\n", - ref->dev_addr, entry->size, ref->size); - } - - if (ref->type != entry->type) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with wrong function " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped as %s] [unmapped as %s]\n", - ref->dev_addr, ref->size, - type2name[entry->type], type2name[ref->type]); - } else if ((entry->type == dma_debug_coherent) && - (phys_addr(ref) != phys_addr(entry))) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different CPU address " - "[device address=0x%016llx] [size=%llu bytes] " - "[cpu alloc address=0x%016llx] " - "[cpu free address=0x%016llx]", - ref->dev_addr, ref->size, - phys_addr(entry), - phys_addr(ref)); - } - - if (ref->sg_call_ents && ref->type == dma_debug_sg && - ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA sg list with different entry count " - "[map count=%d] [unmap count=%d]\n", - entry->sg_call_ents, ref->sg_call_ents); - } - - /* - * This may be no bug in reality - but most implementations of the - * DMA API don't handle this properly, so check for it here - */ - if (ref->direction != entry->direction) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different direction " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [unmapped with %s]\n", - ref->dev_addr, ref->size, - dir2name[entry->direction], - dir2name[ref->direction]); - } - - /* - * Drivers should use dma_mapping_error() to check the returned - * addresses of dma_map_single() and dma_map_page(). - * If not, print this warning message. See Documentation/DMA-API.txt. - */ - if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { - err_printk(ref->dev, entry, - "DMA-API: device driver failed to check map error" - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped as %s]", - ref->dev_addr, ref->size, - type2name[entry->type]); - } - - hash_bucket_del(entry); - dma_entry_free(entry); - - put_hash_bucket(bucket, &flags); -} - -static void check_for_stack(struct device *dev, - struct page *page, size_t offset) -{ - void *addr; - struct vm_struct *stack_vm_area = task_stack_vm_area(current); - - if (!stack_vm_area) { - /* Stack is direct-mapped. */ - if (PageHighMem(page)) - return; - addr = page_address(page) + offset; - if (object_is_on_stack(addr)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr); - } else { - /* Stack is vmalloced. */ - int i; - - for (i = 0; i < stack_vm_area->nr_pages; i++) { - if (page != stack_vm_area->pages[i]) - continue; - - addr = (u8 *)current->stack + i * PAGE_SIZE + offset; - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr); - break; - } - } -} - -static inline bool overlap(void *addr, unsigned long len, void *start, void *end) -{ - unsigned long a1 = (unsigned long)addr; - unsigned long b1 = a1 + len; - unsigned long a2 = (unsigned long)start; - unsigned long b2 = (unsigned long)end; - - return !(b1 <= a2 || a1 >= b2); -} - -static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) -{ - if (overlap(addr, len, _stext, _etext) || - overlap(addr, len, __start_rodata, __end_rodata)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); -} - -static void check_sync(struct device *dev, - struct dma_debug_entry *ref, - bool to_cpu) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - bucket = get_hash_bucket(ref, &flags); - - entry = bucket_find_contain(&bucket, ref, &flags); - - if (!entry) { - err_printk(dev, NULL, "DMA-API: device driver tries " - "to sync DMA memory it has not allocated " - "[device address=0x%016llx] [size=%llu bytes]\n", - (unsigned long long)ref->dev_addr, ref->size); - goto out; - } - - if (ref->size > entry->size) { - err_printk(dev, entry, "DMA-API: device driver syncs" - " DMA memory outside allocated range " - "[device address=0x%016llx] " - "[allocation size=%llu bytes] " - "[sync offset+size=%llu]\n", - entry->dev_addr, entry->size, - ref->size); - } - - if (entry->direction == DMA_BIDIRECTIONAL) - goto out; - - if (ref->direction != entry->direction) { - err_printk(dev, entry, "DMA-API: device driver syncs " - "DMA memory with different direction " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - } - - if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && - !(ref->direction == DMA_TO_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " - "device read-only DMA memory for cpu " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - - if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && - !(ref->direction == DMA_FROM_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " - "device write-only DMA memory to device " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - - if (ref->sg_call_ents && ref->type == dma_debug_sg && - ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver syncs " - "DMA sg list with different entry count " - "[map count=%d] [sync count=%d]\n", - entry->sg_call_ents, ref->sg_call_ents); - } - -out: - put_hash_bucket(bucket, &flags); -} - -static void check_sg_segment(struct device *dev, struct scatterlist *sg) -{ -#ifdef CONFIG_DMA_API_DEBUG_SG - unsigned int max_seg = dma_get_max_seg_size(dev); - u64 start, end, boundary = dma_get_seg_boundary(dev); - - /* - * Either the driver forgot to set dma_parms appropriately, or - * whoever generated the list forgot to check them. - */ - if (sg->length > max_seg) - err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n", - sg->length, max_seg); - /* - * In some cases this could potentially be the DMA API - * implementation's fault, but it would usually imply that - * the scatterlist was built inappropriately to begin with. - */ - start = sg_dma_address(sg); - end = start + sg_dma_len(sg) - 1; - if ((start ^ end) & ~boundary) - err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", - start, end, boundary); -#endif -} - -void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, - size_t size, int direction, dma_addr_t dma_addr, - bool map_single) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - if (dma_mapping_error(dev, dma_addr)) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->dev = dev; - entry->type = dma_debug_page; - entry->pfn = page_to_pfn(page); - entry->offset = offset, - entry->dev_addr = dma_addr; - entry->size = size; - entry->direction = direction; - entry->map_err_type = MAP_ERR_NOT_CHECKED; - - if (map_single) - entry->type = dma_debug_single; - - check_for_stack(dev, page, offset); - - if (!PageHighMem(page)) { - void *addr = page_address(page) + offset; - - check_for_illegal_area(dev, addr, size); - } - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_map_page); - -void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - struct dma_debug_entry ref; - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - if (unlikely(dma_debug_disabled())) - return; - - ref.dev = dev; - ref.dev_addr = dma_addr; - bucket = get_hash_bucket(&ref, &flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!exact_match(&ref, entry)) - continue; - - /* - * The same physical address can be mapped multiple - * times. Without a hardware IOMMU this results in the - * same device addresses being put into the dma-debug - * hash multiple times too. This can result in false - * positives being reported. Therefore we implement a - * best-fit algorithm here which updates the first entry - * from the hash which fits the reference value and is - * not currently listed as being checked. - */ - if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { - entry->map_err_type = MAP_ERR_CHECKED; - break; - } - } - - put_hash_bucket(bucket, &flags); -} -EXPORT_SYMBOL(debug_dma_mapping_error); - -void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, int direction, bool map_single) -{ - struct dma_debug_entry ref = { - .type = dma_debug_page, - .dev = dev, - .dev_addr = addr, - .size = size, - .direction = direction, - }; - - if (unlikely(dma_debug_disabled())) - return; - - if (map_single) - ref.type = dma_debug_single; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_unmap_page); - -void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) -{ - struct dma_debug_entry *entry; - struct scatterlist *s; - int i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, mapped_ents, i) { - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_sg; - entry->dev = dev; - entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset, - entry->size = sg_dma_len(s); - entry->dev_addr = sg_dma_address(s); - entry->direction = direction; - entry->sg_call_ents = nents; - entry->sg_mapped_ents = mapped_ents; - - check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - - check_sg_segment(dev, s); - - add_dma_entry(entry); - } -} -EXPORT_SYMBOL(debug_dma_map_sg); - -static int get_nr_mapped_entries(struct device *dev, - struct dma_debug_entry *ref) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - int mapped_ents; - - bucket = get_hash_bucket(ref, &flags); - entry = bucket_find_exact(bucket, ref); - mapped_ents = 0; - - if (entry) - mapped_ents = entry->sg_mapped_ents; - put_hash_bucket(bucket, &flags); - - return mapped_ents; -} - -void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sglist, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = dir, - .sg_call_ents = nelems, - }; - - if (mapped_ents && i >= mapped_ents) - break; - - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - check_unmap(&ref); - } -} -EXPORT_SYMBOL(debug_dma_unmap_sg); - -void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - if (unlikely(virt == NULL)) - return; - - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_coherent; - entry->dev = dev; - entry->offset = offset_in_page(virt); - entry->size = size; - entry->dev_addr = dma_addr; - entry->direction = DMA_BIDIRECTIONAL; - - if (is_vmalloc_addr(virt)) - entry->pfn = vmalloc_to_pfn(virt); - else - entry->pfn = page_to_pfn(virt_to_page(virt)); - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_alloc_coherent); - -void debug_dma_free_coherent(struct device *dev, size_t size, - void *virt, dma_addr_t addr) -{ - struct dma_debug_entry ref = { - .type = dma_debug_coherent, - .dev = dev, - .offset = offset_in_page(virt), - .dev_addr = addr, - .size = size, - .direction = DMA_BIDIRECTIONAL, - }; - - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) - return; - - if (is_vmalloc_addr(virt)) - ref.pfn = vmalloc_to_pfn(virt); - else - ref.pfn = page_to_pfn(virt_to_page(virt)); - - if (unlikely(dma_debug_disabled())) - return; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_free_coherent); - -void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, - int direction, dma_addr_t dma_addr) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_resource; - entry->dev = dev; - entry->pfn = PHYS_PFN(addr); - entry->offset = offset_in_page(addr); - entry->size = size; - entry->dev_addr = dma_addr; - entry->direction = direction; - entry->map_err_type = MAP_ERR_NOT_CHECKED; - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_map_resource); - -void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) -{ - struct dma_debug_entry ref = { - .type = dma_debug_resource, - .dev = dev, - .dev_addr = dma_addr, - .size = size, - .direction = direction, - }; - - if (unlikely(dma_debug_disabled())) - return; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_unmap_resource); - -void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, true); -} -EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); - -void debug_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, false); -} -EXPORT_SYMBOL(debug_dma_sync_single_for_device); - -void debug_dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, size_t size, - int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, true); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu); - -void debug_dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, false); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); - -void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, int direction) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = direction, - .sg_call_ents = nelems, - }; - - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - if (i >= mapped_ents) - break; - - check_sync(dev, &ref, true); - } -} -EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); - -void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, int direction) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = direction, - .sg_call_ents = nelems, - }; - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - if (i >= mapped_ents) - break; - - check_sync(dev, &ref, false); - } -} -EXPORT_SYMBOL(debug_dma_sync_sg_for_device); - -static int __init dma_debug_driver_setup(char *str) -{ - int i; - - for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) { - current_driver_name[i] = *str; - if (*str == 0) - break; - } - - if (current_driver_name[0]) - pr_info("DMA-API: enable driver filter for driver [%s]\n", - current_driver_name); - - - return 1; -} -__setup("dma_debug_driver=", dma_debug_driver_setup); diff --git a/lib/dma-direct.c b/lib/dma-direct.c deleted file mode 100644 index 8be8106270c2..000000000000 --- a/lib/dma-direct.c +++ /dev/null @@ -1,204 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations that map physical memory directly without using an IOMMU or - * flushing caches. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-direct.h> -#include <linux/scatterlist.h> -#include <linux/dma-contiguous.h> -#include <linux/pfn.h> -#include <linux/set_memory.h> - -#define DIRECT_MAPPING_ERROR 0 - -/* - * Most architectures use ZONE_DMA for the first 16 Megabytes, but - * some use it for entirely different regions: - */ -#ifndef ARCH_ZONE_DMA_BITS -#define ARCH_ZONE_DMA_BITS 24 -#endif - -/* - * For AMD SEV all DMA must be to unencrypted addresses. - */ -static inline bool force_dma_unencrypted(void) -{ - return sev_active(); -} - -static bool -check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, - const char *caller) -{ - if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { - if (!dev->dma_mask) { - dev_err(dev, - "%s: call on device without dma_mask\n", - caller); - return false; - } - - if (*dev->dma_mask >= DMA_BIT_MASK(32)) { - dev_err(dev, - "%s: overflow %pad+%zu of device mask %llx\n", - caller, &dma_addr, size, *dev->dma_mask); - } - return false; - } - return true; -} - -static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) -{ - dma_addr_t addr = force_dma_unencrypted() ? - __phys_to_dma(dev, phys) : phys_to_dma(dev, phys); - return addr + size - 1 <= dev->coherent_dma_mask; -} - -void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - int page_order = get_order(size); - struct page *page = NULL; - void *ret; - - /* we always manually zero the memory once we are done: */ - gfp &= ~__GFP_ZERO; - - /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ - if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - gfp |= GFP_DMA; - if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; - -again: - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(gfp)) { - page = dma_alloc_from_contiguous(dev, count, page_order, gfp); - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - if (!page) - page = alloc_pages_node(dev_to_node(dev), gfp, page_order); - - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - __free_pages(page, page_order); - page = NULL; - - if (IS_ENABLED(CONFIG_ZONE_DMA32) && - dev->coherent_dma_mask < DMA_BIT_MASK(64) && - !(gfp & (GFP_DMA32 | GFP_DMA))) { - gfp |= GFP_DMA32; - goto again; - } - - if (IS_ENABLED(CONFIG_ZONE_DMA) && - dev->coherent_dma_mask < DMA_BIT_MASK(32) && - !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - } - - if (!page) - return NULL; - ret = page_address(page); - if (force_dma_unencrypted()) { - set_memory_decrypted((unsigned long)ret, 1 << page_order); - *dma_handle = __phys_to_dma(dev, page_to_phys(page)); - } else { - *dma_handle = phys_to_dma(dev, page_to_phys(page)); - } - memset(ret, 0, size); - return ret; -} - -/* - * NOTE: this function must never look at the dma_addr argument, because we want - * to be able to use it as a helper for iommu implementations as well. - */ -void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned int page_order = get_order(size); - - if (force_dma_unencrypted()) - set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); - if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) - free_pages((unsigned long)cpu_addr, page_order); -} - -dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; - - if (!check_addr(dev, dma_addr, size, __func__)) - return DIRECT_MAPPING_ERROR; - return dma_addr; -} - -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - - sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); - if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__)) - return 0; - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -int dma_direct_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_ZONE_DMA - if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - return 0; -#else - /* - * Because 32-bit DMA masks are so common we expect every architecture - * to be able to satisfy them - either by not supporting more physical - * memory, or by providing a ZONE_DMA32. If neither is the case, the - * architecture needs to use an IOMMU instead of the direct mapping. - */ - if (mask < DMA_BIT_MASK(32)) - return 0; -#endif - /* - * Various PCI/PCIe bridges have broken support for > 32bit DMA even - * if the device itself might support it. - */ - if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32)) - return 0; - return 1; -} - -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == DIRECT_MAPPING_ERROR; -} - -const struct dma_map_ops dma_direct_ops = { - .alloc = dma_direct_alloc, - .free = dma_direct_free, - .map_page = dma_direct_map_page, - .map_sg = dma_direct_map_sg, - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, -}; -EXPORT_SYMBOL(dma_direct_ops); diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c deleted file mode 100644 index 79e9a757387f..000000000000 --- a/lib/dma-noncoherent.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018 Christoph Hellwig. - * - * DMA operations that map physical memory directly without providing cache - * coherence. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> -#include <linux/scatterlist.h> - -static void dma_noncoherent_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); -} - -static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t addr; - - addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); - if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(dev, page_to_phys(page) + offset, - size, dir); - return addr; -} - -static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs); - if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir); - return nents; -} - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU -static void dma_noncoherent_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); -} - -static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir); -} - -static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir); -} -#endif - -const struct dma_map_ops dma_noncoherent_ops = { - .alloc = arch_dma_alloc, - .free = arch_dma_free, - .mmap = arch_dma_mmap, - .sync_single_for_device = dma_noncoherent_sync_single_for_device, - .sync_sg_for_device = dma_noncoherent_sync_sg_for_device, - .map_page = dma_noncoherent_map_page, - .map_sg = dma_noncoherent_map_sg, -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU - .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu, - .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu, - .unmap_page = dma_noncoherent_unmap_page, - .unmap_sg = dma_noncoherent_unmap_sg, -#endif - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, - .cache_sync = arch_dma_cache_sync, -}; -EXPORT_SYMBOL(dma_noncoherent_ops); diff --git a/lib/dma-virt.c b/lib/dma-virt.c deleted file mode 100644 index 8e61a02ef9ca..000000000000 --- a/lib/dma-virt.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * lib/dma-virt.c - * - * DMA operations that map to virtual addresses without flushing memory. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-mapping.h> -#include <linux/scatterlist.h> - -static void *dma_virt_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) - *dma_handle = (uintptr_t)ret; - return ret; -} - -static void dma_virt_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return (uintptr_t)(page_address(page) + offset); -} - -static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_virt_ops = { - .alloc = dma_virt_alloc, - .free = dma_virt_free, - .map_page = dma_virt_map_page, - .map_sg = dma_virt_map_sg, -}; -EXPORT_SYMBOL(dma_virt_ops); diff --git a/lib/idr.c b/lib/idr.c index 823b813f08f8..ed9c169c12bd 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -4,9 +4,9 @@ #include <linux/idr.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/xarray.h> DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); -static DEFINE_SPINLOCK(simple_ida_lock); /** * idr_alloc_u32() - Allocate an ID. @@ -581,7 +581,7 @@ again: if (!ida_pre_get(ida, gfp_mask)) return -ENOMEM; - spin_lock_irqsave(&simple_ida_lock, flags); + xa_lock_irqsave(&ida->ida_rt, flags); ret = ida_get_new_above(ida, start, &id); if (!ret) { if (id > max) { @@ -591,7 +591,7 @@ again: ret = id; } } - spin_unlock_irqrestore(&simple_ida_lock, flags); + xa_unlock_irqrestore(&ida->ida_rt, flags); if (unlikely(ret == -EAGAIN)) goto again; @@ -615,8 +615,8 @@ void ida_simple_remove(struct ida *ida, unsigned int id) unsigned long flags; BUG_ON((int)id < 0); - spin_lock_irqsave(&simple_ida_lock, flags); + xa_lock_irqsave(&ida->ida_rt, flags); ida_remove(ida, id); - spin_unlock_irqrestore(&simple_ida_lock, flags); + xa_unlock_irqrestore(&ida->ida_rt, flags); } EXPORT_SYMBOL(ida_simple_remove); diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 835242e74aaa..75509a1511a3 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -64,11 +64,12 @@ static int interval_tree_test_init(void) unsigned long results; cycles_t time1, time2, time; - nodes = kmalloc(nnodes * sizeof(struct interval_tree_node), GFP_KERNEL); + nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node), + GFP_KERNEL); if (!nodes) return -ENOMEM; - queries = kmalloc(nsearches * sizeof(int), GFP_KERNEL); + queries = kmalloc_array(nsearches, sizeof(int), GFP_KERNEL); if (!queries) { kfree(nodes); return -ENOMEM; diff --git a/lib/ioremap.c b/lib/ioremap.c index 54e5bbaa3200..517f5853ffed 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, if (ioremap_pmd_enabled() && ((next - addr) == PMD_SIZE) && IS_ALIGNED(phys_addr + addr, PMD_SIZE) && - pmd_free_pte_page(pmd)) { + pmd_free_pte_page(pmd, addr)) { if (pmd_set_huge(pmd, phys_addr + addr, prot)) continue; } @@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, if (ioremap_pud_enabled() && ((next - addr) == PUD_SIZE) && IS_ALIGNED(phys_addr + addr, PUD_SIZE) && - pud_free_pmd_page(pud)) { + pud_free_pmd_page(pud, addr)) { if (pud_set_huge(pud, phys_addr + addr, prot)) continue; } diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e43cd54c84c..8be175df3075 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -596,15 +596,70 @@ static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, return ret; } +static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, + struct iov_iter *i) +{ + struct pipe_inode_info *pipe = i->pipe; + size_t n, off, xfer = 0; + int idx; + + if (!sanity(i)) + return 0; + + bytes = n = push_pipe(i, bytes, &idx, &off); + if (unlikely(!n)) + return 0; + for ( ; n; idx = next_idx(idx, pipe), off = 0) { + size_t chunk = min_t(size_t, n, PAGE_SIZE - off); + unsigned long rem; + + rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, + chunk); + i->idx = idx; + i->iov_offset = off + chunk - rem; + xfer += chunk - rem; + if (rem) + break; + n -= chunk; + addr += chunk; + } + i->count -= xfer; + return xfer; +} + +/** + * _copy_to_iter_mcsafe - copy to user with source-read error exception handling + * @addr: source kernel address + * @bytes: total transfer length + * @iter: destination iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_to_iter() for protecting read/write to persistent memory. + * Unless / until an architecture can guarantee identical performance + * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a + * performance regression to switch more users to the mcsafe version. + * + * Otherwise, the main differences between this and typical _copy_to_iter(). + * + * * Typical tail/residue handling after a fault retries the copy + * byte-by-byte until the fault happens again. Re-triggering machine + * checks is potentially fatal so the implementation uses source + * alignment and poison alignment assumptions to avoid re-triggering + * hardware exceptions. + * + * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. + * Compare to copy_to_iter() where only ITER_IOVEC attempts might return + * a short copy. + * + * See MCSAFE_TEST for self-test. + */ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; unsigned long rem, curr_addr, s_addr = (unsigned long) addr; - if (unlikely(i->type & ITER_PIPE)) { - WARN_ON(1); - return 0; - } + if (unlikely(i->type & ITER_PIPE)) + return copy_pipe_to_iter_mcsafe(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); iterate_and_advance(i, bytes, v, @@ -701,6 +756,20 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) EXPORT_SYMBOL(_copy_from_iter_nocache); #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/** + * _copy_from_iter_flushcache - write destination through cpu cache + * @addr: destination kernel address + * @bytes: total transfer length + * @iter: source iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_from_iter() for ensuring that writes to persistent memory + * are flushed through the CPU cache. It is differentiated from + * _copy_from_iter_nocache() in that guarantees all data is flushed for + * all iterator types. The _copy_from_iter_nocache() only attempts to + * bypass the cache for the ITER_IOVEC case, and on some archs may use + * instructions that strand dirty-data in the cache. + */ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; diff --git a/lib/kfifo.c b/lib/kfifo.c index b0f757bf7213..015656aa8182 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -54,7 +54,7 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, return -EINVAL; } - fifo->data = kmalloc(size * esize, gfp_mask); + fifo->data = kmalloc_array(esize, size, gfp_mask); if (!fifo->data) { fifo->mask = 0; diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 15ea216a67ce..63d0816ab23b 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -22,6 +22,7 @@ #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/netlink.h> +#include <linux/uidgid.h> #include <linux/uuid.h> #include <linux/ctype.h> #include <net/sock.h> @@ -231,30 +232,6 @@ out: return r; } -#ifdef CONFIG_NET -static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) -{ - struct kobject *kobj = data, *ksobj; - const struct kobj_ns_type_operations *ops; - - ops = kobj_ns_ops(kobj); - if (!ops && kobj->kset) { - ksobj = &kobj->kset->kobj; - if (ksobj->parent != NULL) - ops = kobj_ns_ops(ksobj->parent); - } - - if (ops && ops->netlink_ns && kobj->ktype->namespace) { - const void *sock_ns, *ns; - ns = kobj->ktype->namespace(kobj); - sock_ns = ops->netlink_ns(dsk); - return sock_ns != ns; - } - - return 0; -} -#endif - #ifdef CONFIG_UEVENT_HELPER static int kobj_usermode_filter(struct kobject *kobj) { @@ -296,15 +273,44 @@ static void cleanup_uevent_env(struct subprocess_info *info) } #endif -static int kobject_uevent_net_broadcast(struct kobject *kobj, - struct kobj_uevent_env *env, +#ifdef CONFIG_NET +static struct sk_buff *alloc_uevent_skb(struct kobj_uevent_env *env, const char *action_string, const char *devpath) { - int retval = 0; -#if defined(CONFIG_NET) + struct netlink_skb_parms *parms; + struct sk_buff *skb = NULL; + char *scratch; + size_t len; + + /* allocate message with maximum possible size */ + len = strlen(action_string) + strlen(devpath) + 2; + skb = alloc_skb(len + env->buflen, GFP_KERNEL); + if (!skb) + return NULL; + + /* add header */ + scratch = skb_put(skb, len); + sprintf(scratch, "%s@%s", action_string, devpath); + + skb_put_data(skb, env->buf, env->buflen); + + parms = &NETLINK_CB(skb); + parms->creds.uid = GLOBAL_ROOT_UID; + parms->creds.gid = GLOBAL_ROOT_GID; + parms->dst_group = 1; + parms->portid = 0; + + return skb; +} + +static int uevent_net_broadcast_untagged(struct kobj_uevent_env *env, + const char *action_string, + const char *devpath) +{ struct sk_buff *skb = NULL; struct uevent_sock *ue_sk; + int retval = 0; /* send netlink message */ list_for_each_entry(ue_sk, &uevent_sock_list, list) { @@ -314,37 +320,99 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, continue; if (!skb) { - /* allocate message with the maximum possible size */ - size_t len = strlen(action_string) + strlen(devpath) + 2; - char *scratch; - retval = -ENOMEM; - skb = alloc_skb(len + env->buflen, GFP_KERNEL); + skb = alloc_uevent_skb(env, action_string, devpath); if (!skb) continue; - - /* add header */ - scratch = skb_put(skb, len); - sprintf(scratch, "%s@%s", action_string, devpath); - - skb_put_data(skb, env->buf, env->buflen); - - NETLINK_CB(skb).dst_group = 1; } - retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb), - 0, 1, GFP_KERNEL, - kobj_bcast_filter, - kobj); + retval = netlink_broadcast(uevent_sock, skb_get(skb), 0, 1, + GFP_KERNEL); /* ENOBUFS should be handled in userspace */ if (retval == -ENOBUFS || retval == -ESRCH) retval = 0; } consume_skb(skb); -#endif + return retval; } +static int uevent_net_broadcast_tagged(struct sock *usk, + struct kobj_uevent_env *env, + const char *action_string, + const char *devpath) +{ + struct user_namespace *owning_user_ns = sock_net(usk)->user_ns; + struct sk_buff *skb = NULL; + int ret = 0; + + skb = alloc_uevent_skb(env, action_string, devpath); + if (!skb) + return -ENOMEM; + + /* fix credentials */ + if (owning_user_ns != &init_user_ns) { + struct netlink_skb_parms *parms = &NETLINK_CB(skb); + kuid_t root_uid; + kgid_t root_gid; + + /* fix uid */ + root_uid = make_kuid(owning_user_ns, 0); + if (uid_valid(root_uid)) + parms->creds.uid = root_uid; + + /* fix gid */ + root_gid = make_kgid(owning_user_ns, 0); + if (gid_valid(root_gid)) + parms->creds.gid = root_gid; + } + + ret = netlink_broadcast(usk, skb, 0, 1, GFP_KERNEL); + /* ENOBUFS should be handled in userspace */ + if (ret == -ENOBUFS || ret == -ESRCH) + ret = 0; + + return ret; +} +#endif + +static int kobject_uevent_net_broadcast(struct kobject *kobj, + struct kobj_uevent_env *env, + const char *action_string, + const char *devpath) +{ + int ret = 0; + +#ifdef CONFIG_NET + const struct kobj_ns_type_operations *ops; + const struct net *net = NULL; + + ops = kobj_ns_ops(kobj); + if (!ops && kobj->kset) { + struct kobject *ksobj = &kobj->kset->kobj; + if (ksobj->parent != NULL) + ops = kobj_ns_ops(ksobj->parent); + } + + /* kobjects currently only carry network namespace tags and they + * are the only tag relevant here since we want to decide which + * network namespaces to broadcast the uevent into. + */ + if (ops && ops->netlink_ns && kobj->ktype->namespace) + if (ops->type == KOBJ_NS_TYPE_NET) + net = kobj->ktype->namespace(kobj); + + if (!net) + ret = uevent_net_broadcast_untagged(env, action_string, + devpath); + else + ret = uevent_net_broadcast_tagged(net->uevent_sock->sk, env, + action_string, devpath); +#endif + + return ret; +} + static void zap_modalias_env(struct kobj_uevent_env *env) { static const char modalias_prefix[] = "MODALIAS="; @@ -703,9 +771,13 @@ static int uevent_net_init(struct net *net) net->uevent_sock = ue_sk; - mutex_lock(&uevent_sock_mutex); - list_add_tail(&ue_sk->list, &uevent_sock_list); - mutex_unlock(&uevent_sock_mutex); + /* Restrict uevents to initial user namespace. */ + if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) { + mutex_lock(&uevent_sock_mutex); + list_add_tail(&ue_sk->list, &uevent_sock_list); + mutex_unlock(&uevent_sock_mutex); + } + return 0; } @@ -713,9 +785,11 @@ static void uevent_net_exit(struct net *net) { struct uevent_sock *ue_sk = net->uevent_sock; - mutex_lock(&uevent_sock_mutex); - list_del(&ue_sk->list); - mutex_unlock(&uevent_sock_mutex); + if (sock_net(ue_sk->sk)->user_ns == &init_user_ns) { + mutex_lock(&uevent_sock_mutex); + list_del(&ue_sk->list); + mutex_unlock(&uevent_sock_mutex); + } netlink_kernel_release(ue_sk->sk); kfree(ue_sk); diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 28ba40b99337..2b10a4024c35 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -119,7 +119,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL); if (!slot) goto out_fail; - element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL); + element = kcalloc(e_count, sizeof(struct lc_element *), GFP_KERNEL); if (!element) goto out_fail; diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h index 7eceeddb3fb8..c2d6f4efcfbc 100644 --- a/lib/mpi/mpi-internal.h +++ b/lib/mpi/mpi-internal.h @@ -65,13 +65,6 @@ typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ typedef int mpi_size_t; /* (must be a signed type) */ -static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) -{ - if (a->alloced < b) - return mpi_resize(a, b); - return 0; -} - /* Copy N limbs from S to D. */ #define MPN_COPY(d, s, n) \ do { \ @@ -80,13 +73,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) (d)[_i] = (s)[_i]; \ } while (0) -#define MPN_COPY_INCR(d, s, n) \ - do { \ - mpi_size_t _i; \ - for (_i = 0; _i < (n); _i++) \ - (d)[_i] = (s)[_i]; \ - } while (0) - #define MPN_COPY_DECR(d, s, n) \ do { \ mpi_size_t _i; \ @@ -111,15 +97,6 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) } \ } while (0) -#define MPN_NORMALIZE_NOT_ZERO(d, n) \ - do { \ - for (;;) { \ - if ((d)[(n)-1]) \ - break; \ - (n)--; \ - } \ - } while (0) - #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ do { \ if ((size) < KARATSUBA_THRESHOLD) \ @@ -128,46 +105,11 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) mul_n(prodp, up, vp, size, tspace); \ } while (0); -/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest - * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). - * If this would yield overflow, DI should be the largest possible number - * (i.e., only ones). For correct operation, the most significant bit of D - * has to be set. Put the quotient in Q and the remainder in R. - */ -#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \ - do { \ - mpi_limb_t _q, _ql, _r; \ - mpi_limb_t _xh, _xl; \ - umul_ppmm(_q, _ql, (nh), (di)); \ - _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \ - umul_ppmm(_xh, _xl, _q, (d)); \ - sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl); \ - if (_xh) { \ - sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ - _q++; \ - if (_xh) { \ - sub_ddmmss(_xh, _r, _xh, _r, 0, (d)); \ - _q++; \ - } \ - } \ - if (_r >= (d)) { \ - _r -= (d); \ - _q++; \ - } \ - (r) = _r; \ - (q) = _q; \ - } while (0) - /*-- mpiutil.c --*/ mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs); void mpi_free_limb_space(mpi_ptr_t a); void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs); -/*-- mpi-bit.c --*/ -void mpi_rshift_limbs(MPI a, unsigned int count); -int mpi_lshift_limbs(MPI a, unsigned int count); - -/*-- mpihelp-add.c --*/ static inline mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, @@ -175,7 +117,6 @@ mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, static inline mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size); -/*-- mpihelp-sub.c --*/ static inline mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, @@ -183,10 +124,10 @@ mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, static inline mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_ptr_t s2_ptr, mpi_size_t s2_size); -/*-- mpihelp-cmp.c --*/ +/*-- mpih-cmp.c --*/ int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size); -/*-- mpihelp-mul.c --*/ +/*-- mpih-mul.c --*/ struct karatsuba_ctx { struct karatsuba_ctx *next; @@ -202,7 +143,6 @@ mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); -int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size); int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result); void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size); @@ -214,21 +154,16 @@ int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, mpi_ptr_t vp, mpi_size_t vsize, struct karatsuba_ctx *ctx); -/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/ +/*-- generic_mpih-mul1.c --*/ mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb); -/*-- mpihelp-div.c --*/ -mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, - mpi_limb_t divisor_limb); +/*-- mpih-div.c --*/ mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs, mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize); -mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr, - mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, - mpi_limb_t divisor_limb); -/*-- mpihelp-shift.c --*/ +/*-- generic_mpih-[lr]shift.c --*/ mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt); mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 314f4dfa603e..20ed0f766787 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -91,14 +91,14 @@ int mpi_resize(MPI a, unsigned nlimbs) return 0; /* no need to do it */ if (a->d) { - p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL); + p = kmalloc_array(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); if (!p) return -ENOMEM; memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t)); kzfree(a->d); a->d = p; } else { - a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL); + a->d = kcalloc(nlimbs, sizeof(mpi_limb_t), GFP_KERNEL); if (!a->d) return -ENOMEM; } diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 6016f1deb1f5..beb14839b41a 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -112,18 +112,6 @@ static inline void alloc_global_tags(struct percpu_ida *pool, min(pool->nr_free, pool->percpu_batch_size)); } -static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) -{ - int tag = -ENOSPC; - - spin_lock(&tags->lock); - if (tags->nr_free) - tag = tags->freelist[--tags->nr_free]; - spin_unlock(&tags->lock); - - return tag; -} - /** * percpu_ida_alloc - allocate a tag * @pool: pool to allocate from @@ -147,20 +135,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) DEFINE_WAIT(wait); struct percpu_ida_cpu *tags; unsigned long flags; - int tag; + int tag = -ENOSPC; - local_irq_save(flags); - tags = this_cpu_ptr(pool->tag_cpu); + tags = raw_cpu_ptr(pool->tag_cpu); + spin_lock_irqsave(&tags->lock, flags); /* Fastpath */ - tag = alloc_local_tag(tags); - if (likely(tag >= 0)) { - local_irq_restore(flags); + if (likely(tags->nr_free)) { + tag = tags->freelist[--tags->nr_free]; + spin_unlock_irqrestore(&tags->lock, flags); return tag; } + spin_unlock_irqrestore(&tags->lock, flags); while (1) { - spin_lock(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); + tags = this_cpu_ptr(pool->tag_cpu); /* * prepare_to_wait() must come before steal_tags(), in case @@ -184,8 +174,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) &pool->cpus_have_tags); } - spin_unlock(&pool->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&pool->lock, flags); if (tag >= 0 || state == TASK_RUNNING) break; @@ -196,9 +185,6 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) } schedule(); - - local_irq_save(flags); - tags = this_cpu_ptr(pool->tag_cpu); } if (state != TASK_RUNNING) finish_wait(&pool->wait, &wait); @@ -222,28 +208,24 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) BUG_ON(tag >= pool->nr_tags); - local_irq_save(flags); - tags = this_cpu_ptr(pool->tag_cpu); + tags = raw_cpu_ptr(pool->tag_cpu); - spin_lock(&tags->lock); + spin_lock_irqsave(&tags->lock, flags); tags->freelist[tags->nr_free++] = tag; nr_free = tags->nr_free; - spin_unlock(&tags->lock); if (nr_free == 1) { cpumask_set_cpu(smp_processor_id(), &pool->cpus_have_tags); wake_up(&pool->wait); } + spin_unlock_irqrestore(&tags->lock, flags); if (nr_free == pool->percpu_max_size) { - spin_lock(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); + spin_lock(&tags->lock); - /* - * Global lock held and irqs disabled, don't need percpu - * lock - */ if (tags->nr_free == pool->percpu_max_size) { move_tags(pool->freelist, &pool->nr_free, tags->freelist, &tags->nr_free, @@ -251,10 +233,9 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) wake_up(&pool->wait); } - spin_unlock(&pool->lock); + spin_unlock(&tags->lock); + spin_unlock_irqrestore(&pool->lock, flags); } - - local_irq_restore(flags); } EXPORT_SYMBOL_GPL(percpu_ida_free); @@ -346,29 +327,27 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, struct percpu_ida_cpu *remote; unsigned cpu, i, err = 0; - local_irq_save(flags); for_each_possible_cpu(cpu) { remote = per_cpu_ptr(pool->tag_cpu, cpu); - spin_lock(&remote->lock); + spin_lock_irqsave(&remote->lock, flags); for (i = 0; i < remote->nr_free; i++) { err = fn(remote->freelist[i], data); if (err) break; } - spin_unlock(&remote->lock); + spin_unlock_irqrestore(&remote->lock, flags); if (err) goto out; } - spin_lock(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); for (i = 0; i < pool->nr_free; i++) { err = fn(pool->freelist[i], data); if (err) break; } - spin_unlock(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); out: - local_irq_restore(flags); return err; } EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 140fa8bb5c23..914ebe98fc21 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -55,22 +55,24 @@ static inline void XOR(int x, int y, int z) asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); } -static inline void LOAD_DATA(int x, int n, u8 *ptr) +static inline void LOAD_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VLM %2,%3,0,%r1" - : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1)); + : : "m" (*__ptr), "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } -static inline void STORE_DATA(int x, int n, u8 *ptr) +static inline void STORE_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VSTM %2,%3,0,1" - : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1)); + : "=m" (*__ptr) : "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } static inline void COPY_VEC(int x, int y) @@ -93,19 +95,19 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) q = dptr[z0 + 2]; /* RS syndrome */ for (d = 0; d < bytes; d += $#*NSIZE) { - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= 0; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } - STORE_DATA(0,$#,&p[d]); - STORE_DATA(8,$#,&q[d]); + STORE_DATA(0,&p[d]); + STORE_DATA(8,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } @@ -127,14 +129,14 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, for (d = 0; d < bytes; d += $#*NSIZE) { /* P/Q data pages */ - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= start; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } @@ -145,12 +147,12 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); } - LOAD_DATA(16,$#,&p[d]); + LOAD_DATA(16,&p[d]); XOR(16+$$,16+$$,0+$$); - STORE_DATA(16,$#,&p[d]); - LOAD_DATA(16,$#,&q[d]); + STORE_DATA(16,&p[d]); + LOAD_DATA(16,&q[d]); XOR(16+$$,16+$$,8+$$); - STORE_DATA(16,$#,&q[d]); + STORE_DATA(16,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 7d36c1e27ff6..b7055b2a07d3 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -247,7 +247,7 @@ static int __init rbtree_test_init(void) cycles_t time1, time2, time; struct rb_node *node; - nodes = kmalloc(nnodes * sizeof(*nodes), GFP_KERNEL); + nodes = kmalloc_array(nnodes, sizeof(*nodes), GFP_KERNEL); if (!nodes) return -ENOMEM; diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c index dfcf54242fb9..e5fdc8b9e856 100644 --- a/lib/reed_solomon/reed_solomon.c +++ b/lib/reed_solomon/reed_solomon.c @@ -88,15 +88,15 @@ static struct rs_codec *codec_init(int symsize, int gfpoly, int (*gffunc)(int), rs->gffunc = gffunc; /* Allocate the arrays */ - rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp); + rs->alpha_to = kmalloc_array(rs->nn + 1, sizeof(uint16_t), gfp); if (rs->alpha_to == NULL) goto err; - rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp); + rs->index_of = kmalloc_array(rs->nn + 1, sizeof(uint16_t), gfp); if (rs->index_of == NULL) goto err; - rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), gfp); + rs->genpoly = kmalloc_array(rs->nroots + 1, sizeof(uint16_t), gfp); if(rs->genpoly == NULL) goto err; @@ -283,7 +283,7 @@ out: * in index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) - * @gfp: GFP_ flags for allocations + * @gfp: Memory allocation flags. */ struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim, int nroots, gfp_t gfp) diff --git a/lib/refcount.c b/lib/refcount.c index 0eb48353abe3..ebcf8cd49e05 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -35,13 +35,13 @@ * */ +#include <linux/mutex.h> #include <linux/refcount.h> +#include <linux/spinlock.h> #include <linux/bug.h> -#ifdef CONFIG_REFCOUNT_FULL - /** - * refcount_add_not_zero - add a value to a refcount unless it is 0 + * refcount_add_not_zero_checked - add a value to a refcount unless it is 0 * @i: the value to add to the refcount * @r: the refcount * @@ -58,7 +58,7 @@ * * Return: false if the passed refcount is 0, true otherwise */ -bool refcount_add_not_zero(unsigned int i, refcount_t *r) +bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); @@ -79,10 +79,10 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) return true; } -EXPORT_SYMBOL(refcount_add_not_zero); +EXPORT_SYMBOL(refcount_add_not_zero_checked); /** - * refcount_add - add a value to a refcount + * refcount_add_checked - add a value to a refcount * @i: the value to add to the refcount * @r: the refcount * @@ -97,14 +97,14 @@ EXPORT_SYMBOL(refcount_add_not_zero); * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. */ -void refcount_add(unsigned int i, refcount_t *r) +void refcount_add_checked(unsigned int i, refcount_t *r) { - WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); + WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n"); } -EXPORT_SYMBOL(refcount_add); +EXPORT_SYMBOL(refcount_add_checked); /** - * refcount_inc_not_zero - increment a refcount unless it is 0 + * refcount_inc_not_zero_checked - increment a refcount unless it is 0 * @r: the refcount to increment * * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN. @@ -115,7 +115,7 @@ EXPORT_SYMBOL(refcount_add); * * Return: true if the increment was successful, false otherwise */ -bool refcount_inc_not_zero(refcount_t *r) +bool refcount_inc_not_zero_checked(refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); @@ -134,10 +134,10 @@ bool refcount_inc_not_zero(refcount_t *r) return true; } -EXPORT_SYMBOL(refcount_inc_not_zero); +EXPORT_SYMBOL(refcount_inc_not_zero_checked); /** - * refcount_inc - increment a refcount + * refcount_inc_checked - increment a refcount * @r: the refcount to increment * * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN. @@ -148,14 +148,14 @@ EXPORT_SYMBOL(refcount_inc_not_zero); * Will WARN if the refcount is 0, as this represents a possible use-after-free * condition. */ -void refcount_inc(refcount_t *r) +void refcount_inc_checked(refcount_t *r) { - WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); + WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n"); } -EXPORT_SYMBOL(refcount_inc); +EXPORT_SYMBOL(refcount_inc_checked); /** - * refcount_sub_and_test - subtract from a refcount and test if it is 0 + * refcount_sub_and_test_checked - subtract from a refcount and test if it is 0 * @i: amount to subtract from the refcount * @r: the refcount * @@ -174,7 +174,7 @@ EXPORT_SYMBOL(refcount_inc); * * Return: true if the resulting refcount is 0, false otherwise */ -bool refcount_sub_and_test(unsigned int i, refcount_t *r) +bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); @@ -192,10 +192,10 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) return !new; } -EXPORT_SYMBOL(refcount_sub_and_test); +EXPORT_SYMBOL(refcount_sub_and_test_checked); /** - * refcount_dec_and_test - decrement a refcount and test if it is 0 + * refcount_dec_and_test_checked - decrement a refcount and test if it is 0 * @r: the refcount * * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to @@ -207,14 +207,14 @@ EXPORT_SYMBOL(refcount_sub_and_test); * * Return: true if the resulting refcount is 0, false otherwise */ -bool refcount_dec_and_test(refcount_t *r) +bool refcount_dec_and_test_checked(refcount_t *r) { - return refcount_sub_and_test(1, r); + return refcount_sub_and_test_checked(1, r); } -EXPORT_SYMBOL(refcount_dec_and_test); +EXPORT_SYMBOL(refcount_dec_and_test_checked); /** - * refcount_dec - decrement a refcount + * refcount_dec_checked - decrement a refcount * @r: the refcount * * Similar to atomic_dec(), it will WARN on underflow and fail to decrement @@ -223,12 +223,11 @@ EXPORT_SYMBOL(refcount_dec_and_test); * Provides release memory ordering, such that prior loads and stores are done * before. */ -void refcount_dec(refcount_t *r) +void refcount_dec_checked(refcount_t *r) { - WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); + WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n"); } -EXPORT_SYMBOL(refcount_dec); -#endif /* CONFIG_REFCOUNT_FULL */ +EXPORT_SYMBOL(refcount_dec_checked); /** * refcount_dec_if_one - decrement a refcount if it is 1 @@ -350,3 +349,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) } EXPORT_SYMBOL(refcount_dec_and_lock); +/** + * refcount_dec_and_lock_irqsave - return holding spinlock with disabled + * interrupts if able to decrement refcount to 0 + * @r: the refcount + * @lock: the spinlock to be locked + * @flags: saved IRQ-flags if the is acquired + * + * Same as refcount_dec_and_lock() above except that the spinlock is acquired + * with disabled interupts. + * + * Return: true and hold spinlock if able to decrement refcount to 0, false + * otherwise + */ +bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, + unsigned long *flags) +{ + if (refcount_dec_not_one(r)) + return false; + + spin_lock_irqsave(lock, *flags); + if (!refcount_dec_and_test(r)) { + spin_unlock_irqrestore(lock, *flags); + return false; + } + + return true; +} +EXPORT_SYMBOL(refcount_dec_and_lock_irqsave); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 2b2b79974b61..e5c8586cf717 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -668,8 +668,9 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow); * For a completely stable walk you should construct your own data * structure outside the hash table. * - * This function may sleep so you must not call it from interrupt - * context or with spin locks held. + * This function may be called from any process context, including + * non-preemptable context, but cannot be called from softirq or + * hardirq context. * * You must call rhashtable_walk_exit after this function returns. */ @@ -726,6 +727,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) __acquires(RCU) { struct rhashtable *ht = iter->ht; + bool rhlist = ht->rhlist; rcu_read_lock(); @@ -734,11 +736,52 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) list_del(&iter->walker.list); spin_unlock(&ht->lock); - if (!iter->walker.tbl && !iter->end_of_table) { + if (iter->end_of_table) + return 0; + if (!iter->walker.tbl) { iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); + iter->slot = 0; + iter->skip = 0; return -EAGAIN; } + if (iter->p && !rhlist) { + /* + * We need to validate that 'p' is still in the table, and + * if so, update 'skip' + */ + struct rhash_head *p; + int skip = 0; + rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { + skip++; + if (p == iter->p) { + iter->skip = skip; + goto found; + } + } + iter->p = NULL; + } else if (iter->p && rhlist) { + /* Need to validate that 'list' is still in the table, and + * if so, update 'skip' and 'p'. + */ + struct rhash_head *p; + struct rhlist_head *list; + int skip = 0; + rht_for_each_rcu(p, iter->walker.tbl, iter->slot) { + for (list = container_of(p, struct rhlist_head, rhead); + list; + list = rcu_dereference(list->next)) { + skip++; + if (list == iter->list) { + iter->p = p; + iter->skip = skip; + goto found; + } + } + } + iter->p = NULL; + } +found: return 0; } EXPORT_SYMBOL_GPL(rhashtable_walk_start_check); @@ -914,8 +957,6 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) iter->walker.tbl = NULL; spin_unlock(&ht->lock); - iter->p = NULL; - out: rcu_read_unlock(); } @@ -923,8 +964,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(const struct rhashtable_params *params) { - return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - (unsigned long)params->min_size); + size_t retsize; + + if (params->nelem_hint) + retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + (unsigned long)params->min_size); + else + retsize = max(HASH_DEFAULT_SIZE, + (unsigned long)params->min_size); + + return retsize; } static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) @@ -981,8 +1030,6 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *tbl; size_t size; - size = HASH_DEFAULT_SIZE; - if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; @@ -1009,8 +1056,7 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); - if (params->nelem_hint) - size = rounded_hashtable_size(&ht->p); + size = rounded_hashtable_size(&ht->p); if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); @@ -1102,13 +1148,14 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg) { - struct bucket_table *tbl; + struct bucket_table *tbl, *next_tbl; unsigned int i; cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); +restart: if (free_fn) { for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; @@ -1125,7 +1172,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, } } + next_tbl = rht_dereference(tbl->future_tbl, ht); bucket_table_free(tbl); + if (next_tbl) { + tbl = next_tbl; + goto restart; + } mutex_unlock(&ht->mutex); } EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 6fdc6267f4a8..fdd1b8aa8ac6 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -52,7 +52,7 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, return 0; } - sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); + sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); if (!sb->map) return -ENOMEM; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 06dad7a072fd..7c6096a71704 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -24,9 +24,6 @@ **/ struct scatterlist *sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif if (sg_is_last(sg)) return NULL; @@ -111,10 +108,7 @@ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) for_each_sg(sgl, sg, nents, i) ret = sg; -#ifdef CONFIG_DEBUG_SG - BUG_ON(sgl[0].sg_magic != SG_MAGIC); BUG_ON(!sg_is_last(ret)); -#endif return ret; } EXPORT_SYMBOL(sg_last); @@ -170,7 +164,8 @@ static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); return ptr; } else - return kmalloc(nents * sizeof(struct scatterlist), gfp_mask); + return kmalloc_array(nents, sizeof(struct scatterlist), + gfp_mask); } static void sg_kfree(struct scatterlist *sg, unsigned int nents) diff --git a/lib/swiotlb.c b/lib/swiotlb.c deleted file mode 100644 index 04b68d9dffac..000000000000 --- a/lib/swiotlb.c +++ /dev/null @@ -1,1087 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * This implementation is a fallback for platforms that do not support - * I/O TLBs (aka DMA address translation hardware). - * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> - * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> - * Copyright (C) 2000, 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * - * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. - * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid - * unnecessary i-cache flushing. - * 04/07/.. ak Better overflow handling. Assorted fixes. - * 05/09/10 linville Add support for syncing ranges, support syncing for - * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. - * 08/12/11 beckyb Add highmem support - */ - -#include <linux/cache.h> -#include <linux/dma-direct.h> -#include <linux/mm.h> -#include <linux/export.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/swiotlb.h> -#include <linux/pfn.h> -#include <linux/types.h> -#include <linux/ctype.h> -#include <linux/highmem.h> -#include <linux/gfp.h> -#include <linux/scatterlist.h> -#include <linux/mem_encrypt.h> -#include <linux/set_memory.h> - -#include <asm/io.h> -#include <asm/dma.h> - -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/iommu-helper.h> - -#define CREATE_TRACE_POINTS -#include <trace/events/swiotlb.h> - -#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - -#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) - -/* - * Minimum IO TLB size to bother booting with. Systems with mainly - * 64bit capable cards will only lightly use the swiotlb. If we can't - * allocate a contiguous 1MB, we're probably in trouble anyway. - */ -#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) - -enum swiotlb_force swiotlb_force; - -/* - * Used to do a quick range check in swiotlb_tbl_unmap_single and - * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this - * API. - */ -static phys_addr_t io_tlb_start, io_tlb_end; - -/* - * The number of IO TLB blocks (in groups of 64) between io_tlb_start and - * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. - */ -static unsigned long io_tlb_nslabs; - -/* - * When the IOMMU overflows we return a fallback buffer. This sets the size. - */ -static unsigned long io_tlb_overflow = 32*1024; - -static phys_addr_t io_tlb_overflow_buffer; - -/* - * This is a free list describing the number of free entries available from - * each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; - -/* - * Max segment that we can provide which (if pages are contingous) will - * not be bounced (unless SWIOTLB_FORCE is set). - */ -unsigned int max_segment; - -/* - * We need to save away the original address corresponding to a mapped entry - * for the sync operations. - */ -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) -static phys_addr_t *io_tlb_orig_addr; - -/* - * Protect the above data structures in the map and unmap calls - */ -static DEFINE_SPINLOCK(io_tlb_lock); - -static int late_alloc; - -static int __init -setup_io_tlb_npages(char *str) -{ - if (isdigit(*str)) { - io_tlb_nslabs = simple_strtoul(str, &str, 0); - /* avoid tail segment of size < IO_TLB_SEGSIZE */ - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - if (*str == ',') - ++str; - if (!strcmp(str, "force")) { - swiotlb_force = SWIOTLB_FORCE; - } else if (!strcmp(str, "noforce")) { - swiotlb_force = SWIOTLB_NO_FORCE; - io_tlb_nslabs = 1; - } - - return 0; -} -early_param("swiotlb", setup_io_tlb_npages); -/* make io_tlb_overflow tunable too? */ - -unsigned long swiotlb_nr_tbl(void) -{ - return io_tlb_nslabs; -} -EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); - -unsigned int swiotlb_max_segment(void) -{ - return max_segment; -} -EXPORT_SYMBOL_GPL(swiotlb_max_segment); - -void swiotlb_set_max_segment(unsigned int val) -{ - if (swiotlb_force == SWIOTLB_FORCE) - max_segment = 1; - else - max_segment = rounddown(val, PAGE_SIZE); -} - -/* default to 64MB */ -#define IO_TLB_DEFAULT_SIZE (64UL<<20) -unsigned long swiotlb_size_or_default(void) -{ - unsigned long size; - - size = io_tlb_nslabs << IO_TLB_SHIFT; - - return size ? size : (IO_TLB_DEFAULT_SIZE); -} - -static bool no_iotlb_memory; - -void swiotlb_print_info(void) -{ - unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; - unsigned char *vstart, *vend; - - if (no_iotlb_memory) { - pr_warn("software IO TLB: No low mem\n"); - return; - } - - vstart = phys_to_virt(io_tlb_start); - vend = phys_to_virt(io_tlb_end); - - printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", - (unsigned long long)io_tlb_start, - (unsigned long long)io_tlb_end, - bytes >> 20, vstart, vend - 1); -} - -/* - * Early SWIOTLB allocation may be too early to allow an architecture to - * perform the desired operations. This function allows the architecture to - * call SWIOTLB when the operations are possible. It needs to be called - * before the SWIOTLB memory is used. - */ -void __init swiotlb_update_mem_attributes(void) -{ - void *vaddr; - unsigned long bytes; - - if (no_iotlb_memory || late_alloc) - return; - - vaddr = phys_to_virt(io_tlb_start); - bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); - - vaddr = phys_to_virt(io_tlb_overflow_buffer); - bytes = PAGE_ALIGN(io_tlb_overflow); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); -} - -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) -{ - void *v_overflow_buffer; - unsigned long i, bytes; - - bytes = nslabs << IO_TLB_SHIFT; - - io_tlb_nslabs = nslabs; - io_tlb_start = __pa(tlb); - io_tlb_end = io_tlb_start + bytes; - - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = memblock_virt_alloc_low_nopanic( - PAGE_ALIGN(io_tlb_overflow), - PAGE_SIZE); - if (!v_overflow_buffer) - return -ENOMEM; - - io_tlb_overflow_buffer = __pa(v_overflow_buffer); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = memblock_virt_alloc( - PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), - PAGE_SIZE); - io_tlb_orig_addr = memblock_virt_alloc( - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), - PAGE_SIZE); - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - io_tlb_index = 0; - - if (verbose) - swiotlb_print_info(); - - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - return 0; -} - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init -swiotlb_init(int verbose) -{ - size_t default_size = IO_TLB_DEFAULT_SIZE; - unsigned char *vstart; - unsigned long bytes; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - /* Get IO TLB memory from the low pages */ - vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) - return; - - if (io_tlb_start) - memblock_free_early(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - pr_warn("Cannot allocate SWIOTLB buffer"); - no_iotlb_memory = true; -} - -/* - * Systems with larger DMA zones (those that don't support ISA) can - * initialize the swiotlb later using the slab allocator if needed. - * This should be just like above, but with some error catching. - */ -int -swiotlb_late_init_with_default_size(size_t default_size) -{ - unsigned long bytes, req_nslabs = io_tlb_nslabs; - unsigned char *vstart = NULL; - unsigned int order; - int rc = 0; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - /* - * Get IO TLB memory from the low pages - */ - order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); - io_tlb_nslabs = SLABS_PER_PAGE << order; - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, - order); - if (vstart) - break; - order--; - } - - if (!vstart) { - io_tlb_nslabs = req_nslabs; - return -ENOMEM; - } - if (order != get_order(bytes)) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); - io_tlb_nslabs = SLABS_PER_PAGE << order; - } - rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); - if (rc) - free_pages((unsigned long)vstart, order); - - return rc; -} - -int -swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) -{ - unsigned long i, bytes; - unsigned char *v_overflow_buffer; - - bytes = nslabs << IO_TLB_SHIFT; - - io_tlb_nslabs = nslabs; - io_tlb_start = virt_to_phys(tlb); - io_tlb_end = io_tlb_start + bytes; - - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); - memset(tlb, 0, bytes); - - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, - get_order(io_tlb_overflow)); - if (!v_overflow_buffer) - goto cleanup2; - - set_memory_decrypted((unsigned long)v_overflow_buffer, - io_tlb_overflow >> PAGE_SHIFT); - memset(v_overflow_buffer, 0, io_tlb_overflow); - io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(int))); - if (!io_tlb_list) - goto cleanup3; - - io_tlb_orig_addr = (phys_addr_t *) - __get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * - sizeof(phys_addr_t))); - if (!io_tlb_orig_addr) - goto cleanup4; - - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - io_tlb_index = 0; - - swiotlb_print_info(); - - late_alloc = 1; - - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - - return 0; - -cleanup4: - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - io_tlb_list = NULL; -cleanup3: - free_pages((unsigned long)v_overflow_buffer, - get_order(io_tlb_overflow)); - io_tlb_overflow_buffer = 0; -cleanup2: - io_tlb_end = 0; - io_tlb_start = 0; - io_tlb_nslabs = 0; - max_segment = 0; - return -ENOMEM; -} - -void __init swiotlb_exit(void) -{ - if (!io_tlb_orig_addr) - return; - - if (late_alloc) { - free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), - get_order(io_tlb_overflow)); - free_pages((unsigned long)io_tlb_orig_addr, - get_order(io_tlb_nslabs * sizeof(phys_addr_t))); - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - free_pages((unsigned long)phys_to_virt(io_tlb_start), - get_order(io_tlb_nslabs << IO_TLB_SHIFT)); - } else { - memblock_free_late(io_tlb_overflow_buffer, - PAGE_ALIGN(io_tlb_overflow)); - memblock_free_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - memblock_free_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - memblock_free_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - } - io_tlb_nslabs = 0; - max_segment = 0; -} - -int is_swiotlb_buffer(phys_addr_t paddr) -{ - return paddr >= io_tlb_start && paddr < io_tlb_end; -} - -/* - * Bounce: copy the swiotlb buffer back to the original dma location - */ -static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = phys_to_virt(tlb_addr); - - if (PageHighMem(pfn_to_page(pfn))) { - /* The buffer does not have a mapping. Map it in and copy */ - unsigned int offset = orig_addr & ~PAGE_MASK; - char *buffer; - unsigned int sz = 0; - unsigned long flags; - - while (size) { - sz = min_t(size_t, PAGE_SIZE - offset, size); - - local_irq_save(flags); - buffer = kmap_atomic(pfn_to_page(pfn)); - if (dir == DMA_TO_DEVICE) - memcpy(vaddr, buffer + offset, sz); - else - memcpy(buffer + offset, vaddr, sz); - kunmap_atomic(buffer); - local_irq_restore(flags); - - size -= sz; - pfn++; - vaddr += sz; - offset = 0; - } - } else if (dir == DMA_TO_DEVICE) { - memcpy(vaddr, phys_to_virt(orig_addr), size); - } else { - memcpy(phys_to_virt(orig_addr), vaddr, size); - } -} - -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t orig_addr, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - unsigned long flags; - phys_addr_t tlb_addr; - unsigned int nslots, stride, index, wrap; - int i; - unsigned long mask; - unsigned long offset_slots; - unsigned long max_slots; - - if (no_iotlb_memory) - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - - if (mem_encrypt_active()) - pr_warn_once("%s is active and system is using DMA bounce buffers\n", - sme_active() ? "SME" : "SEV"); - - mask = dma_get_seg_boundary(hwdev); - - tbl_dma_addr &= mask; - - offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - - /* - * Carefully handle integer overflow which can occur when mask == ~0UL. - */ - max_slots = mask + 1 - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); - - /* - * For mappings greater than or equal to a page, we limit the stride - * (and hence alignment) to a page size. - */ - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size >= PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); - - /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - index = ALIGN(io_tlb_index, stride); - if (index >= io_tlb_nslabs) - index = 0; - wrap = index; - - do { - while (iommu_is_span_boundary(index, nslots, offset_slots, - max_slots)) { - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - if (index == wrap) - goto not_found; - } - - /* - * If we find a slot that indicates we have 'nslots' number of - * contiguous buffers, we allocate the buffers from that slot - * and mark the entries as '0' indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in the next - * round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); - - goto found; - } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - } while (index != wrap); - -not_found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); - return SWIOTLB_MAP_ERROR; -found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - - /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. - */ - for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); - - return tlb_addr; -} - -/* - * Allocates bounce buffer and returns its physical address. - */ -static phys_addr_t -map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t start_dma_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) { - dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", - &phys); - return SWIOTLB_MAP_ERROR; - } - - start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); - return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, - dir, attrs); -} - -/* - * tlb_addr is the physical address of the bounce buffer to unmap. - */ -void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - unsigned long flags; - int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; - - /* - * First, sync the memory before unmapping the entry - */ - if (orig_addr != INVALID_PHYS_ADDR && - !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); - - /* - * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contiguous entries available. - * While returning the entries to the free list, we merge the entries - * with slots below and above the pool being returned. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - io_tlb_list[i] = ++count; - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - } - spin_unlock_irqrestore(&io_tlb_lock, flags); -} - -void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) -{ - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; - - if (orig_addr == INVALID_PHYS_ADDR) - return; - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); - - switch (target) { - case SYNC_FOR_CPU: - if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_FROM_DEVICE); - else - BUG_ON(dir != DMA_TO_DEVICE); - break; - case SYNC_FOR_DEVICE: - if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_TO_DEVICE); - else - BUG_ON(dir != DMA_FROM_DEVICE); - break; - default: - BUG(); - } -} - -static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, - size_t size) -{ - u64 mask = DMA_BIT_MASK(32); - - if (dev && dev->coherent_dma_mask) - mask = dev->coherent_dma_mask; - return addr + size - 1 <= mask; -} - -static void * -swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, - unsigned long attrs) -{ - phys_addr_t phys_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) - goto out_warn; - - phys_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), - 0, size, DMA_FROM_DEVICE, attrs); - if (phys_addr == SWIOTLB_MAP_ERROR) - goto out_warn; - - *dma_handle = __phys_to_dma(dev, phys_addr); - if (!dma_coherent_ok(dev, *dma_handle, size)) - goto out_unmap; - - memset(phys_to_virt(phys_addr), 0, size); - return phys_to_virt(phys_addr); - -out_unmap: - dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)dev->coherent_dma_mask, - (unsigned long long)*dma_handle); - - /* - * DMA_TO_DEVICE to avoid memcpy in unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); -out_warn: - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { - dev_warn(dev, - "swiotlb: coherent allocation failed, size=%zu\n", - size); - dump_stack(); - } - return NULL; -} - -static bool swiotlb_free_buffer(struct device *dev, size_t size, - dma_addr_t dma_addr) -{ - phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); - - WARN_ON_ONCE(irqs_disabled()); - - if (!is_swiotlb_buffer(phys_addr)) - return false; - - /* - * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - return true; -} - -static void -swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, - int do_panic) -{ - if (swiotlb_force == SWIOTLB_NO_FORCE) - return; - - /* - * Ran out of IOMMU space for this operation. This is very bad. - * Unfortunately the drivers cannot handle this operation properly. - * unless they check for dma_mapping_error (most don't) - * When the mapping is small enough return a static buffer to limit - * the damage, or panic when the transfer is too big. - */ - dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n", - size); - - if (size <= io_tlb_overflow || !do_panic) - return; - - if (dir == DMA_BIDIRECTIONAL) - panic("DMA: Random memory could be DMA accessed\n"); - if (dir == DMA_FROM_DEVICE) - panic("DMA: Random memory could be DMA written\n"); - if (dir == DMA_TO_DEVICE) - panic("DMA: Random memory could be DMA read\n"); -} - -/* - * Map a single buffer of the indicated size for DMA in streaming mode. The - * physical address to use is returned. - * - * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. - */ -dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t map, phys = page_to_phys(page) + offset; - dma_addr_t dev_addr = phys_to_dma(dev, phys); - - BUG_ON(dir == DMA_NONE); - /* - * If the address happens to be in the device's DMA window, - * we can safely return the device addr and not worry about bounce - * buffering it. - */ - if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) - return dev_addr; - - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - - /* Oh well, have to allocate and map a bounce buffer. */ - map = map_single(dev, phys, size, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - swiotlb_full(dev, size, dir, 1); - return __phys_to_dma(dev, io_tlb_overflow_buffer); - } - - dev_addr = __phys_to_dma(dev, map); - - /* Ensure that the address returned is DMA'ble */ - if (dma_capable(dev, dev_addr, size)) - return dev_addr; - - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - - return __phys_to_dma(dev, io_tlb_overflow_buffer); -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_page call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - /* - * phys_to_virt doesn't work with hihgmem page but we could - * call dma_mark_clean() with hihgmem page here. However, we - * are fine since dma_mark_clean() is null on POWERPC. We can - * make dma_mark_clean() take a physical address if necessary. - */ - dma_mark_clean(phys_to_virt(paddr), size); -} - -void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - unmap_single(hwdev, dev_addr, size, dir, attrs); -} - -/* - * Make physical memory consistent for a single streaming mode DMA translation - * after a transfer. - * - * If you perform a swiotlb_map_page() but wish to interrogate the buffer - * using the cpu, yet do not wish to teardown the dma mapping, you must - * call this function before doing so. At the next point you give the dma - * address back to the card, you must first perform a - * swiotlb_dma_sync_for_device, and then the device again owns the buffer - */ -static void -swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - dma_mark_clean(phys_to_virt(paddr), size); -} - -void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); -} - -/* - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_page - * interface. Here the scatter gather list elements are each tagged with the - * appropriate dma address and length. They are obtained via - * sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for swiotlb_map_page are the - * same here. - */ -int -swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) { - phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - - if (swiotlb_force == SWIOTLB_FORCE || - !dma_capable(hwdev, dev_addr, sg->length)) { - phys_addr_t map = map_single(hwdev, sg_phys(sg), - sg->length, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - /* Don't panic here, we expect map_sg users - to do proper error handling. */ - swiotlb_full(hwdev, sg->length, dir, 0); - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, - attrs); - sg_dma_len(sgl) = 0; - return 0; - } - sg->dma_address = __phys_to_dma(hwdev, map); - } else - sg->dma_address = dev_addr; - sg_dma_len(sg) = sg->length; - } - return nelems; -} - -/* - * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_page() above. - */ -void -swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) - unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, - attrs); -} - -/* - * Make physical memory consistent for a set of streaming mode DMA translations - * after a transfer. - * - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules - * and usage. - */ -static void -swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - enum dma_sync_target target) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nelems, i) - swiotlb_sync_single(hwdev, sg->dma_address, - sg_dma_len(sg), dir, target); -} - -void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); -} - -int -swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) -{ - return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); -} - -/* - * Return whether the given device DMA address mask can be supported - * properly. For example, if your device can only drive the low 24-bits - * during bus mastering, then you would pass 0x00ffffff as the mask to - * this function. - */ -int -swiotlb_dma_supported(struct device *hwdev, u64 mask) -{ - return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; -} - -void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - void *vaddr; - - /* temporary workaround: */ - if (gfp & __GFP_NOWARN) - attrs |= DMA_ATTR_NO_WARN; - - /* - * Don't print a warning when the first allocation attempt fails. - * swiotlb_alloc_coherent() will print a warning when the DMA memory - * allocation ultimately failed. - */ - gfp |= __GFP_NOWARN; - - vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); - if (!vaddr) - vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); - return vaddr; -} - -void swiotlb_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - if (!swiotlb_free_buffer(dev, size, dma_addr)) - dma_direct_free(dev, size, vaddr, dma_addr, attrs); -} - -const struct dma_map_ops swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, - .alloc = swiotlb_alloc, - .free = swiotlb_free, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .dma_supported = dma_direct_supported, -}; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8e157806df7a..08d3d59dca17 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -356,29 +356,22 @@ static int bpf_fill_maxinsns11(struct bpf_test *self) return __bpf_fill_ja(self, BPF_MAXINSNS, 68); } -static int bpf_fill_ja(struct bpf_test *self) -{ - /* Hits exactly 11 passes on x86_64 JIT. */ - return __bpf_fill_ja(self, 12, 9); -} - -static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self) +static int bpf_fill_maxinsns12(struct bpf_test *self) { unsigned int len = BPF_MAXINSNS; struct sock_filter *insn; - int i; + int i = 0; insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); if (!insn) return -ENOMEM; - for (i = 0; i < len - 1; i += 2) { - insn[i] = __BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 0); - insn[i + 1] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, - SKF_AD_OFF + SKF_AD_CPU); - } + insn[0] = __BPF_JUMP(BPF_JMP | BPF_JA, len - 2, 0, 0); - insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xbee); + for (i = 1; i < len - 1; i++) + insn[i] = __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0); + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xabababab); self->u.ptr.insns = insn; self->u.ptr.len = len; @@ -386,50 +379,22 @@ static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self) return 0; } -#define PUSH_CNT 68 -/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */ -static int bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) +static int bpf_fill_maxinsns13(struct bpf_test *self) { unsigned int len = BPF_MAXINSNS; - struct bpf_insn *insn; - int i = 0, j, k = 0; + struct sock_filter *insn; + int i = 0; insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); if (!insn) return -ENOMEM; - insn[i++] = BPF_MOV64_REG(R6, R1); -loop: - for (j = 0; j < PUSH_CNT; j++) { - insn[i++] = BPF_LD_ABS(BPF_B, 0); - insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2); - i++; - insn[i++] = BPF_MOV64_REG(R1, R6); - insn[i++] = BPF_MOV64_IMM(R2, 1); - insn[i++] = BPF_MOV64_IMM(R3, 2); - insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - bpf_skb_vlan_push_proto.func - __bpf_call_base); - insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2); - i++; - } + for (i = 0; i < len - 3; i++) + insn[i] = __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0); - for (j = 0; j < PUSH_CNT; j++) { - insn[i++] = BPF_LD_ABS(BPF_B, 0); - insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0x34, len - i - 2); - i++; - insn[i++] = BPF_MOV64_REG(R1, R6); - insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - bpf_skb_vlan_pop_proto.func - __bpf_call_base); - insn[i] = BPF_JMP_IMM(BPF_JNE, R0, 0, len - i - 2); - i++; - } - if (++k < 5) - goto loop; - - for (; i < len - 1; i++) - insn[i] = BPF_ALU32_IMM(BPF_MOV, R0, 0xbef); - - insn[len - 1] = BPF_EXIT_INSN(); + insn[len - 3] = __BPF_STMT(BPF_LD | BPF_IMM, 0xabababab); + insn[len - 2] = __BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0); + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0); self->u.ptr.insns = insn; self->u.ptr.len = len; @@ -437,58 +402,29 @@ loop: return 0; } -static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self) +static int bpf_fill_ja(struct bpf_test *self) { - struct bpf_insn *insn; - - insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL); - if (!insn) - return -ENOMEM; - - /* Due to func address being non-const, we need to - * assemble this here. - */ - insn[0] = BPF_MOV64_REG(R6, R1); - insn[1] = BPF_LD_ABS(BPF_B, 0); - insn[2] = BPF_LD_ABS(BPF_H, 0); - insn[3] = BPF_LD_ABS(BPF_W, 0); - insn[4] = BPF_MOV64_REG(R7, R6); - insn[5] = BPF_MOV64_IMM(R6, 0); - insn[6] = BPF_MOV64_REG(R1, R7); - insn[7] = BPF_MOV64_IMM(R2, 1); - insn[8] = BPF_MOV64_IMM(R3, 2); - insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - bpf_skb_vlan_push_proto.func - __bpf_call_base); - insn[10] = BPF_MOV64_REG(R6, R7); - insn[11] = BPF_LD_ABS(BPF_B, 0); - insn[12] = BPF_LD_ABS(BPF_H, 0); - insn[13] = BPF_LD_ABS(BPF_W, 0); - insn[14] = BPF_MOV64_IMM(R0, 42); - insn[15] = BPF_EXIT_INSN(); - - self->u.ptr.insns = insn; - self->u.ptr.len = 16; - - return 0; + /* Hits exactly 11 passes on x86_64 JIT. */ + return __bpf_fill_ja(self, 12, 9); } -static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) +static int bpf_fill_ld_abs_get_processor_id(struct bpf_test *self) { unsigned int len = BPF_MAXINSNS; - struct bpf_insn *insn; - int i = 0; + struct sock_filter *insn; + int i; insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); if (!insn) return -ENOMEM; - insn[i++] = BPF_MOV64_REG(R6, R1); - insn[i++] = BPF_LD_ABS(BPF_B, 0); - insn[i] = BPF_JMP_IMM(BPF_JEQ, R0, 10, len - i - 2); - i++; - while (i < len - 1) - insn[i++] = BPF_LD_ABS(BPF_B, 1); - insn[i] = BPF_EXIT_INSN(); + for (i = 0; i < len - 1; i += 2) { + insn[i] = __BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 0); + insn[i + 1] = __BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + SKF_AD_OFF + SKF_AD_CPU); + } + + insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xbee); self->u.ptr.insns = insn; self->u.ptr.len = len; @@ -1988,40 +1924,6 @@ static struct bpf_test tests[] = { { { 0, -1 } } }, { - "INT: DIV + ABS", - .u.insns_int = { - BPF_ALU64_REG(BPF_MOV, R6, R1), - BPF_LD_ABS(BPF_B, 3), - BPF_ALU64_IMM(BPF_MOV, R2, 2), - BPF_ALU32_REG(BPF_DIV, R0, R2), - BPF_ALU64_REG(BPF_MOV, R8, R0), - BPF_LD_ABS(BPF_B, 4), - BPF_ALU64_REG(BPF_ADD, R8, R0), - BPF_LD_IND(BPF_B, R8, -70), - BPF_EXIT_INSN(), - }, - INTERNAL, - { 10, 20, 30, 40, 50 }, - { { 4, 0 }, { 5, 10 } } - }, - { - /* This one doesn't go through verifier, but is just raw insn - * as opposed to cBPF tests from here. Thus div by 0 tests are - * done in test_verifier in BPF kselftests. - */ - "INT: DIV by -1", - .u.insns_int = { - BPF_ALU64_REG(BPF_MOV, R6, R1), - BPF_ALU64_IMM(BPF_MOV, R7, -1), - BPF_LD_ABS(BPF_B, 3), - BPF_ALU32_REG(BPF_DIV, R0, R7), - BPF_EXIT_INSN(), - }, - INTERNAL, - { 10, 20, 30, 40, 50 }, - { { 3, 0 }, { 4, 0 } } - }, - { "check: missing ret", .u.insns = { BPF_STMT(BPF_LD | BPF_IMM, 1), @@ -2383,50 +2285,6 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } } }, - { - "nmap reduced", - .u.insns_int = { - BPF_MOV64_REG(R6, R1), - BPF_LD_ABS(BPF_H, 12), - BPF_JMP_IMM(BPF_JNE, R0, 0x806, 28), - BPF_LD_ABS(BPF_H, 12), - BPF_JMP_IMM(BPF_JNE, R0, 0x806, 26), - BPF_MOV32_IMM(R0, 18), - BPF_STX_MEM(BPF_W, R10, R0, -64), - BPF_LDX_MEM(BPF_W, R7, R10, -64), - BPF_LD_IND(BPF_W, R7, 14), - BPF_STX_MEM(BPF_W, R10, R0, -60), - BPF_MOV32_IMM(R0, 280971478), - BPF_STX_MEM(BPF_W, R10, R0, -56), - BPF_LDX_MEM(BPF_W, R7, R10, -56), - BPF_LDX_MEM(BPF_W, R0, R10, -60), - BPF_ALU32_REG(BPF_SUB, R0, R7), - BPF_JMP_IMM(BPF_JNE, R0, 0, 15), - BPF_LD_ABS(BPF_H, 12), - BPF_JMP_IMM(BPF_JNE, R0, 0x806, 13), - BPF_MOV32_IMM(R0, 22), - BPF_STX_MEM(BPF_W, R10, R0, -56), - BPF_LDX_MEM(BPF_W, R7, R10, -56), - BPF_LD_IND(BPF_H, R7, 14), - BPF_STX_MEM(BPF_W, R10, R0, -52), - BPF_MOV32_IMM(R0, 17366), - BPF_STX_MEM(BPF_W, R10, R0, -48), - BPF_LDX_MEM(BPF_W, R7, R10, -48), - BPF_LDX_MEM(BPF_W, R0, R10, -52), - BPF_ALU32_REG(BPF_SUB, R0, R7), - BPF_JMP_IMM(BPF_JNE, R0, 0, 2), - BPF_MOV32_IMM(R0, 256), - BPF_EXIT_INSN(), - BPF_MOV32_IMM(R0, 0), - BPF_EXIT_INSN(), - }, - INTERNAL, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, - { { 38, 256 } }, - .stack_depth = 64, - }, /* BPF_ALU | BPF_MOV | BPF_X */ { "ALU_MOV_X: dst = 2", @@ -5424,21 +5282,31 @@ static struct bpf_test tests[] = { { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Ctx heavy transformations", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { }, { { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } }, .fill_helper = bpf_fill_maxinsns6, + .expected_errcode = -ENOTSUPP, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Call heavy transformations", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, +#else CLASSIC | FLAG_NO_DATA, +#endif { }, { { 1, 0 }, { 10, 0 } }, .fill_helper = bpf_fill_maxinsns7, + .expected_errcode = -ENOTSUPP, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Jump heavy test", @@ -5478,28 +5346,39 @@ static struct bpf_test tests[] = { .expected_errcode = -ENOTSUPP, }, { - "BPF_MAXINSNS: ld_abs+get_processor_id", - { }, - CLASSIC, + "BPF_MAXINSNS: jump over MSH", { }, - { { 1, 0xbee } }, - .fill_helper = bpf_fill_ld_abs_get_processor_id, + CLASSIC | FLAG_EXPECTED_FAIL, + { 0xfa, 0xfb, 0xfc, 0xfd, }, + { { 4, 0xabababab } }, + .fill_helper = bpf_fill_maxinsns12, + .expected_errcode = -EINVAL, }, { - "BPF_MAXINSNS: ld_abs+vlan_push/pop", + "BPF_MAXINSNS: exec all MSH", { }, - INTERNAL, - { 0x34 }, - { { ETH_HLEN, 0xbef } }, - .fill_helper = bpf_fill_ld_abs_vlan_push_pop, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else + CLASSIC, +#endif + { 0xfa, 0xfb, 0xfc, 0xfd, }, + { { 4, 0xababab83 } }, + .fill_helper = bpf_fill_maxinsns13, + .expected_errcode = -ENOTSUPP, }, { - "BPF_MAXINSNS: jump around ld_abs", + "BPF_MAXINSNS: ld_abs+get_processor_id", { }, - INTERNAL, - { 10, 11 }, - { { 2, 10 } }, - .fill_helper = bpf_fill_jump_around_ld_abs, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else + CLASSIC, +#endif + { }, + { { 1, 0xbee } }, + .fill_helper = bpf_fill_ld_abs_get_processor_id, + .expected_errcode = -ENOTSUPP, }, /* * LD_IND / LD_ABS on fragmented SKBs @@ -5683,6 +5562,53 @@ static struct bpf_test tests[] = { { {0x40, 0x05 } }, }, { + "LD_IND byte positive offset, all ff", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), + BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, + { {0x40, 0xff } }, + }, + { + "LD_IND byte positive offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), + BPF_STMT(BPF_LD | BPF_IND | BPF_B, 0x1), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 }, }, + }, + { + "LD_IND byte negative offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), + BPF_STMT(BPF_LD | BPF_IND | BPF_B, -0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 } }, + }, + { + "LD_IND byte negative offset, multiple calls", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3b), + BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 1), + BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 2), + BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 3), + BPF_STMT(BPF_LD | BPF_IND | BPF_B, SKF_LL_OFF + 4), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0x82 }, }, + }, + { "LD_IND halfword positive offset", .u.insns = { BPF_STMT(BPF_LDX | BPF_IMM, 0x20), @@ -5731,6 +5657,39 @@ static struct bpf_test tests[] = { { {0x40, 0x66cc } }, }, { + "LD_IND halfword positive offset, all ff", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3d), + BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, + { {0x40, 0xffff } }, + }, + { + "LD_IND halfword positive offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), + BPF_STMT(BPF_LD | BPF_IND | BPF_H, 0x1), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 }, }, + }, + { + "LD_IND halfword negative offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), + BPF_STMT(BPF_LD | BPF_IND | BPF_H, -0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 } }, + }, + { "LD_IND word positive offset", .u.insns = { BPF_STMT(BPF_LDX | BPF_IMM, 0x20), @@ -5821,6 +5780,39 @@ static struct bpf_test tests[] = { { {0x40, 0x66cc77dd } }, }, { + "LD_IND word positive offset, all ff", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3b), + BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, + { {0x40, 0xffffffff } }, + }, + { + "LD_IND word positive offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), + BPF_STMT(BPF_LD | BPF_IND | BPF_W, 0x1), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 }, }, + }, + { + "LD_IND word negative offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LDX | BPF_IMM, 0x3e), + BPF_STMT(BPF_LD | BPF_IND | BPF_W, -0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 } }, + }, + { "LD_ABS byte", .u.insns = { BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x20), @@ -5838,6 +5830,68 @@ static struct bpf_test tests[] = { { {0x40, 0xcc } }, }, { + "LD_ABS byte positive offset, all ff", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, + { {0x40, 0xff } }, + }, + { + "LD_ABS byte positive offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_B, 0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 }, }, + }, + { + "LD_ABS byte negative offset, out of bounds load", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_B, -1), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC | FLAG_EXPECTED_FAIL, + .expected_errcode = -EINVAL, + }, + { + "LD_ABS byte negative offset, in bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0x82 }, }, + }, + { + "LD_ABS byte negative offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 }, }, + }, + { + "LD_ABS byte negative offset, multiple calls", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3c), + BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3d), + BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3e), + BPF_STMT(BPF_LD | BPF_ABS | BPF_B, SKF_LL_OFF + 0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0x82 }, }, + }, + { "LD_ABS halfword", .u.insns = { BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x22), @@ -5872,6 +5926,55 @@ static struct bpf_test tests[] = { { {0x40, 0x99ff } }, }, { + "LD_ABS halfword positive offset, all ff", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3e), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, + { {0x40, 0xffff } }, + }, + { + "LD_ABS halfword positive offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_H, 0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 }, }, + }, + { + "LD_ABS halfword negative offset, out of bounds load", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_H, -1), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC | FLAG_EXPECTED_FAIL, + .expected_errcode = -EINVAL, + }, + { + "LD_ABS halfword negative offset, in bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0x1982 }, }, + }, + { + "LD_ABS halfword negative offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_H, SKF_LL_OFF + 0x3e), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 }, }, + }, + { "LD_ABS word", .u.insns = { BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x1c), @@ -5939,6 +6042,140 @@ static struct bpf_test tests[] = { }, { {0x40, 0x88ee99ff } }, }, + { + "LD_ABS word positive offset, all ff", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3c), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0xff, [0x3d] = 0xff, [0x3e] = 0xff, [0x3f] = 0xff }, + { {0x40, 0xffffffff } }, + }, + { + "LD_ABS word positive offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, 0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 }, }, + }, + { + "LD_ABS word negative offset, out of bounds load", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, -1), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC | FLAG_EXPECTED_FAIL, + .expected_errcode = -EINVAL, + }, + { + "LD_ABS word negative offset, in bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0x25051982 }, }, + }, + { + "LD_ABS word negative offset, out of bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_ABS | BPF_W, SKF_LL_OFF + 0x3c), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x3f, 0 }, }, + }, + { + "LDX_MSH standalone, preserved A", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0xffeebbaa }, }, + }, + { + "LDX_MSH standalone, preserved A 2", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0x175e9d63), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3d), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3f), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0x175e9d63 }, }, + }, + { + "LDX_MSH standalone, test result 1", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3c), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0x14 }, }, + }, + { + "LDX_MSH standalone, test result 2", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x3e), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0x24 }, }, + }, + { + "LDX_MSH standalone, negative offset", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, -1), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0 }, }, + }, + { + "LDX_MSH standalone, negative offset 2", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, SKF_LL_OFF + 0x3e), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0x24 }, }, + }, + { + "LDX_MSH standalone, out of bounds", + .u.insns = { + BPF_STMT(BPF_LD | BPF_IMM, 0xffeebbaa), + BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0x40), + BPF_STMT(BPF_MISC | BPF_TXA, 0), + BPF_STMT(BPF_RET | BPF_A, 0x0), + }, + CLASSIC, + { [0x3c] = 0x25, [0x3d] = 0x05, [0x3e] = 0x19, [0x3f] = 0x82 }, + { {0x40, 0 }, }, + }, /* * verify that the interpreter or JIT correctly sets A and X * to 0. @@ -6127,14 +6364,6 @@ static struct bpf_test tests[] = { {}, { {0x1, 0x42 } }, }, - { - "LD_ABS with helper changing skb data", - { }, - INTERNAL, - { 0x34 }, - { { ETH_HLEN, 42 } }, - .fill_helper = bpf_fill_ld_abs_vlan_push_pop2, - }, /* Checking interpreter vs JIT wrt signed extended imms. */ { "JNE signed compare, test 1", diff --git a/lib/test_firmware.c b/lib/test_firmware.c index cee000ac54d8..b984806d7d7b 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -618,8 +618,9 @@ static ssize_t trigger_batched_requests_store(struct device *dev, mutex_lock(&test_fw_mutex); - test_fw_config->reqs = vzalloc(sizeof(struct test_batched_req) * - test_fw_config->num_requests * 2); + test_fw_config->reqs = + vzalloc(array3_size(sizeof(struct test_batched_req), + test_fw_config->num_requests, 2)); if (!test_fw_config->reqs) { rc = -ENOMEM; goto out_unlock; @@ -720,8 +721,9 @@ ssize_t trigger_batched_requests_async_store(struct device *dev, mutex_lock(&test_fw_mutex); - test_fw_config->reqs = vzalloc(sizeof(struct test_batched_req) * - test_fw_config->num_requests * 2); + test_fw_config->reqs = + vzalloc(array3_size(sizeof(struct test_batched_req), + test_fw_config->num_requests, 2)); if (!test_fw_config->reqs) { rc = -ENOMEM; goto out; diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 0e5b7a61460b..e3ddd836491f 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -779,8 +779,9 @@ static int kmod_config_sync_info(struct kmod_test_device *test_dev) struct test_config *config = &test_dev->config; free_test_dev_info(test_dev); - test_dev->info = vzalloc(config->num_threads * - sizeof(struct kmod_test_device_info)); + test_dev->info = + vzalloc(array_size(sizeof(struct kmod_test_device_info), + config->num_threads)); if (!test_dev->info) return -ENOMEM; diff --git a/lib/test_overflow.c b/lib/test_overflow.c new file mode 100644 index 000000000000..2278fe05a1b0 --- /dev/null +++ b/lib/test_overflow.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Test cases for arithmetic overflow checks. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/device.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/overflow.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/vmalloc.h> + +#define DEFINE_TEST_ARRAY(t) \ + static const struct test_ ## t { \ + t a, b; \ + t sum, diff, prod; \ + bool s_of, d_of, p_of; \ + } t ## _tests[] __initconst + +DEFINE_TEST_ARRAY(u8) = { + {0, 0, 0, 0, 0, false, false, false}, + {1, 1, 2, 0, 1, false, false, false}, + {0, 1, 1, U8_MAX, 0, false, true, false}, + {1, 0, 1, 1, 0, false, false, false}, + {0, U8_MAX, U8_MAX, 1, 0, false, true, false}, + {U8_MAX, 0, U8_MAX, U8_MAX, 0, false, false, false}, + {1, U8_MAX, 0, 2, U8_MAX, true, true, false}, + {U8_MAX, 1, 0, U8_MAX-1, U8_MAX, true, false, false}, + {U8_MAX, U8_MAX, U8_MAX-1, 0, 1, true, false, true}, + + {U8_MAX, U8_MAX-1, U8_MAX-2, 1, 2, true, false, true}, + {U8_MAX-1, U8_MAX, U8_MAX-2, U8_MAX, 2, true, true, true}, + + {1U << 3, 1U << 3, 1U << 4, 0, 1U << 6, false, false, false}, + {1U << 4, 1U << 4, 1U << 5, 0, 0, false, false, true}, + {1U << 4, 1U << 3, 3*(1U << 3), 1U << 3, 1U << 7, false, false, false}, + {1U << 7, 1U << 7, 0, 0, 0, true, false, true}, + + {48, 32, 80, 16, 0, false, false, true}, + {128, 128, 0, 0, 0, true, false, true}, + {123, 234, 101, 145, 110, true, true, true}, +}; +DEFINE_TEST_ARRAY(u16) = { + {0, 0, 0, 0, 0, false, false, false}, + {1, 1, 2, 0, 1, false, false, false}, + {0, 1, 1, U16_MAX, 0, false, true, false}, + {1, 0, 1, 1, 0, false, false, false}, + {0, U16_MAX, U16_MAX, 1, 0, false, true, false}, + {U16_MAX, 0, U16_MAX, U16_MAX, 0, false, false, false}, + {1, U16_MAX, 0, 2, U16_MAX, true, true, false}, + {U16_MAX, 1, 0, U16_MAX-1, U16_MAX, true, false, false}, + {U16_MAX, U16_MAX, U16_MAX-1, 0, 1, true, false, true}, + + {U16_MAX, U16_MAX-1, U16_MAX-2, 1, 2, true, false, true}, + {U16_MAX-1, U16_MAX, U16_MAX-2, U16_MAX, 2, true, true, true}, + + {1U << 7, 1U << 7, 1U << 8, 0, 1U << 14, false, false, false}, + {1U << 8, 1U << 8, 1U << 9, 0, 0, false, false, true}, + {1U << 8, 1U << 7, 3*(1U << 7), 1U << 7, 1U << 15, false, false, false}, + {1U << 15, 1U << 15, 0, 0, 0, true, false, true}, + + {123, 234, 357, 65425, 28782, false, true, false}, + {1234, 2345, 3579, 64425, 10146, false, true, true}, +}; +DEFINE_TEST_ARRAY(u32) = { + {0, 0, 0, 0, 0, false, false, false}, + {1, 1, 2, 0, 1, false, false, false}, + {0, 1, 1, U32_MAX, 0, false, true, false}, + {1, 0, 1, 1, 0, false, false, false}, + {0, U32_MAX, U32_MAX, 1, 0, false, true, false}, + {U32_MAX, 0, U32_MAX, U32_MAX, 0, false, false, false}, + {1, U32_MAX, 0, 2, U32_MAX, true, true, false}, + {U32_MAX, 1, 0, U32_MAX-1, U32_MAX, true, false, false}, + {U32_MAX, U32_MAX, U32_MAX-1, 0, 1, true, false, true}, + + {U32_MAX, U32_MAX-1, U32_MAX-2, 1, 2, true, false, true}, + {U32_MAX-1, U32_MAX, U32_MAX-2, U32_MAX, 2, true, true, true}, + + {1U << 15, 1U << 15, 1U << 16, 0, 1U << 30, false, false, false}, + {1U << 16, 1U << 16, 1U << 17, 0, 0, false, false, true}, + {1U << 16, 1U << 15, 3*(1U << 15), 1U << 15, 1U << 31, false, false, false}, + {1U << 31, 1U << 31, 0, 0, 0, true, false, true}, + + {-2U, 1U, -1U, -3U, -2U, false, false, false}, + {-4U, 5U, 1U, -9U, -20U, true, false, true}, +}; + +DEFINE_TEST_ARRAY(u64) = { + {0, 0, 0, 0, 0, false, false, false}, + {1, 1, 2, 0, 1, false, false, false}, + {0, 1, 1, U64_MAX, 0, false, true, false}, + {1, 0, 1, 1, 0, false, false, false}, + {0, U64_MAX, U64_MAX, 1, 0, false, true, false}, + {U64_MAX, 0, U64_MAX, U64_MAX, 0, false, false, false}, + {1, U64_MAX, 0, 2, U64_MAX, true, true, false}, + {U64_MAX, 1, 0, U64_MAX-1, U64_MAX, true, false, false}, + {U64_MAX, U64_MAX, U64_MAX-1, 0, 1, true, false, true}, + + {U64_MAX, U64_MAX-1, U64_MAX-2, 1, 2, true, false, true}, + {U64_MAX-1, U64_MAX, U64_MAX-2, U64_MAX, 2, true, true, true}, + + {1ULL << 31, 1ULL << 31, 1ULL << 32, 0, 1ULL << 62, false, false, false}, + {1ULL << 32, 1ULL << 32, 1ULL << 33, 0, 0, false, false, true}, + {1ULL << 32, 1ULL << 31, 3*(1ULL << 31), 1ULL << 31, 1ULL << 63, false, false, false}, + {1ULL << 63, 1ULL << 63, 0, 0, 0, true, false, true}, + {1000000000ULL /* 10^9 */, 10000000000ULL /* 10^10 */, + 11000000000ULL, 18446744064709551616ULL, 10000000000000000000ULL, + false, true, false}, + {-15ULL, 10ULL, -5ULL, -25ULL, -150ULL, false, false, true}, +}; + +DEFINE_TEST_ARRAY(s8) = { + {0, 0, 0, 0, 0, false, false, false}, + + {0, S8_MAX, S8_MAX, -S8_MAX, 0, false, false, false}, + {S8_MAX, 0, S8_MAX, S8_MAX, 0, false, false, false}, + {0, S8_MIN, S8_MIN, S8_MIN, 0, false, true, false}, + {S8_MIN, 0, S8_MIN, S8_MIN, 0, false, false, false}, + + {-1, S8_MIN, S8_MAX, S8_MAX, S8_MIN, true, false, true}, + {S8_MIN, -1, S8_MAX, -S8_MAX, S8_MIN, true, false, true}, + {-1, S8_MAX, S8_MAX-1, S8_MIN, -S8_MAX, false, false, false}, + {S8_MAX, -1, S8_MAX-1, S8_MIN, -S8_MAX, false, true, false}, + {-1, -S8_MAX, S8_MIN, S8_MAX-1, S8_MAX, false, false, false}, + {-S8_MAX, -1, S8_MIN, S8_MIN+2, S8_MAX, false, false, false}, + + {1, S8_MIN, -S8_MAX, -S8_MAX, S8_MIN, false, true, false}, + {S8_MIN, 1, -S8_MAX, S8_MAX, S8_MIN, false, true, false}, + {1, S8_MAX, S8_MIN, S8_MIN+2, S8_MAX, true, false, false}, + {S8_MAX, 1, S8_MIN, S8_MAX-1, S8_MAX, true, false, false}, + + {S8_MIN, S8_MIN, 0, 0, 0, true, false, true}, + {S8_MAX, S8_MAX, -2, 0, 1, true, false, true}, + + {-4, -32, -36, 28, -128, false, false, true}, + {-4, 32, 28, -36, -128, false, false, false}, +}; + +DEFINE_TEST_ARRAY(s16) = { + {0, 0, 0, 0, 0, false, false, false}, + + {0, S16_MAX, S16_MAX, -S16_MAX, 0, false, false, false}, + {S16_MAX, 0, S16_MAX, S16_MAX, 0, false, false, false}, + {0, S16_MIN, S16_MIN, S16_MIN, 0, false, true, false}, + {S16_MIN, 0, S16_MIN, S16_MIN, 0, false, false, false}, + + {-1, S16_MIN, S16_MAX, S16_MAX, S16_MIN, true, false, true}, + {S16_MIN, -1, S16_MAX, -S16_MAX, S16_MIN, true, false, true}, + {-1, S16_MAX, S16_MAX-1, S16_MIN, -S16_MAX, false, false, false}, + {S16_MAX, -1, S16_MAX-1, S16_MIN, -S16_MAX, false, true, false}, + {-1, -S16_MAX, S16_MIN, S16_MAX-1, S16_MAX, false, false, false}, + {-S16_MAX, -1, S16_MIN, S16_MIN+2, S16_MAX, false, false, false}, + + {1, S16_MIN, -S16_MAX, -S16_MAX, S16_MIN, false, true, false}, + {S16_MIN, 1, -S16_MAX, S16_MAX, S16_MIN, false, true, false}, + {1, S16_MAX, S16_MIN, S16_MIN+2, S16_MAX, true, false, false}, + {S16_MAX, 1, S16_MIN, S16_MAX-1, S16_MAX, true, false, false}, + + {S16_MIN, S16_MIN, 0, 0, 0, true, false, true}, + {S16_MAX, S16_MAX, -2, 0, 1, true, false, true}, +}; +DEFINE_TEST_ARRAY(s32) = { + {0, 0, 0, 0, 0, false, false, false}, + + {0, S32_MAX, S32_MAX, -S32_MAX, 0, false, false, false}, + {S32_MAX, 0, S32_MAX, S32_MAX, 0, false, false, false}, + {0, S32_MIN, S32_MIN, S32_MIN, 0, false, true, false}, + {S32_MIN, 0, S32_MIN, S32_MIN, 0, false, false, false}, + + {-1, S32_MIN, S32_MAX, S32_MAX, S32_MIN, true, false, true}, + {S32_MIN, -1, S32_MAX, -S32_MAX, S32_MIN, true, false, true}, + {-1, S32_MAX, S32_MAX-1, S32_MIN, -S32_MAX, false, false, false}, + {S32_MAX, -1, S32_MAX-1, S32_MIN, -S32_MAX, false, true, false}, + {-1, -S32_MAX, S32_MIN, S32_MAX-1, S32_MAX, false, false, false}, + {-S32_MAX, -1, S32_MIN, S32_MIN+2, S32_MAX, false, false, false}, + + {1, S32_MIN, -S32_MAX, -S32_MAX, S32_MIN, false, true, false}, + {S32_MIN, 1, -S32_MAX, S32_MAX, S32_MIN, false, true, false}, + {1, S32_MAX, S32_MIN, S32_MIN+2, S32_MAX, true, false, false}, + {S32_MAX, 1, S32_MIN, S32_MAX-1, S32_MAX, true, false, false}, + + {S32_MIN, S32_MIN, 0, 0, 0, true, false, true}, + {S32_MAX, S32_MAX, -2, 0, 1, true, false, true}, +}; +DEFINE_TEST_ARRAY(s64) = { + {0, 0, 0, 0, 0, false, false, false}, + + {0, S64_MAX, S64_MAX, -S64_MAX, 0, false, false, false}, + {S64_MAX, 0, S64_MAX, S64_MAX, 0, false, false, false}, + {0, S64_MIN, S64_MIN, S64_MIN, 0, false, true, false}, + {S64_MIN, 0, S64_MIN, S64_MIN, 0, false, false, false}, + + {-1, S64_MIN, S64_MAX, S64_MAX, S64_MIN, true, false, true}, + {S64_MIN, -1, S64_MAX, -S64_MAX, S64_MIN, true, false, true}, + {-1, S64_MAX, S64_MAX-1, S64_MIN, -S64_MAX, false, false, false}, + {S64_MAX, -1, S64_MAX-1, S64_MIN, -S64_MAX, false, true, false}, + {-1, -S64_MAX, S64_MIN, S64_MAX-1, S64_MAX, false, false, false}, + {-S64_MAX, -1, S64_MIN, S64_MIN+2, S64_MAX, false, false, false}, + + {1, S64_MIN, -S64_MAX, -S64_MAX, S64_MIN, false, true, false}, + {S64_MIN, 1, -S64_MAX, S64_MAX, S64_MIN, false, true, false}, + {1, S64_MAX, S64_MIN, S64_MIN+2, S64_MAX, true, false, false}, + {S64_MAX, 1, S64_MIN, S64_MAX-1, S64_MAX, true, false, false}, + + {S64_MIN, S64_MIN, 0, 0, 0, true, false, true}, + {S64_MAX, S64_MAX, -2, 0, 1, true, false, true}, + + {-1, -1, -2, 0, 1, false, false, false}, + {-1, -128, -129, 127, 128, false, false, false}, + {-128, -1, -129, -127, 128, false, false, false}, + {0, -S64_MAX, -S64_MAX, S64_MAX, 0, false, false, false}, +}; + +#define check_one_op(t, fmt, op, sym, a, b, r, of) do { \ + t _r; \ + bool _of; \ + \ + _of = check_ ## op ## _overflow(a, b, &_r); \ + if (_of != of) { \ + pr_warn("expected "fmt" "sym" "fmt \ + " to%s overflow (type %s)\n", \ + a, b, of ? "" : " not", #t); \ + err = 1; \ + } \ + if (_r != r) { \ + pr_warn("expected "fmt" "sym" "fmt" == " \ + fmt", got "fmt" (type %s)\n", \ + a, b, r, _r, #t); \ + err = 1; \ + } \ +} while (0) + +#define DEFINE_TEST_FUNC(t, fmt) \ +static int __init do_test_ ## t(const struct test_ ## t *p) \ +{ \ + int err = 0; \ + \ + check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of); \ + check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of); \ + check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of); \ + check_one_op(t, fmt, mul, "*", p->a, p->b, p->prod, p->p_of); \ + check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of); \ + \ + return err; \ +} \ + \ +static int __init test_ ## t ## _overflow(void) { \ + int err = 0; \ + unsigned i; \ + \ + pr_info("%-3s: %zu tests\n", #t, ARRAY_SIZE(t ## _tests)); \ + for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \ + err |= do_test_ ## t(&t ## _tests[i]); \ + return err; \ +} + +DEFINE_TEST_FUNC(u8, "%d"); +DEFINE_TEST_FUNC(s8, "%d"); +DEFINE_TEST_FUNC(u16, "%d"); +DEFINE_TEST_FUNC(s16, "%d"); +DEFINE_TEST_FUNC(u32, "%u"); +DEFINE_TEST_FUNC(s32, "%d"); +#if BITS_PER_LONG == 64 +DEFINE_TEST_FUNC(u64, "%llu"); +DEFINE_TEST_FUNC(s64, "%lld"); +#endif + +static int __init test_overflow_calculation(void) +{ + int err = 0; + + err |= test_u8_overflow(); + err |= test_s8_overflow(); + err |= test_u16_overflow(); + err |= test_s16_overflow(); + err |= test_u32_overflow(); + err |= test_s32_overflow(); +#if BITS_PER_LONG == 64 + err |= test_u64_overflow(); + err |= test_s64_overflow(); +#endif + + return err; +} + +/* + * Deal with the various forms of allocator arguments. See comments above + * the DEFINE_TEST_ALLOC() instances for mapping of the "bits". + */ +#define alloc010(alloc, arg, sz) alloc(sz, GFP_KERNEL) +#define alloc011(alloc, arg, sz) alloc(sz, GFP_KERNEL, NUMA_NO_NODE) +#define alloc000(alloc, arg, sz) alloc(sz) +#define alloc001(alloc, arg, sz) alloc(sz, NUMA_NO_NODE) +#define alloc110(alloc, arg, sz) alloc(arg, sz, GFP_KERNEL) +#define free0(free, arg, ptr) free(ptr) +#define free1(free, arg, ptr) free(arg, ptr) + +/* Wrap around to 8K */ +#define TEST_SIZE (9 << PAGE_SHIFT) + +#define DEFINE_TEST_ALLOC(func, free_func, want_arg, want_gfp, want_node)\ +static int __init test_ ## func (void *arg) \ +{ \ + volatile size_t a = TEST_SIZE; \ + volatile size_t b = (SIZE_MAX / TEST_SIZE) + 1; \ + void *ptr; \ + \ + /* Tiny allocation test. */ \ + ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, 1);\ + if (!ptr) { \ + pr_warn(#func " failed regular allocation?!\n"); \ + return 1; \ + } \ + free ## want_arg (free_func, arg, ptr); \ + \ + /* Wrapped allocation test. */ \ + ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, \ + a * b); \ + if (!ptr) { \ + pr_warn(#func " unexpectedly failed bad wrapping?!\n"); \ + return 1; \ + } \ + free ## want_arg (free_func, arg, ptr); \ + \ + /* Saturated allocation test. */ \ + ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, \ + array_size(a, b)); \ + if (ptr) { \ + pr_warn(#func " missed saturation!\n"); \ + free ## want_arg (free_func, arg, ptr); \ + return 1; \ + } \ + pr_info(#func " detected saturation\n"); \ + return 0; \ +} + +/* + * Allocator uses a trailing node argument --------+ (e.g. kmalloc_node()) + * Allocator uses the gfp_t argument -----------+ | (e.g. kmalloc()) + * Allocator uses a special leading argument + | | (e.g. devm_kmalloc()) + * | | | + */ +DEFINE_TEST_ALLOC(kmalloc, kfree, 0, 1, 0); +DEFINE_TEST_ALLOC(kmalloc_node, kfree, 0, 1, 1); +DEFINE_TEST_ALLOC(kzalloc, kfree, 0, 1, 0); +DEFINE_TEST_ALLOC(kzalloc_node, kfree, 0, 1, 1); +DEFINE_TEST_ALLOC(vmalloc, vfree, 0, 0, 0); +DEFINE_TEST_ALLOC(vmalloc_node, vfree, 0, 0, 1); +DEFINE_TEST_ALLOC(vzalloc, vfree, 0, 0, 0); +DEFINE_TEST_ALLOC(vzalloc_node, vfree, 0, 0, 1); +DEFINE_TEST_ALLOC(kvmalloc, kvfree, 0, 1, 0); +DEFINE_TEST_ALLOC(kvmalloc_node, kvfree, 0, 1, 1); +DEFINE_TEST_ALLOC(kvzalloc, kvfree, 0, 1, 0); +DEFINE_TEST_ALLOC(kvzalloc_node, kvfree, 0, 1, 1); +DEFINE_TEST_ALLOC(devm_kmalloc, devm_kfree, 1, 1, 0); +DEFINE_TEST_ALLOC(devm_kzalloc, devm_kfree, 1, 1, 0); + +static int __init test_overflow_allocation(void) +{ + const char device_name[] = "overflow-test"; + struct device *dev; + int err = 0; + + /* Create dummy device for devm_kmalloc()-family tests. */ + dev = root_device_register(device_name); + if (IS_ERR(dev)) { + pr_warn("Cannot register test device\n"); + return 1; + } + + err |= test_kmalloc(NULL); + err |= test_kmalloc_node(NULL); + err |= test_kzalloc(NULL); + err |= test_kzalloc_node(NULL); + err |= test_kvmalloc(NULL); + err |= test_kvmalloc_node(NULL); + err |= test_kvzalloc(NULL); + err |= test_kvzalloc_node(NULL); + err |= test_vmalloc(NULL); + err |= test_vmalloc_node(NULL); + err |= test_vzalloc(NULL); + err |= test_vzalloc_node(NULL); + err |= test_devm_kmalloc(dev); + err |= test_devm_kzalloc(dev); + + device_unregister(dev); + + return err; +} + +static int __init test_module_init(void) +{ + int err = 0; + + err |= test_overflow_calculation(); + err |= test_overflow_allocation(); + + if (err) { + pr_warn("FAIL!\n"); + err = -EINVAL; + } else { + pr_info("all tests passed\n"); + } + + return err; +} + +static void __exit test_module_exit(void) +{ } + +module_init(test_module_init); +module_exit(test_module_exit); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index f4000c137dbe..fb6968109113 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -285,12 +285,14 @@ static int __init test_rhltable(unsigned int entries) if (entries == 0) entries = 1; - rhl_test_objects = vzalloc(sizeof(*rhl_test_objects) * entries); + rhl_test_objects = vzalloc(array_size(entries, + sizeof(*rhl_test_objects))); if (!rhl_test_objects) return -ENOMEM; ret = -ENOMEM; - obj_in_table = vzalloc(BITS_TO_LONGS(entries) * sizeof(unsigned long)); + obj_in_table = vzalloc(array_size(sizeof(unsigned long), + BITS_TO_LONGS(entries))); if (!obj_in_table) goto out_free; @@ -706,7 +708,8 @@ static int __init test_rht_init(void) test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries); test_rht_params.nelem_hint = size; - objs = vzalloc((test_rht_params.max_size + 1) * sizeof(struct test_obj)); + objs = vzalloc(array_size(sizeof(struct test_obj), + test_rht_params.max_size + 1)); if (!objs) return -ENOMEM; @@ -753,10 +756,10 @@ static int __init test_rht_init(void) pr_info("Testing concurrent rhashtable access from %d threads\n", tcount); sema_init(&prestart_sem, 1 - tcount); - tdata = vzalloc(tcount * sizeof(struct thread_data)); + tdata = vzalloc(array_size(tcount, sizeof(struct thread_data))); if (!tdata) return -ENOMEM; - objs = vzalloc(tcount * entries * sizeof(struct test_obj)); + objs = vzalloc(array3_size(sizeof(struct test_obj), tcount, entries)); if (!objs) { vfree(tdata); return -ENOMEM; diff --git a/lib/ucmpdi2.c b/lib/ucmpdi2.c index 25ca2d4c1e19..597998169a96 100644 --- a/lib/ucmpdi2.c +++ b/lib/ucmpdi2.c @@ -17,7 +17,7 @@ #include <linux/module.h> #include <linux/libgcc.h> -word_type __ucmpdi2(unsigned long long a, unsigned long long b) +word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b) { const DWunion au = {.ll = a}; const DWunion bu = {.ll = b}; diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index d7e06b28de38..0a559a42359b 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -112,3 +112,5 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) return j; } EXPORT_SYMBOL(ucs2_as_utf8); + +MODULE_LICENSE("GPL v2"); |