From 9b6c2d2e2ba5280649eb043cbc7e3483c77e5d69 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:35:57 -0800 Subject: lib/bitmap.c: change prototype of bitmap_copy_le Make the prototype of bitmap_copy_le the same as bitmap_copy's. All other bitmap_* functions take unsigned long* parameters; there's no reason this should be special. The only current user is the static inline uwb_mas_bm_copy_le, which already does the void* laundering, so the end users can pass their u8 or __le32 buffers without a cast. Furthermore, this allows us to simply let bitmap_copy_le be an alias for bitmap_copy on little-endian; see next patch. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5f5c00de39f0..334fe32d8f0e 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -170,7 +170,7 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); +extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); -- cgit v1.2.3-70-g09d2 From e8f24278329dc31b3b8223c83a5465c9df153d9d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:00 -0800 Subject: lib/bitmap.c: elide bitmap_copy_le on little-endian On little-endian, there's no reason to have an extra, presumably less efficient, way of copying a bitmap. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++++ lib/bitmap.c | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 334fe32d8f0e..cffc89c23c02 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -170,7 +170,11 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); +#ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); +#else +#define bitmap_copy_le bitmap_copy +#endif extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); diff --git a/lib/bitmap.c b/lib/bitmap.c index e4ac20bec76c..d2cd50cd4f5d 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1191,6 +1191,7 @@ EXPORT_SYMBOL(bitmap_allocate_region); * * Require nbits % BITS_PER_LONG == 0. */ +#ifdef __BIG_ENDIAN void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits) { unsigned int i; @@ -1203,3 +1204,4 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n } } EXPORT_SYMBOL(bitmap_copy_le); +#endif -- cgit v1.2.3-70-g09d2 From 2fbad29917c9852fa018d572cd3d43a13465d0f8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:02 -0800 Subject: lib: bitmap: change bitmap_shift_right to take unsigned parameters I've previously changed the nbits parameter of most bitmap_* functions to unsigned; now it is bitmap_shift_{left,right}'s turn. This alone saves some .text, but while at it I found that there were a few other things one could do. The end result of these seven patches is $ scripts/bloat-o-meter /tmp/bitmap.o.{old,new} add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-328 (-328) function old new delta __bitmap_shift_right 384 226 -158 __bitmap_shift_left 306 136 -170 and less importantly also a smaller stack footprint $ stack-o-meter.pl master bitmap file function old new delta lib/bitmap.o __bitmap_shift_right 24 8 -16 lib/bitmap.o __bitmap_shift_left 24 0 -24 For each pair of 0 <= shift <= nbits <= 256 I've tested the end result with a few randomly filled src buffers (including garbage beyond nbits), in each case verifying that the shift {left,right}-most bits of dst are zero and the remaining nbits-shift bits correspond to src, so I'm fairly confident I didn't screw up. That hasn't stopped me from being wrong before, though. This patch (of 7): gcc can generate slightly better code for stuff like "nbits % BITS_PER_LONG" when it knows nbits is not negative. Since negative size bitmaps or shift amounts don't make sense, change these parameters of bitmap_shift_right to unsigned. The expressions involving "lim - 1" are still ok, since if lim is 0 the loop is never executed. Also use "shift" and "nbits" consistently for the parameter names. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 12 ++++++------ lib/bitmap.c | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index cffc89c23c02..c168a807ab9a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -96,8 +96,8 @@ extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits); -extern void __bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int shift, int bits); +extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, int shift, int bits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, @@ -313,13 +313,13 @@ static inline int bitmap_weight(const unsigned long *src, unsigned int nbits) return __bitmap_weight(src, nbits); } -static inline void bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int n, int nbits) +static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, int nbits) { if (small_const_nbits(nbits)) - *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n; + *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; else - __bitmap_shift_right(dst, src, n, nbits); + __bitmap_shift_right(dst, src, shift, nbits); } static inline void bitmap_shift_left(unsigned long *dst, diff --git a/lib/bitmap.c b/lib/bitmap.c index d2cd50cd4f5d..45e7d14ebdfd 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -104,17 +104,17 @@ EXPORT_SYMBOL(__bitmap_complement); * @dst : destination bitmap * @src : source bitmap * @shift : shift by this many bits - * @bits : bitmap size, in bits + * @nbits : bitmap size, in bits * * Shifting right (dividing) means moving bits in the MS -> LS bit * direction. Zeros are fed into the vacated MS positions and the * LS bits shifted off the bottom are lost. */ -void __bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int shift, int bits) +void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned shift, unsigned nbits) { - int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; - int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + unsigned k, lim = BITS_TO_LONGS(nbits), left = nbits % BITS_PER_LONG; + unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; unsigned long mask = (1UL << left) - 1; for (k = 0; off + k < lim; ++k) { unsigned long upper, lower; -- cgit v1.2.3-70-g09d2 From dba94c2553da1928303c2a6c6410247c88cafc1d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:13 -0800 Subject: lib: bitmap: change bitmap_shift_left to take unsigned parameters gcc can generate slightly better code for stuff like "nbits % BITS_PER_LONG" when it knows nbits is not negative. Since negative size bitmaps or shift amounts don't make sense, change these parameters of bitmap_shift_right to unsigned. If off >= lim (which requires shift >= nbits), k is initialized with a large positive value, but since I've let k continue to be signed, the loop will never run and dst will be zeroed as expected. Inside the loop, k is guaranteed to be non-negative, so the fact that it is promoted to unsigned in the various expressions it appears in is harmless. Also use "shift" and "nbits" consistently for the parameter names. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 12 ++++++------ lib/bitmap.c | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index c168a807ab9a..5e7f75a6d7d0 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -98,8 +98,8 @@ extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits); extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); -extern void __bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int shift, int bits); +extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, @@ -322,13 +322,13 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s __bitmap_shift_right(dst, src, shift, nbits); } -static inline void bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int n, int nbits) +static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) - *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits); + *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits); else - __bitmap_shift_left(dst, src, n, nbits); + __bitmap_shift_left(dst, src, shift, nbits); } static inline int bitmap_parse(const char *buf, unsigned int buflen, diff --git a/lib/bitmap.c b/lib/bitmap.c index db88512c3451..74bdf3601245 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -148,18 +148,19 @@ EXPORT_SYMBOL(__bitmap_shift_right); * @dst : destination bitmap * @src : source bitmap * @shift : shift by this many bits - * @bits : bitmap size, in bits + * @nbits : bitmap size, in bits * * Shifting left (multiplying) means moving bits in the LS -> MS * direction. Zeros are fed into the vacated LS bit positions * and those MS bits shifted off the top are lost. */ -void __bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int shift, int bits) +void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { - int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; - int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + int k; + unsigned int lim = BITS_TO_LONGS(nbits), left = nbits % BITS_PER_LONG; + unsigned int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; for (k = lim - off - 1; k >= 0; --k) { unsigned long upper, lower; -- cgit v1.2.3-70-g09d2 From a4bb1e43e22d3cade8f942fc6f95920248eb2fd0 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 13 Feb 2015 14:36:24 -0800 Subject: mm/util: add kstrdup_const kstrdup() is often used to duplicate strings where neither source neither destination will be ever modified. In such case we can just reuse the source instead of duplicating it. The problem is that we must be sure that the source is non-modifiable and its life-time is long enough. I suspect the good candidates for such strings are strings located in kernel .rodata section, they cannot be modifed because the section is read-only and their life-time is equal to kernel life-time. This small patchset proposes alternative version of kstrdup - kstrdup_const, which returns source string if it is located in .rodata otherwise it fallbacks to kstrdup. To verify if the source is in .rodata function checks if the address is between sentinels __start_rodata, __end_rodata. I guess it should work with all architectures. The main patch is accompanied by four patches constifying kstrdup for cases where situtation described above happens frequently. I have tested the patchset on mobile platform (exynos4210-trats) and it saves 3272 string allocations. Since minimal allocation is 32 or 64 bytes depending on Kconfig options the patchset saves respectively about 100KB or 200KB of memory. Stats from tested platform show that the main offender is sysfs: By caller: 2260 __kernfs_new_node 631 clk_register+0xc8/0x1b8 318 clk_register+0x34/0x1b8 51 kmem_cache_create 12 alloc_vfsmnt By string (with count >= 5): 883 power 876 subsystem 135 parameters 132 device 61 iommu_group ... This patch (of 5): Add an alternative version of kstrdup which returns pointer to constant char array. The function checks if input string is in persistent and read-only memory section, if yes it returns the input string, otherwise it fallbacks to kstrdup. kstrdup_const is accompanied by kfree_const performing conditional memory deallocation of the string. Signed-off-by: Andrzej Hajda Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Mike Turquette Cc: Alexander Viro Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Tejun Heo Cc: Greg KH Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 3 +++ mm/util.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index b9bc9a5d9e21..e40099e585c9 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -112,7 +112,10 @@ extern void * memchr(const void *,int,__kernel_size_t); #endif void *memchr_inv(const void *s, int c, size_t n); +extern void kfree_const(const void *x); + extern char *kstrdup(const char *s, gfp_t gfp); +extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); diff --git a/mm/util.c b/mm/util.c index f3ef639c4857..3981ae9d1b15 100644 --- a/mm/util.c +++ b/mm/util.c @@ -12,10 +12,30 @@ #include #include +#include #include #include "internal.h" +static inline int is_kernel_rodata(unsigned long addr) +{ + return addr >= (unsigned long)__start_rodata && + addr < (unsigned long)__end_rodata; +} + +/** + * kfree_const - conditionally free memory + * @x: pointer to the memory + * + * Function calls kfree only if @x is not in .rodata section. + */ +void kfree_const(const void *x) +{ + if (!is_kernel_rodata((unsigned long)x)) + kfree(x); +} +EXPORT_SYMBOL(kfree_const); + /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate @@ -37,6 +57,24 @@ char *kstrdup(const char *s, gfp_t gfp) } EXPORT_SYMBOL(kstrdup); +/** + * kstrdup_const - conditionally duplicate an existing const string + * @s: the string to duplicate + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + * + * Function returns source string if it is in .rodata section otherwise it + * fallbacks to kstrdup. + * Strings allocated by kstrdup_const should be freed by kfree_const. + */ +const char *kstrdup_const(const char *s, gfp_t gfp) +{ + if (is_kernel_rodata((unsigned long)s)) + return s; + + return kstrdup(s, gfp); +} +EXPORT_SYMBOL(kstrdup_const); + /** * kstrndup - allocate space for and copy an existing string * @s: the string to duplicate -- cgit v1.2.3-70-g09d2 From dfeb0750b630b72b5d4fb2461bc7179eceb54666 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:31 -0800 Subject: kernfs: remove KERNFS_STATIC_NAME When a new kernfs node is created, KERNFS_STATIC_NAME is used to avoid making a separate copy of its name. It's currently only used for sysfs attributes whose filenames are required to stay accessible and unchanged. There are rare exceptions where these names are allocated and formatted dynamically but for the vast majority of cases they're consts in the rodata section. Now that kernfs is converted to use kstrdup_const() and kfree_const(), there's little point in keeping KERNFS_STATIC_NAME around. Remove it. Signed-off-by: Tejun Heo Cc: Andrzej Hajda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/kernfs/dir.c | 20 ++++++++------------ fs/kernfs/file.c | 4 ---- fs/sysfs/file.c | 2 +- include/linux/kernfs.h | 7 ++----- kernel/cgroup.c | 2 +- 5 files changed, 12 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 35e40879860a..6acc9648f986 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -411,8 +411,9 @@ void kernfs_put(struct kernfs_node *kn) if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); - if (!(kn->flags & KERNFS_STATIC_NAME)) - kfree_const(kn->name); + + kfree_const(kn->name); + if (kn->iattr) { if (kn->iattr->ia_secdata) security_release_secctx(kn->iattr->ia_secdata, @@ -506,15 +507,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, const char *name, umode_t mode, unsigned flags) { - const char *dup_name = NULL; struct kernfs_node *kn; int ret; - if (!(flags & KERNFS_STATIC_NAME)) { - name = dup_name = kstrdup_const(name, GFP_KERNEL); - if (!name) - return NULL; - } + name = kstrdup_const(name, GFP_KERNEL); + if (!name) + return NULL; kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); if (!kn) @@ -538,7 +536,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: - kfree_const(dup_name); + kfree_const(name); return NULL; } @@ -1285,9 +1283,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, kn->ns = new_ns; if (new_name) { - if (!(kn->flags & KERNFS_STATIC_NAME)) - old_name = kn->name; - kn->flags &= ~KERNFS_STATIC_NAME; + old_name = kn->name; kn->name = new_name; } diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index ddc9f9612f16..b684e8a132e6 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -901,7 +901,6 @@ const struct file_operations kernfs_file_fops = { * @ops: kernfs operations for the file * @priv: private data for the file * @ns: optional namespace tag of the file - * @name_is_static: don't copy file name * @key: lockdep key for the file's active_ref, %NULL to disable lockdep * * Returns the created node on success, ERR_PTR() value on error. @@ -911,7 +910,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, umode_t mode, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, - bool name_is_static, struct lock_class_key *key) { struct kernfs_node *kn; @@ -919,8 +917,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, int rc; flags = KERNFS_FILE; - if (name_is_static) - flags |= KERNFS_STATIC_NAME; kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags); if (!kn) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index dfe928a9540f..7c2867b44141 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -295,7 +295,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, key = attr->key ?: (struct lock_class_key *)&attr->skey; #endif kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops, - (void *)attr, ns, true, key); + (void *)attr, ns, key); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, attr->name); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index d4e01b358341..71ecdab1671b 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -43,7 +43,6 @@ enum kernfs_node_flag { KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, - KERNFS_STATIC_NAME = 0x0200, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, }; @@ -291,7 +290,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, umode_t mode, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, - bool name_is_static, struct lock_class_key *key); struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, @@ -369,8 +367,7 @@ kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, bool name_is_static, - struct lock_class_key *key) + void *priv, const void *ns, struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * @@ -439,7 +436,7 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name, key = (struct lock_class_key *)&ops->lockdep_key; #endif return __kernfs_create_file(parent, name, mode, size, ops, priv, ns, - false, key); + key); } static inline struct kernfs_node * diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d5f6ec251fb2..29a7b2cc593e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3077,7 +3077,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft) #endif kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name), cgroup_file_mode(cft), 0, cft->kf_ops, cft, - NULL, false, key); + NULL, key); if (IS_ERR(kn)) return PTR_ERR(kn); -- cgit v1.2.3-70-g09d2 From 513e3d2d11c9f05db1edc70deb18a82555cf9309 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:50 -0800 Subject: cpumask: always use nr_cpu_ids in formatting and parsing functions bitmap implements two variants of scnprintf functions to format a bitmap into a string and cpumask and nodemask wrap them to provide equivalent interfaces. The scnprintf family of functions require a string buffer as an output target which complicates code paths which just want to print out the mask through printk for informational or debug purposes as they have to worry about how large the buffer should be and whether it's too large to allocate on stack. Neither cpumask or nodemask provides a guildeline on how large the target buffer should be forcing users come up with their own solutions - some allocate an arbitrarily sized buffer which is small enough to allocate on stack but may be too short in corner cases, other come up with a custom upper limit calculation considering the output format, some allocate the buffer dynamically while one resorted to using lock to synchronize access to a static buffer. This is an artificial problem which is being solved repeatedly for no benefit. In a lot of cases, the output area already exists and can be targeted directly making the intermediate buffer unnecessary. This patchset teaches printf family of functions how to format bitmaps and replace the dedicated formatting functions with it. Pointer formatting is extended to cover bitmap formatting. It uses the field width for the number of bits instead of precision. The format used is '%*pb[l]', with the optional trailing 'l' specifying list format instead of hex masks. For more details, please see 0002. This patch (of 31): Currently, the formatting and parsing functions in cpumask.h use nr_cpumask_bits like other cpumask functions; however, nr_cpumask_bits is either NR_CPUS or nr_cpu_ids depending on CONFIG_CPUMASK_OFFSTACK. This leads to inconsistent behaviors. With CONFIG_NR_CPUS=512 and !CONFIG_CPUMASK_OFFSTACK # cat /sys/devices/virtual/net/lo/queues/rx-0/rps_cpus 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000 # cat /proc/self/status | grep Cpus_allowed: Cpus_allowed: f With CONFIG_NR_CPUS=1024 and CONFIG_CPUMASK_OFFSTACK (fedora default) # cat /sys/devices/virtual/net/lo/queues/rx-0/rps_cpus 0 # cat /proc/self/status | grep Cpus_allowed: Cpus_allowed: f Note that /proc/self/status is always using nr_cpu_ids regardless of config. This is because seq cpumask formattings functions always use nr_cpu_ids. Given that the same output fields may switch between the two forms, converging on nr_cpu_ids always isn't too likely to surprise userland. This patch updates the formatting and parsing functions in cpumask.h to always use nr_cpu_ids. There's no point in dealing with CPUs which aren't even possible on the machine. Signed-off-by: Tejun Heo Cc: "David S. Miller" Cc: "James E.J. Bottomley" Cc: "John W. Linville" Cc: "Paul E. McKenney" Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Chris Zankel Cc: Christoph Lameter Cc: Dmitry Torokhov Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Li Zefan Cc: Max Filippov Cc: Mike Travis Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Russell King Acked-by: Rusty Russell Cc: Steffen Klassert Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ff9044286d88..ee9acb0ce542 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -550,7 +550,7 @@ static inline void cpumask_copy(struct cpumask *dstp, static inline int cpumask_scnprintf(char *buf, int len, const struct cpumask *srcp) { - return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpumask_bits); + return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpu_ids); } /** @@ -564,7 +564,7 @@ static inline int cpumask_scnprintf(char *buf, int len, static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { - return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); + return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpu_ids); } /** @@ -579,7 +579,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), - nr_cpumask_bits); + nr_cpu_ids); } /** @@ -595,7 +595,7 @@ static inline int cpulist_scnprintf(char *buf, int len, const struct cpumask *srcp) { return bitmap_scnlistprintf(buf, len, cpumask_bits(srcp), - nr_cpumask_bits); + nr_cpu_ids); } /** @@ -610,7 +610,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) char *nl = strchr(buf, '\n'); unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf); - return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits); + return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpu_ids); } /** @@ -622,7 +622,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { - return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); + return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpu_ids); } /** @@ -817,7 +817,7 @@ static inline ssize_t cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) { return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask), - nr_cpumask_bits); + nr_cpu_ids); } /* -- cgit v1.2.3-70-g09d2 From f1bbc032e45106400905ebb47550983af4690b0b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:57 -0800 Subject: cpumask, nodemask: implement cpumask/nodemask_pr_args() printf family of functions can now format bitmaps using '%*pb[l]' and all cpumask and nodemask formatting will be converted to use it. To ease printing these masks with '%*pb[l]' which require two params - the number of bits and the actual bitmap, this patch implement cpumask_pr_args() and nodemask_pr_args() which can be used to provide arguments for '%*pb[l]' Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: "David S. Miller" Cc: "James E.J. Bottomley" Cc: "John W. Linville" Cc: "Paul E. McKenney" Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Chris Zankel Cc: Christoph Lameter Cc: Dmitry Torokhov Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Li Zefan Cc: Max Filippov Cc: Mike Travis Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Russell King Cc: Steffen Klassert Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 8 ++++++++ include/linux/nodemask.h | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ee9acb0ce542..a9b3d00915a0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -22,6 +22,14 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; */ #define cpumask_bits(maskp) ((maskp)->bits) +/** + * cpumask_pr_args - printf args to output a cpumask + * @maskp: cpumask to be printed + * + * Can be used to provide arguments for '%*pb[l]' when printing a cpumask. + */ +#define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) + #if NR_CPUS == 1 #define nr_cpu_ids 1 #else diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 21cef483dc1b..10f8e556ba07 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -98,6 +98,14 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; +/** + * nodemask_pr_args - printf args to output a nodemask + * @maskp: nodemask to be printed + * + * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. + */ +#define nodemask_pr_args(maskp) MAX_NUMNODES, (maskp)->bits + /* * The inline keyword gives the compiler room to decide to inline, or * not inline a function as it sees best. However, as these functions -- cgit v1.2.3-70-g09d2 From 46385326cc1577587ed3e7432c2425cf6d3e4308 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:38:15 -0800 Subject: bitmap, cpumask, nodemask: remove dedicated formatting functions Now that all bitmap formatting usages have been converted to '%*pb[l]', the separate formatting functions are unnecessary. The following functions are removed. * bitmap_scn[list]printf() * cpumask_scnprintf(), cpulist_scnprintf() * [__]nodemask_scnprintf(), [__]nodelist_scnprintf() * seq_bitmap[_list](), seq_cpumask[_list](), seq_nodemask[_list]() * seq_buf_bitmask() Signed-off-by: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/seq_file.c | 32 -------------------------------- include/linux/bitmap.h | 7 ------- include/linux/cpumask.h | 31 ------------------------------- include/linux/nodemask.h | 33 +++++++-------------------------- include/linux/seq_buf.h | 3 --- include/linux/seq_file.h | 25 ------------------------- lib/bitmap.c | 41 ----------------------------------------- lib/seq_buf.c | 36 ------------------------------------ 8 files changed, 7 insertions(+), 201 deletions(-) (limited to 'include') diff --git a/fs/seq_file.c b/fs/seq_file.c index dbf3a59c86bb..555f82155be8 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -539,38 +539,6 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc) return res; } -int seq_bitmap(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits) -{ - if (m->count < m->size) { - int len = bitmap_scnprintf(m->buf + m->count, - m->size - m->count, bits, nr_bits); - if (m->count + len < m->size) { - m->count += len; - return 0; - } - } - seq_set_overflow(m); - return -1; -} -EXPORT_SYMBOL(seq_bitmap); - -int seq_bitmap_list(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits) -{ - if (m->count < m->size) { - int len = bitmap_scnlistprintf(m->buf + m->count, - m->size - m->count, bits, nr_bits); - if (m->count + len < m->size) { - m->count += len; - return 0; - } - } - seq_set_overflow(m); - return -1; -} -EXPORT_SYMBOL(seq_bitmap_list); - static void *single_start(struct seq_file *p, loff_t *pos) { return NULL + (*pos == 0); diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5e7f75a6d7d0..dbfbf4990005 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -52,16 +52,13 @@ * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz - * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf - * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region - * bitmap_print_to_pagebuf(list, buf, mask, nbits) Print bitmap src as list/hex */ /* @@ -147,14 +144,10 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -extern int bitmap_scnprintf(char *buf, unsigned int len, - const unsigned long *src, int nbits); extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, unsigned long *dst, int nbits); extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); -extern int bitmap_scnlistprintf(char *buf, unsigned int len, - const unsigned long *src, int nbits); extern int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index a9b3d00915a0..086549a665e2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -546,21 +546,6 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_of(cpu) (get_cpu_mask(cpu)) -/** - * cpumask_scnprintf - print a cpumask into a string as comma-separated hex - * @buf: the buffer to sprintf into - * @len: the length of the buffer - * @srcp: the cpumask to print - * - * If len is zero, returns zero. Otherwise returns the length of the - * (nul-terminated) @buf string. - */ -static inline int cpumask_scnprintf(char *buf, int len, - const struct cpumask *srcp) -{ - return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpu_ids); -} - /** * cpumask_parse_user - extract a cpumask from a user string * @buf: the buffer to extract from @@ -590,22 +575,6 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, nr_cpu_ids); } -/** - * cpulist_scnprintf - print a cpumask into a string as comma-separated list - * @buf: the buffer to sprintf into - * @len: the length of the buffer - * @srcp: the cpumask to print - * - * If len is zero, returns zero. Otherwise returns the length of the - * (nul-terminated) @buf string. - */ -static inline int cpulist_scnprintf(char *buf, int len, - const struct cpumask *srcp) -{ - return bitmap_scnlistprintf(buf, len, cpumask_bits(srcp), - nr_cpu_ids); -} - /** * cpumask_parse - extract a cpumask from from a string * @buf: the buffer to extract from diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 10f8e556ba07..6e85889cf9ab 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -8,14 +8,13 @@ * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * - * For details of nodemask_scnprintf() and nodemask_parse_user(), - * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. - * For details of nodelist_scnprintf() and nodelist_parse(), see - * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. - * For details of node_remap(), see bitmap_bitremap in lib/bitmap.c. - * For details of nodes_remap(), see bitmap_remap in lib/bitmap.c. - * For details of nodes_onto(), see bitmap_onto in lib/bitmap.c. - * For details of nodes_fold(), see bitmap_fold in lib/bitmap.c. + * For details of nodemask_parse_user(), see bitmap_parse_user() in + * lib/bitmap.c. For details of nodelist_parse(), see bitmap_parselist(), + * also in bitmap.c. For details of node_remap(), see bitmap_bitremap in + * lib/bitmap.c. For details of nodes_remap(), see bitmap_remap in + * lib/bitmap.c. For details of nodes_onto(), see bitmap_onto in + * lib/bitmap.c. For details of nodes_fold(), see bitmap_fold in + * lib/bitmap.c. * * The available nodemask operations are: * @@ -52,9 +51,7 @@ * NODE_MASK_NONE Initializer - no bits set * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * - * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask - * int nodelist_scnprintf(buf, len, mask) Format nodemask as list for printing * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src) @@ -312,14 +309,6 @@ static inline int __first_unset_node(const nodemask_t *maskp) #define nodes_addr(src) ((src).bits) -#define nodemask_scnprintf(buf, len, src) \ - __nodemask_scnprintf((buf), (len), &(src), MAX_NUMNODES) -static inline int __nodemask_scnprintf(char *buf, int len, - const nodemask_t *srcp, int nbits) -{ - return bitmap_scnprintf(buf, len, srcp->bits, nbits); -} - #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) static inline int __nodemask_parse_user(const char __user *buf, int len, @@ -328,14 +317,6 @@ static inline int __nodemask_parse_user(const char __user *buf, int len, return bitmap_parse_user(buf, len, dstp->bits, nbits); } -#define nodelist_scnprintf(buf, len, src) \ - __nodelist_scnprintf((buf), (len), &(src), MAX_NUMNODES) -static inline int __nodelist_scnprintf(char *buf, int len, - const nodemask_t *srcp, int nbits) -{ - return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); -} - #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 9aafe0e24c68..fb7eb9ccb1cd 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -125,9 +125,6 @@ extern int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len); extern int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc); -extern int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp, - int nmaskbits); - #ifdef CONFIG_BINARY_PRINTF extern int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index cf6a9daaaf6d..afbb1fd77c77 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -126,31 +126,6 @@ int seq_path(struct seq_file *, const struct path *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); -int seq_bitmap(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits); -static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask) -{ - return seq_bitmap(m, cpumask_bits(mask), nr_cpu_ids); -} - -static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask) -{ - return seq_bitmap(m, mask->bits, MAX_NUMNODES); -} - -int seq_bitmap_list(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits); - -static inline int seq_cpumask_list(struct seq_file *m, - const struct cpumask *mask) -{ - return seq_bitmap_list(m, cpumask_bits(mask), nr_cpu_ids); -} - -static inline int seq_nodemask_list(struct seq_file *m, nodemask_t *mask) -{ - return seq_bitmap_list(m, mask->bits, MAX_NUMNODES); -} int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t); diff --git a/lib/bitmap.c b/lib/bitmap.c index 088adbdcbad9..d456f4c15a9f 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -369,24 +369,6 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area_off); #define nbits_to_hold_value(val) fls(val) #define BASEDEC 10 /* fancier cpuset lists input in decimal */ -/** - * bitmap_scnprintf - convert bitmap to an ASCII hex string. - * @buf: byte buffer into which string is placed - * @buflen: reserved size of @buf, in bytes - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * - * Exactly @nmaskbits bits are displayed. Hex digits are grouped into - * comma-separated sets of eight digits per set. Returns the number of - * characters which were written to *buf, excluding the trailing \0. - */ -int bitmap_scnprintf(char *buf, unsigned int buflen, - const unsigned long *maskp, int nmaskbits) -{ - return scnprintf(buf, buflen, "%*pb", nmaskbits, maskp); -} -EXPORT_SYMBOL(bitmap_scnprintf); - /** * __bitmap_parse - convert an ASCII hex string into a bitmap. * @buf: pointer to buffer containing string. @@ -500,29 +482,6 @@ int bitmap_parse_user(const char __user *ubuf, } EXPORT_SYMBOL(bitmap_parse_user); -/** - * bitmap_scnlistprintf - convert bitmap to list format ASCII string - * @buf: byte buffer into which string is placed - * @buflen: reserved size of @buf, in bytes - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * - * Output format is a comma-separated list of decimal numbers and - * ranges. Consecutively set bits are shown as two hyphen-separated - * decimal numbers, the smallest and largest bit numbers set in - * the range. Output format is compatible with the format - * accepted as input by bitmap_parselist(). - * - * The return value is the number of characters which were written to *buf - * excluding the trailing '\0', as per ISO C99's scnprintf. - */ -int bitmap_scnlistprintf(char *buf, unsigned int buflen, - const unsigned long *maskp, int nmaskbits) -{ - return scnprintf(buf, buflen, "%*pbl", nmaskbits, maskp); -} -EXPORT_SYMBOL(bitmap_scnlistprintf); - /** * bitmap_print_to_pagebuf - convert bitmap to list or hex format ASCII string * @list: indicates whether the bitmap must be list diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 4eedfedb9e31..88c0854bd752 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -91,42 +91,6 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) return ret; } -/** - * seq_buf_bitmask - write a bitmask array in its ASCII representation - * @s: seq_buf descriptor - * @maskp: points to an array of unsigned longs that represent a bitmask - * @nmaskbits: The number of bits that are valid in @maskp - * - * Writes a ASCII representation of a bitmask string into @s. - * - * Returns zero on success, -1 on overflow. - */ -int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp, - int nmaskbits) -{ - unsigned int len = seq_buf_buffer_left(s); - int ret; - - WARN_ON(s->size == 0); - - /* - * Note, because bitmap_scnprintf() only returns the number of bytes - * written and not the number that would be written, we use the last - * byte of the buffer to let us know if we overflowed. There's a small - * chance that the bitmap could have fit exactly inside the buffer, but - * it's not that critical if that does happen. - */ - if (len > 1) { - ret = bitmap_scnprintf(s->buffer + s->len, len, maskp, nmaskbits); - if (ret < len) { - s->len += ret; - return 0; - } - } - seq_buf_set_overflow(s); - return -1; -} - #ifdef CONFIG_BINARY_PRINTF /** * seq_buf_bprintf - Write the printf string from binary arguments -- cgit v1.2.3-70-g09d2 From cb4188ac8e5779f66b9f55888ac2c75b391cde44 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:14 -0800 Subject: compiler: introduce __alias(symbol) shortcut To be consistent with other compiler attributes introduce __alias(symbol) macro expanding into __attribute__((alias(#symbol))) Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 02ae99e8e6d3..cdf13ca7cac3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,6 +66,7 @@ #define __deprecated __attribute__((deprecated)) #define __packed __attribute__((packed)) #define __weak __attribute__((weak)) +#define __alias(symbol) __attribute__((alias(#symbol))) /* * it doesn't make sense on ARM (currently the only user of __naked) to trace -- cgit v1.2.3-70-g09d2 From 0b24becc810dc3be6e3f94103a866f214c282394 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:17 -0800 Subject: kasan: add kernel address sanitizer infrastructure Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides fast and comprehensive solution for finding use-after-free and out-of-bounds bugs. KASAN uses compile-time instrumentation for checking every memory access, therefore GCC > v4.9.2 required. v4.9.2 almost works, but has issues with putting symbol aliases into the wrong section, which breaks kasan instrumentation of globals. This patch only adds infrastructure for kernel address sanitizer. It's not available for use yet. The idea and some code was borrowed from [1]. Basic idea: The main idea of KASAN is to use shadow memory to record whether each byte of memory is safe to access or not, and use compiler's instrumentation to check the shadow memory on each memory access. Address sanitizer uses 1/8 of the memory addressable in kernel for shadow memory and uses direct mapping with a scale and offset to translate a memory address to its corresponding shadow address. Here is function to translate address to corresponding shadow address: unsigned long kasan_mem_to_shadow(unsigned long addr) { return (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } where KASAN_SHADOW_SCALE_SHIFT = 3. So for every 8 bytes there is one corresponding byte of shadow memory. The following encoding used for each shadow byte: 0 means that all 8 bytes of the corresponding memory region are valid for access; k (1 <= k <= 7) means that the first k bytes are valid for access, and other (8 - k) bytes are not; Any negative value indicates that the entire 8-bytes are inaccessible. Different negative values used to distinguish between different kinds of inaccessible memory (redzones, freed memory) (see mm/kasan/kasan.h). To be able to detect accesses to bad memory we need a special compiler. Such compiler inserts a specific function calls (__asan_load*(addr), __asan_store*(addr)) before each memory access of size 1, 2, 4, 8 or 16. These functions check whether memory region is valid to access or not by checking corresponding shadow memory. If access is not valid an error printed. Historical background of the address sanitizer from Dmitry Vyukov: "We've developed the set of tools, AddressSanitizer (Asan), ThreadSanitizer and MemorySanitizer, for user space. We actively use them for testing inside of Google (continuous testing, fuzzing, running prod services). To date the tools have found more than 10'000 scary bugs in Chromium, Google internal codebase and various open-source projects (Firefox, OpenSSL, gcc, clang, ffmpeg, MySQL and lots of others): [2] [3] [4]. The tools are part of both gcc and clang compilers. We have not yet done massive testing under the Kernel AddressSanitizer (it's kind of chicken and egg problem, you need it to be upstream to start applying it extensively). To date it has found about 50 bugs. Bugs that we've found in upstream kernel are listed in [5]. We've also found ~20 bugs in out internal version of the kernel. Also people from Samsung and Oracle have found some. [...] As others noted, the main feature of AddressSanitizer is its performance due to inline compiler instrumentation and simple linear shadow memory. User-space Asan has ~2x slowdown on computational programs and ~2x memory consumption increase. Taking into account that kernel usually consumes only small fraction of CPU and memory when running real user-space programs, I would expect that kernel Asan will have ~10-30% slowdown and similar memory consumption increase (when we finish all tuning). I agree that Asan can well replace kmemcheck. We have plans to start working on Kernel MemorySanitizer that finds uses of unitialized memory. Asan+Msan will provide feature-parity with kmemcheck. As others noted, Asan will unlikely replace debug slab and pagealloc that can be enabled at runtime. Asan uses compiler instrumentation, so even if it is disabled, it still incurs visible overheads. Asan technology is easily portable to other architectures. Compiler instrumentation is fully portable. Runtime has some arch-dependent parts like shadow mapping and atomic operation interception. They are relatively easy to port." Comparison with other debugging features: ======================================== KMEMCHECK: - KASan can do almost everything that kmemcheck can. KASan uses compile-time instrumentation, which makes it significantly faster than kmemcheck. The only advantage of kmemcheck over KASan is detection of uninitialized memory reads. Some brief performance testing showed that kasan could be x500-x600 times faster than kmemcheck: $ netperf -l 30 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to localhost (127.0.0.1) port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec no debug: 87380 16384 16384 30.00 41624.72 kasan inline: 87380 16384 16384 30.00 12870.54 kasan outline: 87380 16384 16384 30.00 10586.39 kmemcheck: 87380 16384 16384 30.03 20.23 - Also kmemcheck couldn't work on several CPUs. It always sets number of CPUs to 1. KASan doesn't have such limitation. DEBUG_PAGEALLOC: - KASan is slower than DEBUG_PAGEALLOC, but KASan works on sub-page granularity level, so it able to find more bugs. SLUB_DEBUG (poisoning, redzones): - SLUB_DEBUG has lower overhead than KASan. - SLUB_DEBUG in most cases are not able to detect bad reads, KASan able to detect both reads and writes. - In some cases (e.g. redzone overwritten) SLUB_DEBUG detect bugs only on allocation/freeing of object. KASan catch bugs right before it will happen, so we always know exact place of first bad read/write. [1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel [2] https://code.google.com/p/address-sanitizer/wiki/FoundBugs [3] https://code.google.com/p/thread-sanitizer/wiki/FoundBugs [4] https://code.google.com/p/memory-sanitizer/wiki/FoundBugs [5] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel#Trophies Based on work by Andrey Konovalov. Signed-off-by: Andrey Ryabinin Acked-by: Michal Marek Signed-off-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kasan.txt | 170 +++++++++++++++++++ Makefile | 3 +- drivers/firmware/efi/libstub/Makefile | 1 + include/linux/kasan.h | 46 ++++++ include/linux/sched.h | 3 + lib/Kconfig.debug | 2 + lib/Kconfig.kasan | 43 +++++ mm/Makefile | 1 + mm/kasan/Makefile | 8 + mm/kasan/kasan.c | 302 ++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 34 ++++ mm/kasan/report.c | 209 +++++++++++++++++++++++ scripts/Makefile.kasan | 24 +++ scripts/Makefile.lib | 10 ++ 14 files changed, 855 insertions(+), 1 deletion(-) create mode 100644 Documentation/kasan.txt create mode 100644 include/linux/kasan.h create mode 100644 lib/Kconfig.kasan create mode 100644 mm/kasan/Makefile create mode 100644 mm/kasan/kasan.c create mode 100644 mm/kasan/kasan.h create mode 100644 mm/kasan/report.c create mode 100644 scripts/Makefile.kasan (limited to 'include') diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt new file mode 100644 index 000000000000..f0645a8a992f --- /dev/null +++ b/Documentation/kasan.txt @@ -0,0 +1,170 @@ +Kernel address sanitizer +================ + +0. Overview +=========== + +Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides +a fast and comprehensive solution for finding use-after-free and out-of-bounds +bugs. + +KASan uses compile-time instrumentation for checking every memory access, +therefore you will need a certain version of GCC >= 4.9.2 + +Currently KASan is supported only for x86_64 architecture and requires that the +kernel be built with the SLUB allocator. + +1. Usage +========= + +To enable KASAN configure kernel with: + + CONFIG_KASAN = y + +and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline/inline +is compiler instrumentation types. The former produces smaller binary the +latter is 1.1 - 2 times faster. Inline instrumentation requires GCC 5.0 or +latter. + +Currently KASAN works only with the SLUB memory allocator. +For better bug detection and nicer report, enable CONFIG_STACKTRACE and put +at least 'slub_debug=U' in the boot cmdline. + +To disable instrumentation for specific files or directories, add a line +similar to the following to the respective kernel Makefile: + + For a single file (e.g. main.o): + KASAN_SANITIZE_main.o := n + + For all files in one directory: + KASAN_SANITIZE := n + +1.1 Error reports +========== + +A typical out of bounds access report looks like this: + +================================================================== +BUG: AddressSanitizer: out of bounds access in kmalloc_oob_right+0x65/0x75 [test_kasan] at addr ffff8800693bc5d3 +Write of size 1 by task modprobe/1689 +============================================================================= +BUG kmalloc-128 (Not tainted): kasan error +----------------------------------------------------------------------------- + +Disabling lock debugging due to kernel taint +INFO: Allocated in kmalloc_oob_right+0x3d/0x75 [test_kasan] age=0 cpu=0 pid=1689 + __slab_alloc+0x4b4/0x4f0 + kmem_cache_alloc_trace+0x10b/0x190 + kmalloc_oob_right+0x3d/0x75 [test_kasan] + init_module+0x9/0x47 [test_kasan] + do_one_initcall+0x99/0x200 + load_module+0x2cb3/0x3b20 + SyS_finit_module+0x76/0x80 + system_call_fastpath+0x12/0x17 +INFO: Slab 0xffffea0001a4ef00 objects=17 used=7 fp=0xffff8800693bd728 flags=0x100000000004080 +INFO: Object 0xffff8800693bc558 @offset=1368 fp=0xffff8800693bc720 + +Bytes b4 ffff8800693bc548: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ +Object ffff8800693bc558: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc568: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc5a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc5b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc5c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk. +Redzone ffff8800693bc5d8: cc cc cc cc cc cc cc cc ........ +Padding ffff8800693bc718: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ +CPU: 0 PID: 1689 Comm: modprobe Tainted: G B 3.18.0-rc1-mm1+ #98 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 + ffff8800693bc000 0000000000000000 ffff8800693bc558 ffff88006923bb78 + ffffffff81cc68ae 00000000000000f3 ffff88006d407600 ffff88006923bba8 + ffffffff811fd848 ffff88006d407600 ffffea0001a4ef00 ffff8800693bc558 +Call Trace: + [] dump_stack+0x46/0x58 + [] print_trailer+0xf8/0x160 + [] ? kmem_cache_oob+0xc3/0xc3 [test_kasan] + [] object_err+0x35/0x40 + [] ? kmalloc_oob_right+0x65/0x75 [test_kasan] + [] kasan_report_error+0x38a/0x3f0 + [] ? kasan_poison_shadow+0x2f/0x40 + [] ? kasan_unpoison_shadow+0x14/0x40 + [] ? kasan_poison_shadow+0x2f/0x40 + [] ? kmem_cache_oob+0xc3/0xc3 [test_kasan] + [] __asan_store1+0x75/0xb0 + [] ? kmem_cache_oob+0x1d/0xc3 [test_kasan] + [] ? kmalloc_oob_right+0x65/0x75 [test_kasan] + [] kmalloc_oob_right+0x65/0x75 [test_kasan] + [] init_module+0x9/0x47 [test_kasan] + [] do_one_initcall+0x99/0x200 + [] ? __vunmap+0xec/0x160 + [] load_module+0x2cb3/0x3b20 + [] ? m_show+0x240/0x240 + [] SyS_finit_module+0x76/0x80 + [] system_call_fastpath+0x12/0x17 +Memory state around the buggy address: + ffff8800693bc300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc380: fc fc 00 00 00 00 00 00 00 00 00 00 00 00 00 fc + ffff8800693bc400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc500: fc fc fc fc fc fc fc fc fc fc fc 00 00 00 00 00 +>ffff8800693bc580: 00 00 00 00 00 00 00 00 00 00 03 fc fc fc fc fc + ^ + ffff8800693bc600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc700: fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb fb + ffff8800693bc780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +================================================================== + +First sections describe slub object where bad access happened. +See 'SLUB Debug output' section in Documentation/vm/slub.txt for details. + +In the last section the report shows memory state around the accessed address. +Reading this part requires some more understanding of how KASAN works. + +Each 8 bytes of memory are encoded in one shadow byte as accessible, +partially accessible, freed or they can be part of a redzone. +We use the following encoding for each shadow byte: 0 means that all 8 bytes +of the corresponding memory region are accessible; number N (1 <= N <= 7) means +that the first N bytes are accessible, and other (8 - N) bytes are not; +any negative value indicates that the entire 8-byte word is inaccessible. +We use different negative values to distinguish between different kinds of +inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h). + +In the report above the arrows point to the shadow byte 03, which means that +the accessed address is partially accessible. + + +2. Implementation details +======================== + +From a high level, our approach to memory error detection is similar to that +of kmemcheck: use shadow memory to record whether each byte of memory is safe +to access, and use compile-time instrumentation to check shadow memory on each +memory access. + +AddressSanitizer dedicates 1/8 of kernel memory to its shadow memory +(e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and +offset to translate a memory address to its corresponding shadow address. + +Here is the function witch translate an address to its corresponding shadow +address: + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + return ((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + + KASAN_SHADOW_OFFSET; +} + +where KASAN_SHADOW_SCALE_SHIFT = 3. + +Compile-time instrumentation used for checking memory accesses. Compiler inserts +function calls (__asan_load*(addr), __asan_store*(addr)) before each memory +access of size 1, 2, 4, 8 or 16. These functions check whether memory access is +valid or not by checking corresponding shadow memory. + +GCC 5.0 has possibility to perform inline instrumentation. Instead of making +function calls GCC directly inserts the code to check the shadow memory. +This option significantly enlarges kernel but it gives x1.1-x2 performance +boost over outline instrumented kernel. diff --git a/Makefile b/Makefile index 5fa2e3035509..33cb15efd257 100644 --- a/Makefile +++ b/Makefile @@ -423,7 +423,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV +export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL @@ -781,6 +781,7 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO endif +include $(srctree)/scripts/Makefile.kasan include $(srctree)/scripts/Makefile.extrawarn # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 8902f52e0998..280bc0a63365 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -19,6 +19,7 @@ KBUILD_CFLAGS := $(cflags-y) \ $(call cc-option,-fno-stack-protector) GCOV_PROFILE := n +KASAN_SANITIZE := n lib-y := efi-stub-helper.o lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o diff --git a/include/linux/kasan.h b/include/linux/kasan.h new file mode 100644 index 000000000000..9102fda60def --- /dev/null +++ b/include/linux/kasan.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_KASAN_H +#define _LINUX_KASAN_H + +#include + +struct kmem_cache; +struct page; + +#ifdef CONFIG_KASAN + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + +#include +#include + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + + KASAN_SHADOW_OFFSET; +} + +/* Enable reporting bugs after kasan_disable_current() */ +static inline void kasan_enable_current(void) +{ + current->kasan_depth++; +} + +/* Disable reporting bugs for current task */ +static inline void kasan_disable_current(void) +{ + current->kasan_depth--; +} + +void kasan_unpoison_shadow(const void *address, size_t size); + +#else /* CONFIG_KASAN */ + +static inline void kasan_unpoison_shadow(const void *address, size_t size) {} + +static inline void kasan_enable_current(void) {} +static inline void kasan_disable_current(void) {} + +#endif /* CONFIG_KASAN */ + +#endif /* LINUX_KASAN_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 048b91b983ed..41c60e5302d7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1664,6 +1664,9 @@ struct task_struct { unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; +#ifdef CONFIG_KASAN + unsigned int kasan_depth; +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack */ int curr_ret_stack; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 79a9bb67aeaf..ecb3516f6546 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -651,6 +651,8 @@ config DEBUG_STACKOVERFLOW source "lib/Kconfig.kmemcheck" +source "lib/Kconfig.kasan" + endmenu # "Memory Debugging" config DEBUG_SHIRQ diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan new file mode 100644 index 000000000000..e5b3fbe5560f --- /dev/null +++ b/lib/Kconfig.kasan @@ -0,0 +1,43 @@ +config HAVE_ARCH_KASAN + bool + +if HAVE_ARCH_KASAN + +config KASAN + bool "KASan: runtime memory debugger" + help + Enables kernel address sanitizer - runtime memory debugger, + designed to find out-of-bounds accesses and use-after-free bugs. + This is strictly debugging feature. It consumes about 1/8 + of available memory and brings about ~x3 performance slowdown. + For better error detection enable CONFIG_STACKTRACE, + and add slub_debug=U to boot cmdline. + +config KASAN_SHADOW_OFFSET + hex + +choice + prompt "Instrumentation type" + depends on KASAN + default KASAN_OUTLINE + +config KASAN_OUTLINE + bool "Outline instrumentation" + help + Before every memory access compiler insert function call + __asan_load*/__asan_store*. These functions performs check + of shadow memory. This is slower than inline instrumentation, + however it doesn't bloat size of kernel's .text section so + much as inline does. + +config KASAN_INLINE + bool "Inline instrumentation" + help + Compiler directly inserts code checking shadow memory before + memory accesses. This is faster than outline (in some workloads + it gives about x2 boost over outline instrumentation), but + make kernel's .text size much bigger. + +endchoice + +endif diff --git a/mm/Makefile b/mm/Makefile index 3548460ab7b6..930b52df4aca 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -49,6 +49,7 @@ obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_KMEMCHECK) += kmemcheck.o +obj-$(CONFIG_KASAN) += kasan/ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile new file mode 100644 index 000000000000..bd837b8c2f41 --- /dev/null +++ b/mm/kasan/Makefile @@ -0,0 +1,8 @@ +KASAN_SANITIZE := n + +CFLAGS_REMOVE_kasan.o = -pg +# Function splitter causes unnecessary splits in __asan_load1/__asan_store1 +# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 +CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) + +obj-y := kasan.o report.o diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c new file mode 100644 index 000000000000..6dc1aa7cefcc --- /dev/null +++ b/mm/kasan/kasan.c @@ -0,0 +1,302 @@ +/* + * This file contains shadow memory manipulation code. + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin + * + * Some of code borrowed from https://github.com/xairy/linux by + * Andrey Konovalov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define DISABLE_BRANCH_PROFILING + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kasan.h" + +/* + * Poisons the shadow memory for 'size' bytes starting from 'addr'. + * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE. + */ +static void kasan_poison_shadow(const void *address, size_t size, u8 value) +{ + void *shadow_start, *shadow_end; + + shadow_start = kasan_mem_to_shadow(address); + shadow_end = kasan_mem_to_shadow(address + size); + + memset(shadow_start, value, shadow_end - shadow_start); +} + +void kasan_unpoison_shadow(const void *address, size_t size) +{ + kasan_poison_shadow(address, size, 0); + + if (size & KASAN_SHADOW_MASK) { + u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); + *shadow = size & KASAN_SHADOW_MASK; + } +} + + +/* + * All functions below always inlined so compiler could + * perform better optimizations in each of __asan_loadX/__assn_storeX + * depending on memory access size X. + */ + +static __always_inline bool memory_is_poisoned_1(unsigned long addr) +{ + s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(shadow_value)) { + s8 last_accessible_byte = addr & KASAN_SHADOW_MASK; + return unlikely(last_accessible_byte >= shadow_value); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_2(unsigned long addr) +{ + u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + if (memory_is_poisoned_1(addr + 1)) + return true; + + if (likely(((addr + 1) & KASAN_SHADOW_MASK) != 0)) + return false; + + return unlikely(*(u8 *)shadow_addr); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_4(unsigned long addr) +{ + u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + if (memory_is_poisoned_1(addr + 3)) + return true; + + if (likely(((addr + 3) & KASAN_SHADOW_MASK) >= 3)) + return false; + + return unlikely(*(u8 *)shadow_addr); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_8(unsigned long addr) +{ + u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + if (memory_is_poisoned_1(addr + 7)) + return true; + + if (likely(((addr + 7) & KASAN_SHADOW_MASK) >= 7)) + return false; + + return unlikely(*(u8 *)shadow_addr); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_16(unsigned long addr) +{ + u32 *shadow_addr = (u32 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + u16 shadow_first_bytes = *(u16 *)shadow_addr; + s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK; + + if (unlikely(shadow_first_bytes)) + return true; + + if (likely(!last_byte)) + return false; + + return memory_is_poisoned_1(addr + 15); + } + + return false; +} + +static __always_inline unsigned long bytes_is_zero(const u8 *start, + size_t size) +{ + while (size) { + if (unlikely(*start)) + return (unsigned long)start; + start++; + size--; + } + + return 0; +} + +static __always_inline unsigned long memory_is_zero(const void *start, + const void *end) +{ + unsigned int words; + unsigned long ret; + unsigned int prefix = (unsigned long)start % 8; + + if (end - start <= 16) + return bytes_is_zero(start, end - start); + + if (prefix) { + prefix = 8 - prefix; + ret = bytes_is_zero(start, prefix); + if (unlikely(ret)) + return ret; + start += prefix; + } + + words = (end - start) / 8; + while (words) { + if (unlikely(*(u64 *)start)) + return bytes_is_zero(start, 8); + start += 8; + words--; + } + + return bytes_is_zero(start, (end - start) % 8); +} + +static __always_inline bool memory_is_poisoned_n(unsigned long addr, + size_t size) +{ + unsigned long ret; + + ret = memory_is_zero(kasan_mem_to_shadow((void *)addr), + kasan_mem_to_shadow((void *)addr + size - 1) + 1); + + if (unlikely(ret)) { + unsigned long last_byte = addr + size - 1; + s8 *last_shadow = (s8 *)kasan_mem_to_shadow((void *)last_byte); + + if (unlikely(ret != (unsigned long)last_shadow || + ((last_byte & KASAN_SHADOW_MASK) >= *last_shadow))) + return true; + } + return false; +} + +static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size) +{ + if (__builtin_constant_p(size)) { + switch (size) { + case 1: + return memory_is_poisoned_1(addr); + case 2: + return memory_is_poisoned_2(addr); + case 4: + return memory_is_poisoned_4(addr); + case 8: + return memory_is_poisoned_8(addr); + case 16: + return memory_is_poisoned_16(addr); + default: + BUILD_BUG(); + } + } + + return memory_is_poisoned_n(addr, size); +} + + +static __always_inline void check_memory_region(unsigned long addr, + size_t size, bool write) +{ + struct kasan_access_info info; + + if (unlikely(size == 0)) + return; + + if (unlikely((void *)addr < + kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) { + info.access_addr = (void *)addr; + info.access_size = size; + info.is_write = write; + info.ip = _RET_IP_; + kasan_report_user_access(&info); + return; + } + + if (likely(!memory_is_poisoned(addr, size))) + return; + + kasan_report(addr, size, write, _RET_IP_); +} + +#define DEFINE_ASAN_LOAD_STORE(size) \ + void __asan_load##size(unsigned long addr) \ + { \ + check_memory_region(addr, size, false); \ + } \ + EXPORT_SYMBOL(__asan_load##size); \ + __alias(__asan_load##size) \ + void __asan_load##size##_noabort(unsigned long); \ + EXPORT_SYMBOL(__asan_load##size##_noabort); \ + void __asan_store##size(unsigned long addr) \ + { \ + check_memory_region(addr, size, true); \ + } \ + EXPORT_SYMBOL(__asan_store##size); \ + __alias(__asan_store##size) \ + void __asan_store##size##_noabort(unsigned long); \ + EXPORT_SYMBOL(__asan_store##size##_noabort) + +DEFINE_ASAN_LOAD_STORE(1); +DEFINE_ASAN_LOAD_STORE(2); +DEFINE_ASAN_LOAD_STORE(4); +DEFINE_ASAN_LOAD_STORE(8); +DEFINE_ASAN_LOAD_STORE(16); + +void __asan_loadN(unsigned long addr, size_t size) +{ + check_memory_region(addr, size, false); +} +EXPORT_SYMBOL(__asan_loadN); + +__alias(__asan_loadN) +void __asan_loadN_noabort(unsigned long, size_t); +EXPORT_SYMBOL(__asan_loadN_noabort); + +void __asan_storeN(unsigned long addr, size_t size) +{ + check_memory_region(addr, size, true); +} +EXPORT_SYMBOL(__asan_storeN); + +__alias(__asan_storeN) +void __asan_storeN_noabort(unsigned long, size_t); +EXPORT_SYMBOL(__asan_storeN_noabort); + +/* to shut up compiler complaints */ +void __asan_handle_no_return(void) {} +EXPORT_SYMBOL(__asan_handle_no_return); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h new file mode 100644 index 000000000000..648b9c006f3f --- /dev/null +++ b/mm/kasan/kasan.h @@ -0,0 +1,34 @@ +#ifndef __MM_KASAN_KASAN_H +#define __MM_KASAN_KASAN_H + +#include + +#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) +#define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) + +struct kasan_access_info { + const void *access_addr; + const void *first_bad_addr; + size_t access_size; + bool is_write; + unsigned long ip; +}; + +void kasan_report_error(struct kasan_access_info *info); +void kasan_report_user_access(struct kasan_access_info *info); + +static inline const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) + << KASAN_SHADOW_SCALE_SHIFT); +} + +static inline bool kasan_enabled(void) +{ + return !current->kasan_depth; +} + +void kasan_report(unsigned long addr, size_t size, + bool is_write, unsigned long ip); + +#endif diff --git a/mm/kasan/report.c b/mm/kasan/report.c new file mode 100644 index 000000000000..5835d69563f5 --- /dev/null +++ b/mm/kasan/report.c @@ -0,0 +1,209 @@ +/* + * This file contains error reporting code. + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin + * + * Some of code borrowed from https://github.com/xairy/linux by + * Andrey Konovalov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kasan.h" + +/* Shadow layout customization. */ +#define SHADOW_BYTES_PER_BLOCK 1 +#define SHADOW_BLOCKS_PER_ROW 16 +#define SHADOW_BYTES_PER_ROW (SHADOW_BLOCKS_PER_ROW * SHADOW_BYTES_PER_BLOCK) +#define SHADOW_ROWS_AROUND_ADDR 2 + +static const void *find_first_bad_addr(const void *addr, size_t size) +{ + u8 shadow_val = *(u8 *)kasan_mem_to_shadow(addr); + const void *first_bad_addr = addr; + + while (!shadow_val && first_bad_addr < addr + size) { + first_bad_addr += KASAN_SHADOW_SCALE_SIZE; + shadow_val = *(u8 *)kasan_mem_to_shadow(first_bad_addr); + } + return first_bad_addr; +} + +static void print_error_description(struct kasan_access_info *info) +{ + const char *bug_type = "unknown crash"; + u8 shadow_val; + + info->first_bad_addr = find_first_bad_addr(info->access_addr, + info->access_size); + + shadow_val = *(u8 *)kasan_mem_to_shadow(info->first_bad_addr); + + switch (shadow_val) { + case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: + bug_type = "out of bounds access"; + break; + } + + pr_err("BUG: KASan: %s in %pS at addr %p\n", + bug_type, (void *)info->ip, + info->access_addr); + pr_err("%s of size %zu by task %s/%d\n", + info->is_write ? "Write" : "Read", + info->access_size, current->comm, task_pid_nr(current)); +} + +static void print_address_description(struct kasan_access_info *info) +{ + dump_stack(); +} + +static bool row_is_guilty(const void *row, const void *guilty) +{ + return (row <= guilty) && (guilty < row + SHADOW_BYTES_PER_ROW); +} + +static int shadow_pointer_offset(const void *row, const void *shadow) +{ + /* The length of ">ff00ff00ff00ff00: " is + * 3 + (BITS_PER_LONG/8)*2 chars. + */ + return 3 + (BITS_PER_LONG/8)*2 + (shadow - row)*2 + + (shadow - row) / SHADOW_BYTES_PER_BLOCK + 1; +} + +static void print_shadow_for_address(const void *addr) +{ + int i; + const void *shadow = kasan_mem_to_shadow(addr); + const void *shadow_row; + + shadow_row = (void *)round_down((unsigned long)shadow, + SHADOW_BYTES_PER_ROW) + - SHADOW_ROWS_AROUND_ADDR * SHADOW_BYTES_PER_ROW; + + pr_err("Memory state around the buggy address:\n"); + + for (i = -SHADOW_ROWS_AROUND_ADDR; i <= SHADOW_ROWS_AROUND_ADDR; i++) { + const void *kaddr = kasan_shadow_to_mem(shadow_row); + char buffer[4 + (BITS_PER_LONG/8)*2]; + + snprintf(buffer, sizeof(buffer), + (i == 0) ? ">%p: " : " %p: ", kaddr); + + kasan_disable_current(); + print_hex_dump(KERN_ERR, buffer, + DUMP_PREFIX_NONE, SHADOW_BYTES_PER_ROW, 1, + shadow_row, SHADOW_BYTES_PER_ROW, 0); + kasan_enable_current(); + + if (row_is_guilty(shadow_row, shadow)) + pr_err("%*c\n", + shadow_pointer_offset(shadow_row, shadow), + '^'); + + shadow_row += SHADOW_BYTES_PER_ROW; + } +} + +static DEFINE_SPINLOCK(report_lock); + +void kasan_report_error(struct kasan_access_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&report_lock, flags); + pr_err("=================================" + "=================================\n"); + print_error_description(info); + print_address_description(info); + print_shadow_for_address(info->first_bad_addr); + pr_err("=================================" + "=================================\n"); + spin_unlock_irqrestore(&report_lock, flags); +} + +void kasan_report_user_access(struct kasan_access_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&report_lock, flags); + pr_err("=================================" + "=================================\n"); + pr_err("BUG: KASan: user-memory-access on address %p\n", + info->access_addr); + pr_err("%s of size %zu by task %s/%d\n", + info->is_write ? "Write" : "Read", + info->access_size, current->comm, task_pid_nr(current)); + dump_stack(); + pr_err("=================================" + "=================================\n"); + spin_unlock_irqrestore(&report_lock, flags); +} + +void kasan_report(unsigned long addr, size_t size, + bool is_write, unsigned long ip) +{ + struct kasan_access_info info; + + if (likely(!kasan_enabled())) + return; + + info.access_addr = (void *)addr; + info.access_size = size; + info.is_write = is_write; + info.ip = ip; + kasan_report_error(&info); +} + + +#define DEFINE_ASAN_REPORT_LOAD(size) \ +void __asan_report_load##size##_noabort(unsigned long addr) \ +{ \ + kasan_report(addr, size, false, _RET_IP_); \ +} \ +EXPORT_SYMBOL(__asan_report_load##size##_noabort) + +#define DEFINE_ASAN_REPORT_STORE(size) \ +void __asan_report_store##size##_noabort(unsigned long addr) \ +{ \ + kasan_report(addr, size, true, _RET_IP_); \ +} \ +EXPORT_SYMBOL(__asan_report_store##size##_noabort) + +DEFINE_ASAN_REPORT_LOAD(1); +DEFINE_ASAN_REPORT_LOAD(2); +DEFINE_ASAN_REPORT_LOAD(4); +DEFINE_ASAN_REPORT_LOAD(8); +DEFINE_ASAN_REPORT_LOAD(16); +DEFINE_ASAN_REPORT_STORE(1); +DEFINE_ASAN_REPORT_STORE(2); +DEFINE_ASAN_REPORT_STORE(4); +DEFINE_ASAN_REPORT_STORE(8); +DEFINE_ASAN_REPORT_STORE(16); + +void __asan_report_load_n_noabort(unsigned long addr, size_t size) +{ + kasan_report(addr, size, false, _RET_IP_); +} +EXPORT_SYMBOL(__asan_report_load_n_noabort); + +void __asan_report_store_n_noabort(unsigned long addr, size_t size) +{ + kasan_report(addr, size, true, _RET_IP_); +} +EXPORT_SYMBOL(__asan_report_store_n_noabort); diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan new file mode 100644 index 000000000000..7acd6faa0335 --- /dev/null +++ b/scripts/Makefile.kasan @@ -0,0 +1,24 @@ +ifdef CONFIG_KASAN +ifdef CONFIG_KASAN_INLINE + call_threshold := 10000 +else + call_threshold := 0 +endif + +CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address + +CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ + -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ + --param asan-instrumentation-with-call-threshold=$(call_threshold)) + +ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) + $(warning Cannot use CONFIG_KASAN: \ + -fsanitize=kernel-address is not supported by compiler) +else + ifeq ($(CFLAGS_KASAN),) + $(warning CONFIG_KASAN: compiler does not support all options.\ + Trying minimal configuration) + CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL) + endif +endif +endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 511755200634..044eb4f89a91 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -119,6 +119,16 @@ _c_flags += $(if $(patsubst n%,, \ $(CFLAGS_GCOV)) endif +# +# Enable address sanitizer flags for kernel except some files or directories +# we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE) +# +ifeq ($(CONFIG_KASAN),y) +_c_flags += $(if $(patsubst n%,, \ + $(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \ + $(CFLAGS_KASAN)) +endif + # If building the kernel in a separate objtree expand all occurrences # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/'). -- cgit v1.2.3-70-g09d2 From b8c73fc2493d42517be95cf2c89659fc6c6f4d02 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:28 -0800 Subject: mm: page_alloc: add kasan hooks on alloc and free paths Add kernel address sanitizer hooks to mark allocated page's addresses as accessible in corresponding shadow region. Mark freed pages as inaccessible. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 ++++++ mm/compaction.c | 2 ++ mm/kasan/kasan.c | 14 ++++++++++++++ mm/kasan/kasan.h | 2 ++ mm/kasan/report.c | 11 +++++++++++ mm/page_alloc.c | 3 +++ 6 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9102fda60def..f00c15c41235 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -34,6 +34,9 @@ static inline void kasan_disable_current(void) void kasan_unpoison_shadow(const void *address, size_t size); +void kasan_alloc_pages(struct page *page, unsigned int order); +void kasan_free_pages(struct page *page, unsigned int order); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -41,6 +44,9 @@ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} +static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} +static inline void kasan_free_pages(struct page *page, unsigned int order) {} + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/mm/compaction.c b/mm/compaction.c index d50d6de6f1b6..8c0d9459b54a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "internal.h" #ifdef CONFIG_COMPACTION @@ -72,6 +73,7 @@ static void map_pages(struct list_head *list) list_for_each_entry(page, list, lru) { arch_alloc_page(page, 0); kernel_map_pages(page, 1, 1); + kasan_alloc_pages(page, 0); } } diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index def81104772f..b516eb8632b9 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -254,6 +254,20 @@ static __always_inline void check_memory_region(unsigned long addr, kasan_report(addr, size, write, _RET_IP_); } +void kasan_alloc_pages(struct page *page, unsigned int order) +{ + if (likely(!PageHighMem(page))) + kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order); +} + +void kasan_free_pages(struct page *page, unsigned int order) +{ + if (likely(!PageHighMem(page))) + kasan_poison_shadow(page_address(page), + PAGE_SIZE << order, + KASAN_FREE_PAGE); +} + #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 648b9c006f3f..d3c90d5dd97a 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -6,6 +6,8 @@ #define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) #define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) +#define KASAN_FREE_PAGE 0xFF /* page was freed */ + struct kasan_access_info { const void *access_addr; const void *first_bad_addr; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 5835d69563f5..fab8e7882ff1 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -54,6 +54,9 @@ static void print_error_description(struct kasan_access_info *info) shadow_val = *(u8 *)kasan_mem_to_shadow(info->first_bad_addr); switch (shadow_val) { + case KASAN_FREE_PAGE: + bug_type = "use after free"; + break; case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; @@ -69,6 +72,14 @@ static void print_error_description(struct kasan_access_info *info) static void print_address_description(struct kasan_access_info *info) { + const void *addr = info->access_addr; + + if ((addr >= (void *)PAGE_OFFSET) && + (addr < high_memory)) { + struct page *page = virt_to_head_page(addr); + dump_page(page, "kasan: bad access detected"); + } + dump_stack(); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cb4758263f6b..a47f0b229a1a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -787,6 +788,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order) trace_mm_page_free(page, order); kmemcheck_free_shadow(page, order); + kasan_free_pages(page, order); if (PageAnon(page)) page->mapping = NULL; @@ -970,6 +972,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, arch_alloc_page(page, order); kernel_map_pages(page, 1 << order, 1); + kasan_alloc_pages(page, order); if (gfp_flags & __GFP_ZERO) prep_zero_page(page, order, gfp_flags); -- cgit v1.2.3-70-g09d2 From 912f5fbf1d3060f25d6994aed0265c55b974b2e9 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:31 -0800 Subject: mm: slub: introduce virt_to_obj function virt_to_obj takes kmem_cache address, address of slab page, address x pointing somewhere inside slab object, and returns address of the beginning of object. Signed-off-by: Andrey Ryabinin Acked-by: Christoph Lameter Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9abf04ed0999..db7d5de00c5f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -110,4 +110,20 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) } #endif + +/** + * virt_to_obj - returns address of the beginning of object. + * @s: object's kmem_cache + * @slab_page: address of slab page + * @x: address within object memory range + * + * Returns address of the beginning of object + */ +static inline void *virt_to_obj(struct kmem_cache *s, + const void *slab_page, + const void *x) +{ + return (void *)x - ((x - slab_page) % s->size); +} + #endif /* _LINUX_SLUB_DEF_H */ -- cgit v1.2.3-70-g09d2 From 75c66def8d815201aa0386ecc7c66a5c8dbca1ee Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:35 -0800 Subject: mm: slub: share object_err function Remove static and add function declarations to linux/slub_def.h so it could be used by kernel address sanitizer. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 3 +++ mm/slub.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index db7d5de00c5f..33885118523c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -126,4 +126,7 @@ static inline void *virt_to_obj(struct kmem_cache *s, return (void *)x - ((x - slab_page) % s->size); } +void object_err(struct kmem_cache *s, struct page *page, + u8 *object, char *reason); + #endif /* _LINUX_SLUB_DEF_H */ diff --git a/mm/slub.c b/mm/slub.c index 783505ba2052..6833b73ef6b3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -629,7 +629,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) dump_stack(); } -static void object_err(struct kmem_cache *s, struct page *page, +void object_err(struct kmem_cache *s, struct page *page, u8 *object, char *reason) { slab_bug(s, "%s", reason); -- cgit v1.2.3-70-g09d2 From 0316bec22ec95ea2faca6406437b0b5950553b7c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:42 -0800 Subject: mm: slub: add kernel address sanitizer support for slub allocator With this patch kasan will be able to catch bugs in memory allocated by slub. Initially all objects in newly allocated slab page, marked as redzone. Later, when allocation of slub object happens, requested by caller number of bytes marked as accessible, and the rest of the object (including slub's metadata) marked as redzone (inaccessible). We also mark object as accessible if ksize was called for this object. There is some places in kernel where ksize function is called to inquire size of really allocated area. Such callers could validly access whole allocated memory, so it should be marked as accessible. Code in slub.c and slab_common.c files could validly access to object's metadata, so instrumentation for this files are disabled. Signed-off-by: Andrey Ryabinin Signed-off-by: Dmitry Chernenkov Cc: Dmitry Vyukov Cc: Konstantin Serebryany Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 27 ++++++++++++++ include/linux/slab.h | 11 ++++-- lib/Kconfig.kasan | 1 + mm/Makefile | 3 ++ mm/kasan/kasan.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 5 +++ mm/kasan/report.c | 21 +++++++++++ mm/slab_common.c | 5 ++- mm/slub.c | 31 ++++++++++++++-- 9 files changed, 197 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index f00c15c41235..d5310eef3e38 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -37,6 +37,18 @@ void kasan_unpoison_shadow(const void *address, size_t size); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); +void kasan_poison_slab(struct page *page); +void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); +void kasan_poison_object_data(struct kmem_cache *cache, void *object); + +void kasan_kmalloc_large(const void *ptr, size_t size); +void kasan_kfree_large(const void *ptr); +void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size); +void kasan_krealloc(const void *object, size_t new_size); + +void kasan_slab_alloc(struct kmem_cache *s, void *object); +void kasan_slab_free(struct kmem_cache *s, void *object); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -47,6 +59,21 @@ static inline void kasan_disable_current(void) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} +static inline void kasan_poison_slab(struct page *page) {} +static inline void kasan_unpoison_object_data(struct kmem_cache *cache, + void *object) {} +static inline void kasan_poison_object_data(struct kmem_cache *cache, + void *object) {} + +static inline void kasan_kmalloc_large(void *ptr, size_t size) {} +static inline void kasan_kfree_large(const void *ptr) {} +static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, + size_t size) {} +static inline void kasan_krealloc(const void *object, size_t new_size) {} + +static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} +static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index ed2ffaab59ea..76f1feeabd38 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -104,6 +104,7 @@ (unsigned long)ZERO_SIZE_PTR) #include +#include struct mem_cgroup; /* @@ -325,7 +326,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) { - return kmem_cache_alloc(s, flags); + void *ret = kmem_cache_alloc(s, flags); + + kasan_kmalloc(s, ret, size); + return ret; } static __always_inline void * @@ -333,7 +337,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) { - return kmem_cache_alloc_node(s, gfpflags, node); + void *ret = kmem_cache_alloc_node(s, gfpflags, node); + + kasan_kmalloc(s, ret, size); + return ret; } #endif /* CONFIG_TRACING */ diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 0052b1b9aadd..a11ac0234452 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -5,6 +5,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" + depends on SLUB_DEBUG help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/mm/Makefile b/mm/Makefile index 930b52df4aca..088c68e9ec35 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -2,6 +2,9 @@ # Makefile for the linux memory manager. # +KASAN_SANITIZE_slab_common.o := n +KASAN_SANITIZE_slub.o := n + mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index b516eb8632b9..dc83f070edb6 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -31,6 +31,7 @@ #include #include "kasan.h" +#include "../slab.h" /* * Poisons the shadow memory for 'size' bytes starting from 'addr'. @@ -268,6 +269,103 @@ void kasan_free_pages(struct page *page, unsigned int order) KASAN_FREE_PAGE); } +void kasan_poison_slab(struct page *page) +{ + kasan_poison_shadow(page_address(page), + PAGE_SIZE << compound_order(page), + KASAN_KMALLOC_REDZONE); +} + +void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) +{ + kasan_unpoison_shadow(object, cache->object_size); +} + +void kasan_poison_object_data(struct kmem_cache *cache, void *object) +{ + kasan_poison_shadow(object, + round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE), + KASAN_KMALLOC_REDZONE); +} + +void kasan_slab_alloc(struct kmem_cache *cache, void *object) +{ + kasan_kmalloc(cache, object, cache->object_size); +} + +void kasan_slab_free(struct kmem_cache *cache, void *object) +{ + unsigned long size = cache->object_size; + unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); + + /* RCU slabs could be legally used after free within the RCU period */ + if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) + return; + + kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); +} + +void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size) +{ + unsigned long redzone_start; + unsigned long redzone_end; + + if (unlikely(object == NULL)) + return; + + redzone_start = round_up((unsigned long)(object + size), + KASAN_SHADOW_SCALE_SIZE); + redzone_end = round_up((unsigned long)object + cache->object_size, + KASAN_SHADOW_SCALE_SIZE); + + kasan_unpoison_shadow(object, size); + kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, + KASAN_KMALLOC_REDZONE); +} +EXPORT_SYMBOL(kasan_kmalloc); + +void kasan_kmalloc_large(const void *ptr, size_t size) +{ + struct page *page; + unsigned long redzone_start; + unsigned long redzone_end; + + if (unlikely(ptr == NULL)) + return; + + page = virt_to_page(ptr); + redzone_start = round_up((unsigned long)(ptr + size), + KASAN_SHADOW_SCALE_SIZE); + redzone_end = (unsigned long)ptr + (PAGE_SIZE << compound_order(page)); + + kasan_unpoison_shadow(ptr, size); + kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, + KASAN_PAGE_REDZONE); +} + +void kasan_krealloc(const void *object, size_t size) +{ + struct page *page; + + if (unlikely(object == ZERO_SIZE_PTR)) + return; + + page = virt_to_head_page(object); + + if (unlikely(!PageSlab(page))) + kasan_kmalloc_large(object, size); + else + kasan_kmalloc(page->slab_cache, object, size); +} + +void kasan_kfree_large(const void *ptr) +{ + struct page *page = virt_to_page(ptr); + + kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), + KASAN_FREE_PAGE); +} + #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index d3c90d5dd97a..5b052ab40cf9 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -7,6 +7,11 @@ #define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) #define KASAN_FREE_PAGE 0xFF /* page was freed */ +#define KASAN_FREE_PAGE 0xFF /* page was freed */ +#define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */ +#define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ +#define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ + struct kasan_access_info { const void *access_addr; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index fab8e7882ff1..2760edb4d0a8 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -24,6 +24,7 @@ #include #include "kasan.h" +#include "../slab.h" /* Shadow layout customization. */ #define SHADOW_BYTES_PER_BLOCK 1 @@ -55,8 +56,11 @@ static void print_error_description(struct kasan_access_info *info) switch (shadow_val) { case KASAN_FREE_PAGE: + case KASAN_KMALLOC_FREE: bug_type = "use after free"; break; + case KASAN_PAGE_REDZONE: + case KASAN_KMALLOC_REDZONE: case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; @@ -77,6 +81,23 @@ static void print_address_description(struct kasan_access_info *info) if ((addr >= (void *)PAGE_OFFSET) && (addr < high_memory)) { struct page *page = virt_to_head_page(addr); + + if (PageSlab(page)) { + void *object; + struct kmem_cache *cache = page->slab_cache; + void *last_object; + + object = virt_to_obj(cache, page_address(page), addr); + last_object = page_address(page) + + page->objects * cache->size; + + if (unlikely(object > last_object)) + object = last_object; /* we hit into padding */ + + object_err(cache, page, object, + "kasan: bad access detected"); + return; + } dump_page(page, "kasan: bad access detected"); } diff --git a/mm/slab_common.c b/mm/slab_common.c index 429a4506b382..999bb3424d44 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -898,6 +898,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) page = alloc_kmem_pages(flags, order); ret = page ? page_address(page) : NULL; kmemleak_alloc(ret, size, 1, flags); + kasan_kmalloc_large(ret, size); return ret; } EXPORT_SYMBOL(kmalloc_order); @@ -1077,8 +1078,10 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size, if (p) ks = ksize(p); - if (ks >= new_size) + if (ks >= new_size) { + kasan_krealloc((void *)p, new_size); return (void *)p; + } ret = kmalloc_track_caller(new_size, flags); if (ret && p) diff --git a/mm/slub.c b/mm/slub.c index 37555ad8894d..6832c4eab104 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1251,11 +1251,13 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) { kmemleak_alloc(ptr, size, 1, flags); + kasan_kmalloc_large(ptr, size); } static inline void kfree_hook(const void *x) { kmemleak_free(x); + kasan_kfree_large(x); } static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, @@ -1278,6 +1280,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); memcg_kmem_put_cache(s); + kasan_slab_alloc(s, object); } static inline void slab_free_hook(struct kmem_cache *s, void *x) @@ -1301,6 +1304,8 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) #endif if (!(s->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(x, s->object_size); + + kasan_slab_free(s, x); } /* @@ -1395,8 +1400,11 @@ static void setup_object(struct kmem_cache *s, struct page *page, void *object) { setup_object_debug(s, page, object); - if (unlikely(s->ctor)) + if (unlikely(s->ctor)) { + kasan_unpoison_object_data(s, object); s->ctor(object); + kasan_poison_object_data(s, object); + } } static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) @@ -1429,6 +1437,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) if (unlikely(s->flags & SLAB_POISON)) memset(start, POISON_INUSE, PAGE_SIZE << order); + kasan_poison_slab(page); + for_each_object_idx(p, idx, s, start, page->objects) { setup_object(s, page, p); if (likely(idx < page->objects)) @@ -2522,6 +2532,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) { void *ret = slab_alloc(s, gfpflags, _RET_IP_); trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); + kasan_kmalloc(s, ret, size); return ret; } EXPORT_SYMBOL(kmem_cache_alloc_trace); @@ -2548,6 +2559,8 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s, trace_kmalloc_node(_RET_IP_, ret, size, s->size, gfpflags, node); + + kasan_kmalloc(s, ret, size); return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node_trace); @@ -2933,6 +2946,7 @@ static void early_kmem_cache_node_alloc(int node) init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); init_tracking(kmem_cache_node, n); #endif + kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node)); init_kmem_cache_node(n); inc_slabs_node(kmem_cache_node, node, page->objects); @@ -3305,6 +3319,8 @@ void *__kmalloc(size_t size, gfp_t flags) trace_kmalloc(_RET_IP_, ret, size, s->size, flags); + kasan_kmalloc(s, ret, size); + return ret; } EXPORT_SYMBOL(__kmalloc); @@ -3348,12 +3364,14 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); + kasan_kmalloc(s, ret, size); + return ret; } EXPORT_SYMBOL(__kmalloc_node); #endif -size_t ksize(const void *object) +static size_t __ksize(const void *object) { struct page *page; @@ -3369,6 +3387,15 @@ size_t ksize(const void *object) return slab_ksize(page->slab_cache); } + +size_t ksize(const void *object) +{ + size_t size = __ksize(object); + /* We assume that ksize callers could use whole allocated area, + so we need unpoison this area. */ + kasan_krealloc(object, size); + return size; +} EXPORT_SYMBOL(ksize); void kfree(const void *x) -- cgit v1.2.3-70-g09d2 From c420f167db8c799d69fe43a801c58a7f02e9d57c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:39:59 -0800 Subject: kasan: enable stack instrumentation Stack instrumentation allows to detect out of bounds memory accesses for variables allocated on stack. Compiler adds redzones around every variable on stack and poisons redzones in function's prologue. Such approach significantly increases stack usage, so all in-kernel stacks size were doubled. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/page_64_types.h | 12 +++++++++--- arch/x86/kernel/Makefile | 2 ++ arch/x86/mm/kasan_init_64.c | 11 +++++++++-- include/linux/init_task.h | 8 ++++++++ mm/kasan/kasan.h | 9 +++++++++ mm/kasan/report.c | 6 ++++++ scripts/Makefile.kasan | 1 + 7 files changed, 44 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 75450b2c7be4..4edd53b79a81 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,17 +1,23 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#define THREAD_SIZE_ORDER 2 +#ifdef CONFIG_KASAN +#define KASAN_STACK_ORDER 1 +#else +#define KASAN_STACK_ORDER 0 +#endif + +#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define CURRENT_MASK (~(THREAD_SIZE - 1)) -#define EXCEPTION_STACK_ORDER 0 +#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) -#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) #define DOUBLEFAULT_STACK 1 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b13b70634124..cdb1b70ddad0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -17,6 +17,8 @@ CFLAGS_REMOVE_early_printk.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n +KASAN_SANITIZE_dumpstack.o := n +KASAN_SANITIZE_dumpstack_$(BITS).o := n CFLAGS_irq.o := -I$(src)/../include/asm/trace diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 3e4d9a1a39fa..53508708b7aa 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -189,11 +189,18 @@ void __init kasan_init(void) if (map_range(&pfn_mapped[i])) panic("kasan: unable to allocate shadow!"); } - populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - (void *)KASAN_SHADOW_END); + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + + vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), + (unsigned long)kasan_mem_to_shadow(_end), + NUMA_NO_NODE); + + populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_VADDR), + (void *)KASAN_SHADOW_END); memset(kasan_zero_page, 0, PAGE_SIZE); load_cr3(init_level4_pgt); + init_task.kasan_depth = 0; } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d3d43ecf148c..696d22312b31 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -175,6 +175,13 @@ extern struct task_group root_task_group; # define INIT_NUMA_BALANCING(tsk) #endif +#ifdef CONFIG_KASAN +# define INIT_KASAN(tsk) \ + .kasan_depth = 1, +#else +# define INIT_KASAN(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -250,6 +257,7 @@ extern struct task_group root_task_group; INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ INIT_NUMA_BALANCING(tsk) \ + INIT_KASAN(tsk) \ } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 5b052ab40cf9..1fcc1d81a9cf 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -12,6 +12,15 @@ #define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ #define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ +/* + * Stack redzone shadow values + * (Those are compiler's ABI, don't change them) + */ +#define KASAN_STACK_LEFT 0xF1 +#define KASAN_STACK_MID 0xF2 +#define KASAN_STACK_RIGHT 0xF3 +#define KASAN_STACK_PARTIAL 0xF4 + struct kasan_access_info { const void *access_addr; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 2760edb4d0a8..866732ef3db3 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -64,6 +64,12 @@ static void print_error_description(struct kasan_access_info *info) case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; + case KASAN_STACK_LEFT: + case KASAN_STACK_MID: + case KASAN_STACK_RIGHT: + case KASAN_STACK_PARTIAL: + bug_type = "out of bounds on stack"; + break; } pr_err("BUG: KASan: %s in %pS at addr %p\n", diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 7acd6faa0335..2163b8cc446e 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -9,6 +9,7 @@ CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ + --param asan-stack=1 \ --param asan-instrumentation-with-call-threshold=$(call_threshold)) ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) -- cgit v1.2.3-70-g09d2 From 71394fe50146202f2c8d92cf50f5ebc761acf254 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:40:03 -0800 Subject: mm: vmalloc: add flag preventing guard hole allocation For instrumenting global variables KASan will shadow memory backing memory for modules. So on module loading we will need to allocate memory for shadow and map it at address in shadow that corresponds to the address allocated in module_alloc(). __vmalloc_node_range() could be used for this purpose, except it puts a guard hole after allocated area. Guard hole in shadow memory should be a problem because at some future point we might need to have a shadow memory at address occupied by guard hole. So we could fail to allocate shadow for module_alloc(). Add a new vm_struct flag 'VM_NO_GUARD' indicating that vm area doesn't have a guard hole. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 9 +++++++-- mm/vmalloc.c | 6 ++---- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b87696fdf06a..1526fe712ca0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -16,6 +16,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ +#define VM_NO_GUARD 0x00000040 /* don't add guard page */ /* bits [20..32] reserved for arch specific ioremap internals */ /* @@ -96,8 +97,12 @@ void vmalloc_sync_all(void); static inline size_t get_vm_area_size(const struct vm_struct *area) { - /* return actual size without guard page */ - return area->size - PAGE_SIZE; + if (!(area->flags & VM_NO_GUARD)) + /* return actual size without guard page */ + return area->size - PAGE_SIZE; + else + return area->size; + } extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 39c338896416..2e74e99d4cfe 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1324,10 +1324,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, if (unlikely(!area)) return NULL; - /* - * We always allocate a guard page. - */ - size += PAGE_SIZE; + if (!(flags & VM_NO_GUARD)) + size += PAGE_SIZE; va = alloc_vmap_area(size, align, start, end, node, gfp_mask); if (IS_ERR(va)) { -- cgit v1.2.3-70-g09d2 From cb9e3c292d0115499c660028ad35ac5501d722b5 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:40:07 -0800 Subject: mm: vmalloc: pass additional vm_flags to __vmalloc_node_range() For instrumenting global variables KASan will shadow memory backing memory for modules. So on module loading we will need to allocate memory for shadow and map it at address in shadow that corresponds to the address allocated in module_alloc(). __vmalloc_node_range() could be used for this purpose, except it puts a guard hole after allocated area. Guard hole in shadow memory should be a problem because at some future point we might need to have a shadow memory at address occupied by guard hole. So we could fail to allocate shadow for module_alloc(). Now we have VM_NO_GUARD flag disabling guard page, so we need to pass into __vmalloc_node_range(). Add new parameter 'vm_flags' to __vmalloc_node_range() function. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/module.c | 2 +- arch/arm64/kernel/module.c | 4 ++-- arch/mips/kernel/module.c | 2 +- arch/parisc/kernel/module.c | 2 +- arch/s390/kernel/module.c | 2 +- arch/sparc/kernel/module.c | 2 +- arch/unicore32/kernel/module.c | 2 +- arch/x86/kernel/module.c | 2 +- include/linux/vmalloc.h | 4 +++- mm/vmalloc.c | 10 ++++++---- 10 files changed, 18 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index bea7db9e5b80..2e11961f65ae 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -41,7 +41,7 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 9b6f71db2709..67bf4107f6ef 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -35,8 +35,8 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, - __builtin_return_address(0)); + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); } enum aarch64_reloc_op { diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c index 2a52568dbcd6..1833f5171ccd 100644 --- a/arch/mips/kernel/module.c +++ b/arch/mips/kernel/module.c @@ -47,7 +47,7 @@ static DEFINE_SPINLOCK(dbe_lock); void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END, - GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 5822e8e200e6..3c63a820fcda 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -219,7 +219,7 @@ void *module_alloc(unsigned long size) * init_data correctly */ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL | __GFP_HIGHMEM, - PAGE_KERNEL_RWX, NUMA_NO_NODE, + PAGE_KERNEL_RWX, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 409d152585be..36154a2f1814 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -50,7 +50,7 @@ void *module_alloc(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 97655e0fd243..192a617a32f3 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -29,7 +29,7 @@ static void *module_map(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #else diff --git a/arch/unicore32/kernel/module.c b/arch/unicore32/kernel/module.c index dc41f6dfedb6..e191b3448bd3 100644 --- a/arch/unicore32/kernel/module.c +++ b/arch/unicore32/kernel/module.c @@ -25,7 +25,7 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e69f9882bf95..e830e61aae05 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -88,7 +88,7 @@ void *module_alloc(unsigned long size) return __vmalloc_node_range(size, 1, MODULES_VADDR + get_module_load_offset(), MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, - PAGE_KERNEL_EXEC, NUMA_NO_NODE, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 1526fe712ca0..7d7acb35603d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -76,7 +76,9 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, int node, const void *caller); + pgprot_t prot, unsigned long vm_flags, int node, + const void *caller); + extern void vfree(const void *addr); extern void *vmap(struct page **pages, unsigned int count, diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2e74e99d4cfe..35b25e1340ca 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1619,6 +1619,7 @@ fail: * @end: vm area range end * @gfp_mask: flags for the page level allocator * @prot: protection mask for the allocated pages + * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD) * @node: node to use for allocation or NUMA_NO_NODE * @caller: caller's return address * @@ -1628,7 +1629,8 @@ fail: */ void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, int node, const void *caller) + pgprot_t prot, unsigned long vm_flags, int node, + const void *caller) { struct vm_struct *area; void *addr; @@ -1638,8 +1640,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages) goto fail; - area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED, - start, end, node, gfp_mask, caller); + area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | + vm_flags, start, end, node, gfp_mask, caller); if (!area) goto fail; @@ -1688,7 +1690,7 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, int node, const void *caller) { return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, - gfp_mask, prot, node, caller); + gfp_mask, prot, 0, node, caller); } void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) -- cgit v1.2.3-70-g09d2 From 9ddf82521c86ae07af79dbe5a93c52890f2bab23 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:40:10 -0800 Subject: kernel: add support for .init_array.* constructors KASan uses constructors for initializing redzones for global variables. Globals instrumentation in GCC 4.9.2 produces constructors with priority (.init_array.00099) Currently kernel ignores such constructors. Only constructors with default priority supported (.init_array) This patch adds support for constructors with priorities. For kernel image we put pointers to constructors between __ctors_start/__ctors_end and do_ctors() will call them on start up. For modules we merge .init_array.* sections into resulting .init_array. Module code properly handles constructors in .init_array section. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 1 + scripts/module-common.lds | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index bee5d683074d..ac78910d7416 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -478,6 +478,7 @@ #define KERNEL_CTORS() . = ALIGN(8); \ VMLINUX_SYMBOL(__ctors_start) = .; \ *(.ctors) \ + *(SORT(.init_array.*)) \ *(.init_array) \ VMLINUX_SYMBOL(__ctors_end) = .; #else diff --git a/scripts/module-common.lds b/scripts/module-common.lds index bec15f908fc6..73a2c7da0e55 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -16,4 +16,7 @@ SECTIONS { __kcrctab_unused 0 : { *(SORT(___kcrctab_unused+*)) } __kcrctab_unused_gpl 0 : { *(SORT(___kcrctab_unused_gpl+*)) } __kcrctab_gpl_future 0 : { *(SORT(___kcrctab_gpl_future+*)) } + + . = ALIGN(8); + .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } } -- cgit v1.2.3-70-g09d2 From 6301939d97d079f0d3dbe71e750f4daf5d39fc33 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:40:13 -0800 Subject: module: fix types of device tables aliases MODULE_DEVICE_TABLE() macro used to create aliases to device tables. Normally alias should have the same type as aliased symbol. Device tables are arrays, so they have 'struct type##_device_id[x]' types. Alias created by MODULE_DEVICE_TABLE() will have non-array type - 'struct type##_device_id'. This inconsistency confuses compiler, it could make a wrong assumption about variable's size which leads KASan to produce a false positive report about out of bounds access. For every global variable compiler calls __asan_register_globals() passing information about global variable (address, size, size with redzone, name ...) __asan_register_globals() poison symbols redzone to detect possible out of bounds accesses. When symbol has an alias __asan_register_globals() will be called as for symbol so for alias. Compiler determines size of variable by size of variable's type. Alias and symbol have the same address, so if alias have the wrong size part of memory that actually belongs to the symbol could be poisoned as redzone of alias symbol. By fixing type of alias symbol we will fix size of it, so __asan_register_globals() will not poison valid memory. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index b653d7c0a05a..42999fe2dbd0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -135,7 +135,7 @@ void trim_init_extable(struct module *m); #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ - extern const struct type##_device_id __mod_##type##__##name##_device_table \ +extern const typeof(name) __mod_##type##__##name##_device_table \ __attribute__ ((unused, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) -- cgit v1.2.3-70-g09d2 From bebf56a1b176c2e1c9efe44e7e6915532cc682cf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 13 Feb 2015 14:40:17 -0800 Subject: kasan: enable instrumentation of global variables This feature let us to detect accesses out of bounds of global variables. This will work as for globals in kernel image, so for globals in modules. Currently this won't work for symbols in user-specified sections (e.g. __init, __read_mostly, ...) The idea of this is simple. Compiler increases each global variable by redzone size and add constructors invoking __asan_register_globals() function. Information about global variable (address, size, size with redzone ...) passed to __asan_register_globals() so we could poison variable's redzone. This patch also forces module_alloc() to return 8*PAGE_SIZE aligned address making shadow memory handling ( kasan_module_alloc()/kasan_module_free() ) more simple. Such alignment guarantees that each shadow page backing modules address space correspond to only one module_alloc() allocation. Signed-off-by: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Konstantin Serebryany Cc: Dmitry Chernenkov Signed-off-by: Andrey Konovalov Cc: Yuri Gribov Cc: Konstantin Khlebnikov Cc: Sasha Levin Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Dave Hansen Cc: Andi Kleen Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kasan.txt | 2 +- arch/x86/kernel/module.c | 12 +++++++++- arch/x86/mm/kasan_init_64.c | 2 +- include/linux/compiler-gcc4.h | 4 ++++ include/linux/compiler-gcc5.h | 2 ++ include/linux/kasan.h | 10 +++++++++ kernel/module.c | 2 ++ lib/Kconfig.kasan | 1 + mm/kasan/kasan.c | 52 +++++++++++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 25 +++++++++++++++++++++ mm/kasan/report.c | 22 ++++++++++++++++++ scripts/Makefile.kasan | 2 +- 12 files changed, 132 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt index f0645a8a992f..092fc10961fe 100644 --- a/Documentation/kasan.txt +++ b/Documentation/kasan.txt @@ -9,7 +9,7 @@ a fast and comprehensive solution for finding use-after-free and out-of-bounds bugs. KASan uses compile-time instrumentation for checking every memory access, -therefore you will need a certain version of GCC >= 4.9.2 +therefore you will need a certain version of GCC > 4.9.2 Currently KASan is supported only for x86_64 architecture and requires that the kernel be built with the SLUB allocator. diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e830e61aae05..d1ac80b72c72 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -83,13 +84,22 @@ static unsigned long int get_module_load_offset(void) void *module_alloc(unsigned long size) { + void *p; + if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - return __vmalloc_node_range(size, 1, + + p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_module_alloc(p, size) < 0)) { + vfree(p); + return NULL; + } + + return p; } #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 53508708b7aa..4860906c6b9f 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -196,7 +196,7 @@ void __init kasan_init(void) (unsigned long)kasan_mem_to_shadow(_end), NUMA_NO_NODE); - populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_VADDR), + populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END); memset(kasan_zero_page, 0, PAGE_SIZE); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index d1a558239b1a..769e19864632 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -85,3 +85,7 @@ #define __HAVE_BUILTIN_BSWAP16__ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#if GCC_VERSION >= 40902 +#define KASAN_ABI_VERSION 3 +#endif diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h index c8c565952548..efee493714eb 100644 --- a/include/linux/compiler-gcc5.h +++ b/include/linux/compiler-gcc5.h @@ -63,3 +63,5 @@ #define __HAVE_BUILTIN_BSWAP64__ #define __HAVE_BUILTIN_BSWAP16__ #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#define KASAN_ABI_VERSION 4 diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d5310eef3e38..72ba725ddf9c 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -49,8 +49,15 @@ void kasan_krealloc(const void *object, size_t new_size); void kasan_slab_alloc(struct kmem_cache *s, void *object); void kasan_slab_free(struct kmem_cache *s, void *object); +#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) + +int kasan_module_alloc(void *addr, size_t size); +void kasan_module_free(void *addr); + #else /* CONFIG_KASAN */ +#define MODULE_ALIGN 1 + static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_enable_current(void) {} @@ -74,6 +81,9 @@ static inline void kasan_krealloc(const void *object, size_t new_size) {} static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} +static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline void kasan_module_free(void *addr) {} + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/kernel/module.c b/kernel/module.c index 82dc1f899e6d..8426ad48362c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -1813,6 +1814,7 @@ static void unset_module_init_ro_nx(struct module *mod) { } void __weak module_memfree(void *module_region) { vfree(module_region); + kasan_module_free(module_region); } void __weak module_arch_cleanup(struct module *mod) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 4d47d874335c..4fecaedc80a2 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" depends on SLUB_DEBUG + select CONSTRUCTORS help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 799c52b9826c..78fee632a7ee 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -395,6 +396,57 @@ void kasan_kfree_large(const void *ptr) KASAN_FREE_PAGE); } +int kasan_module_alloc(void *addr, size_t size) +{ + void *ret; + size_t shadow_size; + unsigned long shadow_start; + + shadow_start = (unsigned long)kasan_mem_to_shadow(addr); + shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, + PAGE_SIZE); + + if (WARN_ON(!PAGE_ALIGNED(shadow_start))) + return -EINVAL; + + ret = __vmalloc_node_range(shadow_size, 1, shadow_start, + shadow_start + shadow_size, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, + __builtin_return_address(0)); + return ret ? 0 : -ENOMEM; +} + +void kasan_module_free(void *addr) +{ + vfree(kasan_mem_to_shadow(addr)); +} + +static void register_global(struct kasan_global *global) +{ + size_t aligned_size = round_up(global->size, KASAN_SHADOW_SCALE_SIZE); + + kasan_unpoison_shadow(global->beg, global->size); + + kasan_poison_shadow(global->beg + aligned_size, + global->size_with_redzone - aligned_size, + KASAN_GLOBAL_REDZONE); +} + +void __asan_register_globals(struct kasan_global *globals, size_t size) +{ + int i; + + for (i = 0; i < size; i++) + register_global(&globals[i]); +} +EXPORT_SYMBOL(__asan_register_globals); + +void __asan_unregister_globals(struct kasan_global *globals, size_t size) +{ +} +EXPORT_SYMBOL(__asan_unregister_globals); + #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 1fcc1d81a9cf..4986b0acab21 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -11,6 +11,7 @@ #define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */ #define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ #define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ +#define KASAN_GLOBAL_REDZONE 0xFA /* redzone for global variable */ /* * Stack redzone shadow values @@ -21,6 +22,10 @@ #define KASAN_STACK_RIGHT 0xF3 #define KASAN_STACK_PARTIAL 0xF4 +/* Don't break randconfig/all*config builds */ +#ifndef KASAN_ABI_VERSION +#define KASAN_ABI_VERSION 1 +#endif struct kasan_access_info { const void *access_addr; @@ -30,6 +35,26 @@ struct kasan_access_info { unsigned long ip; }; +/* The layout of struct dictated by compiler */ +struct kasan_source_location { + const char *filename; + int line_no; + int column_no; +}; + +/* The layout of struct dictated by compiler */ +struct kasan_global { + const void *beg; /* Address of the beginning of the global variable. */ + size_t size; /* Size of the global variable. */ + size_t size_with_redzone; /* Size of the variable + size of the red zone. 32 bytes aligned */ + const void *name; + const void *module_name; /* Name of the module where the global variable is declared. */ + unsigned long has_dynamic_init; /* This needed for C++ */ +#if KASAN_ABI_VERSION >= 4 + struct kasan_source_location *location; +#endif +}; + void kasan_report_error(struct kasan_access_info *info); void kasan_report_user_access(struct kasan_access_info *info); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 866732ef3db3..680ceedf810a 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -23,6 +23,8 @@ #include #include +#include + #include "kasan.h" #include "../slab.h" @@ -61,6 +63,7 @@ static void print_error_description(struct kasan_access_info *info) break; case KASAN_PAGE_REDZONE: case KASAN_KMALLOC_REDZONE: + case KASAN_GLOBAL_REDZONE: case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; @@ -80,6 +83,20 @@ static void print_error_description(struct kasan_access_info *info) info->access_size, current->comm, task_pid_nr(current)); } +static inline bool kernel_or_module_addr(const void *addr) +{ + return (addr >= (void *)_stext && addr < (void *)_end) + || (addr >= (void *)MODULES_VADDR + && addr < (void *)MODULES_END); +} + +static inline bool init_task_stack_addr(const void *addr) +{ + return addr >= (void *)&init_thread_union.stack && + (addr <= (void *)&init_thread_union.stack + + sizeof(init_thread_union.stack)); +} + static void print_address_description(struct kasan_access_info *info) { const void *addr = info->access_addr; @@ -107,6 +124,11 @@ static void print_address_description(struct kasan_access_info *info) dump_page(page, "kasan: bad access detected"); } + if (kernel_or_module_addr(addr)) { + if (!init_task_stack_addr(addr)) + pr_err("Address belongs to variable %pS\n", addr); + } + dump_stack(); } diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 2163b8cc446e..631619b2b118 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -9,7 +9,7 @@ CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ - --param asan-stack=1 \ + --param asan-stack=1 --param asan-globals=1 \ --param asan-instrumentation-with-call-threshold=$(call_threshold)) ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) -- cgit v1.2.3-70-g09d2