From 1873e870fd63ee4b87dbe0125ca373e420fb4987 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 28 Mar 2012 11:51:18 -0700 Subject: debug: Add CONFIG_READABLE_ASM Add a config option to disable various gcc compiler optimizations that make assembler listings much harder to read. This is everything that reorders code significantly or creates partial functions. This is mainly to keep kernel hackers sane. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1332960678-11879-2-git-send-email-andi@firstfloor.org Signed-off-by: H. Peter Anvin --- lib/Kconfig.debug | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6777153f18f3..4d3cbbbe4516 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -70,6 +70,15 @@ config STRIP_ASM_SYMS that look like '.Lxxx') so they don't pollute the output of get_wchan() and suchlike. +config READABLE_ASM + bool "Generate readable assembler code" + depends on DEBUG_KERNEL + help + Disable some compiler optimizations that tend to generate human unreadable + assembler output. This may make the kernel slightly slower, but it helps + to keep kernel developers who have to stare a lot at assembler listings + sane. + config UNUSED_SYMBOLS bool "Enable unused/obsolete exported symbols" default y if X86 -- cgit v1.2.3-70-g09d2 From 1fda107d44de1e8df2bdfd1d4a2c12a338b8d725 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 Apr 2012 11:52:18 +0200 Subject: debugobjects: Remove unused return value from fill_pool() Signed-off-by: Thomas Gleixner --- lib/debugobjects.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 0ab9ae8057f0..3e5cd7c04a55 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -79,17 +79,17 @@ static const char *obj_states[ODEBUG_STATE_MAX] = { [ODEBUG_STATE_NOTAVAILABLE] = "not available", }; -static int fill_pool(void) +static void fill_pool(void) { gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; struct debug_obj *new; unsigned long flags; if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL)) - return obj_pool_free; + return; if (unlikely(!obj_cache)) - return obj_pool_free; + return; while (obj_pool_free < ODEBUG_POOL_MIN_LEVEL) { @@ -102,7 +102,6 @@ static int fill_pool(void) obj_pool_free++; raw_spin_unlock_irqrestore(&pool_lock, flags); } - return obj_pool_free; } /* -- cgit v1.2.3-70-g09d2 From 765a5e0cb581ef32646f58a213b38b56c77709b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 11 Apr 2012 11:54:27 +0200 Subject: debugobjects: printk with irqs enabled No point in keeping interrupts disabled here. Signed-off-by: Thomas Gleixner --- lib/debugobjects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 3e5cd7c04a55..1b6c00a933a9 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -1051,10 +1051,10 @@ static int __init debug_objects_replace_static_objects(void) cnt++; } } + local_irq_enable(); printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt, obj_pool_used); - local_irq_enable(); return 0; free: hlist_for_each_entry_safe(obj, node, tmp, &objects, node) { -- cgit v1.2.3-70-g09d2 From 29cdd4e4ec91aae239192bb122d377d15a9d75e3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Apr 2012 03:19:10 +0200 Subject: dma-debug: release free_entries_lock before saving stack trace Saving stack trace can take a while and once the entry is allocated free_entries_lock is no longer needed. Signed-off-by: Jakub Kicinski Signed-off-by: Joerg Roedel --- lib/dma-debug.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 13ef2338be41..518aea714d21 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -430,7 +430,7 @@ static struct dma_debug_entry *__dma_entry_alloc(void) */ static struct dma_debug_entry *dma_entry_alloc(void) { - struct dma_debug_entry *entry = NULL; + struct dma_debug_entry *entry; unsigned long flags; spin_lock_irqsave(&free_entries_lock, flags); @@ -438,11 +438,14 @@ static struct dma_debug_entry *dma_entry_alloc(void) if (list_empty(&free_entries)) { pr_err("DMA-API: debugging out of memory - disabling\n"); global_disable = true; - goto out; + spin_unlock_irqrestore(&free_entries_lock, flags); + return NULL; } entry = __dma_entry_alloc(); + spin_unlock_irqrestore(&free_entries_lock, flags); + #ifdef CONFIG_STACKTRACE entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; entry->stacktrace.entries = entry->st_entries; @@ -450,9 +453,6 @@ static struct dma_debug_entry *dma_entry_alloc(void) save_stack_trace(&entry->stacktrace); #endif -out: - spin_unlock_irqrestore(&free_entries_lock, flags); - return entry; } -- cgit v1.2.3-70-g09d2 From 3340808cf04faad7b87d6c6e13800825e5552b51 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 18 Apr 2012 14:28:10 +0300 Subject: debugobjects: Fill_pool() returns void now MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There was a return missed in 1fda107d44 "debugobjects: Remove unused return value from fill_pool()". It makes gcc complain: lib/debugobjects.c: In function ‘fill_pool’: lib/debugobjects.c:98:4: warning: ‘return’ with a value, in function returning void [enabled by default] Signed-off-by: Dan Carpenter Link: http://lkml.kernel.org/r/20120418112810.GA2669@elgon.mountain Signed-off-by: Thomas Gleixner --- lib/debugobjects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 1b6c00a933a9..d11808ca4bc4 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -95,7 +95,7 @@ static void fill_pool(void) new = kmem_cache_zalloc(obj_cache, gfp); if (!new) - return obj_pool_free; + return; raw_spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); -- cgit v1.2.3-70-g09d2 From a15d49fd3094cff90e5410ca454a870e0a722fe1 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 16 Apr 2012 15:06:25 +0200 Subject: driver core: check start node in klist_iter_init_node klist_iter_init_node() takes a node as a start argument. However, this node might not be valid anymore. This patch updates the klist_iter_init_node() and dependent functions to return an error if so. All calling functions have been audited to check for a return code here. Signed-off-by: Hannes Reinecke Cc: Greg Kroah-Hartmann Cc: Kay Sievers Cc: Stable Kernel Cc: Linux Kernel Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 46 +++++++++++++++++++++++++++++----------------- drivers/base/class.c | 32 ++++++++++++++++++++------------ drivers/base/driver.c | 18 +++++++++++------- include/linux/device.h | 10 +++++----- include/linux/klist.h | 2 +- lib/klist.c | 14 ++++++++++---- 6 files changed, 76 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 2bcef657a60c..76aed01a8b2c 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -296,11 +296,13 @@ int bus_for_each_dev(struct bus_type *bus, struct device *start, if (!bus) return -EINVAL; - klist_iter_init_node(&bus->p->klist_devices, &i, - (start ? &start->p->knode_bus : NULL)); - while ((dev = next_device(&i)) && !error) - error = fn(dev, data); - klist_iter_exit(&i); + error = klist_iter_init_node(&bus->p->klist_devices, &i, + (start ? &start->p->knode_bus : NULL)); + if (!error) { + while ((dev = next_device(&i)) && !error) + error = fn(dev, data); + klist_iter_exit(&i); + } return error; } EXPORT_SYMBOL_GPL(bus_for_each_dev); @@ -330,8 +332,10 @@ struct device *bus_find_device(struct bus_type *bus, if (!bus) return NULL; - klist_iter_init_node(&bus->p->klist_devices, &i, - (start ? &start->p->knode_bus : NULL)); + if (klist_iter_init_node(&bus->p->klist_devices, &i, + (start ? &start->p->knode_bus : NULL)) < 0) + return NULL; + while ((dev = next_device(&i))) if (match(dev, data) && get_device(dev)) break; @@ -384,7 +388,9 @@ struct device *subsys_find_device_by_id(struct bus_type *subsys, unsigned int id return NULL; if (hint) { - klist_iter_init_node(&subsys->p->klist_devices, &i, &hint->p->knode_bus); + if (klist_iter_init_node(&subsys->p->klist_devices, &i, + &hint->p->knode_bus) < 0) + return NULL; dev = next_device(&i); if (dev && dev->id == id && get_device(dev)) { klist_iter_exit(&i); @@ -446,11 +452,13 @@ int bus_for_each_drv(struct bus_type *bus, struct device_driver *start, if (!bus) return -EINVAL; - klist_iter_init_node(&bus->p->klist_drivers, &i, - start ? &start->p->knode_bus : NULL); - while ((drv = next_driver(&i)) && !error) - error = fn(drv, data); - klist_iter_exit(&i); + error = klist_iter_init_node(&bus->p->klist_drivers, &i, + start ? &start->p->knode_bus : NULL); + if (!error) { + while ((drv = next_driver(&i)) && !error) + error = fn(drv, data); + klist_iter_exit(&i); + } return error; } EXPORT_SYMBOL_GPL(bus_for_each_drv); @@ -1111,15 +1119,19 @@ EXPORT_SYMBOL_GPL(bus_sort_breadthfirst); * otherwise if it is NULL, the iteration starts at the beginning of * the list. */ -void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct bus_type *subsys, - struct device *start, const struct device_type *type) +int subsys_dev_iter_init(struct subsys_dev_iter *iter, struct bus_type *subsys, + struct device *start, const struct device_type *type) { struct klist_node *start_knode = NULL; + int error; if (start) start_knode = &start->p->knode_bus; - klist_iter_init_node(&subsys->p->klist_devices, &iter->ki, start_knode); - iter->type = type; + error = klist_iter_init_node(&subsys->p->klist_devices, &iter->ki, + start_knode); + if (!error) + iter->type = type; + return error; } EXPORT_SYMBOL_GPL(subsys_dev_iter_init); diff --git a/drivers/base/class.c b/drivers/base/class.c index 03243d4002fd..23dbc661d4a0 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -301,15 +301,20 @@ void class_destroy(struct class *cls) * otherwise if it is NULL, the iteration starts at the beginning of * the list. */ -void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, - struct device *start, const struct device_type *type) +int class_dev_iter_init(struct class_dev_iter *iter, struct class *class, + struct device *start, const struct device_type *type) { struct klist_node *start_knode = NULL; + int error; if (start) start_knode = &start->knode_class; - klist_iter_init_node(&class->p->klist_devices, &iter->ki, start_knode); - iter->type = type; + error = klist_iter_init_node(&class->p->klist_devices, &iter->ki, + start_knode); + if (!error) + iter->type = type; + + return error; } EXPORT_SYMBOL_GPL(class_dev_iter_init); @@ -387,14 +392,15 @@ int class_for_each_device(struct class *class, struct device *start, return -EINVAL; } - class_dev_iter_init(&iter, class, start, NULL); - while ((dev = class_dev_iter_next(&iter))) { - error = fn(dev, data); - if (error) - break; + error = class_dev_iter_init(&iter, class, start, NULL); + if (!error) { + while ((dev = class_dev_iter_next(&iter))) { + error = fn(dev, data); + if (error) + break; + } + class_dev_iter_exit(&iter); } - class_dev_iter_exit(&iter); - return error; } EXPORT_SYMBOL_GPL(class_for_each_device); @@ -434,7 +440,9 @@ struct device *class_find_device(struct class *class, struct device *start, return NULL; } - class_dev_iter_init(&iter, class, start, NULL); + if (class_dev_iter_init(&iter, class, start, NULL) < 0) + return NULL; + while ((dev = class_dev_iter_next(&iter))) { if (match(dev, data)) { get_device(dev); diff --git a/drivers/base/driver.c b/drivers/base/driver.c index 3ec3896c83a6..16f6dd2c4403 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -49,11 +49,13 @@ int driver_for_each_device(struct device_driver *drv, struct device *start, if (!drv) return -EINVAL; - klist_iter_init_node(&drv->p->klist_devices, &i, - start ? &start->p->knode_driver : NULL); - while ((dev = next_device(&i)) && !error) - error = fn(dev, data); - klist_iter_exit(&i); + error = klist_iter_init_node(&drv->p->klist_devices, &i, + start ? &start->p->knode_driver : NULL); + if (!error) { + while ((dev = next_device(&i)) && !error) + error = fn(dev, data); + klist_iter_exit(&i); + } return error; } EXPORT_SYMBOL_GPL(driver_for_each_device); @@ -83,8 +85,10 @@ struct device *driver_find_device(struct device_driver *drv, if (!drv) return NULL; - klist_iter_init_node(&drv->p->klist_devices, &i, - (start ? &start->p->knode_driver : NULL)); + if (klist_iter_init_node(&drv->p->klist_devices, &i, + (start ? &start->p->knode_driver : NULL)) < 0) + return NULL; + while ((dev = next_device(&i))) if (match(dev, data) && get_device(dev)) break; diff --git a/include/linux/device.h b/include/linux/device.h index 5ad17cccdd71..50429b911b21 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -128,7 +128,7 @@ struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; -void subsys_dev_iter_init(struct subsys_dev_iter *iter, +int subsys_dev_iter_init(struct subsys_dev_iter *iter, struct bus_type *subsys, struct device *start, const struct device_type *type); @@ -380,10 +380,10 @@ int class_compat_create_link(struct class_compat *cls, struct device *dev, void class_compat_remove_link(struct class_compat *cls, struct device *dev, struct device *device_link); -extern void class_dev_iter_init(struct class_dev_iter *iter, - struct class *class, - struct device *start, - const struct device_type *type); +extern int class_dev_iter_init(struct class_dev_iter *iter, + struct class *class, + struct device *start, + const struct device_type *type); extern struct device *class_dev_iter_next(struct class_dev_iter *iter); extern void class_dev_iter_exit(struct class_dev_iter *iter); diff --git a/include/linux/klist.h b/include/linux/klist.h index a370ce57cf1d..9f633230f189 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -60,7 +60,7 @@ struct klist_iter { extern void klist_iter_init(struct klist *k, struct klist_iter *i); -extern void klist_iter_init_node(struct klist *k, struct klist_iter *i, +extern int klist_iter_init_node(struct klist *k, struct klist_iter *i, struct klist_node *n); extern void klist_iter_exit(struct klist_iter *i); extern struct klist_node *klist_next(struct klist_iter *i); diff --git a/lib/klist.c b/lib/klist.c index 0874e41609a6..a2741a7d9784 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -278,13 +278,19 @@ EXPORT_SYMBOL_GPL(klist_node_attached); * Similar to klist_iter_init(), but starts the action off with @n, * instead of with the list head. */ -void klist_iter_init_node(struct klist *k, struct klist_iter *i, - struct klist_node *n) +int klist_iter_init_node(struct klist *k, struct klist_iter *i, + struct klist_node *n) { + if (n) { + kref_get(&n->n_ref); + if (!n->n_klist) { + kref_put(&n->n_ref); + return -ENODEV; + } + } i->i_klist = k; i->i_cur = n; - if (n) - kref_get(&n->n_ref); + return 0; } EXPORT_SYMBOL_GPL(klist_iter_init_node); -- cgit v1.2.3-70-g09d2 From 7cd9c9bb57476167e83b7780dbc06d1dd601789d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 19 Apr 2012 19:17:30 -0700 Subject: Revert "driver core: check start node in klist_iter_init_node" This reverts commit a15d49fd3094cff90e5410ca454a870e0a722fe1 as that patch broke the build. Cc: Hannes Reinecke Reported-by: Stephen Rothwell Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 46 +++++++++++++++++----------------------------- drivers/base/class.c | 32 ++++++++++++-------------------- drivers/base/driver.c | 18 +++++++----------- include/linux/device.h | 10 +++++----- include/linux/klist.h | 2 +- lib/klist.c | 14 ++++---------- 6 files changed, 46 insertions(+), 76 deletions(-) (limited to 'lib') diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 76aed01a8b2c..2bcef657a60c 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -296,13 +296,11 @@ int bus_for_each_dev(struct bus_type *bus, struct device *start, if (!bus) return -EINVAL; - error = klist_iter_init_node(&bus->p->klist_devices, &i, - (start ? &start->p->knode_bus : NULL)); - if (!error) { - while ((dev = next_device(&i)) && !error) - error = fn(dev, data); - klist_iter_exit(&i); - } + klist_iter_init_node(&bus->p->klist_devices, &i, + (start ? &start->p->knode_bus : NULL)); + while ((dev = next_device(&i)) && !error) + error = fn(dev, data); + klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(bus_for_each_dev); @@ -332,10 +330,8 @@ struct device *bus_find_device(struct bus_type *bus, if (!bus) return NULL; - if (klist_iter_init_node(&bus->p->klist_devices, &i, - (start ? &start->p->knode_bus : NULL)) < 0) - return NULL; - + klist_iter_init_node(&bus->p->klist_devices, &i, + (start ? &start->p->knode_bus : NULL)); while ((dev = next_device(&i))) if (match(dev, data) && get_device(dev)) break; @@ -388,9 +384,7 @@ struct device *subsys_find_device_by_id(struct bus_type *subsys, unsigned int id return NULL; if (hint) { - if (klist_iter_init_node(&subsys->p->klist_devices, &i, - &hint->p->knode_bus) < 0) - return NULL; + klist_iter_init_node(&subsys->p->klist_devices, &i, &hint->p->knode_bus); dev = next_device(&i); if (dev && dev->id == id && get_device(dev)) { klist_iter_exit(&i); @@ -452,13 +446,11 @@ int bus_for_each_drv(struct bus_type *bus, struct device_driver *start, if (!bus) return -EINVAL; - error = klist_iter_init_node(&bus->p->klist_drivers, &i, - start ? &start->p->knode_bus : NULL); - if (!error) { - while ((drv = next_driver(&i)) && !error) - error = fn(drv, data); - klist_iter_exit(&i); - } + klist_iter_init_node(&bus->p->klist_drivers, &i, + start ? &start->p->knode_bus : NULL); + while ((drv = next_driver(&i)) && !error) + error = fn(drv, data); + klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(bus_for_each_drv); @@ -1119,19 +1111,15 @@ EXPORT_SYMBOL_GPL(bus_sort_breadthfirst); * otherwise if it is NULL, the iteration starts at the beginning of * the list. */ -int subsys_dev_iter_init(struct subsys_dev_iter *iter, struct bus_type *subsys, - struct device *start, const struct device_type *type) +void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct bus_type *subsys, + struct device *start, const struct device_type *type) { struct klist_node *start_knode = NULL; - int error; if (start) start_knode = &start->p->knode_bus; - error = klist_iter_init_node(&subsys->p->klist_devices, &iter->ki, - start_knode); - if (!error) - iter->type = type; - return error; + klist_iter_init_node(&subsys->p->klist_devices, &iter->ki, start_knode); + iter->type = type; } EXPORT_SYMBOL_GPL(subsys_dev_iter_init); diff --git a/drivers/base/class.c b/drivers/base/class.c index 23dbc661d4a0..03243d4002fd 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -301,20 +301,15 @@ void class_destroy(struct class *cls) * otherwise if it is NULL, the iteration starts at the beginning of * the list. */ -int class_dev_iter_init(struct class_dev_iter *iter, struct class *class, - struct device *start, const struct device_type *type) +void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, + struct device *start, const struct device_type *type) { struct klist_node *start_knode = NULL; - int error; if (start) start_knode = &start->knode_class; - error = klist_iter_init_node(&class->p->klist_devices, &iter->ki, - start_knode); - if (!error) - iter->type = type; - - return error; + klist_iter_init_node(&class->p->klist_devices, &iter->ki, start_knode); + iter->type = type; } EXPORT_SYMBOL_GPL(class_dev_iter_init); @@ -392,15 +387,14 @@ int class_for_each_device(struct class *class, struct device *start, return -EINVAL; } - error = class_dev_iter_init(&iter, class, start, NULL); - if (!error) { - while ((dev = class_dev_iter_next(&iter))) { - error = fn(dev, data); - if (error) - break; - } - class_dev_iter_exit(&iter); + class_dev_iter_init(&iter, class, start, NULL); + while ((dev = class_dev_iter_next(&iter))) { + error = fn(dev, data); + if (error) + break; } + class_dev_iter_exit(&iter); + return error; } EXPORT_SYMBOL_GPL(class_for_each_device); @@ -440,9 +434,7 @@ struct device *class_find_device(struct class *class, struct device *start, return NULL; } - if (class_dev_iter_init(&iter, class, start, NULL) < 0) - return NULL; - + class_dev_iter_init(&iter, class, start, NULL); while ((dev = class_dev_iter_next(&iter))) { if (match(dev, data)) { get_device(dev); diff --git a/drivers/base/driver.c b/drivers/base/driver.c index 16f6dd2c4403..3ec3896c83a6 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -49,13 +49,11 @@ int driver_for_each_device(struct device_driver *drv, struct device *start, if (!drv) return -EINVAL; - error = klist_iter_init_node(&drv->p->klist_devices, &i, - start ? &start->p->knode_driver : NULL); - if (!error) { - while ((dev = next_device(&i)) && !error) - error = fn(dev, data); - klist_iter_exit(&i); - } + klist_iter_init_node(&drv->p->klist_devices, &i, + start ? &start->p->knode_driver : NULL); + while ((dev = next_device(&i)) && !error) + error = fn(dev, data); + klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(driver_for_each_device); @@ -85,10 +83,8 @@ struct device *driver_find_device(struct device_driver *drv, if (!drv) return NULL; - if (klist_iter_init_node(&drv->p->klist_devices, &i, - (start ? &start->p->knode_driver : NULL)) < 0) - return NULL; - + klist_iter_init_node(&drv->p->klist_devices, &i, + (start ? &start->p->knode_driver : NULL)); while ((dev = next_device(&i))) if (match(dev, data) && get_device(dev)) break; diff --git a/include/linux/device.h b/include/linux/device.h index 50429b911b21..5ad17cccdd71 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -128,7 +128,7 @@ struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; -int subsys_dev_iter_init(struct subsys_dev_iter *iter, +void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct bus_type *subsys, struct device *start, const struct device_type *type); @@ -380,10 +380,10 @@ int class_compat_create_link(struct class_compat *cls, struct device *dev, void class_compat_remove_link(struct class_compat *cls, struct device *dev, struct device *device_link); -extern int class_dev_iter_init(struct class_dev_iter *iter, - struct class *class, - struct device *start, - const struct device_type *type); +extern void class_dev_iter_init(struct class_dev_iter *iter, + struct class *class, + struct device *start, + const struct device_type *type); extern struct device *class_dev_iter_next(struct class_dev_iter *iter); extern void class_dev_iter_exit(struct class_dev_iter *iter); diff --git a/include/linux/klist.h b/include/linux/klist.h index 9f633230f189..a370ce57cf1d 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -60,7 +60,7 @@ struct klist_iter { extern void klist_iter_init(struct klist *k, struct klist_iter *i); -extern int klist_iter_init_node(struct klist *k, struct klist_iter *i, +extern void klist_iter_init_node(struct klist *k, struct klist_iter *i, struct klist_node *n); extern void klist_iter_exit(struct klist_iter *i); extern struct klist_node *klist_next(struct klist_iter *i); diff --git a/lib/klist.c b/lib/klist.c index a2741a7d9784..0874e41609a6 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -278,19 +278,13 @@ EXPORT_SYMBOL_GPL(klist_node_attached); * Similar to klist_iter_init(), but starts the action off with @n, * instead of with the list head. */ -int klist_iter_init_node(struct klist *k, struct klist_iter *i, - struct klist_node *n) +void klist_iter_init_node(struct klist *k, struct klist_iter *i, + struct klist_node *n) { - if (n) { - kref_get(&n->n_ref); - if (!n->n_klist) { - kref_put(&n->n_ref); - return -ENODEV; - } - } i->i_klist = k; i->i_cur = n; - return 0; + if (n) + kref_get(&n->n_ref); } EXPORT_SYMBOL_GPL(klist_iter_init_node); -- cgit v1.2.3-70-g09d2 From 4ccf4beab8c447f8cd33d46afb6e10e1aa3befc6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 31 Aug 2011 20:35:40 +0200 Subject: lib: add support for stmp-style devices MX23/28 use IP cores which follow a register layout I have first seen on STMP3xxx SoCs. In this layout, every register actually has four u32: 1.) to store a value directly 2.) a SET register where every 1-bit sets the corresponding bit, others are unaffected 3.) same with a CLR register 4.) same with a TOG (toggle) register Also, the 2 MSBs in register 0 are always the same and can be used to reset the IP core. All this is strictly speaking not mach-specific (but IP core specific) and, thus, doesn't need to be in mach-mxs/include. At least mx6 also uses IP cores following this stmp-style. So: Introduce a stmp-style device, put the code and defines for that in a public place (lib/), and let drivers for stmp-style devices select that code. To avoid regressions and ease reviewing, the actual code is simply copied from mach-mxs. It definately wants updates, but those need a seperate patch series. Voila, mach dependency gone, reusable code introduced. Note that I didn't remove the duplicated code from mach-mxs yet, first the drivers have to be converted. Signed-off-by: Wolfram Sang Acked-by: Shawn Guo Acked-by: Dong Aisheng --- include/linux/stmp_device.h | 20 ++++++++++++ lib/Kconfig | 3 ++ lib/Makefile | 2 ++ lib/stmp_device.c | 80 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 include/linux/stmp_device.h create mode 100644 lib/stmp_device.c (limited to 'lib') diff --git a/include/linux/stmp_device.h b/include/linux/stmp_device.h new file mode 100644 index 000000000000..6cf7ec9547cf --- /dev/null +++ b/include/linux/stmp_device.h @@ -0,0 +1,20 @@ +/* + * basic functions for devices following the "stmp" style register layout + * + * Copyright (C) 2011 Wolfram Sang, Pengutronix e.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __STMP_DEVICE_H__ +#define __STMP_DEVICE_H__ + +#define STMP_OFFSET_REG_SET 0x4 +#define STMP_OFFSET_REG_CLR 0x8 +#define STMP_OFFSET_REG_TOG 0xc + +extern int stmp_reset_block(void __iomem *); +#endif /* __STMP_DEVICE_H__ */ diff --git a/lib/Kconfig b/lib/Kconfig index 4a8aba2e5cc0..c5da1548b964 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -33,6 +33,9 @@ config GENERIC_IO boolean default n +config STMP_DEVICE + bool + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/Makefile b/lib/Makefile index 18515f0267c4..f78dbcdc7e3d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -123,6 +123,8 @@ obj-$(CONFIG_SIGNATURE) += digsig.o obj-$(CONFIG_CLZ_TAB) += clz_tab.o +obj-$(CONFIG_STMP_DEVICE) += stmp_device.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/stmp_device.c b/lib/stmp_device.c new file mode 100644 index 000000000000..8ac9bcc4289a --- /dev/null +++ b/lib/stmp_device.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 1999 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd + * Copyright 2006-2007,2010 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Juergen Beisert, kernel@pengutronix.de + * Copyright 2009 Ilya Yanok, Emcraft Systems Ltd, yanok@emcraft.com + * Copyright (C) 2011 Wolfram Sang, Pengutronix e.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#define STMP_MODULE_CLKGATE (1 << 30) +#define STMP_MODULE_SFTRST (1 << 31) + +/* + * Clear the bit and poll it cleared. This is usually called with + * a reset address and mask being either SFTRST(bit 31) or CLKGATE + * (bit 30). + */ +static int stmp_clear_poll_bit(void __iomem *addr, u32 mask) +{ + int timeout = 0x400; + + writel(mask, addr + STMP_OFFSET_REG_CLR); + udelay(1); + while ((readl(addr) & mask) && --timeout) + /* nothing */; + + return !timeout; +} + +int stmp_reset_block(void __iomem *reset_addr) +{ + int ret; + int timeout = 0x400; + + /* clear and poll SFTRST */ + ret = stmp_clear_poll_bit(reset_addr, STMP_MODULE_SFTRST); + if (unlikely(ret)) + goto error; + + /* clear CLKGATE */ + writel(STMP_MODULE_CLKGATE, reset_addr + STMP_OFFSET_REG_CLR); + + /* set SFTRST to reset the block */ + writel(STMP_MODULE_SFTRST, reset_addr + STMP_OFFSET_REG_SET); + udelay(1); + + /* poll CLKGATE becoming set */ + while ((!(readl(reset_addr) & STMP_MODULE_CLKGATE)) && --timeout) + /* nothing */; + if (unlikely(!timeout)) + goto error; + + /* clear and poll SFTRST */ + ret = stmp_clear_poll_bit(reset_addr, STMP_MODULE_SFTRST); + if (unlikely(ret)) + goto error; + + /* clear and poll CLKGATE */ + ret = stmp_clear_poll_bit(reset_addr, STMP_MODULE_CLKGATE); + if (unlikely(ret)) + goto error; + + return 0; + +error: + pr_err("%s(%p): module reset timeout\n", __func__, reset_addr); + return -ETIMEDOUT; +} +EXPORT_SYMBOL(stmp_reset_block); -- cgit v1.2.3-70-g09d2 From 6b9606106ba58d2bd80610f97e06fea58206b47c Mon Sep 17 00:00:00 2001 From: yan Date: Fri, 20 Apr 2012 21:25:53 +0800 Subject: lib/kobject.c : Remove redundant check in create_dir create_dir is a static function used only in kobject_add_internal. There's no need to do check here, for kobject_add_internal will reject kobject with invalid name. Signed-off-by: Yan Hong Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 21dee7c19afd..bbffa2110d43 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -47,13 +47,11 @@ static int populate_dir(struct kobject *kobj) static int create_dir(struct kobject *kobj) { int error = 0; - if (kobject_name(kobj)) { - error = sysfs_create_dir(kobj); - if (!error) { - error = populate_dir(kobj); - if (error) - sysfs_remove_dir(kobj); - } + error = sysfs_create_dir(kobj); + if (!error) { + error = populate_dir(kobj); + if (error) + sysfs_remove_dir(kobj); } return error; } -- cgit v1.2.3-70-g09d2 From 559f9badd11ddf399f88b18b4c0f110fd511ae53 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 14 Mar 2012 22:17:39 -0400 Subject: rcu: List-debug variants of rcu list routines. * Make __list_add_rcu check the next->prev and prev->next pointers just like __list_add does. * Make list_del_rcu use __list_del_entry, which does the same checking at deletion time. Has been running for a week here without anything being tripped up, but it seems worth adding for completeness just in case something ever does corrupt those lists. Signed-off-by: Dave Jones Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 7 ++++++- lib/list_debug.c | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index d079290843a9..a20c05096231 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -30,6 +30,7 @@ * This is only for internal list manipulation where we know * the prev/next entries already! */ +#ifndef CONFIG_DEBUG_LIST static inline void __list_add_rcu(struct list_head *new, struct list_head *prev, struct list_head *next) { @@ -38,6 +39,10 @@ static inline void __list_add_rcu(struct list_head *new, rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } +#else +extern void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next); +#endif /** * list_add_rcu - add a new entry to rcu-protected list @@ -108,7 +113,7 @@ static inline void list_add_tail_rcu(struct list_head *new, */ static inline void list_del_rcu(struct list_head *entry) { - __list_del(entry->prev, entry->next); + __list_del_entry(entry); entry->prev = LIST_POISON2; } diff --git a/lib/list_debug.c b/lib/list_debug.c index 982b850d4e7a..3810b481f940 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -10,6 +10,7 @@ #include #include #include +#include /* * Insert a new entry between two known consecutive entries. @@ -75,3 +76,24 @@ void list_del(struct list_head *entry) entry->prev = LIST_POISON2; } EXPORT_SYMBOL(list_del); + +/* + * RCU variants. + */ +void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next) +{ + WARN(next->prev != prev, + "list_add_rcu corruption. next->prev should be " + "prev (%p), but was %p. (next=%p).\n", + prev, next->prev, next); + WARN(prev->next != next, + "list_add_rcu corruption. prev->next should be " + "next (%p), but was %p. (prev=%p).\n", + next, prev->next, prev); + new->next = next; + new->prev = prev; + rcu_assign_pointer(list_next_rcu(prev), new); + next->prev = new; +} +EXPORT_SYMBOL(__list_add_rcu); -- cgit v1.2.3-70-g09d2 From b8ccd5dee776d85e29cf139c77595b7369e294bc Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:32 -0600 Subject: dynamic_debug: replace if (verbose) pr_info with macro vpr_info Use vpr_info to declutter code, reduce indenting, and change one additional pr_info call in ddebug_exec_queries. Signed-off-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 81 +++++++++++++++++++++++------------------------------ 1 file changed, 35 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 310c753cf83e..8675717c0f16 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -107,20 +107,22 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, return buf; } -#define vpr_info_dq(q, msg) \ -do { \ - if (verbose) \ - /* trim last char off format print */ \ - pr_info("%s: func=\"%s\" file=\"%s\" " \ - "module=\"%s\" format=\"%.*s\" " \ - "lineno=%u-%u", \ - msg, \ - q->function ? q->function : "", \ - q->filename ? q->filename : "", \ - q->module ? q->module : "", \ - (int)(q->format ? strlen(q->format) - 1 : 0), \ - q->format ? q->format : "", \ - q->first_lineno, q->last_lineno); \ +#define vpr_info(fmt, ...) \ + if (verbose) do { pr_info(fmt, ##__VA_ARGS__); } while (0) + +#define vpr_info_dq(q, msg) \ +do { \ + /* trim last char off format print */ \ + vpr_info("%s: func=\"%s\" file=\"%s\" " \ + "module=\"%s\" format=\"%.*s\" " \ + "lineno=%u-%u", \ + msg, \ + q->function ? q->function : "", \ + q->filename ? q->filename : "", \ + q->module ? q->module : "", \ + (int)(q->format ? strlen(q->format) - 1 : 0), \ + q->format ? q->format : "", \ + q->first_lineno, q->last_lineno); \ } while (0) /* @@ -180,12 +182,11 @@ static int ddebug_change(const struct ddebug_query *query, if (newflags == dp->flags) continue; dp->flags = newflags; - if (verbose) - pr_info("changed %s:%d [%s]%s =%s\n", - trim_prefix(dp->filename), dp->lineno, - dt->mod_name, dp->function, - ddebug_describe_flags(dp, flagbuf, - sizeof(flagbuf))); + vpr_info("changed %s:%d [%s]%s =%s\n", + trim_prefix(dp->filename), dp->lineno, + dt->mod_name, dp->function, + ddebug_describe_flags(dp, flagbuf, + sizeof(flagbuf))); } } mutex_unlock(&ddebug_lock); @@ -410,8 +411,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, default: return -EINVAL; } - if (verbose) - pr_info("op='%c'\n", op); + vpr_info("op='%c'\n", op); for ( ; *str ; ++str) { for (i = ARRAY_SIZE(opt_array) - 1; i >= 0; i--) { @@ -423,8 +423,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, if (i < 0) return -EINVAL; } - if (verbose) - pr_info("flags=0x%x\n", flags); + vpr_info("flags=0x%x\n", flags); /* calculate final *flagsp, *maskp according to mask and op */ switch (op) { @@ -441,8 +440,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, *flagsp = 0; break; } - if (verbose) - pr_info("*flagsp=0x%x *maskp=0x%x\n", *flagsp, *maskp); + vpr_info("*flagsp=0x%x *maskp=0x%x\n", *flagsp, *maskp); return 0; } @@ -487,8 +485,7 @@ static int ddebug_exec_queries(char *query) if (!query || !*query || *query == '#') continue; - if (verbose) - pr_info("query %d: \"%s\"\n", i, query); + vpr_info("query %d: \"%s\"\n", i, query); rc = ddebug_exec_query(query); if (rc < 0) { @@ -498,7 +495,7 @@ static int ddebug_exec_queries(char *query) nfound += rc; i++; } - pr_info("processed %d queries, with %d matches, %d errs\n", + vpr_info("processed %d queries, with %d matches, %d errs\n", i, nfound, errs); if (exitcode) @@ -653,8 +650,7 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, return -EFAULT; } tmpbuf[len] = '\0'; - if (verbose) - pr_info("read %d bytes from userspace\n", (int)len); + vpr_info("read %d bytes from userspace\n", (int)len); ret = ddebug_exec_queries(tmpbuf); kfree(tmpbuf); @@ -717,8 +713,7 @@ static void *ddebug_proc_start(struct seq_file *m, loff_t *pos) struct _ddebug *dp; int n = *pos; - if (verbose) - pr_info("called m=%p *pos=%lld\n", m, (unsigned long long)*pos); + vpr_info("called m=%p *pos=%lld\n", m, (unsigned long long)*pos); mutex_lock(&ddebug_lock); @@ -742,9 +737,8 @@ static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos) struct ddebug_iter *iter = m->private; struct _ddebug *dp; - if (verbose) - pr_info("called m=%p p=%p *pos=%lld\n", - m, p, (unsigned long long)*pos); + vpr_info("called m=%p p=%p *pos=%lld\n", + m, p, (unsigned long long)*pos); if (p == SEQ_START_TOKEN) dp = ddebug_iter_first(iter); @@ -766,8 +760,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p) struct _ddebug *dp = p; char flagsbuf[10]; - if (verbose) - pr_info("called m=%p p=%p\n", m, p); + vpr_info("called m=%p p=%p\n", m, p); if (p == SEQ_START_TOKEN) { seq_puts(m, @@ -791,8 +784,7 @@ static int ddebug_proc_show(struct seq_file *m, void *p) */ static void ddebug_proc_stop(struct seq_file *m, void *p) { - if (verbose) - pr_info("called m=%p p=%p\n", m, p); + vpr_info("called m=%p p=%p\n", m, p); mutex_unlock(&ddebug_lock); } @@ -815,8 +807,7 @@ static int ddebug_proc_open(struct inode *inode, struct file *file) struct ddebug_iter *iter; int err; - if (verbose) - pr_info("called\n"); + vpr_info("called\n"); iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (iter == NULL) @@ -866,8 +857,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, list_add_tail(&dt->link, &ddebug_tables); mutex_unlock(&ddebug_lock); - if (verbose) - pr_info("%u debug prints in module %s\n", n, dt->mod_name); + vpr_info("%u debug prints in module %s\n", n, dt->mod_name); return 0; } EXPORT_SYMBOL_GPL(ddebug_add_module); @@ -888,8 +878,7 @@ int ddebug_remove_module(const char *mod_name) struct ddebug_table *dt, *nextdt; int ret = -ENOENT; - if (verbose) - pr_info("removing module \"%s\"\n", mod_name); + vpr_info("removing module \"%s\"\n", mod_name); mutex_lock(&ddebug_lock); list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) { -- cgit v1.2.3-70-g09d2 From b48420c1d3019ce8d84fb8e58f4ca86b8e3655b8 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:35 -0600 Subject: dynamic_debug: make dynamic-debug work for module initialization This introduces a fake module param $module.dyndbg. Its based upon Thomas Renninger's $module.ddebug boot-time debugging patch from https://lkml.org/lkml/2010/9/15/397 The 'fake' module parameter is provided for all modules, whether or not they need it. It is not explicitly added to each module, but is implemented in callbacks invoked from parse_args. For builtin modules, dynamic_debug_init() now directly calls parse_args(..., &ddebug_dyndbg_boot_params_cb), to process the params undeclared in the modules, just after the ddebug tables are processed. While its slightly weird to reprocess the boot params, parse_args() is already called repeatedly by do_initcall_levels(). More importantly, the dyndbg queries (given in ddebug_query or dyndbg params) cannot be activated until after the ddebug tables are ready, and reusing parse_args is cleaner than doing an ad-hoc parse. This reparse would break options like inc_verbosity, but they probably should be params, like verbosity=3. ddebug_dyndbg_boot_params_cb() handles both bare dyndbg (aka: ddebug_query) and module-prefixed dyndbg params, and ignores all other parameters. For example, the following will enable pr_debug()s in 4 builtin modules, in the order given: dyndbg="module params +p; module aio +p" module.dyndbg=+p pci.dyndbg For loadable modules, parse_args() in load_module() calls ddebug_dyndbg_module_params_cb(). This handles bare dyndbg params as passed from modprobe, and errors on other unknown params. Note that modprobe reads /proc/cmdline, so "modprobe foo" grabs all foo.params, strips the "foo.", and passes these to the kernel. ddebug_dyndbg_module_params_cb() is again called for the unknown params; it handles dyndbg, and errors on others. The "doing" arg added previously contains the module name. For non CONFIG_DYNAMIC_DEBUG builds, the stub function accepts and ignores $module.dyndbg params, other unknowns get -ENOENT. If no param value is given (as in pci.dyndbg example above), "+p" is assumed, which enables all pr_debug callsites in the module. The dyndbg fake parameter is not shown in /sys/module/*/parameters, thus it does not use any resources. Changes to it are made via the control file. Also change pr_info in ddebug_exec_queries to vpr_info, no need to see it all the time. Signed-off-by: Jim Cromie CC: Thomas Renninger CC: Rusty Russell Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- include/linux/dynamic_debug.h | 17 +++++++++++++++ kernel/module.c | 2 +- lib/dynamic_debug.c | 49 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index bf1b0fcc3c65..4697e4b59d6f 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -44,6 +44,9 @@ extern int ddebug_remove_module(const char *mod_name); extern __printf(2, 3) int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...); +extern int ddebug_dyndbg_module_param_cb(char *param, char *val, + const char *modname); + struct device; extern __printf(3, 4) @@ -94,11 +97,25 @@ do { \ #else +#include +#include + static inline int ddebug_remove_module(const char *mod) { return 0; } +static inline int ddebug_dyndbg_module_param_cb(char *param, char *val, + const char *modname) +{ + if (strstr(param, "dyndbg")) { + pr_warn("dyndbg supported only in " + "CONFIG_DYNAMIC_DEBUG builds\n"); + return 0; /* allow and ignore */ + } + return -EINVAL; +} + #define dynamic_pr_debug(fmt, ...) \ do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0) #define dynamic_dev_dbg(dev, fmt, ...) \ diff --git a/kernel/module.c b/kernel/module.c index 78ac6ec1e425..a4e60973ca73 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2953,7 +2953,7 @@ static struct module *load_module(void __user *umod, /* Module is ready to execute: parsing args may do that. */ err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, - -32768, 32767, NULL); + -32768, 32767, &ddebug_dyndbg_module_param_cb); if (err < 0) goto unlink; diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 8675717c0f16..8fba40179305 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -862,6 +862,41 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, } EXPORT_SYMBOL_GPL(ddebug_add_module); +/* handle both dyndbg=".." and $module.dyndbg=".." params at boot */ +static int ddebug_dyndbg_boot_param_cb(char *param, char *val, + const char *unused) +{ + const char *modname = NULL; + char *sep; + + sep = strchr(param, '.'); + if (sep) { + *sep = '\0'; + modname = param; + param = sep + 1; + } + if (strcmp(param, "dyndbg")) + return 0; /* skip all other params w/o error */ + + vpr_info("module: %s %s=\"%s\"\n", modname, param, val); + + ddebug_exec_queries(val ? val : "+p"); + return 0; /* query failure shouldnt stop module load */ +} + +/* handle dyndbg args to modprobe */ +int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *doing) +{ + if (strcmp(param, "dyndbg")) + return -ENOENT; + + vpr_info("module: %s %s=\"%s\"\n", doing, param, val); + + ddebug_exec_queries((val ? val : "+p")); + + return 0; /* query failure shouldnt stop module load */ +} + static void ddebug_table_free(struct ddebug_table *dt) { list_del_init(&dt->link); @@ -929,6 +964,7 @@ static int __init dynamic_debug_init(void) { struct _ddebug *iter, *iter_start; const char *modname = NULL; + char *cmdline; int ret = 0; int n = 0; @@ -967,6 +1003,18 @@ static int __init dynamic_debug_init(void) /* keep tables even on ddebug_query parse error */ ret = 0; } + /* now that ddebug tables are loaded, process all boot args + * again to find and activate queries given in dyndbg params. + * While this has already been done for known boot params, it + * ignored the unknown ones (dyndbg in particular). Reusing + * parse_args avoids ad-hoc parsing. This will also attempt + * to activate queries for not-yet-loaded modules, which is + * slightly noisy if verbose, but harmless. + */ + cmdline = kstrdup(saved_command_line, GFP_KERNEL); + parse_args("dyndbg params", cmdline, NULL, + 0, 0, 0, &ddebug_dyndbg_boot_param_cb); + kfree(cmdline); out_free: if (ret) @@ -977,5 +1025,6 @@ out_free: } /* Allow early initialization for boot messages via boot param */ arch_initcall(dynamic_debug_init); + /* Debugfs setup must be done later */ module_init(dynamic_debug_init_debugfs); -- cgit v1.2.3-70-g09d2 From f0b919d967284313be4a767ba92ab5a88cb27410 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:36 -0600 Subject: dynamic_debug: deprecate ddebug_query, suggest dyndbg instead With ddebug_dyndbg_boot_params_cb() handling bare dyndbg params, we dont need ddebug_query param anymore. Add a warning when processing ddebug_query= param that it is deprecated, and to change it to dyndbg= Add a deprecation notice for v3.8 to feature-removal-schedule.txt, and add a suggested deprecation period of 3 releases to the header. Signed-off-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- Documentation/feature-removal-schedule.txt | 9 ++++++++- lib/dynamic_debug.c | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 709e08e9a222..e458d2b2ae95 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -2,7 +2,14 @@ The following is a list of files and features that are going to be removed in the kernel source tree. Every entry should contain what exactly is going away, why it is happening, and who is going to be doing the work. When the feature is removed from the kernel, it should also -be removed from this file. +be removed from this file. The suggested deprecation period is 3 releases. + +--------------------------- + +What: ddebug_query="query" boot cmdline param +When: v3.8 +Why: obsoleted by dyndbg="query" and module.dyndbg="query" +Who: Jim Cromie , Jason Baron --------------------------- diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 8fba40179305..09f2cda88058 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -993,6 +993,8 @@ static int __init dynamic_debug_init(void) /* ddebug_query boot param got passed -> set it up */ if (ddebug_setup_string[0] != '\0') { + pr_warn("ddebug_query param name is deprecated," + " change it to dyndbg\n"); ret = ddebug_exec_queries(ddebug_setup_string); if (ret < 0) pr_warn("Invalid ddebug boot param %s", -- cgit v1.2.3-70-g09d2 From 6ab676e96422f33a873006096f928feeded7ce3b Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:37 -0600 Subject: dynamic_debug: combine parse_args callbacks together Refactor ddebug_dyndbg_boot_param_cb and ddebug_dyndbg_module_param_cb into a common helper function, and call it from both. The handling of foo.dyndbg is unneeded by the latter, but harmless. The 2 callers differ only by pr_info and the return code they pass to the helper for when an unknown param is handled. I could slightly reduce dmesg clutter by putting the vpr_info in the common helper, after the return on_err, but that loses __func__ context, is overly silent on module_cb unknown param errors, and the clutter is only when dynamic_debug.verbose=1 anyway. Signed-off-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 09f2cda88058..3b06f926d5b8 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -862,39 +862,43 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, } EXPORT_SYMBOL_GPL(ddebug_add_module); -/* handle both dyndbg=".." and $module.dyndbg=".." params at boot */ -static int ddebug_dyndbg_boot_param_cb(char *param, char *val, - const char *unused) +/* helper for ddebug_dyndbg_(boot|module)_param_cb */ +static int ddebug_dyndbg_param_cb(char *param, char *val, + const char *modname, int on_err) { - const char *modname = NULL; char *sep; sep = strchr(param, '.'); if (sep) { + /* needed only for ddebug_dyndbg_boot_param_cb */ *sep = '\0'; modname = param; param = sep + 1; } if (strcmp(param, "dyndbg")) - return 0; /* skip all other params w/o error */ - - vpr_info("module: %s %s=\"%s\"\n", modname, param, val); + return on_err; /* determined by caller */ ddebug_exec_queries(val ? val : "+p"); return 0; /* query failure shouldnt stop module load */ } -/* handle dyndbg args to modprobe */ -int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *doing) +/* handle both dyndbg and $module.dyndbg params at boot */ +static int ddebug_dyndbg_boot_param_cb(char *param, char *val, + const char *unused) { - if (strcmp(param, "dyndbg")) - return -ENOENT; - - vpr_info("module: %s %s=\"%s\"\n", doing, param, val); - - ddebug_exec_queries((val ? val : "+p")); + vpr_info("%s=\"%s\"\n", param, val); + return ddebug_dyndbg_param_cb(param, val, NULL, 0); +} - return 0; /* query failure shouldnt stop module load */ +/* + * modprobe foo finds foo.params in boot-args, strips "foo.", and + * passes them to load_module(). This callback gets unknown params, + * processes dyndbg params, rejects others. + */ +int ddebug_dyndbg_module_param_cb(char *param, char *val, const char *module) +{ + vpr_info("module: %s %s=\"%s\"\n", module, param, val); + return ddebug_dyndbg_param_cb(param, val, module, -ENOENT); } static void ddebug_table_free(struct ddebug_table *dt) -- cgit v1.2.3-70-g09d2 From af442399fcf378a21ffe924b182f6d9ee70001ca Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:38 -0600 Subject: dynamic_debug: simplify dynamic_debug_init error exit We dont want errors while parsing ddebug_query to unload ddebug tables, so set success after tables are loaded, and return 0 after query parsing is done. Simplify error handling code since its no longer used for success, and change goto label to out_err to clarify this. Signed-off-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 3b06f926d5b8..66e0ec4d21f7 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -984,7 +984,7 @@ static int __init dynamic_debug_init(void) if (strcmp(modname, iter->modname)) { ret = ddebug_add_module(iter_start, n, modname); if (ret) - goto out_free; + goto out_err; n = 0; modname = iter->modname; iter_start = iter; @@ -993,9 +993,11 @@ static int __init dynamic_debug_init(void) } ret = ddebug_add_module(iter_start, n, modname); if (ret) - goto out_free; + goto out_err; - /* ddebug_query boot param got passed -> set it up */ + ddebug_init_success = 1; + + /* apply ddebug_query boot param, dont unload tables on err */ if (ddebug_setup_string[0] != '\0') { pr_warn("ddebug_query param name is deprecated," " change it to dyndbg\n"); @@ -1005,9 +1007,6 @@ static int __init dynamic_debug_init(void) ddebug_setup_string); else pr_info("%d changes by ddebug_query\n", ret); - - /* keep tables even on ddebug_query parse error */ - ret = 0; } /* now that ddebug tables are loaded, process all boot args * again to find and activate queries given in dyndbg params. @@ -1021,12 +1020,10 @@ static int __init dynamic_debug_init(void) parse_args("dyndbg params", cmdline, NULL, 0, 0, 0, &ddebug_dyndbg_boot_param_cb); kfree(cmdline); + return 0; -out_free: - if (ret) - ddebug_remove_all_tables(); - else - ddebug_init_success = 1; +out_err: + ddebug_remove_all_tables(); return 0; } /* Allow early initialization for boot messages via boot param */ -- cgit v1.2.3-70-g09d2 From 4107692760db8160a65347f7bb2fa7fa7bf9b0d1 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:39 -0600 Subject: dynamic_debug: print ram usage by ddebug tables if verbose Print ram usage of dynamic-debug tables and verbose section so user knows cost of enabling CONFIG_DYNAMIC_DEBUG. This only counts the size of the _ddebug tables for builtins and the __verbose section that they refer to, not those used in loadable modules. Signed-off-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 66e0ec4d21f7..76da6aa66ce7 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -970,7 +970,8 @@ static int __init dynamic_debug_init(void) const char *modname = NULL; char *cmdline; int ret = 0; - int n = 0; + int n = 0, entries = 0, modct = 0; + int verbose_bytes = 0; if (__start___verbose == __stop___verbose) { pr_warn("_ddebug table is empty in a " @@ -981,7 +982,12 @@ static int __init dynamic_debug_init(void) modname = iter->modname; iter_start = iter; for (; iter < __stop___verbose; iter++) { + entries++; + verbose_bytes += strlen(iter->modname) + strlen(iter->function) + + strlen(iter->filename) + strlen(iter->format); + if (strcmp(modname, iter->modname)) { + modct++; ret = ddebug_add_module(iter_start, n, modname); if (ret) goto out_err; @@ -996,6 +1002,10 @@ static int __init dynamic_debug_init(void) goto out_err; ddebug_init_success = 1; + vpr_info("%d modules, %d entries and %d bytes in ddebug tables," + " %d bytes in (readonly) verbose section\n", + modct, entries, (int)( modct * sizeof(struct ddebug_table)), + verbose_bytes + (int)(__stop___verbose - __start___verbose)); /* apply ddebug_query boot param, dont unload tables on err */ if (ddebug_setup_string[0] != '\0') { -- cgit v1.2.3-70-g09d2 From 8e59b5cfb9a6f43753236b554d785e8efca62db7 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:40 -0600 Subject: dynamic_debug: add modname arg to exec_query callchain Pass module name into ddebug_exec_queries(), ddebug_exec_query(), and ddebug_parse_query() as separate parameter. In ddebug_parse_query(), the module name is added into the query struct before the query-string is parsed. This allows the query-string to be shorter: instead of: $modname.dyndbg="module $modname +fp" do this: $modname.dyndbg="+fp" Omitting "module $modname" from the query string is actually required for $modname.dyndbg rules; the set-only-once check added in a previous patch will throw an error if its added again. ddebug_query="..." has no $modname associated with it, so the query string may include it. This also fixes redundant "module $modname" otherwise needed to handle multiple queries per string: $modname.dyndbg="func foo +fp; func bar +fp" Signed-off-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 76da6aa66ce7..cfd84638b1a4 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -338,7 +338,7 @@ static int check_set(const char **dest, char *src, char *name) * Returns 0 on success, <0 on error. */ static int ddebug_parse_query(char *words[], int nwords, - struct ddebug_query *query) + struct ddebug_query *query, const char *modname) { unsigned int i; int rc; @@ -348,6 +348,10 @@ static int ddebug_parse_query(char *words[], int nwords, return -EINVAL; memset(query, 0, sizeof(*query)); + if (modname) + /* support $modname.dyndbg= */ + query->module = modname; + for (i = 0 ; i < nwords ; i += 2) { if (!strcmp(words[i], "func")) rc = check_set(&query->function, words[i+1], "func"); @@ -444,7 +448,7 @@ static int ddebug_parse_flags(const char *str, unsigned int *flagsp, return 0; } -static int ddebug_exec_query(char *query_string) +static int ddebug_exec_query(char *query_string, const char *modname) { unsigned int flags = 0, mask = 0; struct ddebug_query query; @@ -455,7 +459,7 @@ static int ddebug_exec_query(char *query_string) nwords = ddebug_tokenize(query_string, words, MAXWORDS); if (nwords <= 0) return -EINVAL; - if (ddebug_parse_query(words, nwords-1, &query)) + if (ddebug_parse_query(words, nwords-1, &query, modname)) return -EINVAL; if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) return -EINVAL; @@ -471,7 +475,7 @@ static int ddebug_exec_query(char *query_string) last error or number of matching callsites. Module name is either in param (for boot arg) or perhaps in query string. */ -static int ddebug_exec_queries(char *query) +static int ddebug_exec_queries(char *query, const char *modname) { char *split; int i, errs = 0, exitcode = 0, rc, nfound = 0; @@ -487,7 +491,7 @@ static int ddebug_exec_queries(char *query) vpr_info("query %d: \"%s\"\n", i, query); - rc = ddebug_exec_query(query); + rc = ddebug_exec_query(query, modname); if (rc < 0) { errs++; exitcode = rc; @@ -652,7 +656,7 @@ static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, tmpbuf[len] = '\0'; vpr_info("read %d bytes from userspace\n", (int)len); - ret = ddebug_exec_queries(tmpbuf); + ret = ddebug_exec_queries(tmpbuf, NULL); kfree(tmpbuf); if (ret < 0) return ret; @@ -878,7 +882,8 @@ static int ddebug_dyndbg_param_cb(char *param, char *val, if (strcmp(param, "dyndbg")) return on_err; /* determined by caller */ - ddebug_exec_queries(val ? val : "+p"); + ddebug_exec_queries((val ? val : "+p"), modname); + return 0; /* query failure shouldnt stop module load */ } @@ -1011,7 +1016,7 @@ static int __init dynamic_debug_init(void) if (ddebug_setup_string[0] != '\0') { pr_warn("ddebug_query param name is deprecated," " change it to dyndbg\n"); - ret = ddebug_exec_queries(ddebug_setup_string); + ret = ddebug_exec_queries(ddebug_setup_string, NULL); if (ret < 0) pr_warn("Invalid ddebug boot param %s", ddebug_setup_string); -- cgit v1.2.3-70-g09d2 From 29e36c9ffb696ed8d73e1aee713d483ec74a9a43 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:41 -0600 Subject: dynamic_debug: update Documentation/*, Kconfig.debug In dynamic-debug-howto.txt: - add section: Debug Messages at Module Initialization Time - update flags indicators in example outputs to include '=' - make flags descriptions tabular - add item on '_' flag-char - add dyndbg, boot-args examples - rewrap some paragraphs with long lines In Kconfig.debug, note that compiling with -DDEBUG enables all pr_debug()s in that code. In kernel-parameters.txt, add dyndbg and module.dyndbg items, and deprecate ddebug_query. Signed-off-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- Documentation/dynamic-debug-howto.txt | 184 ++++++++++++++++++++++------------ Documentation/kernel-parameters.txt | 7 +- lib/Kconfig.debug | 17 ++-- 3 files changed, 138 insertions(+), 70 deletions(-) (limited to 'lib') diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt index 74e6c7782678..6e1684981da2 100644 --- a/Documentation/dynamic-debug-howto.txt +++ b/Documentation/dynamic-debug-howto.txt @@ -2,17 +2,17 @@ Introduction ============ -This document describes how to use the dynamic debug (ddebug) feature. +This document describes how to use the dynamic debug (dyndbg) feature. -Dynamic debug is designed to allow you to dynamically enable/disable kernel -code to obtain additional kernel information. Currently, if -CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_dbg() calls can be -dynamically enabled per-callsite. +Dynamic debug is designed to allow you to dynamically enable/disable +kernel code to obtain additional kernel information. Currently, if +CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_dbg() calls can +be dynamically enabled per-callsite. Dynamic debug has even more useful features: - * Simple query language allows turning on and off debugging statements by - matching any combination of 0 or 1 of: + * Simple query language allows turning on and off debugging + statements by matching any combination of 0 or 1 of: - source filename - function name @@ -20,17 +20,19 @@ Dynamic debug has even more useful features: - module name - format string - * Provides a debugfs control file: /dynamic_debug/control which can be - read to display the complete list of known debug statements, to help guide you + * Provides a debugfs control file: /dynamic_debug/control + which can be read to display the complete list of known debug + statements, to help guide you Controlling dynamic debug Behaviour =================================== The behaviour of pr_debug()/dev_dbg()s are controlled via writing to a -control file in the 'debugfs' filesystem. Thus, you must first mount the debugfs -filesystem, in order to make use of this feature. Subsequently, we refer to the -control file as: /dynamic_debug/control. For example, if you want to -enable printing from source file 'svcsock.c', line 1603 you simply do: +control file in the 'debugfs' filesystem. Thus, you must first mount +the debugfs filesystem, in order to make use of this feature. +Subsequently, we refer to the control file as: +/dynamic_debug/control. For example, if you want to enable +printing from source file 'svcsock.c', line 1603 you simply do: nullarbor:~ # echo 'file svcsock.c line 1603 +p' > /dynamic_debug/control @@ -44,15 +46,15 @@ nullarbor:~ # echo 'file svcsock.c wtf 1 +p' > Viewing Dynamic Debug Behaviour =========================== -You can view the currently configured behaviour of all the debug statements -via: +You can view the currently configured behaviour of all the debug +statements via: nullarbor:~ # cat /dynamic_debug/control # filename:lineno [module]function flags format -/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup - "SVCRDMA Module Removed, deregister RPC RDMA transport\012" -/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init - "\011max_inline : %d\012" -/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init - "\011sq_depth : %d\012" -/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init - "\011max_requests : %d\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup =_ "SVCRDMA Module Removed, deregister RPC RDMA transport\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init =_ "\011max_inline : %d\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init =_ "\011sq_depth : %d\012" +/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init =_ "\011max_requests : %d\012" ... @@ -65,12 +67,12 @@ nullarbor:~ # grep -i rdma /dynamic_debug/control | wc -l nullarbor:~ # grep -i tcp /dynamic_debug/control | wc -l 42 -Note in particular that the third column shows the enabled behaviour -flags for each debug statement callsite (see below for definitions of the -flags). The default value, no extra behaviour enabled, is "-". So -you can view all the debug statement callsites with any non-default flags: +The third column shows the currently enabled flags for each debug +statement callsite (see below for definitions of the flags). The +default value, with no flags enabled, is "=_". So you can view all +the debug statement callsites with any non-default flags: -nullarbor:~ # awk '$3 != "-"' /dynamic_debug/control +nullarbor:~ # awk '$3 != "=_"' /dynamic_debug/control # filename:lineno [module]function flags format /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c:1603 [sunrpc]svc_send p "svc_process: st_sendto returned %d\012" @@ -103,15 +105,14 @@ specifications, followed by a flags change specification. command ::= match-spec* flags-spec -The match-spec's are used to choose a subset of the known dprintk() +The match-spec's are used to choose a subset of the known pr_debug() callsites to which to apply the flags-spec. Think of them as a query with implicit ANDs between each pair. Note that an empty list of -match-specs is possible, but is not very useful because it will not -match any debug statement callsites. +match-specs will select all debug statement callsites. -A match specification comprises a keyword, which controls the attribute -of the callsite to be compared, and a value to compare against. Possible -keywords are: +A match specification comprises a keyword, which controls the +attribute of the callsite to be compared, and a value to compare +against. Possible keywords are: match-spec ::= 'func' string | 'file' string | @@ -164,15 +165,15 @@ format characters (") or single quote characters ('). Examples: - format svcrdma: // many of the NFS/RDMA server dprintks - format readahead // some dprintks in the readahead cache + format svcrdma: // many of the NFS/RDMA server pr_debugs + format readahead // some pr_debugs in the readahead cache format nfsd:\040SETATTR // one way to match a format with whitespace format "nfsd: SETATTR" // a neater way to match a format with whitespace format 'nfsd: SETATTR' // yet another way to match a format with whitespace line The given line number or range of line numbers is compared - against the line number of each dprintk() callsite. A single + against the line number of each pr_debug() callsite. A single line number matches the callsite line number exactly. A range of line numbers matches any callsite between the first and last line number inclusive. An empty first number means @@ -188,51 +189,93 @@ The flags specification comprises a change operation followed by one or more flag characters. The change operation is one of the characters: -- - remove the given flags - -+ - add the given flags - -= - set the flags to the given flags + - remove the given flags + + add the given flags + = set the flags to the given flags The flags are: -f - Include the function name in the printed message -l - Include line number in the printed message -m - Include module name in the printed message -p - Causes a printk() message to be emitted to dmesg -t - Include thread ID in messages not generated from interrupt context + p enables the pr_debug() callsite. + f Include the function name in the printed message + l Include line number in the printed message + m Include module name in the printed message + t Include thread ID in messages not generated from interrupt context + _ No flags are set. (Or'd with others on input) + +For display, the flags are preceded by '=' +(mnemonic: what the flags are currently equal to). -Note the regexp ^[-+=][flmpt]+$ matches a flags specification. -Note also that there is no convenient syntax to remove all -the flags at once, you need to use "-flmpt". +Note the regexp ^[-+=][flmpt_]+$ matches a flags specification. +To clear all flags at once, use "=_" or "-flmpt". -Debug messages during boot process +Debug messages during Boot Process ================================== -To be able to activate debug messages during the boot process, -even before userspace and debugfs exists, use the boot parameter: -ddebug_query="QUERY" +To activate debug messages for core code and built-in modules during +the boot process, even before userspace and debugfs exists, use +dyndbg="QUERY", module.dyndbg="QUERY", or ddebug_query="QUERY" +(ddebug_query is obsoleted by dyndbg, and deprecated). QUERY follows +the syntax described above, but must not exceed 1023 characters. Your +bootloader may impose lower limits. + +These dyndbg params are processed just after the ddebug tables are +processed, as part of the arch_initcall. Thus you can enable debug +messages in all code run after this arch_initcall via this boot +parameter. -QUERY follows the syntax described above, but must not exceed 1023 -characters. The enablement of debug messages is done as an arch_initcall. -Thus you can enable debug messages in all code processed after this -arch_initcall via this boot parameter. On an x86 system for example ACPI enablement is a subsys_initcall and -ddebug_query="file ec.c +p" + dyndbg="file ec.c +p" will show early Embedded Controller transactions during ACPI setup if your machine (typically a laptop) has an Embedded Controller. PCI (or other devices) initialization also is a hot candidate for using this boot parameter for debugging purposes. +If foo module is not built-in, foo.dyndbg will still be processed at +boot time, without effect, but will be reprocessed when module is +loaded later. dyndbg_query= and bare dyndbg= are only processed at +boot. + + +Debug Messages at Module Initialization Time +============================================ + +When "modprobe foo" is called, modprobe scans /proc/cmdline for +foo.params, strips "foo.", and passes them to the kernel along with +params given in modprobe args or /etc/modprob.d/*.conf files, +in the following order: + +1. # parameters given via /etc/modprobe.d/*.conf + options foo dyndbg=+pt + options foo dyndbg # defaults to +p + +2. # foo.dyndbg as given in boot args, "foo." is stripped and passed + foo.dyndbg=" func bar +p; func buz +mp" + +3. # args to modprobe + modprobe foo dyndbg==pmf # override previous settings + +These dyndbg queries are applied in order, with last having final say. +This allows boot args to override or modify those from /etc/modprobe.d +(sensible, since 1 is system wide, 2 is kernel or boot specific), and +modprobe args to override both. + +In the foo.dyndbg="QUERY" form, the query must exclude "module foo". +"foo" is extracted from the param-name, and applied to each query in +"QUERY", and only 1 match-spec of each type is allowed. + +The dyndbg option is a "fake" module parameter, which means: + +- modules do not need to define it explicitly +- every module gets it tacitly, whether they use pr_debug or not +- it doesnt appear in /sys/module/$module/parameters/ + To see it, grep the control file, or inspect /proc/cmdline. + +For CONFIG_DYNAMIC_DEBUG kernels, any settings given at boot-time (or +enabled by -DDEBUG flag during compilation) can be disabled later via +the sysfs interface if the debug messages are no longer needed: + + echo "module module_name -p" > /dynamic_debug/control Examples ======== @@ -260,3 +303,18 @@ nullarbor:~ # echo -n 'func svc_process -p' > // enable messages for NFS calls READ, READLINK, READDIR and READDIR+. nullarbor:~ # echo -n 'format "nfsd: READ" +p' > /dynamic_debug/control + +// enable all messages +nullarbor:~ # echo -n '+p' > /dynamic_debug/control + +// add module, function to all enabled messages +nullarbor:~ # echo -n '+mf' > /dynamic_debug/control + +// boot-args example, with newlines and comments for readability +Kernel command line: ... + // see whats going on in dyndbg=value processing + dynamic_debug.verbose=1 + // enable pr_debugs in 2 builtins, #cmt is stripped + dyndbg="module params +p #cmt ; module sys +p" + // enable pr_debugs in 2 functions in a module loaded later + pc87360.dyndbg="func pc87360_init_device +p; func pc87360_find +p" diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c1601e5a8b71..d224225616b7 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -610,7 +610,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ddebug_query= [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot time. See Documentation/dynamic-debug-howto.txt for - details. + details. Deprecated, see dyndbg. debug [KNL] Enable kernel debugging (events log level). @@ -730,6 +730,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. dscc4.setup= [NET] + dyndbg[="val"] [KNL,DYNAMIC_DEBUG] + module.dyndbg[="val"] + Enable debug messages at boot time. See + Documentation/dynamic-debug-howto.txt for details. + earlycon= [KNL] Output early console device and options. uart[8250],io,[,options] uart[8250],mmio,[,options] diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6777153f18f3..ef8192bc0c33 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1205,8 +1205,13 @@ config DYNAMIC_DEBUG otherwise be available at runtime. These messages can then be enabled/disabled based on various levels of scope - per source file, function, module, format string, and line number. This mechanism - implicitly enables all pr_debug() and dev_dbg() calls. The impact of - this compile option is a larger kernel text size of about 2%. + implicitly compiles in all pr_debug() and dev_dbg() calls, which + enlarges the kernel text size by about 2%. + + If a source file is compiled with DEBUG flag set, any + pr_debug() calls in it are enabled by default, but can be + disabled at runtime as below. Note that DEBUG flag is + turned on by many CONFIG_*DEBUG* options. Usage: @@ -1223,16 +1228,16 @@ config DYNAMIC_DEBUG lineno : line number of the debug statement module : module that contains the debug statement function : function that contains the debug statement - flags : 'p' means the line is turned 'on' for printing + flags : '=p' means the line is turned 'on' for printing format : the format used for the debug statement From a live system: nullarbor:~ # cat /dynamic_debug/control # filename:lineno [module]function flags format - fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" - fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" - fs/aio.c:1770 [aio]sys_io_cancel - "calling\040cancel\012" + fs/aio.c:222 [aio]__put_ioctx =_ "__put_ioctx:\040freeing\040%p\012" + fs/aio.c:248 [aio]ioctx_alloc =_ "ENOMEM:\040nr_events\040too\040high\012" + fs/aio.c:1770 [aio]sys_io_cancel =_ "calling\040cancel\012" Example usage: -- cgit v1.2.3-70-g09d2 From 3ec5652ab70f6e9a888d9e5f67c858af354323b3 Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Fri, 27 Apr 2012 14:30:42 -0600 Subject: dynamic_debug: init with early_initcall, not arch_initcall 1- Call dynamic_debug_init() from early_initcall, not arch_initcall. 2- Call dynamic_debug_init_debugfs() from fs_initcall, not module_init. RFC: This works for me on a 64 bit desktop and a i586 SBC, but is untested on other arches. I presume there is or was a reason original code used arch_initcall, maybe the constraints have changed. This makes facility available as soon as possible. 2nd change has a downside when dynamic_debug.verbose=1; all the vpr_info()s called in the proc-fs code are activated, causing voluminous output from dmesg. TBD: Im unsure of this explanation, but the output is there. This could be fixed by changing those callsites to v2pr_info(if verbose > 1). 1st change is still not early enough to enable pr_debugs in kernel/params, so parsing of boot-args isnt logged. The reparse of those args is however visible after params.dyndbg="+p" is processed. Signed-off-by: Jim Cromie Acked-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index cfd84638b1a4..7ca29a0a3019 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -1042,7 +1042,7 @@ out_err: return 0; } /* Allow early initialization for boot messages via boot param */ -arch_initcall(dynamic_debug_init); +early_initcall(dynamic_debug_init); /* Debugfs setup must be done later */ -module_init(dynamic_debug_init_debugfs); +fs_initcall(dynamic_debug_init_debugfs); -- cgit v1.2.3-70-g09d2 From 9c1c21a0533aa37a475e8e8cce7ee064ed771881 Mon Sep 17 00:00:00 2001 From: Aneesh V Date: Fri, 27 Apr 2012 17:54:03 +0530 Subject: ddr: add LPDDR2 data from JESD209-2 add LPDDR2 data from the JEDEC spec JESD209-2. The data includes: 1. Addressing information for LPDDR2 memories of different densities and types(S2/S4) 2. AC timing data. This data will useful for memory controller device drivers. Right now this is used by the TI EMIF SDRAM controller driver. Signed-off-by: Aneesh V Reviewed-by: Santosh Shilimkar Reviewed-by: Benoit Cousson [santosh.shilimkar@ti.com: Moved to drivers/memory from drivers/misc] Signed-off-by: Santosh Shilimkar Tested-by: Lokesh Vutla Signed-off-by: Greg Kroah-Hartman --- include/memory/jedec_ddr.h | 175 +++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig | 8 +++ lib/Makefile | 2 + lib/jedec_ddr_data.c | 135 ++++++++++++++++++++++++++++++++++ 4 files changed, 320 insertions(+) create mode 100644 include/memory/jedec_ddr.h create mode 100644 lib/jedec_ddr_data.c (limited to 'lib') diff --git a/include/memory/jedec_ddr.h b/include/memory/jedec_ddr.h new file mode 100644 index 000000000000..ddad0f870e5d --- /dev/null +++ b/include/memory/jedec_ddr.h @@ -0,0 +1,175 @@ +/* + * Definitions for DDR memories based on JEDEC specs + * + * Copyright (C) 2012 Texas Instruments, Inc. + * + * Aneesh V + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __LINUX_JEDEC_DDR_H +#define __LINUX_JEDEC_DDR_H + +#include + +/* DDR Densities */ +#define DDR_DENSITY_64Mb 1 +#define DDR_DENSITY_128Mb 2 +#define DDR_DENSITY_256Mb 3 +#define DDR_DENSITY_512Mb 4 +#define DDR_DENSITY_1Gb 5 +#define DDR_DENSITY_2Gb 6 +#define DDR_DENSITY_4Gb 7 +#define DDR_DENSITY_8Gb 8 +#define DDR_DENSITY_16Gb 9 +#define DDR_DENSITY_32Gb 10 + +/* DDR type */ +#define DDR_TYPE_DDR2 1 +#define DDR_TYPE_DDR3 2 +#define DDR_TYPE_LPDDR2_S4 3 +#define DDR_TYPE_LPDDR2_S2 4 +#define DDR_TYPE_LPDDR2_NVM 5 + +/* DDR IO width */ +#define DDR_IO_WIDTH_4 1 +#define DDR_IO_WIDTH_8 2 +#define DDR_IO_WIDTH_16 3 +#define DDR_IO_WIDTH_32 4 + +/* Number of Row bits */ +#define R9 9 +#define R10 10 +#define R11 11 +#define R12 12 +#define R13 13 +#define R14 14 +#define R15 15 +#define R16 16 + +/* Number of Column bits */ +#define C7 7 +#define C8 8 +#define C9 9 +#define C10 10 +#define C11 11 +#define C12 12 + +/* Number of Banks */ +#define B1 0 +#define B2 1 +#define B4 2 +#define B8 3 + +/* Refresh rate in nano-seconds */ +#define T_REFI_15_6 15600 +#define T_REFI_7_8 7800 +#define T_REFI_3_9 3900 + +/* tRFC values */ +#define T_RFC_90 90000 +#define T_RFC_110 110000 +#define T_RFC_130 130000 +#define T_RFC_160 160000 +#define T_RFC_210 210000 +#define T_RFC_300 300000 +#define T_RFC_350 350000 + +/* Mode register numbers */ +#define DDR_MR0 0 +#define DDR_MR1 1 +#define DDR_MR2 2 +#define DDR_MR3 3 +#define DDR_MR4 4 +#define DDR_MR5 5 +#define DDR_MR6 6 +#define DDR_MR7 7 +#define DDR_MR8 8 +#define DDR_MR9 9 +#define DDR_MR10 10 +#define DDR_MR11 11 +#define DDR_MR16 16 +#define DDR_MR17 17 +#define DDR_MR18 18 + +/* + * LPDDR2 related defines + */ + +/* MR4 register fields */ +#define MR4_SDRAM_REF_RATE_SHIFT 0 +#define MR4_SDRAM_REF_RATE_MASK 7 +#define MR4_TUF_SHIFT 7 +#define MR4_TUF_MASK (1 << 7) + +/* MR4 SDRAM Refresh Rate field values */ +#define SDRAM_TEMP_NOMINAL 0x3 +#define SDRAM_TEMP_RESERVED_4 0x4 +#define SDRAM_TEMP_HIGH_DERATE_REFRESH 0x5 +#define SDRAM_TEMP_HIGH_DERATE_REFRESH_AND_TIMINGS 0x6 +#define SDRAM_TEMP_VERY_HIGH_SHUTDOWN 0x7 + +#define NUM_DDR_ADDR_TABLE_ENTRIES 11 +#define NUM_DDR_TIMING_TABLE_ENTRIES 4 + +/* Structure for DDR addressing info from the JEDEC spec */ +struct lpddr2_addressing { + u32 num_banks; + u32 tREFI_ns; + u32 tRFCab_ps; +}; + +/* + * Structure for timings from the LPDDR2 datasheet + * All parameters are in pico seconds(ps) unless explicitly indicated + * with a suffix like tRAS_max_ns below + */ +struct lpddr2_timings { + u32 max_freq; + u32 min_freq; + u32 tRPab; + u32 tRCD; + u32 tWR; + u32 tRAS_min; + u32 tRRD; + u32 tWTR; + u32 tXP; + u32 tRTP; + u32 tCKESR; + u32 tDQSCK_max; + u32 tDQSCK_max_derated; + u32 tFAW; + u32 tZQCS; + u32 tZQCL; + u32 tZQinit; + u32 tRAS_max_ns; +}; + +/* + * Min value for some parameters in terms of number of tCK cycles(nCK) + * Please set to zero parameters that are not valid for a given memory + * type + */ +struct lpddr2_min_tck { + u32 tRPab; + u32 tRCD; + u32 tWR; + u32 tRASmin; + u32 tRRD; + u32 tWTR; + u32 tXP; + u32 tRTP; + u32 tCKE; + u32 tCKESR; + u32 tFAW; +}; + +extern const struct lpddr2_addressing + lpddr2_jedec_addressing_table[NUM_DDR_ADDR_TABLE_ENTRIES]; +extern const struct lpddr2_timings + lpddr2_jedec_timings[NUM_DDR_TIMING_TABLE_ENTRIES]; +extern const struct lpddr2_min_tck lpddr2_jedec_min_tck; + +#endif /* __LINUX_JEDEC_DDR_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 4a8aba2e5cc0..0e25c03939e3 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -353,6 +353,14 @@ config CORDIC This option provides an implementation of the CORDIC algorithm; calculations are in fixed point. Module will be called cordic. +config DDR + bool "JEDEC DDR data" + help + Data from JEDEC specs for DDR SDRAM memories, + particularly the AC timing parameters and addressing + information. This data is useful for drivers handling + DDR SDRAM controllers. + config MPILIB tristate select CLZ_TAB diff --git a/lib/Makefile b/lib/Makefile index 18515f0267c4..74290c9e2864 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -123,6 +123,8 @@ obj-$(CONFIG_SIGNATURE) += digsig.o obj-$(CONFIG_CLZ_TAB) += clz_tab.o +obj-$(CONFIG_DDR) += jedec_ddr_data.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/jedec_ddr_data.c b/lib/jedec_ddr_data.c new file mode 100644 index 000000000000..6d2cbf1d567f --- /dev/null +++ b/lib/jedec_ddr_data.c @@ -0,0 +1,135 @@ +/* + * DDR addressing details and AC timing parameters from JEDEC specs + * + * Copyright (C) 2012 Texas Instruments, Inc. + * + * Aneesh V + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +/* LPDDR2 addressing details from JESD209-2 section 2.4 */ +const struct lpddr2_addressing + lpddr2_jedec_addressing_table[NUM_DDR_ADDR_TABLE_ENTRIES] = { + {B4, T_REFI_15_6, T_RFC_90}, /* 64M */ + {B4, T_REFI_15_6, T_RFC_90}, /* 128M */ + {B4, T_REFI_7_8, T_RFC_90}, /* 256M */ + {B4, T_REFI_7_8, T_RFC_90}, /* 512M */ + {B8, T_REFI_7_8, T_RFC_130}, /* 1GS4 */ + {B8, T_REFI_3_9, T_RFC_130}, /* 2GS4 */ + {B8, T_REFI_3_9, T_RFC_130}, /* 4G */ + {B8, T_REFI_3_9, T_RFC_210}, /* 8G */ + {B4, T_REFI_7_8, T_RFC_130}, /* 1GS2 */ + {B4, T_REFI_3_9, T_RFC_130}, /* 2GS2 */ +}; +EXPORT_SYMBOL_GPL(lpddr2_jedec_addressing_table); + +/* LPDDR2 AC timing parameters from JESD209-2 section 12 */ +const struct lpddr2_timings + lpddr2_jedec_timings[NUM_DDR_TIMING_TABLE_ENTRIES] = { + /* Speed bin 400(200 MHz) */ + [0] = { + .max_freq = 200000000, + .min_freq = 10000000, + .tRPab = 21000, + .tRCD = 18000, + .tWR = 15000, + .tRAS_min = 42000, + .tRRD = 10000, + .tWTR = 10000, + .tXP = 7500, + .tRTP = 7500, + .tCKESR = 15000, + .tDQSCK_max = 5500, + .tFAW = 50000, + .tZQCS = 90000, + .tZQCL = 360000, + .tZQinit = 1000000, + .tRAS_max_ns = 70000, + .tDQSCK_max_derated = 6000, + }, + /* Speed bin 533(266 MHz) */ + [1] = { + .max_freq = 266666666, + .min_freq = 10000000, + .tRPab = 21000, + .tRCD = 18000, + .tWR = 15000, + .tRAS_min = 42000, + .tRRD = 10000, + .tWTR = 7500, + .tXP = 7500, + .tRTP = 7500, + .tCKESR = 15000, + .tDQSCK_max = 5500, + .tFAW = 50000, + .tZQCS = 90000, + .tZQCL = 360000, + .tZQinit = 1000000, + .tRAS_max_ns = 70000, + .tDQSCK_max_derated = 6000, + }, + /* Speed bin 800(400 MHz) */ + [2] = { + .max_freq = 400000000, + .min_freq = 10000000, + .tRPab = 21000, + .tRCD = 18000, + .tWR = 15000, + .tRAS_min = 42000, + .tRRD = 10000, + .tWTR = 7500, + .tXP = 7500, + .tRTP = 7500, + .tCKESR = 15000, + .tDQSCK_max = 5500, + .tFAW = 50000, + .tZQCS = 90000, + .tZQCL = 360000, + .tZQinit = 1000000, + .tRAS_max_ns = 70000, + .tDQSCK_max_derated = 6000, + }, + /* Speed bin 1066(533 MHz) */ + [3] = { + .max_freq = 533333333, + .min_freq = 10000000, + .tRPab = 21000, + .tRCD = 18000, + .tWR = 15000, + .tRAS_min = 42000, + .tRRD = 10000, + .tWTR = 7500, + .tXP = 7500, + .tRTP = 7500, + .tCKESR = 15000, + .tDQSCK_max = 5500, + .tFAW = 50000, + .tZQCS = 90000, + .tZQCL = 360000, + .tZQinit = 1000000, + .tRAS_max_ns = 70000, + .tDQSCK_max_derated = 5620, + }, +}; +EXPORT_SYMBOL_GPL(lpddr2_jedec_timings); + +const struct lpddr2_min_tck lpddr2_jedec_min_tck = { + .tRPab = 3, + .tRCD = 3, + .tWR = 3, + .tRASmin = 3, + .tRRD = 2, + .tWTR = 2, + .tXP = 2, + .tRTP = 2, + .tCKE = 3, + .tCKESR = 3, + .tFAW = 8 +}; +EXPORT_SYMBOL_GPL(lpddr2_jedec_min_tck); -- cgit v1.2.3-70-g09d2 From 04db6e5fddca55186b6a74339a62c800150648bc Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Thu, 3 May 2012 11:57:39 -0600 Subject: dynamic_debug: remove unneeded includes These arent currently needed, so drop them. Some will probably get re-added when static-branches are added, but include loops prevent that at present. Signed-off-by: Jim Cromie Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 7ca29a0a3019..fc5d270751a7 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -14,24 +14,14 @@ #include #include -#include -#include -#include #include -#include #include -#include -#include #include -#include -#include #include #include #include -#include #include #include -#include #include extern struct _ddebug __start___verbose[]; -- cgit v1.2.3-70-g09d2 From fef15d2f3d97c9858694f234af94a4ef40d86679 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 7 May 2012 16:47:32 -0700 Subject: Revert "dynamic_debug: remove unneeded includes" This reverts commit 04db6e5fddca55186b6a74339a62c800150648bc. Odds are, we really don't want to revert all of these, and need to be more careful in the future to make sure we don't break the build of other arches. Reported-by: Stephen Rothwell Cc: Jim Cromie Signed-off-by: Greg Kroah-Hartman --- lib/dynamic_debug.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index fc5d270751a7..7ca29a0a3019 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -14,14 +14,24 @@ #include #include +#include +#include +#include #include +#include #include +#include +#include #include +#include +#include #include #include #include +#include #include #include +#include #include extern struct _ddebug __start___verbose[]; -- cgit v1.2.3-70-g09d2 From 9ff1f838e9c019b16b720dca9b04565f1a6e0316 Mon Sep 17 00:00:00 2001 From: Zhi Yong Wu Date: Mon, 7 May 2012 10:48:25 +0800 Subject: kobject: fix the uncorrect comment Signed-off-by: Zhi Yong Wu Signed-off-by: Greg Kroah-Hartman --- lib/kobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index 38fcc60e661b..e07ee1fcd6f1 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -632,7 +632,7 @@ struct kobject *kobject_create(void) /** * kobject_create_and_add - create a struct kobject dynamically and register it with sysfs * - * @name: the name for the kset + * @name: the name for the kobject * @parent: the parent kobject of this kobject, if any. * * This function creates a kobject structure dynamically and registers it -- cgit v1.2.3-70-g09d2 From 649e6ee33f73ba1c4f2492c6de9aff2254b540cb Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 10 May 2012 04:30:45 +0200 Subject: printk() - restore timestamp printing at console output The output of the timestamps got lost with the conversion of the kmsg buffer to records; restore the old behavior. Document, that CONFIG_PRINTK_TIME now only controls the output of the timestamps in the syslog() system call and on the console, and not the recording of the timestamps. Cc: Joe Perches Cc: Linus Torvalds Cc: Sasha Levin Cc: Ingo Molnar Reported-by: Yinghai Lu Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/printk.c | 43 ++++++++++++++++++++++++++----------------- lib/Kconfig.debug | 16 ++++++++++------ 2 files changed, 36 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/kernel/printk.c b/kernel/printk.c index 301fb0f09fbf..572941d7e5f7 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -786,6 +786,22 @@ static bool printk_time; #endif module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); +static size_t prepend_timestamp(unsigned long long t, char *buf) +{ + unsigned long rem_ns; + + if (!printk_time) + return 0; + + if (!buf) + return 15; + + rem_ns = do_div(t, 1000000000); + + return sprintf(buf, "[%5lu.%06lu] ", + (unsigned long) t, rem_ns / 1000); +} + static int syslog_print_line(u32 idx, char *text, size_t size) { struct log *msg; @@ -800,9 +816,7 @@ static int syslog_print_line(u32 idx, char *text, size_t size) len++; if (msg->level > 99) len++; - - if (printk_time) - len += 15; + len += prepend_timestamp(0, NULL); len += msg->text_len; len++; @@ -810,15 +824,7 @@ static int syslog_print_line(u32 idx, char *text, size_t size) } len = sprintf(text, "<%u>", msg->level); - - if (printk_time) { - unsigned long long t = msg->ts_nsec; - unsigned long rem_ns = do_div(t, 1000000000); - - len += sprintf(text + len, "[%5lu.%06lu] ", - (unsigned long) t, rem_ns / 1000); - } - + len += prepend_timestamp(msg->ts_nsec, text + len); if (len + msg->text_len > size) return -EINVAL; memcpy(text + len, log_text(msg), msg->text_len); @@ -1741,7 +1747,7 @@ again: for (;;) { struct log *msg; static char text[LOG_LINE_MAX]; - size_t len; + size_t len, l; int level; raw_spin_lock_irqsave(&logbuf_lock, flags); @@ -1761,10 +1767,13 @@ again: msg = log_from_idx(console_idx); level = msg->level & 7; - len = msg->text_len; - if (len+1 >= sizeof(text)) - len = sizeof(text)-1; - memcpy(text, log_text(msg), len); + + len = prepend_timestamp(msg->ts_nsec, text); + l = msg->text_len; + if (len + l + 1 >= sizeof(text)) + l = sizeof(text) - len - 1; + memcpy(text + len, log_text(msg), l); + len += l; text[len++] = '\n'; console_idx = log_next(console_idx); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ef8192bc0c33..e11934177030 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3,12 +3,16 @@ config PRINTK_TIME bool "Show timing information on printks" depends on PRINTK help - Selecting this option causes timing information to be - included in printk output. This allows you to measure - the interval between kernel operations, including bootup - operations. This is useful for identifying long delays - in kernel startup. Or add printk.time=1 at boot-time. - See Documentation/kernel-parameters.txt + Selecting this option causes time stamps of the printk() + messages to be added to the output of the syslog() system + call and at the console. + + The timestamp is always recorded internally, and exported + to /dev/kmsg. This flag just specifies if the timestamp should + be included, not that the timestamp is recorded. + + The behavior is also controlled by the kernel command line + parameter printk.time=1. See Documentation/kernel-parameters.txt config DEFAULT_MESSAGE_LOGLEVEL int "Default message log level (1-7)" -- cgit v1.2.3-70-g09d2 From 6684b5729df289ba60a404d8d095d820dc553cdf Mon Sep 17 00:00:00 2001 From: Oskar Schirmer Date: Wed, 16 May 2012 09:41:19 +0000 Subject: lib: Change mail address of Oskar Schirmer That old mail address doesnt exist any more. This changes all occurences to my new address. Signed-off-by: Oskar Schirmer Cc: Paul Gortmaker Signed-off-by: Jiri Kosina --- include/linux/rational.h | 2 +- lib/rational.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/include/linux/rational.h b/include/linux/rational.h index 4f532fcd9eea..bfa6a2bcfb32 100644 --- a/include/linux/rational.h +++ b/include/linux/rational.h @@ -1,7 +1,7 @@ /* * rational fractions * - * Copyright (C) 2009 emlix GmbH, Oskar Schirmer + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer * * helper functions when coping with rational numbers, * e.g. when calculating optimum numerator/denominator pairs for diff --git a/lib/rational.c b/lib/rational.c index d326da3976f5..f0aa21c2a762 100644 --- a/lib/rational.c +++ b/lib/rational.c @@ -1,7 +1,7 @@ /* * rational fractions * - * Copyright (C) 2009 emlix GmbH, Oskar Schirmer + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer * * helper functions when coping with rational numbers */ -- cgit v1.2.3-70-g09d2 From f674ef7b43881b2ac11f98d6ba2dc5d9dd0dd118 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Tue, 22 May 2012 13:54:16 +1000 Subject: lib/raid6: fix test program build drags in headers which are not visible to userspace, thus breaking the build for the test program. Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- lib/raid6/algos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 8b02f60ffc86..f6a0f7899163 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -17,11 +17,11 @@ */ #include -#include #ifndef __KERNEL__ #include #include #else +#include #include #if !RAID6_USE_EMPTY_ZERO_PAGE /* In .bss so it's zeroed */ -- cgit v1.2.3-70-g09d2 From 048a8b8c89dc427dd7a58527c8923224b1e66d83 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Tue, 22 May 2012 13:54:18 +1000 Subject: lib/raid6: Add SSSE3 optimized recovery functions Add SSSE3 optimized recovery functions, as well as a system for selecting the most appropriate recovery functions to use. Originally-by: H. Peter Anvin Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- include/linux/raid/pq.h | 18 ++- lib/raid6/Makefile | 2 +- lib/raid6/algos.c | 37 ++++++ lib/raid6/mktables.c | 25 ++++ lib/raid6/recov.c | 15 ++- lib/raid6/recov_ssse3.c | 335 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 425 insertions(+), 7 deletions(-) create mode 100644 lib/raid6/recov_ssse3.c (limited to 'lib') diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 53272e9860a7..640c69ceec96 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -99,8 +99,20 @@ extern const struct raid6_calls raid6_altivec2; extern const struct raid6_calls raid6_altivec4; extern const struct raid6_calls raid6_altivec8; +struct raid6_recov_calls { + void (*data2)(int, size_t, int, int, void **); + void (*datap)(int, size_t, int, void **); + int (*valid)(void); + const char *name; + int priority; +}; + +extern const struct raid6_recov_calls raid6_recov_intx1; +extern const struct raid6_recov_calls raid6_recov_ssse3; + /* Algorithm list */ extern const struct raid6_calls * const raid6_algos[]; +extern const struct raid6_recov_calls *const raid6_recov_algos[]; int raid6_select_algo(void); /* Return values from chk_syndrome */ @@ -111,14 +123,16 @@ int raid6_select_algo(void); /* Galois field tables */ extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256))); +extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256))); extern const u8 raid6_gfexp[256] __attribute__((aligned(256))); extern const u8 raid6_gfinv[256] __attribute__((aligned(256))); extern const u8 raid6_gfexi[256] __attribute__((aligned(256))); /* Recovery routines */ -void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, +extern void (*raid6_2data_recov)(int disks, size_t bytes, int faila, int failb, void **ptrs); -void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs); +extern void (*raid6_datap_recov)(int disks, size_t bytes, int faila, + void **ptrs); void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs); diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 8a38102770f3..de06dfe165b8 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o -raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ +raid6_pq-y += algos.o recov.o recov_ssse3.o tables.o int1.o int2.o int4.o \ int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ altivec8.o mmx.o sse1.o sse2.o hostprogs-y += mktables diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index f6a0f7899163..5a7f8022be13 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -64,6 +64,20 @@ const struct raid6_calls * const raid6_algos[] = { NULL }; +void (*raid6_2data_recov)(int, size_t, int, int, void **); +EXPORT_SYMBOL_GPL(raid6_2data_recov); + +void (*raid6_datap_recov)(int, size_t, int, void **); +EXPORT_SYMBOL_GPL(raid6_datap_recov); + +const struct raid6_recov_calls *const raid6_recov_algos[] = { +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + &raid6_recov_ssse3, +#endif + &raid6_recov_intx1, + NULL +}; + #ifdef __KERNEL__ #define RAID6_TIME_JIFFIES_LG2 4 #else @@ -72,6 +86,26 @@ const struct raid6_calls * const raid6_algos[] = { #define time_before(x, y) ((x) < (y)) #endif +static inline void raid6_choose_recov(void) +{ + const struct raid6_recov_calls *const *algo; + const struct raid6_recov_calls *best; + + for (best = NULL, algo = raid6_recov_algos; *algo; algo++) + if (!best || (*algo)->priority > best->priority) + if (!(*algo)->valid || (*algo)->valid()) + best = *algo; + + if (best) { + raid6_2data_recov = best->data2; + raid6_datap_recov = best->datap; + + printk("raid6: using %s recovery algorithm\n", best->name); + } else + printk("raid6: Yikes! No recovery algorithm found!\n"); +} + + /* Try to pick the best algorithm */ /* This code uses the gfmul table as convenient data set to abuse */ @@ -141,6 +175,9 @@ int __init raid6_select_algo(void) free_pages((unsigned long)syndromes, 1); + /* select raid recover functions */ + raid6_choose_recov(); + return best ? 0 : -EINVAL; } diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c index 8a3780902cec..39787db588b0 100644 --- a/lib/raid6/mktables.c +++ b/lib/raid6/mktables.c @@ -81,6 +81,31 @@ int main(int argc, char *argv[]) printf("EXPORT_SYMBOL(raid6_gfmul);\n"); printf("#endif\n"); + /* Compute vector multiplication table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_vgfmul[256][32] =\n" + "{\n"); + for (i = 0; i < 256; i++) { + printf("\t{\n"); + for (j = 0; j < 16; j += 8) { + printf("\t\t"); + for (k = 0; k < 8; k++) + printf("0x%02x,%c", gfmul(i, j + k), + (k == 7) ? '\n' : ' '); + } + for (j = 0; j < 16; j += 8) { + printf("\t\t"); + for (k = 0; k < 8; k++) + printf("0x%02x,%c", gfmul(i, (j + k) << 4), + (k == 7) ? '\n' : ' '); + } + printf("\t},\n"); + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_vgfmul);\n"); + printf("#endif\n"); + /* Compute power-of-2 table (exponent) */ v = 1; printf("\nconst u8 __attribute__((aligned(256)))\n" diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index fe275d7b6b36..1805a5cc5daa 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -22,7 +22,7 @@ #include /* Recover two failed data blocks. */ -void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, +void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, void **ptrs) { u8 *p, *q, *dp, *dq; @@ -64,10 +64,9 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, p++; q++; } } -EXPORT_SYMBOL_GPL(raid6_2data_recov); /* Recover failure of one data block plus the P block */ -void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) +void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs) { u8 *p, *q, *dq; const u8 *qmul; /* Q multiplier table */ @@ -96,7 +95,15 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) q++; dq++; } } -EXPORT_SYMBOL_GPL(raid6_datap_recov); + + +const struct raid6_recov_calls raid6_recov_intx1 = { + .data2 = raid6_2data_recov_intx1, + .datap = raid6_datap_recov_intx1, + .valid = NULL, + .name = "intx1", + .priority = 0, +}; #ifndef __KERNEL__ /* Testing only */ diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c new file mode 100644 index 000000000000..37ae61930559 --- /dev/null +++ b/lib/raid6/recov_ssse3.c @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#include +#include "x86.h" + +static int raid6_has_ssse3(void) +{ + return boot_cpu_has(X86_FEATURE_XMM) && + boot_cpu_has(X86_FEATURE_XMM2) && + boot_cpu_has(X86_FEATURE_SSSE3); +} + +void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, + void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + static const u8 __aligned(16) x0f[16] = { + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data pages + Use the dead data pages as temporary storage for + delta p and delta q */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks-2] = p; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ + raid6_gfexp[failb]]]; + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0])); + +#ifdef CONFIG_X86_64 + asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0])); + asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0])); + asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16])); +#endif + + /* Now do it... */ + while (bytes) { +#ifdef CONFIG_X86_64 + /* xmm6, xmm14, xmm15 */ + + asm volatile("movdqa %0,%%xmm1" : : "m" (q[0])); + asm volatile("movdqa %0,%%xmm9" : : "m" (q[16])); + asm volatile("movdqa %0,%%xmm0" : : "m" (p[0])); + asm volatile("movdqa %0,%%xmm8" : : "m" (p[16])); + asm volatile("pxor %0,%%xmm1" : : "m" (dq[0])); + asm volatile("pxor %0,%%xmm9" : : "m" (dq[16])); + asm volatile("pxor %0,%%xmm0" : : "m" (dp[0])); + asm volatile("pxor %0,%%xmm8" : : "m" (dp[16])); + + /* xmm0/8 = px */ + + asm volatile("movdqa %xmm6,%xmm4"); + asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); + asm volatile("movdqa %xmm6,%xmm12"); + asm volatile("movdqa %xmm5,%xmm13"); + asm volatile("movdqa %xmm1,%xmm3"); + asm volatile("movdqa %xmm9,%xmm11"); + asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */ + asm volatile("movdqa %xmm8,%xmm10"); + asm volatile("psraw $4,%xmm1"); + asm volatile("psraw $4,%xmm9"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm11"); + asm volatile("pand %xmm7,%xmm1"); + asm volatile("pand %xmm7,%xmm9"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm11,%xmm12"); + asm volatile("pshufb %xmm1,%xmm5"); + asm volatile("pshufb %xmm9,%xmm13"); + asm volatile("pxor %xmm4,%xmm5"); + asm volatile("pxor %xmm12,%xmm13"); + + /* xmm5/13 = qx */ + + asm volatile("movdqa %xmm14,%xmm4"); + asm volatile("movdqa %xmm15,%xmm1"); + asm volatile("movdqa %xmm14,%xmm12"); + asm volatile("movdqa %xmm15,%xmm9"); + asm volatile("movdqa %xmm2,%xmm3"); + asm volatile("movdqa %xmm10,%xmm11"); + asm volatile("psraw $4,%xmm2"); + asm volatile("psraw $4,%xmm10"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm11"); + asm volatile("pand %xmm7,%xmm2"); + asm volatile("pand %xmm7,%xmm10"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm11,%xmm12"); + asm volatile("pshufb %xmm2,%xmm1"); + asm volatile("pshufb %xmm10,%xmm9"); + asm volatile("pxor %xmm4,%xmm1"); + asm volatile("pxor %xmm12,%xmm9"); + + /* xmm1/9 = pbmul[px] */ + asm volatile("pxor %xmm5,%xmm1"); + asm volatile("pxor %xmm13,%xmm9"); + /* xmm1/9 = db = DQ */ + asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0])); + asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16])); + + asm volatile("pxor %xmm1,%xmm0"); + asm volatile("pxor %xmm9,%xmm8"); + asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0])); + asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16])); + + bytes -= 32; + p += 32; + q += 32; + dp += 32; + dq += 32; +#else + asm volatile("movdqa %0,%%xmm1" : : "m" (*q)); + asm volatile("movdqa %0,%%xmm0" : : "m" (*p)); + asm volatile("pxor %0,%%xmm1" : : "m" (*dq)); + asm volatile("pxor %0,%%xmm0" : : "m" (*dp)); + + /* 1 = dq ^ q + * 0 = dp ^ p + */ + asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0])); + asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); + + asm volatile("movdqa %xmm1,%xmm3"); + asm volatile("psraw $4,%xmm1"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm1"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm1,%xmm5"); + asm volatile("pxor %xmm4,%xmm5"); + + asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */ + + /* xmm5 = qx */ + + asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0])); + asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16])); + asm volatile("movdqa %xmm2,%xmm3"); + asm volatile("psraw $4,%xmm2"); + asm volatile("pand %xmm7,%xmm3"); + asm volatile("pand %xmm7,%xmm2"); + asm volatile("pshufb %xmm3,%xmm4"); + asm volatile("pshufb %xmm2,%xmm1"); + asm volatile("pxor %xmm4,%xmm1"); + + /* xmm1 = pbmul[px] */ + asm volatile("pxor %xmm5,%xmm1"); + /* xmm1 = db = DQ */ + asm volatile("movdqa %%xmm1,%0" : "=m" (*dq)); + + asm volatile("pxor %xmm1,%xmm0"); + asm volatile("movdqa %%xmm0,%0" : "=m" (*dp)); + + bytes -= 16; + p += 16; + q += 16; + dp += 16; + dq += 16; +#endif + } + + kernel_fpu_end(); +} + + +void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + static const u8 __aligned(16) x0f[16] = { + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data page + Use the dead data page as temporary storage for delta q */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_fpu_begin(); + + asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0])); + + while (bytes) { +#ifdef CONFIG_X86_64 + asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); + asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16])); + asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); + asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); + + /* xmm3 = q[0] ^ dq[0] */ + + asm volatile("pxor %0, %%xmm4" : : "m" (q[16])); + asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); + + /* xmm4 = q[16] ^ dq[16] */ + + asm volatile("movdqa %xmm3, %xmm6"); + asm volatile("movdqa %xmm4, %xmm8"); + + /* xmm4 = xmm8 = q[16] ^ dq[16] */ + + asm volatile("psraw $4, %xmm3"); + asm volatile("pand %xmm7, %xmm6"); + asm volatile("pand %xmm7, %xmm3"); + asm volatile("pshufb %xmm6, %xmm0"); + asm volatile("pshufb %xmm3, %xmm1"); + asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0])); + asm volatile("pxor %xmm0, %xmm1"); + asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16])); + + /* xmm1 = qmul[q[0] ^ dq[0]] */ + + asm volatile("psraw $4, %xmm4"); + asm volatile("pand %xmm7, %xmm8"); + asm volatile("pand %xmm7, %xmm4"); + asm volatile("pshufb %xmm8, %xmm10"); + asm volatile("pshufb %xmm4, %xmm11"); + asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); + asm volatile("pxor %xmm10, %xmm11"); + asm volatile("movdqa %0, %%xmm12" : : "m" (p[16])); + + /* xmm11 = qmul[q[16] ^ dq[16]] */ + + asm volatile("pxor %xmm1, %xmm2"); + + /* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */ + + asm volatile("pxor %xmm11, %xmm12"); + + /* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */ + + asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); + asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16])); + + asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); + asm volatile("movdqa %%xmm12, %0" : "=m" (p[16])); + + bytes -= 32; + p += 32; + q += 32; + dq += 32; + +#else + asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); + asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); + asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); + asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); + + /* xmm3 = *q ^ *dq */ + + asm volatile("movdqa %xmm3, %xmm6"); + asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); + asm volatile("psraw $4, %xmm3"); + asm volatile("pand %xmm7, %xmm6"); + asm volatile("pand %xmm7, %xmm3"); + asm volatile("pshufb %xmm6, %xmm0"); + asm volatile("pshufb %xmm3, %xmm1"); + asm volatile("pxor %xmm0, %xmm1"); + + /* xmm1 = qmul[*q ^ *dq */ + + asm volatile("pxor %xmm1, %xmm2"); + + /* xmm2 = *p ^ qmul[*q ^ *dq] */ + + asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); + asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); + + bytes -= 16; + p += 16; + q += 16; + dq += 16; +#endif + } + + kernel_fpu_end(); +} + +const struct raid6_recov_calls raid6_recov_ssse3 = { + .data2 = raid6_2data_recov_ssse3, + .datap = raid6_datap_recov_ssse3, + .valid = raid6_has_ssse3, +#ifdef CONFIG_X86_64 + .name = "ssse3x2", +#else + .name = "ssse3x1", +#endif + .priority = 1, +}; + +#endif -- cgit v1.2.3-70-g09d2 From 2dbf708448c836754d25fe6108c5bfe1f5697c95 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Tue, 22 May 2012 13:54:23 +1000 Subject: lib/raid6: update test program for recovery functions Test each combination of recovery and syndrome generation functions. Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- lib/raid6/test/Makefile | 2 +- lib/raid6/test/test.c | 32 +++++++++++++++++++++----------- lib/raid6/x86.h | 15 ++++++++++----- 3 files changed, 32 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index aa651697b6dc..c76151d94764 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -23,7 +23,7 @@ RANLIB = ranlib all: raid6.a raid6test raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ - altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \ + altivec1.o altivec2.o altivec4.o altivec8.o recov.o recov_ssse3.o algos.o \ tables.o rm -f $@ $(AR) cq $@ $^ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 7a930318b17d..5a485b7a7d3c 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -90,25 +90,35 @@ static int test_disks(int i, int j) int main(int argc, char *argv[]) { const struct raid6_calls *const *algo; + const struct raid6_recov_calls *const *ra; int i, j; int err = 0; makedata(); - for (algo = raid6_algos; *algo; algo++) { - if (!(*algo)->valid || (*algo)->valid()) { - raid6_call = **algo; + for (ra = raid6_recov_algos; *ra; ra++) { + if ((*ra)->valid && !(*ra)->valid()) + continue; + raid6_2data_recov = (*ra)->data2; + raid6_datap_recov = (*ra)->datap; - /* Nuke syndromes */ - memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + printf("using recovery %s\n", (*ra)->name); - /* Generate assumed good syndrome */ - raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, - (void **)&dataptrs); + for (algo = raid6_algos; *algo; algo++) { + if (!(*algo)->valid || (*algo)->valid()) { + raid6_call = **algo; - for (i = 0; i < NDISKS-1; i++) - for (j = i+1; j < NDISKS; j++) - err += test_disks(i, j); + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } } printf("\n"); } diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index cb2a8c91c886..d55d63232c55 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -35,24 +35,29 @@ static inline void kernel_fpu_end(void) { } +#define __aligned(x) __attribute__((aligned(x))) + #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions * (fast save and restore) */ #define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ #define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ +#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ /* Should work well enough on modern CPUs for testing */ static inline int boot_cpu_has(int flag) { - u32 eax = (flag >> 5) ? 0x80000001 : 1; - u32 edx; + u32 eax = (flag & 0x20) ? 0x80000001 : 1; + u32 ecx, edx; asm volatile("cpuid" - : "+a" (eax), "=d" (edx) - : : "ecx", "ebx"); + : "+a" (eax), "=d" (edx), "=c" (ecx) + : : "ebx"); - return (edx >> (flag & 31)) & 1; + return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1; } #endif /* ndef __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From 96e67703e71f4b3cc32b747dbb6158ec74d01e19 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Tue, 22 May 2012 13:54:24 +1000 Subject: lib/raid6: cleanup gen_syndrome function selection Reorders functions in raid6_algos as well as the preference check to reduce the number of functions tested on initialization. Also, creates symmetry between choosing the gen_syndrome functions and choosing the recovery functions. Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- lib/raid6/algos.c | 104 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 47 deletions(-) (limited to 'lib') diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 5a7f8022be13..589f5f50ad2e 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -34,10 +34,6 @@ struct raid6_calls raid6_call; EXPORT_SYMBOL_GPL(raid6_call); const struct raid6_calls * const raid6_algos[] = { - &raid6_intx1, - &raid6_intx2, - &raid6_intx4, - &raid6_intx8, #if defined(__ia64__) &raid6_intx16, &raid6_intx32, @@ -61,6 +57,10 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_altivec4, &raid6_altivec8, #endif + &raid6_intx1, + &raid6_intx2, + &raid6_intx4, + &raid6_intx8, NULL }; @@ -86,7 +86,7 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = { #define time_before(x, y) ((x) < (y)) #endif -static inline void raid6_choose_recov(void) +static inline const struct raid6_recov_calls *raid6_choose_recov(void) { const struct raid6_recov_calls *const *algo; const struct raid6_recov_calls *best; @@ -103,62 +103,38 @@ static inline void raid6_choose_recov(void) printk("raid6: using %s recovery algorithm\n", best->name); } else printk("raid6: Yikes! No recovery algorithm found!\n"); -} - -/* Try to pick the best algorithm */ -/* This code uses the gfmul table as convenient data set to abuse */ + return best; +} -int __init raid6_select_algo(void) +static inline const struct raid6_calls *raid6_choose_gen( + void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) { - const struct raid6_calls * const * algo; - const struct raid6_calls * best; - char *syndromes; - void *dptrs[(65536/PAGE_SIZE)+2]; - int i, disks; - unsigned long perf, bestperf; - int bestprefer; - unsigned long j0, j1; - - disks = (65536/PAGE_SIZE)+2; - for ( i = 0 ; i < disks-2 ; i++ ) { - dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; - } - - /* Normal code - use a 2-page allocation to avoid D$ conflict */ - syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); - - if ( !syndromes ) { - printk("raid6: Yikes! No memory available.\n"); - return -ENOMEM; - } - - dptrs[disks-2] = syndromes; - dptrs[disks-1] = syndromes + PAGE_SIZE; + unsigned long perf, bestperf, j0, j1; + const struct raid6_calls *const *algo; + const struct raid6_calls *best; - bestperf = 0; bestprefer = 0; best = NULL; + for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + if (!best || (*algo)->prefer >= best->prefer) { + if ((*algo)->valid && !(*algo)->valid()) + continue; - for ( algo = raid6_algos ; *algo ; algo++ ) { - if ( !(*algo)->valid || (*algo)->valid() ) { perf = 0; preempt_disable(); j0 = jiffies; - while ( (j1 = jiffies) == j0 ) + while ((j1 = jiffies) == j0) cpu_relax(); while (time_before(jiffies, j1 + (1<gen_syndrome(disks, PAGE_SIZE, dptrs); + (*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs); perf++; } preempt_enable(); - if ( (*algo)->prefer > bestprefer || - ((*algo)->prefer == bestprefer && - perf > bestperf) ) { - best = *algo; - bestprefer = best->prefer; + if (perf > bestperf) { bestperf = perf; + best = *algo; } printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); @@ -173,12 +149,46 @@ int __init raid6_select_algo(void) } else printk("raid6: Yikes! No algorithm found!\n"); - free_pages((unsigned long)syndromes, 1); + return best; +} + + +/* Try to pick the best algorithm */ +/* This code uses the gfmul table as convenient data set to abuse */ + +int __init raid6_select_algo(void) +{ + const int disks = (65536/PAGE_SIZE)+2; + + const struct raid6_calls *gen_best; + const struct raid6_recov_calls *rec_best; + char *syndromes; + void *dptrs[(65536/PAGE_SIZE)+2]; + int i; + + for (i = 0; i < disks-2; i++) + dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; + + /* Normal code - use a 2-page allocation to avoid D$ conflict */ + syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + + if (!syndromes) { + printk("raid6: Yikes! No memory available.\n"); + return -ENOMEM; + } + + dptrs[disks-2] = syndromes; + dptrs[disks-1] = syndromes + PAGE_SIZE; + + /* select raid gen_syndrome function */ + gen_best = raid6_choose_gen(&dptrs, disks); /* select raid recover functions */ - raid6_choose_recov(); + rec_best = raid6_choose_recov(); + + free_pages((unsigned long)syndromes, 1); - return best ? 0 : -EINVAL; + return gen_best && rec_best ? 0 : -EINVAL; } static void raid6_exit(void) -- cgit v1.2.3-70-g09d2 From 2922585b93294d47172a765115e0dbc1bfe1be19 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 May 2012 13:12:28 -0700 Subject: lib: Sparc's strncpy_from_user is generic enough, move under lib/ To use this, an architecture simply needs to: 1) Provide a user_addr_max() implementation via asm/uaccess.h 2) Add "select GENERIC_STRNCPY_FROM_USER" to their arch Kcnfig 3) Remove the existing strncpy_from_user() implementation and symbol exports their architecture had. Signed-off-by: David S. Miller Acked-by: David Howells --- arch/sparc/Kconfig | 1 + arch/sparc/lib/usercopy.c | 144 --------------------------------------------- lib/Kconfig | 3 + lib/Makefile | 2 + lib/strncpy_from_user.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 144 deletions(-) create mode 100644 lib/strncpy_from_user.c (limited to 'lib') diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 051af37f2b49..22474233205b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -32,6 +32,7 @@ config SPARC select HAVE_NMI_WATCHDOG if SPARC64 select HAVE_BPF_JIT select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER config SPARC32 def_bool !64BIT diff --git a/arch/sparc/lib/usercopy.c b/arch/sparc/lib/usercopy.c index 0b12e91d6ccc..5c4284ce1c03 100644 --- a/arch/sparc/lib/usercopy.c +++ b/arch/sparc/lib/usercopy.c @@ -1,153 +1,9 @@ #include -#include #include -#include #include -#include - void copy_from_user_overflow(void) { WARN(1, "Buffer overflow detected!\n"); } EXPORT_SYMBOL(copy_from_user_overflow); - -static inline long find_zero(unsigned long mask) -{ - long byte = 0; - -#ifdef __BIG_ENDIAN -#ifdef CONFIG_64BIT - if (mask >> 32) - mask >>= 32; - else - byte = 4; -#endif - if (mask >> 16) - mask >>= 16; - else - byte += 2; - return (mask >> 8) ? byte : byte + 1; -#else -#ifdef CONFIG_64BIT - if (!((unsigned int) mask)) { - mask >>= 32; - byte = 4; - } -#endif - if (!(mask & 0xffff)) { - mask >>= 16; - byte += 2; - } - return (mask & 0xff) ? byte : byte + 1; -#endif -} - -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS -#define IS_UNALIGNED(src, dst) 0 -#else -#define IS_UNALIGNED(src, dst) \ - (((long) dst | (long) src) & (sizeof(long) - 1)) -#endif - -/* - * Do a strncpy, return length of string without final '\0'. - * 'count' is the user-supplied count (return 'count' if we - * hit it), 'max' is the address space maximum (and we return - * -EFAULT if we hit it). - */ -static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) -{ - const unsigned long high_bits = REPEAT_BYTE(0xfe) + 1; - const unsigned long low_bits = REPEAT_BYTE(0x7f); - long res = 0; - - /* - * Truncate 'max' to the user-specified limit, so that - * we only have one limit we need to check in the loop - */ - if (max > count) - max = count; - - if (IS_UNALIGNED(src, dst)) - goto byte_at_a_time; - - while (max >= sizeof(unsigned long)) { - unsigned long c, v, rhs; - - /* Fall back to byte-at-a-time if we get a page fault */ - if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) - break; - rhs = c | low_bits; - v = (c + high_bits) & ~rhs; - *(unsigned long *)(dst+res) = c; - if (v) { - v = (c & low_bits) + low_bits; - v = ~(v | rhs); - return res + find_zero(v); - } - res += sizeof(unsigned long); - max -= sizeof(unsigned long); - } - -byte_at_a_time: - while (max) { - char c; - - if (unlikely(__get_user(c,src+res))) - return -EFAULT; - dst[res] = c; - if (!c) - return res; - res++; - max--; - } - - /* - * Uhhuh. We hit 'max'. But was that the user-specified maximum - * too? If so, that's ok - we got as much as the user asked for. - */ - if (res >= count) - return res; - - /* - * Nope: we hit the address space limit, and we still had more - * characters the caller would have wanted. That's an EFAULT. - */ - return -EFAULT; -} - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long strncpy_from_user(char *dst, const char __user *src, long count) -{ - unsigned long max_addr, src_addr; - - if (unlikely(count <= 0)) - return 0; - - max_addr = user_addr_max(); - src_addr = (unsigned long)src; - if (likely(src_addr < max_addr)) { - unsigned long max = max_addr - src_addr; - return do_strncpy_from_user(dst, src, count, max); - } - return -EFAULT; -} -EXPORT_SYMBOL(strncpy_from_user); diff --git a/lib/Kconfig b/lib/Kconfig index 4a8aba2e5cc0..49cb46337db9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -16,6 +16,9 @@ config BITREVERSE config RATIONAL boolean +config GENERIC_STRNCPY_FROM_USER + bool + config GENERIC_FIND_FIRST_BIT bool diff --git a/lib/Makefile b/lib/Makefile index 18515f0267c4..57538b4d01d2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -123,6 +123,8 @@ obj-$(CONFIG_SIGNATURE) += digsig.o obj-$(CONFIG_CLZ_TAB) += clz_tab.o +obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c new file mode 100644 index 000000000000..c4c09b0e96ba --- /dev/null +++ b/lib/strncpy_from_user.c @@ -0,0 +1,146 @@ +#include +#include +#include +#include + +#include + +static inline long find_zero(unsigned long mask) +{ + long byte = 0; + +#ifdef __BIG_ENDIAN +#ifdef CONFIG_64BIT + if (mask >> 32) + mask >>= 32; + else + byte = 4; +#endif + if (mask >> 16) + mask >>= 16; + else + byte += 2; + return (mask >> 8) ? byte : byte + 1; +#else +#ifdef CONFIG_64BIT + if (!((unsigned int) mask)) { + mask >>= 32; + byte = 4; + } +#endif + if (!(mask & 0xffff)) { + mask >>= 16; + byte += 2; + } + return (mask & 0xff) ? byte : byte + 1; +#endif +} + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#define IS_UNALIGNED(src, dst) 0 +#else +#define IS_UNALIGNED(src, dst) \ + (((long) dst | (long) src) & (sizeof(long) - 1)) +#endif + +/* + * Do a strncpy, return length of string without final '\0'. + * 'count' is the user-supplied count (return 'count' if we + * hit it), 'max' is the address space maximum (and we return + * -EFAULT if we hit it). + */ +static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) +{ + const unsigned long high_bits = REPEAT_BYTE(0xfe) + 1; + const unsigned long low_bits = REPEAT_BYTE(0x7f); + long res = 0; + + /* + * Truncate 'max' to the user-specified limit, so that + * we only have one limit we need to check in the loop + */ + if (max > count) + max = count; + + if (IS_UNALIGNED(src, dst)) + goto byte_at_a_time; + + while (max >= sizeof(unsigned long)) { + unsigned long c, v, rhs; + + /* Fall back to byte-at-a-time if we get a page fault */ + if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + break; + rhs = c | low_bits; + v = (c + high_bits) & ~rhs; + *(unsigned long *)(dst+res) = c; + if (v) { + v = (c & low_bits) + low_bits; + v = ~(v | rhs); + return res + find_zero(v); + } + res += sizeof(unsigned long); + max -= sizeof(unsigned long); + } + +byte_at_a_time: + while (max) { + char c; + + if (unlikely(__get_user(c,src+res))) + return -EFAULT; + dst[res] = c; + if (!c) + return res; + res++; + max--; + } + + /* + * Uhhuh. We hit 'max'. But was that the user-specified maximum + * too? If so, that's ok - we got as much as the user asked for. + */ + if (res >= count) + return res; + + /* + * Nope: we hit the address space limit, and we still had more + * characters the caller would have wanted. That's an EFAULT. + */ + return -EFAULT; +} + +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long strncpy_from_user(char *dst, const char __user *src, long count) +{ + unsigned long max_addr, src_addr; + + if (unlikely(count <= 0)) + return 0; + + max_addr = user_addr_max(); + src_addr = (unsigned long)src; + if (likely(src_addr < max_addr)) { + unsigned long max = max_addr - src_addr; + return do_strncpy_from_user(dst, src, count, max); + } + return -EFAULT; +} +EXPORT_SYMBOL(strncpy_from_user); -- cgit v1.2.3-70-g09d2 From 36126f8f2ed8168eb13aa0662b9b9585cba100a9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 May 2012 10:43:17 -0700 Subject: word-at-a-time: make the interfaces truly generic This changes the interfaces in to be a bit more complicated, but a lot more generic. In particular, it allows us to really do the operations efficiently on both little-endian and big-endian machines, pretty much regardless of machine details. For example, if you can rely on a fast population count instruction on your architecture, this will allow you to make your optimized file with that. NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is not truly generic, it actually only works on big-endian. Why? Because on little-endian the generic algorithms are wasteful, since you can inevitably do better. The x86 implementation is an example of that. (The only truly non-generic part of the asm-generic implementation is the "find_zero()" function, and you could make a little-endian version of it. And if the Kbuild infrastructure allowed us to pick a particular header file, that would be lovely) The functions are as follows: - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm uses. - has_zero(): take a word, and determine if it has a zero byte in it. It gets the word, the pointer to the constant pool, and a pointer to an intermediate "data" field it can set. This is the "quick-and-dirty" zero tester: it's what is run inside the hot loops. - "prep_zero_mask()": take the word, the data that has_zero() produced, and the constant pool, and generate an *exact* mask of which byte had the first zero. This is run directly *outside* the loop, and allows the "has_zero()" function to answer the "is there a zero byte" question without necessarily getting exactly *which* byte is the first one to contain a zero. If you do multiple byte lookups concurrently (eg "hash_name()", which looks for both NUL and '/' bytes), after you've done the prep_zero_mask() phase, the result of those can be or'ed together to get the "either or" case. - The result from "prep_zero_mask()" can then be fed into "find_zero()" (to find the byte offset of the first byte that was zero) or into "zero_bytemask()" (to find the bytemask of the bytes preceding the zero byte). The existence of zero_bytemask() is optional, and is not necessary for the normal string routines. But dentry name hashing needs it, so if you enable DENTRY_WORD_AT_A_TIME you need to expose it. This changes the generic strncpy_from_user() function and the dentry hashing functions to use these modified word-at-a-time interfaces. This gets us back to the optimized state of the x86 strncpy that we lost in the previous commit when moving over to the generic version. Signed-off-by: Linus Torvalds --- arch/openrisc/include/asm/Kbuild | 1 + arch/sparc/include/asm/Kbuild | 1 + arch/x86/include/asm/word-at-a-time.h | 32 +++++++++++++++++++-- fs/namei.c | 22 ++++++++------- include/asm-generic/word-at-a-time.h | 52 +++++++++++++++++++++++++++++++++++ lib/strncpy_from_user.c | 47 +++++-------------------------- 6 files changed, 102 insertions(+), 53 deletions(-) create mode 100644 include/asm-generic/word-at-a-time.h (limited to 'lib') diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index c936483bc8e2..3f35c38d7b64 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -66,3 +66,4 @@ generic-y += topology.h generic-y += types.h generic-y += ucontext.h generic-y += user.h +generic-y += word-at-a-time.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 2c2e38821f60..67f83e0a0d68 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -21,3 +21,4 @@ generic-y += div64.h generic-y += local64.h generic-y += irq_regs.h generic-y += local.h +generic-y += word-at-a-time.h diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index ae03facfadd6..5b238981542a 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -10,6 +10,11 @@ * bit count instruction, that might be better than the multiply * and shift, for example. */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } #ifdef CONFIG_64BIT @@ -37,10 +42,31 @@ static inline long count_masked_bytes(long mask) #endif -/* Return the high bit set in the first byte that is a zero */ -static inline unsigned long has_zero(unsigned long a) +/* Return nonzero if it has a zero */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +/* The mask we created is directly usable as a bytemask */ +#define zero_bytemask(mask) (mask) + +static inline unsigned long find_zero(unsigned long mask) { - return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); + return count_masked_bytes(mask); } /* diff --git a/fs/namei.c b/fs/namei.c index 93ff12b1a1de..c651f02c9fec 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1452,7 +1452,8 @@ EXPORT_SYMBOL(full_name_hash); */ static inline unsigned long hash_name(const char *name, unsigned int *hashp) { - unsigned long a, mask, hash, len; + unsigned long a, b, adata, bdata, mask, hash, len; + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; hash = a = 0; len = -sizeof(unsigned long); @@ -1460,17 +1461,18 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) hash = (hash + a) * 9; len += sizeof(unsigned long); a = load_unaligned_zeropad(name+len); - /* Do we have any NUL or '/' bytes in this word? */ - mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/')); - } while (!mask); - - /* The mask *below* the first high bit set */ - mask = (mask - 1) & ~mask; - mask >>= 7; - hash += a & mask; + b = a ^ REPEAT_BYTE('/'); + } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); + + adata = prep_zero_mask(a, adata, &constants); + bdata = prep_zero_mask(b, bdata, &constants); + + mask = create_zero_mask(adata | bdata); + + hash += a & zero_bytemask(mask); *hashp = fold_hash(hash); - return len + count_masked_bytes(mask); + return len + find_zero(mask); } #else diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h new file mode 100644 index 000000000000..3f21f1b72e45 --- /dev/null +++ b/include/asm-generic/word-at-a-time.h @@ -0,0 +1,52 @@ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +/* + * This says "generic", but it's actually big-endian only. + * Little-endian can use more efficient versions of these + * interfaces, see for example + * arch/x86/include/asm/word-at-a-time.h + * for those. + */ + +#include + +struct word_at_a_time { + const unsigned long high_bits, low_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) } + +/* Bit set in the bytes that have a zero */ +static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c) +{ + unsigned long mask = (val & c->low_bits) + c->low_bits; + return ~(mask | rhs); +} + +#define create_zero_mask(mask) (mask) + +static inline long find_zero(unsigned long mask) +{ + long byte = 0; +#ifdef CONFIG_64BIT + if (mask >> 32) + mask >>= 32; + else + byte = 4; +#endif + if (mask >> 16) + mask >>= 16; + else + byte += 2; + return (mask >> 8) ? byte : byte + 1; +} + +static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +{ + unsigned long rhs = val | c->low_bits; + *data = rhs; + return (val + c->high_bits) & ~rhs; +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index c4c09b0e96ba..bb2b201d6ad0 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -4,37 +4,7 @@ #include #include - -static inline long find_zero(unsigned long mask) -{ - long byte = 0; - -#ifdef __BIG_ENDIAN -#ifdef CONFIG_64BIT - if (mask >> 32) - mask >>= 32; - else - byte = 4; -#endif - if (mask >> 16) - mask >>= 16; - else - byte += 2; - return (mask >> 8) ? byte : byte + 1; -#else -#ifdef CONFIG_64BIT - if (!((unsigned int) mask)) { - mask >>= 32; - byte = 4; - } -#endif - if (!(mask & 0xffff)) { - mask >>= 16; - byte += 2; - } - return (mask & 0xff) ? byte : byte + 1; -#endif -} +#include #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS #define IS_UNALIGNED(src, dst) 0 @@ -51,8 +21,7 @@ static inline long find_zero(unsigned long mask) */ static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) { - const unsigned long high_bits = REPEAT_BYTE(0xfe) + 1; - const unsigned long low_bits = REPEAT_BYTE(0x7f); + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; long res = 0; /* @@ -66,18 +35,16 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long goto byte_at_a_time; while (max >= sizeof(unsigned long)) { - unsigned long c, v, rhs; + unsigned long c, data; /* Fall back to byte-at-a-time if we get a page fault */ if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) break; - rhs = c | low_bits; - v = (c + high_bits) & ~rhs; *(unsigned long *)(dst+res) = c; - if (v) { - v = (c & low_bits) + low_bits; - v = ~(v | rhs); - return res + find_zero(v); + if (has_zero(c, &data, &constants)) { + data = prep_zero_mask(c, data, &constants); + data = create_zero_mask(data); + return res + find_zero(data); } res += sizeof(unsigned long); max -= sizeof(unsigned long); -- cgit v1.2.3-70-g09d2 From a08c5356a3aaf638c41897ae4169de18db89595e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 May 2012 11:06:38 -0700 Subject: lib: add generic strnlen_user() function This adds a new generic optimized strnlen_user() function that uses the infrastructure to portably do efficient string handling. In many ways, strnlen is much simpler than strncpy, and in particular we can always pre-align the words we load from memory. That means that all the worries about alignment etc are a non-issue, so this one can easily be used on any architecture. You obviously do have to do the appropriate word-at-a-time.h macros. Signed-off-by: Linus Torvalds --- lib/Kconfig | 3 ++ lib/Makefile | 1 + lib/strnlen_user.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100644 lib/strnlen_user.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 98230ac3db29..64ddc44d0b81 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -19,6 +19,9 @@ config RATIONAL config GENERIC_STRNCPY_FROM_USER bool +config GENERIC_STRNLEN_USER + bool + config GENERIC_FIND_FIRST_BIT bool diff --git a/lib/Makefile b/lib/Makefile index b98df505f335..77937a7dd5ce 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -126,6 +126,7 @@ obj-$(CONFIG_CLZ_TAB) += clz_tab.o obj-$(CONFIG_DDR) += jedec_ddr_data.o obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o +obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c new file mode 100644 index 000000000000..90900ecfeb54 --- /dev/null +++ b/lib/strnlen_user.c @@ -0,0 +1,138 @@ +#include +#include +#include + +#include + +/* Set bits in the first 'n' bytes when loaded from memory */ +#ifdef __LITTLE_ENDIAN +# define aligned_byte_mask(n) ((1ul << 8*(n))-1) +#else +# define aligned_byte_mask(n) (~0xfful << 8*(7-(n))) +#endif + +/* + * Do a strnlen, return length of string *with* final '\0'. + * 'count' is the user-supplied count, while 'max' is the + * address space maximum. + * + * Return 0 for exceptions (which includes hitting the address + * space maximum), or 'count+1' if hitting the user-supplied + * maximum count. + * + * NOTE! We can sometimes overshoot the user-supplied maximum + * if it fits in a aligned 'long'. The caller needs to check + * the return value against "> max". + */ +static inline long do_strnlen_user(const char __user *src, unsigned long count, unsigned long max) +{ + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; + long align, res = 0; + unsigned long c; + + /* + * Truncate 'max' to the user-specified limit, so that + * we only have one limit we need to check in the loop + */ + if (max > count) + max = count; + + /* + * Do everything aligned. But that means that we + * need to also expand the maximum.. + */ + align = (sizeof(long) - 1) & (unsigned long)src; + src -= align; + max += align; + + if (unlikely(__get_user(c,(unsigned long __user *)src))) + return 0; + c |= aligned_byte_mask(align); + + for (;;) { + unsigned long data; + if (has_zero(c, &data, &constants)) { + data = prep_zero_mask(c, data, &constants); + data = create_zero_mask(data); + return res + find_zero(data) + 1 - align; + } + res += sizeof(unsigned long); + if (unlikely(max < sizeof(unsigned long))) + break; + max -= sizeof(unsigned long); + if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + return 0; + } + res -= align; + + /* + * Uhhuh. We hit 'max'. But was that the user-specified maximum + * too? If so, return the marker for "too long". + */ + if (res >= count) + return count+1; + + /* + * Nope: we hit the address space limit, and we still had more + * characters the caller would have wanted. That's 0. + */ + return 0; +} + +/** + * strnlen_user: - Get the size of a user string INCLUDING final NUL. + * @str: The string to measure. + * @count: Maximum count (including NUL character) + * + * Context: User context only. This function may sleep. + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * If the string is too long, returns 'count+1'. + * On exception (or invalid count), returns 0. + */ +long strnlen_user(const char __user *str, long count) +{ + unsigned long max_addr, src_addr; + + if (unlikely(count <= 0)) + return 0; + + max_addr = user_addr_max(); + src_addr = (unsigned long)str; + if (likely(src_addr < max_addr)) { + unsigned long max = max_addr - src_addr; + return do_strnlen_user(str, count, max); + } + return 0; +} +EXPORT_SYMBOL(strnlen_user); + +/** + * strlen_user: - Get the size of a user string INCLUDING final NUL. + * @str: The string to measure. + * + * Context: User context only. This function may sleep. + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * + * If there is a limit on the length of a valid string, you may wish to + * consider using strnlen_user() instead. + */ +long strlen_user(const char __user *str) +{ + unsigned long max_addr, src_addr; + + max_addr = user_addr_max(); + src_addr = (unsigned long)str; + if (likely(src_addr < max_addr)) { + unsigned long max = max_addr - src_addr; + return do_strnlen_user(str, ~0ul, max); + } + return 0; +} +EXPORT_SYMBOL(strlen_user); -- cgit v1.2.3-70-g09d2 From 69ea6405980f217557b6a58f70ff60d8d88519a5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 28 May 2012 12:59:56 +1000 Subject: lib: Fix generic strnlen_user for 32-bit big-endian machines The aligned_byte_mask() definition is wrong for 32-bit big-endian machines: the "7-(n)" part of the definition assumes a long is 8 bytes. This fixes it by using BITS_PER_LONG - 8 instead of 8*7. Tested on 32-bit and 64-bit PowerPC. Signed-off-by: Paul Mackerras Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- lib/strnlen_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 90900ecfeb54..a28df5206d95 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -8,7 +8,7 @@ #ifdef __LITTLE_ENDIAN # define aligned_byte_mask(n) ((1ul << 8*(n))-1) #else -# define aligned_byte_mask(n) (~0xfful << 8*(7-(n))) +# define aligned_byte_mask(n) (~0xfful << (BITS_PER_LONG - 8 - 8*(n))) #endif /* -- cgit v1.2.3-70-g09d2 From 2aa4ee2a8805ec0260dde971e9e6699917c868a7 Mon Sep 17 00:00:00 2001 From: Jim Kukunas Date: Mon, 28 May 2012 14:10:22 +1000 Subject: lib/raid6: fix sparse warnings in recovery functions Make the recovery functions static to fix the following sparse warnings: lib/raid6/recov.c:25:6: warning: symbol 'raid6_2data_recov_intx1' was not declared. Should it be static? lib/raid6/recov.c:69:6: warning: symbol 'raid6_datap_recov_intx1' was not declared. Should it be static? lib/raid6/recov_ssse3.c:22:6: warning: symbol 'raid6_2data_recov_ssse3' was not declared. Should it be static? lib/raid6/recov_ssse3.c:197:6: warning: symbol 'raid6_datap_recov_ssse3' was not declared. Should it be static? Reported-by: Fengguang Wu Signed-off-by: Jim Kukunas Signed-off-by: NeilBrown --- lib/raid6/recov.c | 7 ++++--- lib/raid6/recov_ssse3.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index 1805a5cc5daa..a95bccb8497d 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -22,8 +22,8 @@ #include /* Recover two failed data blocks. */ -void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, - void **ptrs) +static void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, + int failb, void **ptrs) { u8 *p, *q, *dp, *dq; u8 px, qx, db; @@ -66,7 +66,8 @@ void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, } /* Recover failure of one data block plus the P block */ -void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs) +static void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, + void **ptrs) { u8 *p, *q, *dq; const u8 *qmul; /* Q multiplier table */ diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index 37ae61930559..ecb710c0b4d9 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -19,8 +19,8 @@ static int raid6_has_ssse3(void) boot_cpu_has(X86_FEATURE_SSSE3); } -void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, - void **ptrs) +static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, + int failb, void **ptrs) { u8 *p, *q, *dp, *dq; const u8 *pbmul; /* P multiplier table for B data */ @@ -194,7 +194,8 @@ void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, } -void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs) +static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, + void **ptrs) { u8 *p, *q, *dq; const u8 *qmul; /* Q multiplier table */ -- cgit v1.2.3-70-g09d2 From 3af684c7c5b3dddf7c5d83b8ad431380cdc6f164 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 29 May 2012 15:06:29 -0700 Subject: swiotlb: print physical addresses consistently with other parts of kernel Print swiotlb info in a style consistent with the %pR style used elsewhere in the kernel. For example: -Placing 64MB software IO TLB between ffff88007a662000 - ffff88007e662000 -software IO TLB at phys 0x7a662000 - 0x7e662000 +software IO TLB [mem 0x7a662000-0x7e661fff] (64MB) mapped at [ffff88007a662000-ffff88007e661fff] Signed-off-by: Bjorn Helgaas Cc: Yinghai Lu Cc: Konrad Rzeszutek Wilk Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/swiotlb.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 414f46ed1dcd..45bc1f83a5ad 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -130,11 +130,9 @@ void swiotlb_print_info(void) pstart = virt_to_phys(io_tlb_start); pend = virt_to_phys(io_tlb_end); - printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n", - bytes >> 20, io_tlb_start, io_tlb_end); - printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n", - (unsigned long long)pstart, - (unsigned long long)pend); + printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", + (unsigned long long)pstart, (unsigned long long)pend - 1, + bytes >> 20, io_tlb_start, io_tlb_end - 1); } void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) -- cgit v1.2.3-70-g09d2 From 17a801f4bfeb8d55df1b05fa7adb16ada504e765 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 29 May 2012 15:07:31 -0700 Subject: list_debug: WARN for adding something already in the list We were bitten by this at one point and added an additional sanity test for DEBUG_LIST. You can't validly add a list_head to a list where either prev or next is the same as the thing you're adding. Signed-off-by: Chris Metcalf Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/list_debug.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/list_debug.c b/lib/list_debug.c index 3810b481f940..23a5e031cd8b 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -31,6 +31,9 @@ void __list_add(struct list_head *new, "list_add corruption. prev->next should be " "next (%p), but was %p. (prev=%p).\n", next, prev->next, prev); + WARN(new == prev || new == next, + "list_add double add: new=%p, prev=%p, next=%p.\n", + new, prev, next); next->prev = new; new->next = next; new->prev = prev; -- cgit v1.2.3-70-g09d2 From 26d7b99b835294ab21e2a2b4b3bdf04b03b0028d Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 29 May 2012 15:07:31 -0700 Subject: lib/test-kstrtox.c: mark const init data with __initconst instead of __initdata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As long as there is no other non-const variable marked __initdata in the same compilation unit it doesn't hurt. If there were one however compilation would fail with error: $variablename causes a section type conflict because a section containing const variables is marked read only and so cannot contain non-const variables. Signed-off-by: Uwe Kleine-König Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test-kstrtox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/test-kstrtox.c b/lib/test-kstrtox.c index d55769d63cb8..bea3f3fa3f02 100644 --- a/lib/test-kstrtox.c +++ b/lib/test-kstrtox.c @@ -11,7 +11,7 @@ struct test_fail { }; #define DEFINE_TEST_FAIL(test) \ - const struct test_fail test[] __initdata + const struct test_fail test[] __initconst #define DECLARE_TEST_OK(type, test_type) \ test_type { \ @@ -21,7 +21,7 @@ struct test_fail { } #define DEFINE_TEST_OK(type, test) \ - const type test[] __initdata + const type test[] __initconst #define TEST_FAIL(fn, type, fmt, test) \ { \ -- cgit v1.2.3-70-g09d2 From 68aecfb97978fe6730615f92f53c11149e929052 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 29 May 2012 15:07:32 -0700 Subject: lib/string_helpers.c: make arrays static Moving these arrays into static storage shrinks the kernel a bit: text data bss dec hex filename 723 112 64 899 383 lib/string_helpers.o 516 272 64 852 354 lib/string_helpers.o Cc: James Bottomley Cc: "Aneesh Kumar K.V" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/string_helpers.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/string_helpers.c b/lib/string_helpers.c index dd4ece372699..1cffc223bff5 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -23,15 +23,15 @@ int string_get_size(u64 size, const enum string_size_units units, char *buf, int len) { - const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", + static const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL}; - const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", + static const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", NULL }; - const char **units_str[] = { + static const char **units_str[] = { [STRING_UNITS_10] = units_10, [STRING_UNITS_2] = units_2, }; - const unsigned int divisor[] = { + static const unsigned int divisor[] = { [STRING_UNITS_10] = 1000, [STRING_UNITS_2] = 1024, }; -- cgit v1.2.3-70-g09d2 From 05a6c8a9226599f921bd0b6e439dbc04df96a6fc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 29 May 2012 15:07:32 -0700 Subject: lib/bitmap.c: fix documentation for scnprintf() functions The code comments for bscnl_emit() and bitmap_scnlistprintf() are describing snprintf() return semantics, but these functions use scnprintf() return semantics. Fix that, and document the bitmap_scnprintf() return value as well. Cc: Ryota Ozaki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index b5a8b6ad2454..06fdfa1aeba7 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -369,7 +369,8 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area); * @nmaskbits: size of bitmap, in bits * * Exactly @nmaskbits bits are displayed. Hex digits are grouped into - * comma-separated sets of eight digits per set. + * comma-separated sets of eight digits per set. Returns the number of + * characters which were written to *buf, excluding the trailing \0. */ int bitmap_scnprintf(char *buf, unsigned int buflen, const unsigned long *maskp, int nmaskbits) @@ -517,8 +518,8 @@ EXPORT_SYMBOL(bitmap_parse_user); * * Helper routine for bitmap_scnlistprintf(). Write decimal number * or range to buf, suppressing output past buf+buflen, with optional - * comma-prefix. Return len of what would be written to buf, if it - * all fit. + * comma-prefix. Return len of what was written to *buf, excluding the + * trailing \0. */ static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) { @@ -544,9 +545,8 @@ static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) * the range. Output format is compatible with the format * accepted as input by bitmap_parselist(). * - * The return value is the number of characters which would be - * generated for the given input, excluding the trailing '\0', as - * per ISO C99. + * The return value is the number of characters which were written to *buf + * excluding the trailing '\0', as per ISO C99's scnprintf. */ int bitmap_scnlistprintf(char *buf, unsigned int buflen, const unsigned long *maskp, int nmaskbits) -- cgit v1.2.3-70-g09d2 From 4796dd200db943e36f876e7029552212e5bbdf33 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 29 May 2012 15:07:33 -0700 Subject: vsprintf: fix %ps on non symbols when using kallsyms Using %ps in a printk format will sometimes fail silently and print the empty string if the address passed in does not match a symbol that kallsyms knows about. But using %pS will fall back to printing the full address if kallsyms can't find the symbol. Make %ps act the same as %pS by falling back to printing the address. While we're here also make %ps print the module that a symbol comes from so that it matches what %pS already does. Take this simple function for example (in a module): static void test_printk(void) { int test; pr_info("with pS: %pS\n", &test); pr_info("with ps: %ps\n", &test); } Before this patch: with pS: 0xdff7df44 with ps: After this patch: with pS: 0xdff7df44 with ps: 0xdff7df44 Signed-off-by: Stephen Boyd Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 7 +++++++ kernel/kallsyms.c | 32 ++++++++++++++++++++++++-------- lib/vsprintf.c | 2 +- 3 files changed, 32 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 387571959dd9..6883e197acb9 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -36,6 +36,7 @@ const char *kallsyms_lookup(unsigned long addr, /* Look up a kernel symbol and return it in a text buffer. */ extern int sprint_symbol(char *buffer, unsigned long address); +extern int sprint_symbol_no_offset(char *buffer, unsigned long address); extern int sprint_backtrace(char *buffer, unsigned long address); /* Look up a kernel symbol and print it to the kernel messages. */ @@ -80,6 +81,12 @@ static inline int sprint_symbol(char *buffer, unsigned long addr) return 0; } +static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr) +{ + *buffer = '\0'; + return 0; +} + static inline int sprint_backtrace(char *buffer, unsigned long addr) { *buffer = '\0'; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 079f1d39a8b8..2169feeba529 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -343,7 +343,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, /* Look up a kernel symbol and return it in a text buffer. */ static int __sprint_symbol(char *buffer, unsigned long address, - int symbol_offset) + int symbol_offset, int add_offset) { char *modname; const char *name; @@ -358,13 +358,13 @@ static int __sprint_symbol(char *buffer, unsigned long address, if (name != buffer) strcpy(buffer, name); len = strlen(buffer); - buffer += len; offset -= symbol_offset; + if (add_offset) + len += sprintf(buffer + len, "+%#lx/%#lx", offset, size); + if (modname) - len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname); - else - len += sprintf(buffer, "+%#lx/%#lx", offset, size); + len += sprintf(buffer + len, " [%s]", modname); return len; } @@ -382,11 +382,27 @@ static int __sprint_symbol(char *buffer, unsigned long address, */ int sprint_symbol(char *buffer, unsigned long address) { - return __sprint_symbol(buffer, address, 0); + return __sprint_symbol(buffer, address, 0, 1); } - EXPORT_SYMBOL_GPL(sprint_symbol); +/** + * sprint_symbol_no_offset - Look up a kernel symbol and return it in a text buffer + * @buffer: buffer to be stored + * @address: address to lookup + * + * This function looks up a kernel symbol with @address and stores its name + * and module name to @buffer if possible. If no symbol was found, just saves + * its @address as is. + * + * This function returns the number of bytes stored in @buffer. + */ +int sprint_symbol_no_offset(char *buffer, unsigned long address) +{ + return __sprint_symbol(buffer, address, 0, 0); +} +EXPORT_SYMBOL_GPL(sprint_symbol_no_offset); + /** * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer * @buffer: buffer to be stored @@ -403,7 +419,7 @@ EXPORT_SYMBOL_GPL(sprint_symbol); */ int sprint_backtrace(char *buffer, unsigned long address) { - return __sprint_symbol(buffer, address, -1); + return __sprint_symbol(buffer, address, -1, 1); } /* Look up a kernel symbol and print it to the kernel messages. */ diff --git a/lib/vsprintf.c b/lib/vsprintf.c index abbabec9720a..f5dfe0ca34f6 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -436,7 +436,7 @@ char *symbol_string(char *buf, char *end, void *ptr, else if (ext != 'f' && ext != 's') sprint_symbol(sym, value); else - kallsyms_lookup(value, NULL, NULL, NULL, sym); + sprint_symbol_no_offset(sym, value); return string(buf, end, sym, spec); #else -- cgit v1.2.3-70-g09d2 From fd0a37355c4d39affa39d5cd75168fb94b292318 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 29 May 2012 15:07:34 -0700 Subject: spinlock_debug: print kallsyms name for lock When a spinlock warning is printed we usually get BUG: spinlock bad magic on CPU#0, modprobe/111 lock: 0xdff09f38, .magic: 00000000, .owner: /0, .owner_cpu: 0 but it's nicer to print the symbol for the lock if we have it so that we can avoid 'grep dff09f38 /proc/kallsyms' to find out which lock it was. Use kallsyms to print the symbol name so we get something a bit easier to read BUG: spinlock bad magic on CPU#0, modprobe/112 lock: test_lock, .magic: 00000000, .owner: /-1, .owner_cpu: 0 If the lock is not in kallsyms %ps will fall back to printing the address directly. Signed-off-by: Stephen Boyd Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/spinlock_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index 525d160d44f0..d0ec4f3d1593 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -58,7 +58,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg) printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", msg, raw_smp_processor_id(), current->comm, task_pid_nr(current)); - printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, " + printk(KERN_EMERG " lock: %ps, .magic: %08x, .owner: %s/%d, " ".owner_cpu: %d\n", lock, lock->magic, owner ? owner->comm : "", -- cgit v1.2.3-70-g09d2 From 5536805292e64393f57054de66578f17eb1ea994 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 29 May 2012 15:07:34 -0700 Subject: radix-tree: fix preload vector size We are not preallocating a sufficient number of nodes. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 86516f5588e3..d7c878cc006c 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -72,12 +72,25 @@ static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; */ static struct kmem_cache *radix_tree_node_cachep; +/* + * The radix tree is variable-height, so an insert operation not only has + * to build the branch to its corresponding item, it also has to build the + * branch to existing items if the size has to be increased (by + * radix_tree_extend). + * + * The worst case is a zero height tree with just a single item at index 0, + * and then inserting an item at index ULONG_MAX. This requires 2 new branches + * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared. + * Hence: + */ +#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) + /* * Per-cpu pool of preloaded nodes */ struct radix_tree_preload { int nr; - struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; + struct radix_tree_node *nodes[RADIX_TREE_PRELOAD_SIZE]; }; static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; -- cgit v1.2.3-70-g09d2 From 7c20342230ff370c397fc4a9c4c1e7a91964bb66 Mon Sep 17 00:00:00 2001 From: Pierre Carrier Date: Tue, 29 May 2012 15:07:35 -0700 Subject: lib/vsprintf.c: "%#o",0 becomes '0' instead of '00' number()'s behaviour is slighly changed: 0 becomes "0" instead of "00" when using the flag SPECIAL and base 8. Before: Number\Format %o %#o %x %#x 0 0 00 0 0x0 1 1 01 1 0x1 16 20 020 10 0x10 After: Number\Format %o %#o %x %#x 0 0 0 0 0x0 1 1 01 1 0x1 16 20 020 10 0x10 Signed-off-by: Pierre Carrier Acked-by: Stephen Rothwell Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index f5dfe0ca34f6..5391299c1e78 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -284,6 +284,7 @@ char *number(char *buf, char *end, unsigned long long num, char locase; int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); int i; + bool is_zero = num == 0LL; /* locase = 0 or 0x20. ORing digits or letters with 'locase' * produces same digits or (maybe lowercased) letters */ @@ -305,8 +306,9 @@ char *number(char *buf, char *end, unsigned long long num, } } if (need_pfx) { - spec.field_width--; if (spec.base == 16) + spec.field_width -= 2; + else if (!is_zero) spec.field_width--; } @@ -353,9 +355,11 @@ char *number(char *buf, char *end, unsigned long long num, } /* "0x" / "0" prefix */ if (need_pfx) { - if (buf < end) - *buf = '0'; - ++buf; + if (spec.base == 16 || !is_zero) { + if (buf < end) + *buf = '0'; + ++buf; + } if (spec.base == 16) { if (buf < end) *buf = ('X' | locase); -- cgit v1.2.3-70-g09d2 From 0cfd32b736ae0c36b42697584811042726c07cba Mon Sep 17 00:00:00 2001 From: Hiroaki SHIMODA Date: Wed, 30 May 2012 12:24:39 +0000 Subject: bql: Fix POSDIFF() to integer overflow aware. POSDIFF() fails to take into account integer overflow case. Signed-off-by: Hiroaki SHIMODA Cc: Tom Herbert Cc: Eric Dumazet Cc: Denys Fedoryshchenko Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/dynamic_queue_limits.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index 6ab4587d052b..c87eb76f2fd4 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -10,7 +10,7 @@ #include #include -#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0) +#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0) /* Records completed count and recalculates the queue limit */ void dql_completed(struct dql *dql, unsigned int count) -- cgit v1.2.3-70-g09d2 From 25426b794efdc70dde7fd3134dc56fac3e7d562d Mon Sep 17 00:00:00 2001 From: Hiroaki SHIMODA Date: Wed, 30 May 2012 12:25:19 +0000 Subject: bql: Avoid unneeded limit decrement. When below pattern is observed, TIME dql_queued() dql_completed() | a) initial state | | b) X bytes queued V c) Y bytes queued d) X bytes completed e) Z bytes queued f) Y bytes completed a) dql->limit has already some value and there is no in-flight packet. b) X bytes queued. c) Y bytes queued and excess limit. d) X bytes completed and dql->prev_ovlimit is set and also dql->prev_num_queued is set Y. e) Z bytes queued. f) Y bytes completed. inprogress and prev_inprogress are true. At f), according to the comment, all_prev_completed becomes true and limit should be increased. But POSDIFF() ignores (completed == dql->prev_num_queued) case, so limit is decreased. Signed-off-by: Hiroaki SHIMODA Cc: Tom Herbert Cc: Eric Dumazet Cc: Denys Fedoryshchenko Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/dynamic_queue_limits.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index c87eb76f2fd4..0fafa77f4036 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -11,12 +11,14 @@ #include #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0) +#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0) /* Records completed count and recalculates the queue limit */ void dql_completed(struct dql *dql, unsigned int count) { unsigned int inprogress, prev_inprogress, limit; - unsigned int ovlimit, all_prev_completed, completed; + unsigned int ovlimit, completed; + bool all_prev_completed; /* Can't complete more than what's in queue */ BUG_ON(count > dql->num_queued - dql->num_completed); @@ -26,7 +28,7 @@ void dql_completed(struct dql *dql, unsigned int count) ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit); inprogress = dql->num_queued - completed; prev_inprogress = dql->prev_num_queued - dql->num_completed; - all_prev_completed = POSDIFF(completed, dql->prev_num_queued); + all_prev_completed = AFTER_EQ(completed, dql->prev_num_queued); if ((ovlimit && !inprogress) || (dql->prev_ovlimit && all_prev_completed)) { -- cgit v1.2.3-70-g09d2 From 914bec1011a25f65cdc94988a6f974bfb9a3c10d Mon Sep 17 00:00:00 2001 From: Hiroaki SHIMODA Date: Wed, 30 May 2012 12:25:37 +0000 Subject: bql: Avoid possible inconsistent calculation. dql->num_queued could change while processing dql_completed(). To provide consistent calculation, added an on stack variable. Signed-off-by: Hiroaki SHIMODA Cc: Tom Herbert Cc: Eric Dumazet Cc: Denys Fedoryshchenko Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/dynamic_queue_limits.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index 0fafa77f4036..0777c5a45fa0 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -17,16 +17,18 @@ void dql_completed(struct dql *dql, unsigned int count) { unsigned int inprogress, prev_inprogress, limit; - unsigned int ovlimit, completed; + unsigned int ovlimit, completed, num_queued; bool all_prev_completed; + num_queued = ACCESS_ONCE(dql->num_queued); + /* Can't complete more than what's in queue */ - BUG_ON(count > dql->num_queued - dql->num_completed); + BUG_ON(count > num_queued - dql->num_completed); completed = dql->num_completed + count; limit = dql->limit; - ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit); - inprogress = dql->num_queued - completed; + ovlimit = POSDIFF(num_queued - dql->num_completed, limit); + inprogress = num_queued - completed; prev_inprogress = dql->prev_num_queued - dql->num_completed; all_prev_completed = AFTER_EQ(completed, dql->prev_num_queued); @@ -106,7 +108,7 @@ void dql_completed(struct dql *dql, unsigned int count) dql->prev_ovlimit = ovlimit; dql->prev_last_obj_cnt = dql->last_obj_cnt; dql->num_completed = completed; - dql->prev_num_queued = dql->num_queued; + dql->prev_num_queued = num_queued; } EXPORT_SYMBOL(dql_completed); -- cgit v1.2.3-70-g09d2 From 725fe002d315c2501c110b7245d3eb4f4535f4d6 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 31 May 2012 16:26:08 -0700 Subject: vsprintf: correctly handle width when '#' flag used in %#p format The '%p' output of the kernel's vsprintf() uses spec.field_width to determine how many digits to output based on 2 * sizeof(void*) so that all digits of a pointer are shown. ie. a pointer will be output as "001A2B3C" instead of "1A2B3C". However, if the '#' flag is used in the format (%#p), then the code doesn't take into account the width of the '0x' prefix and will end up outputing "0x1A2B3C" instead of "0x001A2B3C". This patch reworks the "pointer()" format hook to include 2 characters for the '0x' prefix if the '#' flag is included. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Grant Likely Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 5391299c1e78..b8fbd275bc46 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -870,13 +870,15 @@ static noinline_for_stack char *pointer(const char *fmt, char *buf, char *end, void *ptr, struct printf_spec spec) { + int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0); + if (!ptr && *fmt != 'K') { /* * Print (null) with the same width as a pointer so it makes * tabular output look nice. */ if (spec.field_width == -1) - spec.field_width = 2 * sizeof(void *); + spec.field_width = default_width; return string(buf, end, "(null)", spec); } @@ -931,7 +933,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, */ if (in_irq() || in_serving_softirq() || in_nmi()) { if (spec.field_width == -1) - spec.field_width = 2 * sizeof(void *); + spec.field_width = default_width; return string(buf, end, "pK-error", spec); } if (!((kptr_restrict == 0) || @@ -948,7 +950,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } spec.flags |= SMALL; if (spec.field_width == -1) { - spec.field_width = 2 * sizeof(void *); + spec.field_width = default_width; spec.flags |= ZEROPAD; } spec.base = 16; -- cgit v1.2.3-70-g09d2 From 133fd9f5cda2d86904126f4b9fa4e8f4330c9569 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 31 May 2012 16:26:08 -0700 Subject: vsprintf: further optimize decimal conversion Previous code was using optimizations which were developed to work well even on narrow-word CPUs (by today's standards). But Linux runs only on 32-bit and wider CPUs. We can use that. First: using 32x32->64 multiply and trivial 32-bit shift, we can correctly divide by 10 much larger numbers, and thus we can print groups of 9 digits instead of groups of 5 digits. Next: there are two algorithms to print larger numbers. One is generic: divide by 1000000000 and repeatedly print groups of (up to) 9 digits. It's conceptually simple, but requires an (unsigned long long) / 1000000000 division. Second algorithm splits 64-bit unsigned long long into 16-bit chunks, manipulates them cleverly and generates groups of 4 decimal digits. It so happens that it does NOT require long long division. If long is > 32 bits, division of 64-bit values is relatively easy, and we will use the first algorithm. If long long is > 64 bits (strange architecture with VERY large long long), second algorithm can't be used, and we again use the first one. Else (if long is 32 bits and long long is 64 bits) we use second one. And third: there is a simple optimization which takes fast path not only for zero as was done before, but for all one-digit numbers. In all tested cases new code is faster than old one, in many cases by 30%, in few cases by more than 50% (for example, on x86-32, conversion of 12345678). Code growth is ~0 in 32-bit case and ~130 bytes in 64-bit case. This patch is based upon an original from Michal Nazarewicz. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Michal Nazarewicz Signed-off-by: Denys Vlasenko Cc: Douglas W Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/bitsperlong.h | 4 + lib/vsprintf.c | 281 ++++++++++++++++++++++++++------------ 2 files changed, 194 insertions(+), 91 deletions(-) (limited to 'lib') diff --git a/include/asm-generic/bitsperlong.h b/include/asm-generic/bitsperlong.h index 4ae54e07de83..a7b0914348fd 100644 --- a/include/asm-generic/bitsperlong.h +++ b/include/asm-generic/bitsperlong.h @@ -28,5 +28,9 @@ #error Inconsistent word size. Check asm/bitsperlong.h #endif +#ifndef BITS_PER_LONG_LONG +#define BITS_PER_LONG_LONG 64 +#endif + #endif /* __KERNEL__ */ #endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/lib/vsprintf.c b/lib/vsprintf.c index b8fbd275bc46..c3f36d415bdf 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -112,106 +112,199 @@ int skip_atoi(const char **s) /* Decimal conversion is by far the most typical, and is used * for /proc and /sys data. This directly impacts e.g. top performance * with many processes running. We optimize it for speed - * using code from - * http://www.cs.uiowa.edu/~jones/bcd/decimal.html - * (with permission from the author, Douglas W. Jones). */ + * using ideas described at + * (with permission from the author, Douglas W. Jones). + */ -/* Formats correctly any integer in [0,99999]. - * Outputs from one to five digits depending on input. - * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ +#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 +/* Formats correctly any integer in [0, 999999999] */ static noinline_for_stack -char *put_dec_trunc(char *buf, unsigned q) +char *put_dec_full9(char *buf, unsigned q) { - unsigned d3, d2, d1, d0; - d1 = (q>>4) & 0xf; - d2 = (q>>8) & 0xf; - d3 = (q>>12); - - d0 = 6*(d3 + d2 + d1) + (q & 0xf); - q = (d0 * 0xcd) >> 11; - d0 = d0 - 10*q; - *buf++ = d0 + '0'; /* least significant digit */ - d1 = q + 9*d3 + 5*d2 + d1; - if (d1 != 0) { - q = (d1 * 0xcd) >> 11; - d1 = d1 - 10*q; - *buf++ = d1 + '0'; /* next digit */ - - d2 = q + 2*d2; - if ((d2 != 0) || (d3 != 0)) { - q = (d2 * 0xd) >> 7; - d2 = d2 - 10*q; - *buf++ = d2 + '0'; /* next digit */ - - d3 = q + 4*d3; - if (d3 != 0) { - q = (d3 * 0xcd) >> 11; - d3 = d3 - 10*q; - *buf++ = d3 + '0'; /* next digit */ - if (q != 0) - *buf++ = q + '0'; /* most sign. digit */ - } - } - } + unsigned r; + /* + * Possible ways to approx. divide by 10 + * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit) + * (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul) + * (x * 0x6667) >> 18 x < 43699 + * (x * 0x3334) >> 17 x < 16389 + * (x * 0x199a) >> 16 x < 16389 + * (x * 0x0ccd) >> 15 x < 16389 + * (x * 0x0667) >> 14 x < 2739 + * (x * 0x0334) >> 13 x < 1029 + * (x * 0x019a) >> 12 x < 1029 + * (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386) + * (x * 0x0067) >> 10 x < 179 + * (x * 0x0034) >> 9 x < 69 same + * (x * 0x001a) >> 8 x < 69 same + * (x * 0x000d) >> 7 x < 69 same, shortest code (on i386) + * (x * 0x0007) >> 6 x < 19 + * See + */ + r = (q * (uint64_t)0x1999999a) >> 32; + *buf++ = (q - 10 * r) + '0'; /* 1 */ + q = (r * (uint64_t)0x1999999a) >> 32; + *buf++ = (r - 10 * q) + '0'; /* 2 */ + r = (q * (uint64_t)0x1999999a) >> 32; + *buf++ = (q - 10 * r) + '0'; /* 3 */ + q = (r * (uint64_t)0x1999999a) >> 32; + *buf++ = (r - 10 * q) + '0'; /* 4 */ + r = (q * (uint64_t)0x1999999a) >> 32; + *buf++ = (q - 10 * r) + '0'; /* 5 */ + /* Now value is under 10000, can avoid 64-bit multiply */ + q = (r * 0x199a) >> 16; + *buf++ = (r - 10 * q) + '0'; /* 6 */ + r = (q * 0xcd) >> 11; + *buf++ = (q - 10 * r) + '0'; /* 7 */ + q = (r * 0xcd) >> 11; + *buf++ = (r - 10 * q) + '0'; /* 8 */ + *buf++ = q + '0'; /* 9 */ return buf; } -/* Same with if's removed. Always emits five digits */ +#endif + +/* Similar to above but do not pad with zeros. + * Code can be easily arranged to print 9 digits too, but our callers + * always call put_dec_full9() instead when the number has 9 decimal digits. + */ static noinline_for_stack -char *put_dec_full(char *buf, unsigned q) +char *put_dec_trunc8(char *buf, unsigned r) { - /* BTW, if q is in [0,9999], 8-bit ints will be enough, */ - /* but anyway, gcc produces better code with full-sized ints */ - unsigned d3, d2, d1, d0; - d1 = (q>>4) & 0xf; - d2 = (q>>8) & 0xf; - d3 = (q>>12); + unsigned q; + + /* Copy of previous function's body with added early returns */ + q = (r * (uint64_t)0x1999999a) >> 32; + *buf++ = (r - 10 * q) + '0'; /* 2 */ + if (q == 0) + return buf; + r = (q * (uint64_t)0x1999999a) >> 32; + *buf++ = (q - 10 * r) + '0'; /* 3 */ + if (r == 0) + return buf; + q = (r * (uint64_t)0x1999999a) >> 32; + *buf++ = (r - 10 * q) + '0'; /* 4 */ + if (q == 0) + return buf; + r = (q * (uint64_t)0x1999999a) >> 32; + *buf++ = (q - 10 * r) + '0'; /* 5 */ + if (r == 0) + return buf; + q = (r * 0x199a) >> 16; + *buf++ = (r - 10 * q) + '0'; /* 6 */ + if (q == 0) + return buf; + r = (q * 0xcd) >> 11; + *buf++ = (q - 10 * r) + '0'; /* 7 */ + if (r == 0) + return buf; + q = (r * 0xcd) >> 11; + *buf++ = (r - 10 * q) + '0'; /* 8 */ + if (q == 0) + return buf; + *buf++ = q + '0'; /* 9 */ + return buf; +} - /* - * Possible ways to approx. divide by 10 - * gcc -O2 replaces multiply with shifts and adds - * (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386) - * (x * 0x67) >> 10: 1100111 - * (x * 0x34) >> 9: 110100 - same - * (x * 0x1a) >> 8: 11010 - same - * (x * 0x0d) >> 7: 1101 - same, shortest code (on i386) - */ - d0 = 6*(d3 + d2 + d1) + (q & 0xf); - q = (d0 * 0xcd) >> 11; - d0 = d0 - 10*q; - *buf++ = d0 + '0'; - d1 = q + 9*d3 + 5*d2 + d1; - q = (d1 * 0xcd) >> 11; - d1 = d1 - 10*q; - *buf++ = d1 + '0'; - - d2 = q + 2*d2; - q = (d2 * 0xd) >> 7; - d2 = d2 - 10*q; - *buf++ = d2 + '0'; - - d3 = q + 4*d3; - q = (d3 * 0xcd) >> 11; /* - shorter code */ - /* q = (d3 * 0x67) >> 10; - would also work */ - d3 = d3 - 10*q; - *buf++ = d3 + '0'; - *buf++ = q + '0'; +/* There are two algorithms to print larger numbers. + * One is generic: divide by 1000000000 and repeatedly print + * groups of (up to) 9 digits. It's conceptually simple, + * but requires a (unsigned long long) / 1000000000 division. + * + * Second algorithm splits 64-bit unsigned long long into 16-bit chunks, + * manipulates them cleverly and generates groups of 4 decimal digits. + * It so happens that it does NOT require long long division. + * + * If long is > 32 bits, division of 64-bit values is relatively easy, + * and we will use the first algorithm. + * If long long is > 64 bits (strange architecture with VERY large long long), + * second algorithm can't be used, and we again use the first one. + * + * Else (if long is 32 bits and long long is 64 bits) we use second one. + */ - return buf; +#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 + +/* First algorithm: generic */ + +static +char *put_dec(char *buf, unsigned long long n) +{ + if (n >= 100*1000*1000) { + while (n >= 1000*1000*1000) + buf = put_dec_full9(buf, do_div(n, 1000*1000*1000)); + if (n >= 100*1000*1000) + return put_dec_full9(buf, n); + } + return put_dec_trunc8(buf, n); } -/* No inlining helps gcc to use registers better */ + +#else + +/* Second algorithm: valid only for 64-bit long longs */ + static noinline_for_stack -char *put_dec(char *buf, unsigned long long num) +char *put_dec_full4(char *buf, unsigned q) { - while (1) { - unsigned rem; - if (num < 100000) - return put_dec_trunc(buf, num); - rem = do_div(num, 100000); - buf = put_dec_full(buf, rem); - } + unsigned r; + r = (q * 0xcccd) >> 19; + *buf++ = (q - 10 * r) + '0'; + q = (r * 0x199a) >> 16; + *buf++ = (r - 10 * q) + '0'; + r = (q * 0xcd) >> 11; + *buf++ = (q - 10 * r) + '0'; + *buf++ = r + '0'; + return buf; +} + +/* Based on code by Douglas W. Jones found at + * + * (with permission from the author). + * Performs no 64-bit division and hence should be fast on 32-bit machines. + */ +static +char *put_dec(char *buf, unsigned long long n) +{ + uint32_t d3, d2, d1, q, h; + + if (n < 100*1000*1000) + return put_dec_trunc8(buf, n); + + d1 = ((uint32_t)n >> 16); /* implicit "& 0xffff" */ + h = (n >> 32); + d2 = (h ) & 0xffff; + d3 = (h >> 16); /* implicit "& 0xffff" */ + + q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); + + buf = put_dec_full4(buf, q % 10000); + q = q / 10000; + + d1 = q + 7671 * d3 + 9496 * d2 + 6 * d1; + buf = put_dec_full4(buf, d1 % 10000); + q = d1 / 10000; + + d2 = q + 4749 * d3 + 42 * d2; + buf = put_dec_full4(buf, d2 % 10000); + q = d2 / 10000; + + d3 = q + 281 * d3; + if (!d3) + goto done; + buf = put_dec_full4(buf, d3 % 10000); + q = d3 / 10000; + if (!q) + goto done; + buf = put_dec_full4(buf, q); + done: + while (buf[-1] == '0') + --buf; + + return buf; } +#endif + /* * Convert passed number to decimal string. * Returns the length of string. On buffer overflow, returns 0. @@ -220,16 +313,22 @@ char *put_dec(char *buf, unsigned long long num) */ int num_to_str(char *buf, int size, unsigned long long num) { - char tmp[21]; /* Enough for 2^64 in decimal */ + char tmp[sizeof(num) * 3]; int idx, len; - len = put_dec(tmp, num) - tmp; + /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ + if (num <= 9) { + tmp[0] = '0' + num; + len = 1; + } else { + len = put_dec(tmp, num) - tmp; + } if (len > size) return 0; for (idx = 0; idx < len; ++idx) buf[idx] = tmp[len - idx - 1]; - return len; + return len; } #define ZEROPAD 1 /* pad with zero */ @@ -314,8 +413,8 @@ char *number(char *buf, char *end, unsigned long long num, /* generate full string in tmp[], in reverse order */ i = 0; - if (num == 0) - tmp[i++] = '0'; + if (num < spec.base) + tmp[i++] = digits[num] | locase; /* Generic code, for any base: else do { tmp[i++] = (digits[do_div(num,base)] | locase); @@ -611,7 +710,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt) } for (i = 0; i < 4; i++) { char temp[3]; /* hold each IP quad in reverse order */ - int digits = put_dec_trunc(temp, addr[index]) - temp; + int digits = put_dec_trunc8(temp, addr[index]) - temp; if (leading_zeros) { if (digits < 3) *p++ = '0'; -- cgit v1.2.3-70-g09d2 From fffaee365fded09f9ebf2db19066065fa54323c3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 5 Jun 2012 21:36:33 +0400 Subject: radix-tree: fix contiguous iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes bug in macro radix_tree_for_each_contig(). If radix_tree_next_slot() sees NULL in next slot it returns NULL, but following radix_tree_next_chunk() switches iterating into next chunk. As result iterating becomes non-contiguous and breaks vfs "splice" and all its users. Signed-off-by: Konstantin Khlebnikov Reported-and-bisected-by: Hans de Bruin Reported-and-bisected-by: Ondrej Zary Reported-bisected-and-tested-by: Toralf Förster Link: https://lkml.org/lkml/2012/6/5/64 Cc: stable # 3.4.x Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 5 ++++- lib/radix-tree.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0d04cd69ab9b..ffc444c38b0a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index++; if (likely(*slot)) return slot; - if (flags & RADIX_TREE_ITER_CONTIG) + if (flags & RADIX_TREE_ITER_CONTIG) { + /* forbid switching to the next chunk */ + iter->next_index = 0; break; + } } } return NULL; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d7c878cc006c..e7964296fd50 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -686,6 +686,9 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, * during iterating; it can be zero only at the beginning. * And we cannot overflow iter->next_index in a single step, * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. + * + * This condition also used by radix_tree_next_slot() to stop + * contiguous iterating, and forbid swithing to the next chunk. */ index = iter->next_index; if (!index && iter->index) -- cgit v1.2.3-70-g09d2 From cbf8ae32f66a9ceb8907ad9e16663c2a29e48990 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 7 Jun 2012 14:21:13 -0700 Subject: btree: fix tree corruption in btree_get_prev() The memory the parameter __key points to is used as an iterator in btree_get_prev(), so if we save off a bkey() pointer in retry_key and then assign that to __key, we'll end up corrupting the btree internals when we do eg longcpy(__key, bkey(geo, node, i), geo->keylen); to return the key value. What we should do instead is use longcpy() to copy the key value that retry_key points to __key. This can cause a btree to get corrupted by seemingly read-only operations such as btree_for_each_safe. [akpm@linux-foundation.org: avoid the double longcpy()] Signed-off-by: Roland Dreier Acked-by: Joern Engel Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/btree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/btree.c b/lib/btree.c index e5ec1e9c1aa5..5cf9e74ec3f3 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -319,8 +319,8 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo, if (head->height == 0) return NULL; -retry: longcpy(key, __key, geo->keylen); +retry: dec_key(geo, key); node = head->node; @@ -351,7 +351,7 @@ retry: } miss: if (retry_key) { - __key = retry_key; + longcpy(key, retry_key, geo->keylen); retry_key = NULL; goto retry; } -- cgit v1.2.3-70-g09d2 From 39caa0916ef27cf1da5026eb708a2b8413156f75 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Thu, 7 Jun 2012 14:21:14 -0700 Subject: btree: catch NULL value before it does harm Storing NULL values in the btree is illegal and can lead to memory corruption and possible other fun as well. Catch it on insert, instead of waiting for the inevitable. Signed-off-by: Joern Engel Signed-off-by: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/btree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/btree.c b/lib/btree.c index 5cf9e74ec3f3..f9a484676cb6 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -509,6 +509,7 @@ retry: int btree_insert(struct btree_head *head, struct btree_geo *geo, unsigned long *key, void *val, gfp_t gfp) { + BUG_ON(!val); return btree_insert_level(head, geo, key, val, 1, gfp); } EXPORT_SYMBOL_GPL(btree_insert); -- cgit v1.2.3-70-g09d2