From 7f7ace0cda64c99599c23785f8979a072e118058 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: cpumask: update irq_desc to use cpumask_var_t Impact: reduce memory usage, use new cpumask API. Replace the affinity and pending_masks with cpumask_var_t's. This adds to the significant size reduction done with the SPARSE_IRQS changes. The added functions (init_alloc_desc_masks & init_copy_desc_masks) are in the include file so they can be inlined (and optimized out for the !CONFIG_CPUMASKS_OFFSTACK case.) [Naming chosen to be consistent with the other init*irq functions, as well as the backwards arg declaration of "from, to" instead of the more common "to, from" standard.] Includes a slight change to the declaration of struct irq_desc to embed the pending_mask within ifdef(CONFIG_SMP) to be consistent with other references, and some small changes to Xen. Tested: sparse/non-sparse/cpumask_offstack/non-cpumask_offstack/nonuma/nosmp on x86_64 Signed-off-by: Mike Travis Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: KOSAKI Motohiro Cc: Venkatesh Pallipadi Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com Cc: Yinghai Lu --- kernel/irq/chip.c | 5 ++++- kernel/irq/handle.c | 26 ++++++++++++++------------ kernel/irq/manage.c | 12 ++++++------ kernel/irq/migration.c | 12 ++++++------ kernel/irq/numa_migrate.c | 12 +++++++++++- kernel/irq/proc.c | 4 ++-- 6 files changed, 43 insertions(+), 28 deletions(-) (limited to 'kernel/irq') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f63c706d25e1..c248eba98b43 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -46,7 +46,10 @@ void dynamic_irq_init(unsigned int irq) desc->irq_count = 0; desc->irqs_unhandled = 0; #ifdef CONFIG_SMP - cpumask_setall(&desc->affinity); + cpumask_setall(desc->affinity); +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_clear(desc->pending_mask); +#endif #endif spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c20db0be9173..b8fa1354f01c 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -64,9 +64,6 @@ static struct irq_desc irq_desc_init = { .handle_irq = handle_bad_irq, .depth = 1, .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif }; void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) @@ -88,6 +85,8 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) { + int node = cpu_to_node(cpu); + memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); spin_lock_init(&desc->lock); @@ -101,6 +100,10 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } + if (!init_alloc_desc_masks(desc, node, false)) { + printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); + BUG_ON(1); + } arch_init_chip_data(desc, cpu); } @@ -119,9 +122,6 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm .handle_irq = handle_bad_irq, .depth = 1, .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif } }; @@ -141,7 +141,7 @@ int __init early_irq_init(void) desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy[i]; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - + init_alloc_desc_masks(&desc[i], 0, true); irq_desc_ptrs[i] = desc + i; } @@ -188,6 +188,10 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) printk(KERN_ERR "can not alloc irq_desc\n"); BUG_ON(1); } + if (!init_alloc_desc_masks(desc, node, false)) { + printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); + BUG_ON(1); + } init_one_irq_desc(irq, desc, cpu); irq_desc_ptrs[irq] = desc; @@ -207,9 +211,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { .handle_irq = handle_bad_irq, .depth = 1, .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif } }; @@ -222,9 +223,10 @@ int __init early_irq_init(void) desc = irq_desc; count = ARRAY_SIZE(irq_desc); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { desc[i].irq = i; - + init_alloc_desc_masks(&desc[i], 0, true); + } return arch_early_irq_init(); } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index cd0cd8dcb345..b98739af4558 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -98,14 +98,14 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { - cpumask_copy(&desc->affinity, cpumask); + cpumask_copy(desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); } else { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, cpumask); + cpumask_copy(desc->pending_mask, cpumask); } #else - cpumask_copy(&desc->affinity, cpumask); + cpumask_copy(desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); #endif desc->status |= IRQ_AFFINITY_SET; @@ -127,16 +127,16 @@ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) * one of the targets is online. */ if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { - if (cpumask_any_and(&desc->affinity, cpu_online_mask) + if (cpumask_any_and(desc->affinity, cpu_online_mask) < nr_cpu_ids) goto set_affinity; else desc->status &= ~IRQ_AFFINITY_SET; } - cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity); + cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity); set_affinity: - desc->chip->set_affinity(irq, &desc->affinity); + desc->chip->set_affinity(irq, desc->affinity); return 0; } diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index bd72329e630c..e05ad9be43b7 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -18,7 +18,7 @@ void move_masked_irq(int irq) desc->status &= ~IRQ_MOVE_PENDING; - if (unlikely(cpumask_empty(&desc->pending_mask))) + if (unlikely(cpumask_empty(desc->pending_mask))) return; if (!desc->chip->set_affinity) @@ -38,13 +38,13 @@ void move_masked_irq(int irq) * For correct operation this depends on the caller * masking the irqs. */ - if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask) + if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids)) { - cpumask_and(&desc->affinity, - &desc->pending_mask, cpu_online_mask); - desc->chip->set_affinity(irq, &desc->affinity); + cpumask_and(desc->affinity, + desc->pending_mask, cpu_online_mask); + desc->chip->set_affinity(irq, desc->affinity); } - cpumask_clear(&desc->pending_mask); + cpumask_clear(desc->pending_mask); } void move_native_irq(int irq) diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index ecf765c6a77a..f001a4ea6414 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -46,6 +46,7 @@ static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, desc->cpu = cpu; lockdep_set_class(&desc->lock, &irq_desc_lock_class); init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + init_copy_desc_masks(old_desc, desc); arch_init_copy_chip_data(old_desc, desc, cpu); } @@ -76,11 +77,20 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); if (!desc) { - printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq); + printk(KERN_ERR "irq %d: can not get new irq_desc " + "for migration.\n", irq); /* still use old one */ desc = old_desc; goto out_unlock; } + if (!init_alloc_desc_masks(desc, node, false)) { + printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " + "for migration.\n", irq); + /* still use old one */ + kfree(desc); + desc = old_desc; + goto out_unlock; + } init_copy_one_irq_desc(irq, old_desc, desc, cpu); irq_desc_ptrs[irq] = desc; diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index aae3f742bcec..692363dd591f 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -20,11 +20,11 @@ static struct proc_dir_entry *root_irq_dir; static int irq_affinity_proc_show(struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); - const struct cpumask *mask = &desc->affinity; + const struct cpumask *mask = desc->affinity; #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PENDING) - mask = &desc->pending_mask; + mask = desc->pending_mask; #endif seq_cpumask(m, mask); seq_putc(m, '\n'); -- cgit v1.2.3-70-g09d2 From 802bf931f2688ad125b73db597ce63cc842fb27a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: cpumask: fix bug in use cpumask_var_t in irq_desc Impact: fix bug where new irq_desc uses old cpumask pointers which are freed. As Yinghai pointed out, init_copy_one_irq_desc() copies the old desc to the new desc overwriting the cpumask pointers. Since the old_desc and the cpumask pointers are freed, then memory corruption will occur if these old pointers are used. Move the allocation of these pointers to after the copy. Signed-off-by: Mike Travis Cc: Yinghai Lu --- include/linux/irq.h | 9 +++++++-- kernel/irq/handle.c | 8 +------- kernel/irq/numa_migrate.c | 13 ++++++++----- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'kernel/irq') diff --git a/include/linux/irq.h b/include/linux/irq.h index fa27210f1dfd..27a67536511e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -426,15 +426,18 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); /** * init_alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct + * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { + int node; + if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); cpumask_setall(desc->affinity); @@ -446,6 +449,8 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, return true; } + node = cpu_to_node(cpu); + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; cpumask_setall(desc->affinity); @@ -484,7 +489,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { return true; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b8fa1354f01c..f01c0a30cb42 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -85,8 +85,6 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) { - int node = cpu_to_node(cpu); - memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); spin_lock_init(&desc->lock); @@ -100,7 +98,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } - if (!init_alloc_desc_masks(desc, node, false)) { + if (!init_alloc_desc_masks(desc, cpu, false)) { printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); BUG_ON(1); } @@ -188,10 +186,6 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) printk(KERN_ERR "can not alloc irq_desc\n"); BUG_ON(1); } - if (!init_alloc_desc_masks(desc, node, false)) { - printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); - BUG_ON(1); - } init_one_irq_desc(irq, desc, cpu); irq_desc_ptrs[irq] = desc; diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index f001a4ea6414..666260e4c065 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -38,16 +38,22 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) old_desc->kstat_irqs = NULL; } -static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, +static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, struct irq_desc *desc, int cpu) { memcpy(desc, old_desc, sizeof(struct irq_desc)); + if (!init_alloc_desc_masks(desc, cpu, false)) { + printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " + "for migration.\n", irq); + return false; + } spin_lock_init(&desc->lock); desc->cpu = cpu; lockdep_set_class(&desc->lock, &irq_desc_lock_class); init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); init_copy_desc_masks(old_desc, desc); arch_init_copy_chip_data(old_desc, desc, cpu); + return true; } static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) @@ -83,15 +89,12 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, desc = old_desc; goto out_unlock; } - if (!init_alloc_desc_masks(desc, node, false)) { - printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " - "for migration.\n", irq); + if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) { /* still use old one */ kfree(desc); desc = old_desc; goto out_unlock; } - init_copy_one_irq_desc(irq, old_desc, desc, cpu); irq_desc_ptrs[irq] = desc; -- cgit v1.2.3-70-g09d2 From 9594949b060efe86ecaa1a66839232a3b9800bc9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: irq: change references from NR_IRQS to nr_irqs Impact: preparation, cleanup, add KERN_INFO printk Modify references from NR_IRQS to nr_irqs as the later will become variable-sized based on nr_cpu_ids when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- arch/x86/kernel/io_apic.c | 2 +- kernel/irq/handle.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'kernel/irq') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1337eab60ecc..ae80638012de 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3183,7 +3183,7 @@ unsigned int create_irq_nr(unsigned int irq_want) irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new < NR_IRQS; new++) { + for (new = irq_want; new < nr_irqs; new++) { if (platform_legacy_irq(new)) continue; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index f01c0a30cb42..790c5fa7ea39 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -132,6 +132,8 @@ int __init early_irq_init(void) int legacy_count; int i; + printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); + desc = irq_desc_legacy; legacy_count = ARRAY_SIZE(irq_desc_legacy); @@ -143,7 +145,7 @@ int __init early_irq_init(void) irq_desc_ptrs[i] = desc + i; } - for (i = legacy_count; i < NR_IRQS; i++) + for (i = legacy_count; i < nr_irqs; i++) irq_desc_ptrs[i] = NULL; return arch_early_irq_init(); @@ -151,7 +153,7 @@ int __init early_irq_init(void) struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL; + return (irq < nr_irqs) ? irq_desc_ptrs[irq] : NULL; } struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) @@ -160,9 +162,9 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) unsigned long flags; int node; - if (irq >= NR_IRQS) { - printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n", - irq, NR_IRQS); + if (irq >= nr_irqs) { + printk(KERN_WARNING "irq >= nr_irqs in irq_to_desc_alloc: %d %d\n", + irq, nr_irqs); WARN_ON(1); return NULL; } @@ -214,6 +216,8 @@ int __init early_irq_init(void) int count; int i; + printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS); + desc = irq_desc; count = ARRAY_SIZE(irq_desc); -- cgit v1.2.3-70-g09d2 From e2f4d06545ec1f29b0e838ee34cbf3500ea5b9a4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: irq: use WARN() instead of WARN_ON(). Impact: cleanup WARN msg. Ingo requested: > While at it, could you please also convert this to a WARN() construct > instead? (in a separate commit) ... and it shall be done. ;-) Signed-off-by: Mike Travis --- kernel/irq/handle.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/irq') diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 790c5fa7ea39..fd1ef16252f4 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -163,9 +163,8 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) int node; if (irq >= nr_irqs) { - printk(KERN_WARNING "irq >= nr_irqs in irq_to_desc_alloc: %d %d\n", - irq, nr_irqs); - WARN_ON(1); + WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n", + irq, nr_irqs); return NULL; } -- cgit v1.2.3-70-g09d2 From 0fa0ebbf15addc1be8f73325d809c8547a9de304 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: irq: allocate irq_desc_ptrs array based on nr_irqs Impact: allocate irq_desc_ptrs in preparation for making it variable-sized. This addresses this memory usage bump when NR_CPUS bumped from 128 to 4096: 34816 +229376 264192 +658% irq_desc_ptrs(.data.read_mostly) The patch is split into two parts, the first simply allocates the irq_desc_ptrs array. Then next will deal with making it variable. This is only when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- kernel/irq/handle.c | 11 +++++++++-- kernel/irq/internals.h | 7 +++++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'kernel/irq') diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index fd1ef16252f4..d0b8f7e72790 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "internals.h" @@ -110,7 +111,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) */ DEFINE_SPINLOCK(sparse_irq_lock); -struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; +struct irq_desc **irq_desc_ptrs __read_mostly; static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { [0 ... NR_IRQS_LEGACY-1] = { @@ -137,6 +138,9 @@ int __init early_irq_init(void) desc = irq_desc_legacy; legacy_count = ARRAY_SIZE(irq_desc_legacy); + /* allocate irq_desc_ptrs array based on nr_irqs */ + irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); + for (i = 0; i < legacy_count; i++) { desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy[i]; @@ -153,7 +157,10 @@ int __init early_irq_init(void) struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < nr_irqs) ? irq_desc_ptrs[irq] : NULL; + if (irq_desc_ptrs && irq < nr_irqs) + return irq_desc_ptrs[irq]; + + return NULL; } struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index e6d0a43cc125..40416a81a0f5 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -16,7 +16,14 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, extern struct lock_class_key irq_desc_lock_class; extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); extern spinlock_t sparse_irq_lock; + +#ifdef CONFIG_SPARSE_IRQ +/* irq_desc_ptrs allocated at boot time */ +extern struct irq_desc **irq_desc_ptrs; +#else +/* irq_desc_ptrs is a fixed size array */ extern struct irq_desc *irq_desc_ptrs[NR_IRQS]; +#endif #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); -- cgit v1.2.3-70-g09d2 From 9332fccdedf8e09448f3b69b624211ae879f6c45 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:07 -0800 Subject: irq: initialize nr_irqs based on nr_cpu_ids Impact: Reduce memory usage. This is the second half of the changes to make the irq_desc_ptrs be variable sized based on nr_cpu_ids. This is done by adding a new "max_nr_irqs" macro to irq_vectors.h (and a dummy in irqnr.h) to return a max NR_IRQS value based on NR_CPUS or nr_cpu_ids. This necessitated moving the define of MAX_IO_APICS to a separate file (asm/apicnum.h) so it could be included without the baggage of the other asm/apicdef.h declarations. Signed-off-by: Mike Travis --- arch/x86/include/asm/apicdef.h | 8 ++------ arch/x86/include/asm/apicnum.h | 12 ++++++++++++ arch/x86/include/asm/irq_vectors.h | 16 +++++++++++----- include/linux/irqnr.h | 7 +++++++ kernel/irq/handle.c | 3 +++ 5 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 arch/x86/include/asm/apicnum.h (limited to 'kernel/irq') diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 63134e31e8b9..1a6454ef7f6c 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -132,12 +132,8 @@ #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) -#ifdef CONFIG_X86_32 -# define MAX_IO_APICS 64 -#else -# define MAX_IO_APICS 128 -# define MAX_LOCAL_APIC 32768 -#endif +/* get MAX_IO_APICS */ +#include /* * All x86-64 systems are xAPIC compatible. diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h new file mode 100644 index 000000000000..82f613c607ce --- /dev/null +++ b/arch/x86/include/asm/apicnum.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_APICNUM_H +#define _ASM_X86_APICNUM_H + +/* define MAX_IO_APICS */ +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index f7ff65032b9d..602361ad0e74 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -105,6 +105,8 @@ #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) +#include /* need MAX_IO_APICS */ + #ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) @@ -112,11 +114,15 @@ # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else -# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) -# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif + +/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */ +# define max_nr_irqs(nr_cpus) \ + ((8 * nr_cpus) > (32 * MAX_IO_APICS) ? \ + (NR_VECTORS + (8 * NR_CPUS)) : \ + (NR_VECTORS + (32 * MAX_IO_APICS))) \ + +# define NR_IRQS max_nr_irqs(NR_CPUS) + #endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 86af92e9e84c..de66e4e10406 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -20,11 +20,18 @@ # define for_each_irq_desc_reverse(irq, desc) \ for (irq = nr_irqs - 1; irq >= 0; irq--) + #else /* CONFIG_GENERIC_HARDIRQS */ +#include /* need possible max_nr_irqs() */ + extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); +# ifndef max_nr_irqs +# define max_nr_irqs(nr_cpus) NR_IRQS +# endif + # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d0b8f7e72790..ebba7a116f14 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -133,6 +133,9 @@ int __init early_irq_init(void) int legacy_count; int i; + /* initialize nr_irqs based on nr_cpu_ids */ + nr_irqs = max_nr_irqs(nr_cpu_ids); + printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); desc = irq_desc_legacy; -- cgit v1.2.3-70-g09d2 From 542d865bbed4ce1f050f586e53cf1cfadda93766 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:07 -0800 Subject: kstat: modify kstat_irqs_legacy to be variable sized Impact: reduce memory usage. Allocate kstat_irqs_legacy based on nr_cpu_ids to deal with this memory usage bump when NR_CPUS bumped from 128 to 4096: 8192 +253952 262144 +3100% kstat_irqs_legacy(.bss) This is only when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- kernel/irq/handle.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel/irq') diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index ebba7a116f14..b39f32ac8f80 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -124,8 +124,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm } }; -/* FIXME: use bootmem alloc ...*/ -static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS]; +static unsigned int *kstat_irqs_legacy; int __init early_irq_init(void) { @@ -144,9 +143,14 @@ int __init early_irq_init(void) /* allocate irq_desc_ptrs array based on nr_irqs */ irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); + /* allocate based on nr_cpu_ids */ + /* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */ + kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids * + sizeof(int)); + for (i = 0; i < legacy_count; i++) { desc[i].irq = i; - desc[i].kstat_irqs = kstat_irqs_legacy[i]; + desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); init_alloc_desc_masks(&desc[i], 0, true); irq_desc_ptrs[i] = desc + i; -- cgit v1.2.3-70-g09d2 From 92296c6d6e908c35fca287a21af27be814af9c75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sun, 11 Jan 2009 09:22:58 -0800 Subject: cpumask, irq: non-x86 build failures Ingo Molnar wrote: > All non-x86 architectures fail to build: > > In file included from /home/mingo/tip/include/linux/random.h:11, > from /home/mingo/tip/include/linux/stackprotector.h:6, > from /home/mingo/tip/init/main.c:17: > /home/mingo/tip/include/linux/irqnr.h:26:63: error: asm/irq_vectors.h: No such file or directory Do not include asm/irq_vectors.h in generic code - it's not available on all architectures. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apicdef.h | 8 ++++++-- include/linux/irqnr.h | 6 ------ kernel/irq/handle.c | 5 +++++ 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'kernel/irq') diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 1a6454ef7f6c..63134e31e8b9 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -132,8 +132,12 @@ #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) -/* get MAX_IO_APICS */ -#include +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif /* * All x86-64 systems are xAPIC compatible. diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index de66e4e10406..887477bc2ab0 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -23,15 +23,9 @@ #else /* CONFIG_GENERIC_HARDIRQS */ -#include /* need possible max_nr_irqs() */ - extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); -# ifndef max_nr_irqs -# define max_nr_irqs(nr_cpus) NR_IRQS -# endif - # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b39f32ac8f80..04d3e46031e5 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -58,6 +58,11 @@ int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); #ifdef CONFIG_SPARSE_IRQ + +#ifndef max_nr_irqs +#define max_nr_irqs(nr_cpus) NR_IRQS +#endif + static struct irq_desc irq_desc_init = { .irq = -1, .status = IRQ_DISABLED, -- cgit v1.2.3-70-g09d2 From 4a046d1754ee6ebb6f399696805ed61ea0444d4c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 17:39:24 -0800 Subject: x86: arch_probe_nr_irqs Impact: save RAM with large NR_CPUS, get smaller nr_irqs Signed-off-by: Yinghai Lu Signed-off-by: Mike Travis --- arch/x86/include/asm/irq_vectors.h | 7 ++----- arch/x86/kernel/io_apic.c | 16 ++++++++++++++++ include/linux/interrupt.h | 1 + kernel/irq/handle.c | 9 ++------- kernel/softirq.c | 5 +++++ 5 files changed, 26 insertions(+), 12 deletions(-) (limited to 'kernel/irq') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 602361ad0e74..a16a2ab2b429 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -115,14 +115,11 @@ # endif #else -/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */ -# define max_nr_irqs(nr_cpus) \ - ((8 * nr_cpus) > (32 * MAX_IO_APICS) ? \ +# define NR_IRQS \ + ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ (NR_VECTORS + (8 * NR_CPUS)) : \ (NR_VECTORS + (32 * MAX_IO_APICS))) \ -# define NR_IRQS max_nr_irqs(NR_CPUS) - #endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index ae80638012de..157986916cd1 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3850,6 +3850,22 @@ void __init probe_nr_irqs_gsi(void) nr_irqs_gsi = nr; } +#ifdef CONFIG_SPARSE_IRQ +int __init arch_probe_nr_irqs(void) +{ + int nr; + + nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? + (NR_VECTORS + (8 * nr_cpu_ids)) : + (NR_VECTORS + (32 * nr_ioapics))); + + if (nr < nr_irqs && nr > nr_irqs_gsi) + nr_irqs = nr; + + return 0; +} +#endif + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9127f6b51a39..472f11765f60 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -467,6 +467,7 @@ int show_interrupts(struct seq_file *p, void *v); struct irq_desc; extern int early_irq_init(void); +extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); extern int arch_init_chip_data(struct irq_desc *desc, int cpu); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 04d3e46031e5..375d68cd5bf0 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -59,10 +59,6 @@ EXPORT_SYMBOL_GPL(nr_irqs); #ifdef CONFIG_SPARSE_IRQ -#ifndef max_nr_irqs -#define max_nr_irqs(nr_cpus) NR_IRQS -#endif - static struct irq_desc irq_desc_init = { .irq = -1, .status = IRQ_DISABLED, @@ -137,9 +133,8 @@ int __init early_irq_init(void) int legacy_count; int i; - /* initialize nr_irqs based on nr_cpu_ids */ - nr_irqs = max_nr_irqs(nr_cpu_ids); - + /* initialize nr_irqs based on nr_cpu_ids */ + arch_probe_nr_irqs(); printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); desc = irq_desc_legacy; diff --git a/kernel/softirq.c b/kernel/softirq.c index bdbe9de9cd8d..0365b4899a3d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -795,6 +795,11 @@ int __init __weak early_irq_init(void) return 0; } +int __init __weak arch_probe_nr_irqs(void) +{ + return 0; +} + int __init __weak arch_early_irq_init(void) { return 0; -- cgit v1.2.3-70-g09d2