diff options
author | Marc Zyngier <maz@kernel.org> | 2024-08-26 09:06:18 +0100 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2024-08-27 13:54:15 +0200 |
commit | 64b6d1d7a84538de34c22a6fc92a7dcc2b196b64 (patch) | |
tree | 954e57573070b44ed7169e34aca0916e6dff31d2 /kernel/irq/manage.c | |
parent | 17e28a9aeae40d2de3c1ea3b94819ed94bfd6392 (diff) |
genirq: Get rid of global lock in irq_do_set_affinity()
Kunkun Jiang reports that for a workload involving the simultaneous startup
of a large number of VMs (for a total of about 200 vcpus), a lot of CPU
time gets spent on spinning on the tmp_mask_lock that exists as a static
raw spinlock in irq_do_set_affinity(). This lock protects a global cpumask
(tmp_mask) that is used as a temporary variable to compute the resulting
affinity.
While this is triggered by KVM issuing a irq_set_affinity() call each time
a vcpu is about to execute, it is obvious that having a single global
resource is not very scalable.
Since a cpumask can be a fairly large structure on systems with a high core
count, a stack allocation is not really appropriate. Instead, turn the
global cpumask into a per-CPU variable, removing the need for locking
altogether as the code is executed with preemption and interrupts disabled.
[ tglx: Moved the per CPU variable declaration outside of the function ]
Reported-by: Kunkun Jiang <jiangkunkun@huawei.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Kunkun Jiang <jiangkunkun@huawei.com>
Link: https://lore.kernel.org/all/20240826080618.3886694-1-maz@kernel.org
Link: https://lore.kernel.org/all/a7fc58e4-64c2-77fc-c1dc-f5eb78dbbb01@huawei.com
Diffstat (limited to 'kernel/irq/manage.c')
-rw-r--r-- | kernel/irq/manage.c | 21 |
1 files changed, 9 insertions, 12 deletions
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index dd53298ef1a5..f0803d6bd296 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -218,21 +218,20 @@ static void irq_validate_effective_affinity(struct irq_data *data) static inline void irq_validate_effective_affinity(struct irq_data *data) { } #endif +static DEFINE_PER_CPU(struct cpumask, __tmp_mask); + int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { + struct cpumask *tmp_mask = this_cpu_ptr(&__tmp_mask); struct irq_desc *desc = irq_data_to_desc(data); struct irq_chip *chip = irq_data_get_irq_chip(data); const struct cpumask *prog_mask; int ret; - static DEFINE_RAW_SPINLOCK(tmp_mask_lock); - static struct cpumask tmp_mask; - if (!chip || !chip->irq_set_affinity) return -EINVAL; - raw_spin_lock(&tmp_mask_lock); /* * If this is a managed interrupt and housekeeping is enabled on * it check whether the requested affinity mask intersects with @@ -258,11 +257,11 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ); - cpumask_and(&tmp_mask, mask, hk_mask); - if (!cpumask_intersects(&tmp_mask, cpu_online_mask)) + cpumask_and(tmp_mask, mask, hk_mask); + if (!cpumask_intersects(tmp_mask, cpu_online_mask)) prog_mask = mask; else - prog_mask = &tmp_mask; + prog_mask = tmp_mask; } else { prog_mask = mask; } @@ -272,16 +271,14 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, * unless we are being asked to force the affinity (in which * case we do as we are told). */ - cpumask_and(&tmp_mask, prog_mask, cpu_online_mask); - if (!force && !cpumask_empty(&tmp_mask)) - ret = chip->irq_set_affinity(data, &tmp_mask, force); + cpumask_and(tmp_mask, prog_mask, cpu_online_mask); + if (!force && !cpumask_empty(tmp_mask)) + ret = chip->irq_set_affinity(data, tmp_mask, force); else if (force) ret = chip->irq_set_affinity(data, mask, force); else ret = -EINVAL; - raw_spin_unlock(&tmp_mask_lock); - switch (ret) { case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: |