From 098faf5805c80f951ce5e8b4a6842382ad793c38 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 24 Oct 2013 09:06:45 +0100 Subject: percpu_counter: make APIs irq safe In my usage, sometimes the percpu APIs are called with irq locked, sometimes not. lockdep complains there is potential deadlock. Let's always use percpucounter lock in irq safe way. There should be no performance penality, as all those are slow code path. Cc: Andrew Morton Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- lib/percpu_counter.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 93c5d5ecff4e..7473ee3b4ee7 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -60,14 +60,15 @@ static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { int cpu; + unsigned long flags; - raw_spin_lock(&fbc->lock); + raw_spin_lock_irqsave(&fbc->lock, flags); for_each_possible_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); *pcount = 0; } fbc->count = amount; - raw_spin_unlock(&fbc->lock); + raw_spin_unlock_irqrestore(&fbc->lock, flags); } EXPORT_SYMBOL(percpu_counter_set); @@ -78,9 +79,10 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) preempt_disable(); count = __this_cpu_read(*fbc->counters) + amount; if (count >= batch || count <= -batch) { - raw_spin_lock(&fbc->lock); + unsigned long flags; + raw_spin_lock_irqsave(&fbc->lock, flags); fbc->count += count; - raw_spin_unlock(&fbc->lock); + raw_spin_unlock_irqrestore(&fbc->lock, flags); __this_cpu_write(*fbc->counters, 0); } else { __this_cpu_write(*fbc->counters, count); @@ -97,14 +99,15 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) { s64 ret; int cpu; + unsigned long flags; - raw_spin_lock(&fbc->lock); + raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } - raw_spin_unlock(&fbc->lock); + raw_spin_unlock_irqrestore(&fbc->lock, flags); return ret; } EXPORT_SYMBOL(__percpu_counter_sum); -- cgit v1.2.3-70-g09d2 From e26b53d0b287056646a0dffce8bc6b0f053f3823 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 15 Oct 2013 09:05:01 +0800 Subject: percpu_ida: make percpu_ida percpu size/batch configurable Make percpu_ida percpu size/batch configurable. The block-mq-tag will use it. After block-mq uses percpu_ida to manage tags, performance is improved. My test is done in a 2 sockets machine, 12 process cross the 2 sockets. So if there is lock contention or ipi, should be stressed heavily. Testing is done for null-blk. hw_queue_depth nopatch iops patch iops 64 ~800k/s ~1470k/s 2048 ~4470k/s ~4340k/s Cc: Andrew Morton Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/percpu_ida.h | 18 +++++++++++++++++- lib/percpu_ida.c | 28 +++++++++++----------------- 2 files changed, 28 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h index 0b23edbee309..56c14033e7e7 100644 --- a/include/linux/percpu_ida.h +++ b/include/linux/percpu_ida.h @@ -16,6 +16,8 @@ struct percpu_ida { * percpu_ida_init() */ unsigned nr_tags; + unsigned percpu_max_size; + unsigned percpu_batch_size; struct percpu_ida_cpu __percpu *tag_cpu; @@ -51,10 +53,24 @@ struct percpu_ida { } ____cacheline_aligned_in_smp; }; +/* + * Number of tags we move between the percpu freelist and the global freelist at + * a time + */ +#define IDA_DEFAULT_PCPU_BATCH_MOVE 32U +/* Max size of percpu freelist, */ +#define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2) + int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); void percpu_ida_free(struct percpu_ida *pool, unsigned tag); void percpu_ida_destroy(struct percpu_ida *pool); -int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags); +int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, + unsigned long max_size, unsigned long batch_size); +static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) +{ + return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE, + IDA_DEFAULT_PCPU_BATCH_MOVE); +} #endif /* __PERCPU_IDA_H__ */ diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index bab1ba2a4c71..a601d4259e13 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -30,15 +30,6 @@ #include #include -/* - * Number of tags we move between the percpu freelist and the global freelist at - * a time - */ -#define IDA_PCPU_BATCH_MOVE 32U - -/* Max size of percpu freelist, */ -#define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2) - struct percpu_ida_cpu { /* * Even though this is percpu, we need a lock for tag stealing by remote @@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool, struct percpu_ida_cpu *remote; for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); - cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2; + cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2; cpus_have_tags--) { cpu = cpumask_next(cpu, &pool->cpus_have_tags); @@ -123,7 +114,7 @@ static inline void alloc_global_tags(struct percpu_ida *pool, { move_tags(tags->freelist, &tags->nr_free, pool->freelist, &pool->nr_free, - min(pool->nr_free, IDA_PCPU_BATCH_MOVE)); + min(pool->nr_free, pool->percpu_batch_size)); } static inline unsigned alloc_local_tag(struct percpu_ida *pool, @@ -245,17 +236,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) wake_up(&pool->wait); } - if (nr_free == IDA_PCPU_SIZE) { + if (nr_free == pool->percpu_max_size) { spin_lock(&pool->lock); /* * Global lock held and irqs disabled, don't need percpu * lock */ - if (tags->nr_free == IDA_PCPU_SIZE) { + if (tags->nr_free == pool->percpu_max_size) { move_tags(pool->freelist, &pool->nr_free, tags->freelist, &tags->nr_free, - IDA_PCPU_BATCH_MOVE); + pool->percpu_batch_size); wake_up(&pool->wait); } @@ -292,7 +283,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy); * Allocation is percpu, but sharding is limited by nr_tags - for best * performance, the workload should not span more cpus than nr_tags / 128. */ -int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) +int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, + unsigned long max_size, unsigned long batch_size) { unsigned i, cpu, order; @@ -301,6 +293,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) init_waitqueue_head(&pool->wait); spin_lock_init(&pool->lock); pool->nr_tags = nr_tags; + pool->percpu_max_size = max_size; + pool->percpu_batch_size = batch_size; /* Guard against overflow */ if (nr_tags > (unsigned) INT_MAX + 1) { @@ -319,7 +313,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) pool->nr_free = nr_tags; pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + - IDA_PCPU_SIZE * sizeof(unsigned), + pool->percpu_max_size * sizeof(unsigned), sizeof(unsigned)); if (!pool->tag_cpu) goto err; @@ -332,4 +326,4 @@ err: percpu_ida_destroy(pool); return -ENOMEM; } -EXPORT_SYMBOL_GPL(percpu_ida_init); +EXPORT_SYMBOL_GPL(__percpu_ida_init); -- cgit v1.2.3-70-g09d2 From 7fc2ba17e8bf9f218cac10cc2a3de613d9d9086d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 15 Oct 2013 09:05:02 +0800 Subject: percpu_ida: add percpu_ida_for_each_free Add a new API to iterate free ids. blk-mq-tag will use it. Note, this doesn't guarantee to iterate all free ids restrictly. Caller should be aware of this. blk-mq uses it to do sanity check for request timedout, so can tolerate the limitation. Cc: Andrew Morton Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/percpu_ida.h | 4 ++++ lib/percpu_ida.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'lib') diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h index 56c14033e7e7..63510ae6f933 100644 --- a/include/linux/percpu_ida.h +++ b/include/linux/percpu_ida.h @@ -73,4 +73,8 @@ static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags IDA_DEFAULT_PCPU_BATCH_MOVE); } +typedef int (*percpu_ida_cb)(unsigned, void *); +int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, + void *data); + #endif /* __PERCPU_IDA_H__ */ diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index a601d4259e13..0f51c1b556cf 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -327,3 +327,47 @@ err: return -ENOMEM; } EXPORT_SYMBOL_GPL(__percpu_ida_init); + +/** + * percpu_ida_for_each_free - iterate free ids of a pool + * @pool: pool to iterate + * @fn: interate callback function + * @data: parameter for @fn + * + * Note, this doesn't guarantee to iterate all free ids restrictly. Some free + * ids might be missed, some might be iterated duplicated, and some might + * be iterated and not free soon. + */ +int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, + void *data) +{ + unsigned long flags; + struct percpu_ida_cpu *remote; + unsigned cpu, i, err = 0; + + local_irq_save(flags); + for_each_possible_cpu(cpu) { + remote = per_cpu_ptr(pool->tag_cpu, cpu); + spin_lock(&remote->lock); + for (i = 0; i < remote->nr_free; i++) { + err = fn(remote->freelist[i], data); + if (err) + break; + } + spin_unlock(&remote->lock); + if (err) + goto out; + } + + spin_lock(&pool->lock); + for (i = 0; i < pool->nr_free; i++) { + err = fn(pool->freelist[i], data); + if (err) + break; + } + spin_unlock(&pool->lock); +out: + local_irq_restore(flags); + return err; +} +EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); -- cgit v1.2.3-70-g09d2 From 1dddc01af0d42b21058e0cb9c1ca9e8d5204d9b0 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 15 Oct 2013 09:05:03 +0800 Subject: percpu_ida: add an API to return free tags Add an API to return free tags, blk-mq-tag will use it. Note, this just returns a snapshot of free tags number. blk-mq-tag has two usages of it. One is for info output for diagnosis. The other is to quickly check if there are free tags for request dispatch checking. Neither requires very precise. Cc: Andrew Morton Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/percpu_ida.h | 1 + lib/percpu_ida.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'lib') diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h index 63510ae6f933..1900bd0fa639 100644 --- a/include/linux/percpu_ida.h +++ b/include/linux/percpu_ida.h @@ -77,4 +77,5 @@ typedef int (*percpu_ida_cb)(unsigned, void *); int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, void *data); +unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu); #endif /* __PERCPU_IDA_H__ */ diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 0f51c1b556cf..b0698ea972c6 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -371,3 +371,20 @@ out: return err; } EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); + +/** + * percpu_ida_free_tags - return free tags number of a specific cpu or global pool + * @pool: pool related + * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids + * + * Note: this just returns a snapshot of free tags number. + */ +unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu) +{ + struct percpu_ida_cpu *remote; + if (cpu == nr_cpu_ids) + return pool->nr_free; + remote = per_cpu_ptr(pool->tag_cpu, cpu); + return remote->nr_free; +} +EXPORT_SYMBOL_GPL(percpu_ida_free_tags); -- cgit v1.2.3-70-g09d2