From d62e26b3ffd28f16ddae85a1babd0303a1a6dfb6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 Jun 2017 21:55:08 -0600 Subject: block: pass in queue to inflight accounting No functional change in this patch, just in preparation for basing the inflight mechanism on the queue in question. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/genhd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block/genhd.c') diff --git a/block/genhd.c b/block/genhd.c index 7f520fa25d16..f735af67a0c9 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1217,7 +1217,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { cpu = part_stat_lock(); - part_round_stats(cpu, hd); + part_round_stats(gp->queue, cpu, hd); part_stat_unlock(); seq_printf(seqf, "%4d %7d %s %lu %lu %lu " "%u %lu %lu %lu %u %u %u %u\n", @@ -1231,7 +1231,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) part_stat_read(hd, merges[WRITE]), part_stat_read(hd, sectors[WRITE]), jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), - part_in_flight(hd), + part_in_flight(gp->queue, hd), jiffies_to_msecs(part_stat_read(hd, io_ticks)), jiffies_to_msecs(part_stat_read(hd, time_in_queue)) ); -- cgit v1.2.3-70-g09d2 From 0609e0efc5e15195ecf8c6d2f2e890d98760e337 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Aug 2017 17:49:47 -0600 Subject: block: make part_in_flight() take an array of two ints Instead of returning the count that matches the partition, pass in an array of two ints. Index 0 will be filled with the inflight count for the partition in question, and index 1 will filled with the root inflight count, if the partition passed in is not the root. This is in preparation for being able to calculate both in one go. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-core.c | 8 ++++---- block/genhd.c | 4 +++- block/partition-generic.c | 4 +++- include/linux/genhd.h | 13 ++++++++++--- 4 files changed, 20 insertions(+), 9 deletions(-) (limited to 'block/genhd.c') diff --git a/block/blk-core.c b/block/blk-core.c index 8ee954c12e9d..6ad2b8602c1d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1472,15 +1472,15 @@ static void add_acct_request(struct request_queue *q, struct request *rq, static void part_round_stats_single(struct request_queue *q, int cpu, struct hd_struct *part, unsigned long now) { - int inflight; + int inflight[2]; if (now == part->stamp) return; - inflight = part_in_flight(q, part); - if (inflight) { + part_in_flight(q, part, inflight); + if (inflight[0]) { __part_stat_add(cpu, part, time_in_queue, - inflight * (now - part->stamp)); + inflight[0] * (now - part->stamp)); __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } part->stamp = now; diff --git a/block/genhd.c b/block/genhd.c index f735af67a0c9..822f65f95e2a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1204,6 +1204,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) struct disk_part_iter piter; struct hd_struct *hd; char buf[BDEVNAME_SIZE]; + unsigned int inflight[2]; int cpu; /* @@ -1219,6 +1220,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) cpu = part_stat_lock(); part_round_stats(gp->queue, cpu, hd); part_stat_unlock(); + part_in_flight(gp->queue, hd, inflight); seq_printf(seqf, "%4d %7d %s %lu %lu %lu " "%u %lu %lu %lu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), @@ -1231,7 +1233,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) part_stat_read(hd, merges[WRITE]), part_stat_read(hd, sectors[WRITE]), jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), - part_in_flight(gp->queue, hd), + inflight[0], jiffies_to_msecs(part_stat_read(hd, io_ticks)), jiffies_to_msecs(part_stat_read(hd, time_in_queue)) ); diff --git a/block/partition-generic.c b/block/partition-generic.c index d1bdd61e1d71..fa5049a4d99b 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -113,11 +113,13 @@ ssize_t part_stat_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); struct request_queue *q = dev_to_disk(dev)->queue; + unsigned int inflight[2]; int cpu; cpu = part_stat_lock(); part_round_stats(q, cpu, p); part_stat_unlock(); + part_in_flight(q, p, inflight); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -131,7 +133,7 @@ ssize_t part_stat_show(struct device *dev, part_stat_read(p, merges[WRITE]), (unsigned long long)part_stat_read(p, sectors[WRITE]), jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), - part_in_flight(q, p), + inflight[0], jiffies_to_msecs(part_stat_read(p, io_ticks)), jiffies_to_msecs(part_stat_read(p, time_in_queue))); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 7f7427e00f9c..f2a3a26cdda1 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -378,11 +378,18 @@ static inline void part_dec_in_flight(struct request_queue *q, atomic_dec(&part_to_disk(part)->part0.in_flight[rw]); } -static inline int part_in_flight(struct request_queue *q, - struct hd_struct *part) +static inline void part_in_flight(struct request_queue *q, + struct hd_struct *part, + unsigned int inflight[2]) { - return atomic_read(&part->in_flight[0]) + + inflight[0] = atomic_read(&part->in_flight[0]) + atomic_read(&part->in_flight[1]); + if (part->partno) { + part = &part_to_disk(part)->part0; + inflight[1] = atomic_read(&part->in_flight[0]) + + atomic_read(&part->in_flight[1]); + } else + inflight[1] = 0; } static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) -- cgit v1.2.3-70-g09d2 From f299b7c7a9dee64425e5965bd4f56dc024c1befc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Aug 2017 17:51:45 -0600 Subject: blk-mq: provide internal in-flight variant We don't have to inc/dec some counter, since we can just iterate the tags. That makes inc/dec a noop, but means we have to iterate busy tags to get an in-flight count. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq.c | 31 +++++++++++++++++++++++++++++++ block/blk-mq.h | 3 +++ block/genhd.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/genhd.h | 35 ++++++----------------------------- 4 files changed, 77 insertions(+), 29 deletions(-) (limited to 'block/genhd.c') diff --git a/block/blk-mq.c b/block/blk-mq.c index a5d369dc7622..0dfc7a9984b6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -83,6 +83,37 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw); } +struct mq_inflight { + struct hd_struct *part; + unsigned int *inflight; +}; + +static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, + bool reserved) +{ + struct mq_inflight *mi = priv; + + if (test_bit(REQ_ATOM_STARTED, &rq->atomic_flags) && + !test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) { + /* + * Count as inflight if it either matches the partition we + * asked for, or if it's the root + */ + if (rq->part == mi->part || mi->part->partno) + mi->inflight[0]++; + } +} + +void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]) +{ + struct mq_inflight mi = { .part = part, .inflight = inflight, }; + + inflight[0] = 0; + blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); +} + void blk_freeze_queue_start(struct request_queue *q) { int freeze_depth; diff --git a/block/blk-mq.h b/block/blk-mq.h index 60b01c0309bc..98252b79b80b 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -133,4 +133,7 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) return hctx->nr_ctx && hctx->tags; } +void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]); + #endif diff --git a/block/genhd.c b/block/genhd.c index 822f65f95e2a..3dc4d115480f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -45,6 +45,43 @@ static void disk_add_events(struct gendisk *disk); static void disk_del_events(struct gendisk *disk); static void disk_release_events(struct gendisk *disk); +void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw) +{ + if (q->mq_ops) + return; + + atomic_inc(&part->in_flight[rw]); + if (part->partno) + atomic_inc(&part_to_disk(part)->part0.in_flight[rw]); +} + +void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw) +{ + if (q->mq_ops) + return; + + atomic_dec(&part->in_flight[rw]); + if (part->partno) + atomic_dec(&part_to_disk(part)->part0.in_flight[rw]); +} + +void part_in_flight(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]) +{ + if (q->mq_ops) { + blk_mq_in_flight(q, part, inflight); + return; + } + + inflight[0] = atomic_read(&part->in_flight[0]) + + atomic_read(&part->in_flight[1]); + if (part->partno) { + part = &part_to_disk(part)->part0; + inflight[1] = atomic_read(&part->in_flight[0]) + + atomic_read(&part->in_flight[1]); + } +} + /** * disk_get_part - get partition * @disk: disk to look partition from diff --git a/include/linux/genhd.h b/include/linux/genhd.h index f2a3a26cdda1..ea652bfcd675 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -362,35 +362,12 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(cpu, gendiskp, field, subnd) \ part_stat_add(cpu, gendiskp, field, -subnd) -static inline void part_inc_in_flight(struct request_queue *q, - struct hd_struct *part, int rw) -{ - atomic_inc(&part->in_flight[rw]); - if (part->partno) - atomic_inc(&part_to_disk(part)->part0.in_flight[rw]); -} - -static inline void part_dec_in_flight(struct request_queue *q, - struct hd_struct *part, int rw) -{ - atomic_dec(&part->in_flight[rw]); - if (part->partno) - atomic_dec(&part_to_disk(part)->part0.in_flight[rw]); -} - -static inline void part_in_flight(struct request_queue *q, - struct hd_struct *part, - unsigned int inflight[2]) -{ - inflight[0] = atomic_read(&part->in_flight[0]) + - atomic_read(&part->in_flight[1]); - if (part->partno) { - part = &part_to_disk(part)->part0; - inflight[1] = atomic_read(&part->in_flight[0]) + - atomic_read(&part->in_flight[1]); - } else - inflight[1] = 0; -} +void part_in_flight(struct request_queue *q, struct hd_struct *part, + unsigned int inflight[2]); +void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, + int rw); +void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, + int rw); static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) { -- cgit v1.2.3-70-g09d2 From 6d2cf6f2b446c4ace6f57402713ddbc09b11b0a9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 17 Aug 2017 16:23:06 -0700 Subject: genhd: Annotate all part and part_tbl pointer dereferences Annotate gendisk.part_tbl and disk_part_tbl.part dereferences with rcu_dereference_protected(). This patch does not change the behavior of the modified code but ensures that sparse does not complain about disk->part_tbl manipulations nor about part_tbl->part accesses. Additionally, improve documentation of the locking requirements of the modified functions. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Cc: Tejun Heo Cc: Jan Kara Cc: Dan Williams Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 15 ++++++++++----- block/partition-generic.c | 15 ++++++++++++--- 2 files changed, 22 insertions(+), 8 deletions(-) (limited to 'block/genhd.c') diff --git a/block/genhd.c b/block/genhd.c index 3dc4d115480f..2367087cdb7c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1127,12 +1127,13 @@ static const struct attribute_group *disk_attr_groups[] = { * original ptbl is freed using RCU callback. * * LOCKING: - * Matching bd_mutx locked. + * Matching bd_mutex locked or the caller is the only user of @disk. */ static void disk_replace_part_tbl(struct gendisk *disk, struct disk_part_tbl *new_ptbl) { - struct disk_part_tbl *old_ptbl = disk->part_tbl; + struct disk_part_tbl *old_ptbl = + rcu_dereference_protected(disk->part_tbl, 1); rcu_assign_pointer(disk->part_tbl, new_ptbl); @@ -1151,14 +1152,16 @@ static void disk_replace_part_tbl(struct gendisk *disk, * uses RCU to allow unlocked dereferencing for stats and other stuff. * * LOCKING: - * Matching bd_mutex locked, might sleep. + * Matching bd_mutex locked or the caller is the only user of @disk. + * Might sleep. * * RETURNS: * 0 on success, -errno on failure. */ int disk_expand_part_tbl(struct gendisk *disk, int partno) { - struct disk_part_tbl *old_ptbl = disk->part_tbl; + struct disk_part_tbl *old_ptbl = + rcu_dereference_protected(disk->part_tbl, 1); struct disk_part_tbl *new_ptbl; int len = old_ptbl ? old_ptbl->len : 0; int i, target; @@ -1352,6 +1355,7 @@ EXPORT_SYMBOL(alloc_disk); struct gendisk *alloc_disk_node(int minors, int node_id) { struct gendisk *disk; + struct disk_part_tbl *ptbl; disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); if (disk) { @@ -1365,7 +1369,8 @@ struct gendisk *alloc_disk_node(int minors, int node_id) kfree(disk); return NULL; } - disk->part_tbl->part[0] = &disk->part0; + ptbl = rcu_dereference_protected(disk->part_tbl, 1); + rcu_assign_pointer(ptbl->part[0], &disk->part0); /* * set_capacity() and get_capacity() currently don't use diff --git a/block/partition-generic.c b/block/partition-generic.c index fa5049a4d99b..1745a9659517 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -252,15 +252,20 @@ void __delete_partition(struct percpu_ref *ref) call_rcu(&part->rcu_head, delete_partition_rcu_cb); } +/* + * Must be called either with bd_mutex held, before a disk can be opened or + * after all disk users are gone. + */ void delete_partition(struct gendisk *disk, int partno) { - struct disk_part_tbl *ptbl = disk->part_tbl; + struct disk_part_tbl *ptbl = + rcu_dereference_protected(disk->part_tbl, 1); struct hd_struct *part; if (partno >= ptbl->len) return; - part = ptbl->part[partno]; + part = rcu_dereference_protected(ptbl->part[partno], 1); if (!part) return; @@ -280,6 +285,10 @@ static ssize_t whole_disk_show(struct device *dev, static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, whole_disk_show, NULL); +/* + * Must be called either with bd_mutex held, before a disk can be opened or + * after all disk users are gone. + */ struct hd_struct *add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, struct partition_meta_info *info) @@ -295,7 +304,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, err = disk_expand_part_tbl(disk, partno); if (err) return ERR_PTR(err); - ptbl = disk->part_tbl; + ptbl = rcu_dereference_protected(disk->part_tbl, 1); if (ptbl->part[partno]) return ERR_PTR(-EBUSY); -- cgit v1.2.3-70-g09d2 From de65b0123216a8e1dbe3ca1eb20a45572b9e71d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Aug 2017 19:10:29 +0200 Subject: block: reject attempts to allocate more than DISK_MAX_PARTS partitions Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'block/genhd.c') diff --git a/block/genhd.c b/block/genhd.c index 2367087cdb7c..3380a1e73ea0 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1357,6 +1357,13 @@ struct gendisk *alloc_disk_node(int minors, int node_id) struct gendisk *disk; struct disk_part_tbl *ptbl; + if (minors > DISK_MAX_PARTS) { + printk(KERN_ERR + "block: can't allocated more than %d partitions\n", + DISK_MAX_PARTS); + minors = DISK_MAX_PARTS; + } + disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); if (disk) { if (!init_part_stats(&disk->part0)) { -- cgit v1.2.3-70-g09d2 From 807d4af2f64ed79fdbb28e582e330be3dbe10d23 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Aug 2017 19:10:30 +0200 Subject: block: add a __disk_get_part helper This helper allows looking up a partion under RCU protection without grabbing a reference to it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk.h | 2 ++ block/genhd.c | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 13 deletions(-) (limited to 'block/genhd.c') diff --git a/block/blk.h b/block/blk.h index 3a3d715bd725..fde8b351c166 100644 --- a/block/blk.h +++ b/block/blk.h @@ -204,6 +204,8 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq e->type->ops.sq.elevator_deactivate_req_fn(q, rq); } +struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); + #ifdef CONFIG_FAIL_IO_TIMEOUT int blk_should_fake_timeout(struct request_queue *); ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); diff --git a/block/genhd.c b/block/genhd.c index 3380a1e73ea0..713b7d4fe7a1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -82,6 +82,15 @@ void part_in_flight(struct request_queue *q, struct hd_struct *part, } } +struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) +{ + struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl); + + if (unlikely(partno < 0 || partno >= ptbl->len)) + return NULL; + return rcu_dereference(ptbl->part[partno]); +} + /** * disk_get_part - get partition * @disk: disk to look partition from @@ -98,21 +107,12 @@ void part_in_flight(struct request_queue *q, struct hd_struct *part, */ struct hd_struct *disk_get_part(struct gendisk *disk, int partno) { - struct hd_struct *part = NULL; - struct disk_part_tbl *ptbl; - - if (unlikely(partno < 0)) - return NULL; + struct hd_struct *part; rcu_read_lock(); - - ptbl = rcu_dereference(disk->part_tbl); - if (likely(partno < ptbl->len)) { - part = rcu_dereference(ptbl->part[partno]); - if (part) - get_device(part_to_dev(part)); - } - + part = __disk_get_part(disk, partno); + if (part) + get_device(part_to_dev(part)); rcu_read_unlock(); return part; -- cgit v1.2.3-70-g09d2