diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bdev.c | 23 | ||||
-rw-r--r-- | block/bfq-iosched.c | 12 | ||||
-rw-r--r-- | block/blk-core.c | 4 | ||||
-rw-r--r-- | block/blk-iocost.c | 12 | ||||
-rw-r--r-- | block/blk-mq.c | 118 | ||||
-rw-r--r-- | block/fops.c | 12 | ||||
-rw-r--r-- | block/mq-deadline.c | 1 | ||||
-rw-r--r-- | block/partitions/core.c | 2 |
8 files changed, 106 insertions, 78 deletions
diff --git a/block/bdev.c b/block/bdev.c index 13de871fa816..5fe06c1f2def 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -200,6 +200,13 @@ int sync_blockdev(struct block_device *bdev) } EXPORT_SYMBOL(sync_blockdev); +int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend) +{ + return filemap_write_and_wait_range(bdev->bd_inode->i_mapping, + lstart, lend); +} +EXPORT_SYMBOL(sync_blockdev_range); + /* * Write out and wait upon all dirty data associated with this * device. Filesystem data as well as the underlying block @@ -673,17 +680,17 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode) } } - if (!bdev->bd_openers) + if (!atomic_read(&bdev->bd_openers)) set_init_blocksize(bdev); if (test_bit(GD_NEED_PART_SCAN, &disk->state)) bdev_disk_changed(disk, false); - bdev->bd_openers++; + atomic_inc(&bdev->bd_openers); return 0; } static void blkdev_put_whole(struct block_device *bdev, fmode_t mode) { - if (!--bdev->bd_openers) + if (atomic_dec_and_test(&bdev->bd_openers)) blkdev_flush_mapping(bdev); if (bdev->bd_disk->fops->release) bdev->bd_disk->fops->release(bdev->bd_disk, mode); @@ -694,7 +701,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode) struct gendisk *disk = part->bd_disk; int ret; - if (part->bd_openers) + if (atomic_read(&part->bd_openers)) goto done; ret = blkdev_get_whole(bdev_whole(part), mode); @@ -708,7 +715,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode) disk->open_partitions++; set_init_blocksize(part); done: - part->bd_openers++; + atomic_inc(&part->bd_openers); return 0; out_blkdev_put: @@ -720,7 +727,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode) { struct block_device *whole = bdev_whole(part); - if (--part->bd_openers) + if (!atomic_dec_and_test(&part->bd_openers)) return; blkdev_flush_mapping(part); whole->bd_disk->open_partitions--; @@ -899,7 +906,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * of the world and we want to avoid long (could be several minute) * syncs while holding the mutex. */ - if (bdev->bd_openers == 1) + if (atomic_read(&bdev->bd_openers) == 1) sync_blockdev(bdev); mutex_lock(&disk->open_mutex); @@ -1044,7 +1051,7 @@ void sync_bdevs(bool wait) bdev = I_BDEV(inode); mutex_lock(&bdev->bd_disk->open_mutex); - if (!bdev->bd_openers) { + if (!atomic_read(&bdev->bd_openers)) { ; /* skip */ } else if (wait) { /* diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 038e075e05b8..0d46cb728bbf 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -571,7 +571,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) struct bfq_entity *entity = &bfqq->entity; struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH]; struct bfq_entity **entities = inline_entities; - int depth, level; + int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH; int class_idx = bfqq->ioprio_class - 1; struct bfq_sched_data *sched_data; unsigned long wsum; @@ -580,15 +580,21 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) if (!entity->on_st_or_in_serv) return false; +retry: + spin_lock_irq(&bfqd->lock); /* +1 for bfqq entity, root cgroup not included */ depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1; - if (depth > BFQ_LIMIT_INLINE_DEPTH) { + if (depth > alloc_depth) { + spin_unlock_irq(&bfqd->lock); + if (entities != inline_entities) + kfree(entities); entities = kmalloc_array(depth, sizeof(*entities), GFP_NOIO); if (!entities) return false; + alloc_depth = depth; + goto retry; } - spin_lock_irq(&bfqd->lock); sched_data = entity->sched_data; /* Gather our ancestors as we need to traverse them in reverse order */ level = 0; diff --git a/block/blk-core.c b/block/blk-core.c index f002fed8e5a5..06ff5bbfe8f6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -50,7 +50,6 @@ #include "blk-pm.h" #include "blk-cgroup.h" #include "blk-throttle.h" -#include "blk-rq-qos.h" struct dentry *blk_debugfs_root; @@ -315,9 +314,6 @@ void blk_cleanup_queue(struct request_queue *q) */ blk_freeze_queue(q); - /* cleanup rq qos structures for queue without disk */ - rq_qos_exit(q); - blk_queue_flag_set(QUEUE_FLAG_DEAD, q); blk_sync_queue(q); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 8132d49df37b..33a11ba971ea 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2315,7 +2315,17 @@ static void ioc_timer_fn(struct timer_list *timer) iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); - } else { + } else if (!iocg->abs_vdebt) { + /* + * @iocg doesn't have enough to donate. Reset + * its inuse to active. + * + * Don't reset debtors as their inuse's are + * owned by debt handling. This shouldn't affect + * donation calculuation in any meaningful way + * as @iocg doesn't have a meaningful amount of + * share anyway. + */ TRACE_IOCG_PATH(inuse_shortage, iocg, &now, iocg->inuse, iocg->active, iocg->hweight_inuse, new_hwi); diff --git a/block/blk-mq.c b/block/blk-mq.c index 30f4565623a8..30e4bdcd8d7f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1132,14 +1132,7 @@ void blk_mq_start_request(struct request *rq) trace_block_rq_issue(rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { - u64 start_time; -#ifdef CONFIG_BLK_CGROUP - if (rq->bio) - start_time = bio_issue_time(&rq->bio->bi_issue); - else -#endif - start_time = ktime_get_ns(); - rq->io_start_time_ns = start_time; + rq->io_start_time_ns = ktime_get_ns(); rq->stats_sectors = blk_rq_sectors(rq); rq->rq_flags |= RQF_STATS; rq_qos_issue(q, rq); @@ -1177,6 +1170,62 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error) complete(waiting); } +/* + * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple + * queues. This is important for md arrays to benefit from merging + * requests. + */ +static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) +{ + if (plug->multiple_queues) + return BLK_MAX_REQUEST_COUNT * 2; + return BLK_MAX_REQUEST_COUNT; +} + +static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) +{ + struct request *last = rq_list_peek(&plug->mq_list); + + if (!plug->rq_count) { + trace_block_plug(rq->q); + } else if (plug->rq_count >= blk_plug_max_rq_count(plug) || + (!blk_queue_nomerges(rq->q) && + blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { + blk_mq_flush_plug_list(plug, false); + trace_block_plug(rq->q); + } + + if (!plug->multiple_queues && last && last->q != rq->q) + plug->multiple_queues = true; + if (!plug->has_elevator && (rq->rq_flags & RQF_ELV)) + plug->has_elevator = true; + rq->rq_next = NULL; + rq_list_add(&plug->mq_list, rq); + plug->rq_count++; +} + +static void __blk_execute_rq_nowait(struct request *rq, bool at_head, + rq_end_io_fn *done, bool use_plug) +{ + WARN_ON(irqs_disabled()); + WARN_ON(!blk_rq_is_passthrough(rq)); + + rq->end_io = done; + + blk_account_io_start(rq); + + if (use_plug && current->plug) { + blk_add_rq_to_plug(current->plug, rq); + return; + } + /* + * don't check dying flag for MQ because the request won't + * be reused after dying flag is set + */ + blk_mq_sched_insert_request(rq, at_head, true, false); +} + + /** * blk_execute_rq_nowait - insert a request to I/O scheduler for execution * @rq: request to insert @@ -1192,18 +1241,8 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error) */ void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done) { - WARN_ON(irqs_disabled()); - WARN_ON(!blk_rq_is_passthrough(rq)); - - rq->end_io = done; + __blk_execute_rq_nowait(rq, at_head, done, true); - blk_account_io_start(rq); - - /* - * don't check dying flag for MQ because the request won't - * be reused after dying flag is set - */ - blk_mq_sched_insert_request(rq, at_head, true, false); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); @@ -1241,8 +1280,13 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head) DECLARE_COMPLETION_ONSTACK(wait); unsigned long hang_check; + /* + * iopoll requires request to be submitted to driver, so can't + * use plug + */ rq->end_io_data = &wait; - blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq); + __blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq, + !blk_rq_is_poll(rq)); /* Prevent hang_check timer from firing at us during very long I/O */ hang_check = sysctl_hung_task_timeout_secs; @@ -2683,40 +2727,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, hctx->queue->mq_ops->commit_rqs(hctx); } -/* - * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple - * queues. This is important for md arrays to benefit from merging - * requests. - */ -static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) -{ - if (plug->multiple_queues) - return BLK_MAX_REQUEST_COUNT * 2; - return BLK_MAX_REQUEST_COUNT; -} - -static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) -{ - struct request *last = rq_list_peek(&plug->mq_list); - - if (!plug->rq_count) { - trace_block_plug(rq->q); - } else if (plug->rq_count >= blk_plug_max_rq_count(plug) || - (!blk_queue_nomerges(rq->q) && - blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { - blk_mq_flush_plug_list(plug, false); - trace_block_plug(rq->q); - } - - if (!plug->multiple_queues && last && last->q != rq->q) - plug->multiple_queues = true; - if (!plug->has_elevator && (rq->rq_flags & RQF_ELV)) - plug->has_elevator = true; - rq->rq_next = NULL; - rq_list_add(&plug->mq_list, rq); - plug->rq_count++; -} - static bool blk_mq_attempt_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { diff --git a/block/fops.c b/block/fops.c index b9b83030e0df..d6b3276a6c68 100644 --- a/block/fops.c +++ b/block/fops.c @@ -372,9 +372,9 @@ static int blkdev_writepage(struct page *page, struct writeback_control *wbc) return block_write_full_page(page, blkdev_get_block, wbc); } -static int blkdev_readpage(struct file * file, struct page * page) +static int blkdev_read_folio(struct file *file, struct folio *folio) { - return block_read_full_page(page, blkdev_get_block); + return block_read_full_folio(folio, blkdev_get_block); } static void blkdev_readahead(struct readahead_control *rac) @@ -383,11 +383,9 @@ static void blkdev_readahead(struct readahead_control *rac) } static int blkdev_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, struct page **pagep, - void **fsdata) + loff_t pos, unsigned len, struct page **pagep, void **fsdata) { - return block_write_begin(mapping, pos, len, flags, pagep, - blkdev_get_block); + return block_write_begin(mapping, pos, len, pagep, blkdev_get_block); } static int blkdev_write_end(struct file *file, struct address_space *mapping, @@ -412,7 +410,7 @@ static int blkdev_writepages(struct address_space *mapping, const struct address_space_operations def_blk_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, - .readpage = blkdev_readpage, + .read_folio = blkdev_read_folio, .readahead = blkdev_readahead, .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 3ed5eaf3446a..6ed602b2f80a 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -742,6 +742,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, if (at_head) { list_add(&rq->queuelist, &per_prio->dispatch); + rq->fifo_time = jiffies; } else { deadline_add_rq_rb(per_prio, rq); diff --git a/block/partitions/core.c b/block/partitions/core.c index 70dec1c78521..8a0ec929023b 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -478,7 +478,7 @@ int bdev_del_partition(struct gendisk *disk, int partno) goto out_unlock; ret = -EBUSY; - if (part->bd_openers) + if (atomic_read(&part->bd_openers)) goto out_unlock; delete_partition(part); |