diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 13:19:59 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 13:19:59 -0800 |
commit | 0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch) | |
tree | 2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /block/blk-iolatency.c | |
parent | b12a9124eeb71d766a3e3eb594ebbb3fefc66902 (diff) | |
parent | 00203ba40d40d7f33857416adfb18adaf0e40123 (diff) |
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main pull request for block/storage for 4.21.
Larger than usual, it was a busy round with lots of goodies queued up.
Most notable is the removal of the old IO stack, which has been a long
time coming. No new features for a while, everything coming in this
week has all been fixes for things that were previously merged.
This contains:
- Use atomic counters instead of semaphores for mtip32xx (Arnd)
- Cleanup of the mtip32xx request setup (Christoph)
- Fix for circular locking dependency in loop (Jan, Tetsuo)
- bcache (Coly, Guoju, Shenghui)
* Optimizations for writeback caching
* Various fixes and improvements
- nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith)
* host and target support for NVMe over TCP
* Error log page support
* Support for separate read/write/poll queues
* Much improved polling
* discard OOM fallback
* Tracepoint improvements
- lightnvm (Hans, Hua, Igor, Matias, Javier)
* Igor added packed metadata to pblk. Now drives without metadata
per LBA can be used as well.
* Fix from Geert on uninitialized value on chunk metadata reads.
* Fixes from Hans and Javier to pblk recovery and write path.
* Fix from Hua Su to fix a race condition in the pblk recovery
code.
* Scan optimization added to pblk recovery from Zhoujie.
* Small geometry cleanup from me.
- Conversion of the last few drivers that used the legacy path to
blk-mq (me)
- Removal of legacy IO path in SCSI (me, Christoph)
- Removal of legacy IO stack and schedulers (me)
- Support for much better polling, now without interrupts at all.
blk-mq adds support for multiple queue maps, which enables us to
have a map per type. This in turn enables nvme to have separate
completion queues for polling, which can then be interrupt-less.
Also means we're ready for async polled IO, which is hopefully
coming in the next release.
- Killing of (now) unused block exports (Christoph)
- Unification of the blk-rq-qos and blk-wbt wait handling (Josef)
- Support for zoned testing with null_blk (Masato)
- sx8 conversion to per-host tag sets (Christoph)
- IO priority improvements (Damien)
- mq-deadline zoned fix (Damien)
- Ref count blkcg series (Dennis)
- Lots of blk-mq improvements and speedups (me)
- sbitmap scalability improvements (me)
- Make core inflight IO accounting per-cpu (Mikulas)
- Export timeout setting in sysfs (Weiping)
- Cleanup the direct issue path (Jianchao)
- Export blk-wbt internals in block debugfs for easier debugging
(Ming)
- Lots of other fixes and improvements"
* tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits)
kyber: use sbitmap add_wait_queue/list_del wait helpers
sbitmap: add helpers for add/del wait queue handling
block: save irq state in blkg_lookup_create()
dm: don't reuse bio for flushes
nvme-pci: trace SQ status on completions
nvme-rdma: implement polling queue map
nvme-fabrics: allow user to pass in nr_poll_queues
nvme-fabrics: allow nvmf_connect_io_queue to poll
nvme-core: optionally poll sync commands
block: make request_to_qc_t public
nvme-tcp: fix spelling mistake "attepmpt" -> "attempt"
nvme-tcp: fix endianess annotations
nvmet-tcp: fix endianess annotations
nvme-pci: refactor nvme_poll_irqdisable to make sparse happy
nvme-pci: only set nr_maps to 2 if poll queues are supported
nvmet: use a macro for default error location
nvmet: fix comparison of a u16 with -1
blk-mq: enable IO poll if .nr_queues of type poll > 0
blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()
blk-mq: skip zero-queue maps in blk_mq_map_swqueue
...
Diffstat (limited to 'block/blk-iolatency.c')
-rw-r--r-- | block/blk-iolatency.c | 75 |
1 files changed, 14 insertions, 61 deletions
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 38c35c32aff2..fc714ef402a6 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -262,29 +262,25 @@ static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat, stat->rqs.mean); } -static inline bool iolatency_may_queue(struct iolatency_grp *iolat, - wait_queue_entry_t *wait, - bool first_block) +static void iolat_cleanup_cb(struct rq_wait *rqw, void *private_data) { - struct rq_wait *rqw = &iolat->rq_wait; + atomic_dec(&rqw->inflight); + wake_up(&rqw->wait); +} - if (first_block && waitqueue_active(&rqw->wait) && - rqw->wait.head.next != &wait->entry) - return false; +static bool iolat_acquire_inflight(struct rq_wait *rqw, void *private_data) +{ + struct iolatency_grp *iolat = private_data; return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth); } static void __blkcg_iolatency_throttle(struct rq_qos *rqos, struct iolatency_grp *iolat, - spinlock_t *lock, bool issue_as_root, + bool issue_as_root, bool use_memdelay) - __releases(lock) - __acquires(lock) { struct rq_wait *rqw = &iolat->rq_wait; unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay); - DEFINE_WAIT(wait); - bool first_block = true; if (use_delay) blkcg_schedule_throttle(rqos->q, use_memdelay); @@ -301,27 +297,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos, return; } - if (iolatency_may_queue(iolat, &wait, first_block)) - return; - - do { - prepare_to_wait_exclusive(&rqw->wait, &wait, - TASK_UNINTERRUPTIBLE); - - if (iolatency_may_queue(iolat, &wait, first_block)) - break; - first_block = false; - - if (lock) { - spin_unlock_irq(lock); - io_schedule(); - spin_lock_irq(lock); - } else { - io_schedule(); - } - } while (1); - - finish_wait(&rqw->wait, &wait); + rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb); } #define SCALE_DOWN_FACTOR 2 @@ -478,38 +454,15 @@ static void check_scale_change(struct iolatency_grp *iolat) scale_change(iolat, direction > 0); } -static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, - spinlock_t *lock) +static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) { struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); - struct blkcg *blkcg; - struct blkcg_gq *blkg; - struct request_queue *q = rqos->q; + struct blkcg_gq *blkg = bio->bi_blkg; bool issue_as_root = bio_issue_as_root_blkg(bio); if (!blk_iolatency_enabled(blkiolat)) return; - rcu_read_lock(); - blkcg = bio_blkcg(bio); - bio_associate_blkcg(bio, &blkcg->css); - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - if (!lock) - spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - if (!lock) - spin_unlock_irq(q->queue_lock); - } - if (!blkg) - goto out; - - bio_issue_init(&bio->bi_issue, bio_sectors(bio)); - bio_associate_blkg(bio, blkg); -out: - rcu_read_unlock(); while (blkg && blkg->parent) { struct iolatency_grp *iolat = blkg_to_lat(blkg); if (!iolat) { @@ -518,7 +471,7 @@ out: } check_scale_change(iolat); - __blkcg_iolatency_throttle(rqos, iolat, lock, issue_as_root, + __blkcg_iolatency_throttle(rqos, iolat, issue_as_root, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); blkg = blkg->parent; } @@ -640,7 +593,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) bool enabled = false; blkg = bio->bi_blkg; - if (!blkg) + if (!blkg || !bio_flagged(bio, BIO_TRACKED)) return; iolat = blkg_to_lat(bio->bi_blkg); @@ -730,7 +683,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) * We could be exiting, don't access the pd unless we have a * ref on the blkg. */ - if (!blkg_try_get(blkg)) + if (!blkg_tryget(blkg)) continue; iolat = blkg_to_lat(blkg); |