summaryrefslogtreecommitdiff
path: root/block/blk-iolatency.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 13:19:59 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 13:19:59 -0800
commit0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch)
tree2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /block/blk-iolatency.c
parentb12a9124eeb71d766a3e3eb594ebbb3fefc66902 (diff)
parent00203ba40d40d7f33857416adfb18adaf0e40123 (diff)
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block/storage for 4.21. Larger than usual, it was a busy round with lots of goodies queued up. Most notable is the removal of the old IO stack, which has been a long time coming. No new features for a while, everything coming in this week has all been fixes for things that were previously merged. This contains: - Use atomic counters instead of semaphores for mtip32xx (Arnd) - Cleanup of the mtip32xx request setup (Christoph) - Fix for circular locking dependency in loop (Jan, Tetsuo) - bcache (Coly, Guoju, Shenghui) * Optimizations for writeback caching * Various fixes and improvements - nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith) * host and target support for NVMe over TCP * Error log page support * Support for separate read/write/poll queues * Much improved polling * discard OOM fallback * Tracepoint improvements - lightnvm (Hans, Hua, Igor, Matias, Javier) * Igor added packed metadata to pblk. Now drives without metadata per LBA can be used as well. * Fix from Geert on uninitialized value on chunk metadata reads. * Fixes from Hans and Javier to pblk recovery and write path. * Fix from Hua Su to fix a race condition in the pblk recovery code. * Scan optimization added to pblk recovery from Zhoujie. * Small geometry cleanup from me. - Conversion of the last few drivers that used the legacy path to blk-mq (me) - Removal of legacy IO path in SCSI (me, Christoph) - Removal of legacy IO stack and schedulers (me) - Support for much better polling, now without interrupts at all. blk-mq adds support for multiple queue maps, which enables us to have a map per type. This in turn enables nvme to have separate completion queues for polling, which can then be interrupt-less. Also means we're ready for async polled IO, which is hopefully coming in the next release. - Killing of (now) unused block exports (Christoph) - Unification of the blk-rq-qos and blk-wbt wait handling (Josef) - Support for zoned testing with null_blk (Masato) - sx8 conversion to per-host tag sets (Christoph) - IO priority improvements (Damien) - mq-deadline zoned fix (Damien) - Ref count blkcg series (Dennis) - Lots of blk-mq improvements and speedups (me) - sbitmap scalability improvements (me) - Make core inflight IO accounting per-cpu (Mikulas) - Export timeout setting in sysfs (Weiping) - Cleanup the direct issue path (Jianchao) - Export blk-wbt internals in block debugfs for easier debugging (Ming) - Lots of other fixes and improvements" * tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits) kyber: use sbitmap add_wait_queue/list_del wait helpers sbitmap: add helpers for add/del wait queue handling block: save irq state in blkg_lookup_create() dm: don't reuse bio for flushes nvme-pci: trace SQ status on completions nvme-rdma: implement polling queue map nvme-fabrics: allow user to pass in nr_poll_queues nvme-fabrics: allow nvmf_connect_io_queue to poll nvme-core: optionally poll sync commands block: make request_to_qc_t public nvme-tcp: fix spelling mistake "attepmpt" -> "attempt" nvme-tcp: fix endianess annotations nvmet-tcp: fix endianess annotations nvme-pci: refactor nvme_poll_irqdisable to make sparse happy nvme-pci: only set nr_maps to 2 if poll queues are supported nvmet: use a macro for default error location nvmet: fix comparison of a u16 with -1 blk-mq: enable IO poll if .nr_queues of type poll > 0 blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight() blk-mq: skip zero-queue maps in blk_mq_map_swqueue ...
Diffstat (limited to 'block/blk-iolatency.c')
-rw-r--r--block/blk-iolatency.c75
1 files changed, 14 insertions, 61 deletions
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 38c35c32aff2..fc714ef402a6 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -262,29 +262,25 @@ static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
stat->rqs.mean);
}
-static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
- wait_queue_entry_t *wait,
- bool first_block)
+static void iolat_cleanup_cb(struct rq_wait *rqw, void *private_data)
{
- struct rq_wait *rqw = &iolat->rq_wait;
+ atomic_dec(&rqw->inflight);
+ wake_up(&rqw->wait);
+}
- if (first_block && waitqueue_active(&rqw->wait) &&
- rqw->wait.head.next != &wait->entry)
- return false;
+static bool iolat_acquire_inflight(struct rq_wait *rqw, void *private_data)
+{
+ struct iolatency_grp *iolat = private_data;
return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
}
static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
struct iolatency_grp *iolat,
- spinlock_t *lock, bool issue_as_root,
+ bool issue_as_root,
bool use_memdelay)
- __releases(lock)
- __acquires(lock)
{
struct rq_wait *rqw = &iolat->rq_wait;
unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
- DEFINE_WAIT(wait);
- bool first_block = true;
if (use_delay)
blkcg_schedule_throttle(rqos->q, use_memdelay);
@@ -301,27 +297,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
return;
}
- if (iolatency_may_queue(iolat, &wait, first_block))
- return;
-
- do {
- prepare_to_wait_exclusive(&rqw->wait, &wait,
- TASK_UNINTERRUPTIBLE);
-
- if (iolatency_may_queue(iolat, &wait, first_block))
- break;
- first_block = false;
-
- if (lock) {
- spin_unlock_irq(lock);
- io_schedule();
- spin_lock_irq(lock);
- } else {
- io_schedule();
- }
- } while (1);
-
- finish_wait(&rqw->wait, &wait);
+ rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb);
}
#define SCALE_DOWN_FACTOR 2
@@ -478,38 +454,15 @@ static void check_scale_change(struct iolatency_grp *iolat)
scale_change(iolat, direction > 0);
}
-static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
- spinlock_t *lock)
+static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
{
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
- struct blkcg *blkcg;
- struct blkcg_gq *blkg;
- struct request_queue *q = rqos->q;
+ struct blkcg_gq *blkg = bio->bi_blkg;
bool issue_as_root = bio_issue_as_root_blkg(bio);
if (!blk_iolatency_enabled(blkiolat))
return;
- rcu_read_lock();
- blkcg = bio_blkcg(bio);
- bio_associate_blkcg(bio, &blkcg->css);
- blkg = blkg_lookup(blkcg, q);
- if (unlikely(!blkg)) {
- if (!lock)
- spin_lock_irq(q->queue_lock);
- blkg = blkg_lookup_create(blkcg, q);
- if (IS_ERR(blkg))
- blkg = NULL;
- if (!lock)
- spin_unlock_irq(q->queue_lock);
- }
- if (!blkg)
- goto out;
-
- bio_issue_init(&bio->bi_issue, bio_sectors(bio));
- bio_associate_blkg(bio, blkg);
-out:
- rcu_read_unlock();
while (blkg && blkg->parent) {
struct iolatency_grp *iolat = blkg_to_lat(blkg);
if (!iolat) {
@@ -518,7 +471,7 @@ out:
}
check_scale_change(iolat);
- __blkcg_iolatency_throttle(rqos, iolat, lock, issue_as_root,
+ __blkcg_iolatency_throttle(rqos, iolat, issue_as_root,
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
blkg = blkg->parent;
}
@@ -640,7 +593,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
bool enabled = false;
blkg = bio->bi_blkg;
- if (!blkg)
+ if (!blkg || !bio_flagged(bio, BIO_TRACKED))
return;
iolat = blkg_to_lat(bio->bi_blkg);
@@ -730,7 +683,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
* We could be exiting, don't access the pd unless we have a
* ref on the blkg.
*/
- if (!blkg_try_get(blkg))
+ if (!blkg_tryget(blkg))
continue;
iolat = blkg_to_lat(blkg);