summaryrefslogtreecommitdiff
path: root/block/blk-wbt.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 13:19:59 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 13:19:59 -0800
commit0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch)
tree2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /block/blk-wbt.c
parentb12a9124eeb71d766a3e3eb594ebbb3fefc66902 (diff)
parent00203ba40d40d7f33857416adfb18adaf0e40123 (diff)
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block/storage for 4.21. Larger than usual, it was a busy round with lots of goodies queued up. Most notable is the removal of the old IO stack, which has been a long time coming. No new features for a while, everything coming in this week has all been fixes for things that were previously merged. This contains: - Use atomic counters instead of semaphores for mtip32xx (Arnd) - Cleanup of the mtip32xx request setup (Christoph) - Fix for circular locking dependency in loop (Jan, Tetsuo) - bcache (Coly, Guoju, Shenghui) * Optimizations for writeback caching * Various fixes and improvements - nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith) * host and target support for NVMe over TCP * Error log page support * Support for separate read/write/poll queues * Much improved polling * discard OOM fallback * Tracepoint improvements - lightnvm (Hans, Hua, Igor, Matias, Javier) * Igor added packed metadata to pblk. Now drives without metadata per LBA can be used as well. * Fix from Geert on uninitialized value on chunk metadata reads. * Fixes from Hans and Javier to pblk recovery and write path. * Fix from Hua Su to fix a race condition in the pblk recovery code. * Scan optimization added to pblk recovery from Zhoujie. * Small geometry cleanup from me. - Conversion of the last few drivers that used the legacy path to blk-mq (me) - Removal of legacy IO path in SCSI (me, Christoph) - Removal of legacy IO stack and schedulers (me) - Support for much better polling, now without interrupts at all. blk-mq adds support for multiple queue maps, which enables us to have a map per type. This in turn enables nvme to have separate completion queues for polling, which can then be interrupt-less. Also means we're ready for async polled IO, which is hopefully coming in the next release. - Killing of (now) unused block exports (Christoph) - Unification of the blk-rq-qos and blk-wbt wait handling (Josef) - Support for zoned testing with null_blk (Masato) - sx8 conversion to per-host tag sets (Christoph) - IO priority improvements (Damien) - mq-deadline zoned fix (Damien) - Ref count blkcg series (Dennis) - Lots of blk-mq improvements and speedups (me) - sbitmap scalability improvements (me) - Make core inflight IO accounting per-cpu (Mikulas) - Export timeout setting in sysfs (Weiping) - Cleanup the direct issue path (Jianchao) - Export blk-wbt internals in block debugfs for easier debugging (Ming) - Lots of other fixes and improvements" * tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits) kyber: use sbitmap add_wait_queue/list_del wait helpers sbitmap: add helpers for add/del wait queue handling block: save irq state in blkg_lookup_create() dm: don't reuse bio for flushes nvme-pci: trace SQ status on completions nvme-rdma: implement polling queue map nvme-fabrics: allow user to pass in nr_poll_queues nvme-fabrics: allow nvmf_connect_io_queue to poll nvme-core: optionally poll sync commands block: make request_to_qc_t public nvme-tcp: fix spelling mistake "attepmpt" -> "attempt" nvme-tcp: fix endianess annotations nvmet-tcp: fix endianess annotations nvme-pci: refactor nvme_poll_irqdisable to make sparse happy nvme-pci: only set nr_maps to 2 if poll queues are supported nvmet: use a macro for default error location nvmet: fix comparison of a u16 with -1 blk-mq: enable IO poll if .nr_queues of type poll > 0 blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight() blk-mq: skip zero-queue maps in blk_mq_map_swqueue ...
Diffstat (limited to 'block/blk-wbt.c')
-rw-r--r--block/blk-wbt.c176
1 files changed, 108 insertions, 68 deletions
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8ac93fcbaa2e..f0c56649775f 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -489,31 +489,21 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
}
struct wbt_wait_data {
- struct wait_queue_entry wq;
- struct task_struct *task;
struct rq_wb *rwb;
- struct rq_wait *rqw;
+ enum wbt_flags wb_acct;
unsigned long rw;
- bool got_token;
};
-static int wbt_wake_function(struct wait_queue_entry *curr, unsigned int mode,
- int wake_flags, void *key)
+static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data)
{
- struct wbt_wait_data *data = container_of(curr, struct wbt_wait_data,
- wq);
-
- /*
- * If we fail to get a budget, return -1 to interrupt the wake up
- * loop in __wake_up_common.
- */
- if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw)))
- return -1;
+ struct wbt_wait_data *data = private_data;
+ return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw));
+}
- data->got_token = true;
- list_del_init(&curr->entry);
- wake_up_process(data->task);
- return 1;
+static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data)
+{
+ struct wbt_wait_data *data = private_data;
+ wbt_rqw_done(data->rwb, rqw, data->wb_acct);
}
/*
@@ -521,57 +511,16 @@ static int wbt_wake_function(struct wait_queue_entry *curr, unsigned int mode,
* the timer to kick off queuing again.
*/
static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
- unsigned long rw, spinlock_t *lock)
- __releases(lock)
- __acquires(lock)
+ unsigned long rw)
{
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
struct wbt_wait_data data = {
- .wq = {
- .func = wbt_wake_function,
- .entry = LIST_HEAD_INIT(data.wq.entry),
- },
- .task = current,
.rwb = rwb,
- .rqw = rqw,
+ .wb_acct = wb_acct,
.rw = rw,
};
- bool has_sleeper;
-
- has_sleeper = wq_has_sleeper(&rqw->wait);
- if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
- return;
- prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
- do {
- if (data.got_token)
- break;
-
- if (!has_sleeper &&
- rq_wait_inc_below(rqw, get_limit(rwb, rw))) {
- finish_wait(&rqw->wait, &data.wq);
-
- /*
- * We raced with wbt_wake_function() getting a token,
- * which means we now have two. Put our local token
- * and wake anyone else potentially waiting for one.
- */
- if (data.got_token)
- wbt_rqw_done(rwb, rqw, wb_acct);
- break;
- }
-
- if (lock) {
- spin_unlock_irq(lock);
- io_schedule();
- spin_lock_irq(lock);
- } else
- io_schedule();
-
- has_sleeper = false;
- } while (1);
-
- finish_wait(&rqw->wait, &data.wq);
+ rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb);
}
static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
@@ -624,7 +573,7 @@ static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
* in an irq held spinlock, if it holds one when calling this function.
* If we do sleep, we'll release and re-grab it.
*/
-static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock)
+static void wbt_wait(struct rq_qos *rqos, struct bio *bio)
{
struct rq_wb *rwb = RQWB(rqos);
enum wbt_flags flags;
@@ -636,7 +585,7 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock)
return;
}
- __wbt_wait(rwb, flags, bio->bi_opf, lock);
+ __wbt_wait(rwb, flags, bio->bi_opf);
if (!blk_stat_is_active(rwb->cb))
rwb_arm_timer(rwb);
@@ -709,8 +658,7 @@ void wbt_enable_default(struct request_queue *q)
if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
return;
- if ((q->mq_ops && IS_ENABLED(CONFIG_BLK_WBT_MQ)) ||
- (q->request_fn && IS_ENABLED(CONFIG_BLK_WBT_SQ)))
+ if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
wbt_init(q);
}
EXPORT_SYMBOL_GPL(wbt_enable_default);
@@ -760,11 +708,100 @@ void wbt_disable_default(struct request_queue *q)
if (!rqos)
return;
rwb = RQWB(rqos);
- if (rwb->enable_state == WBT_STATE_ON_DEFAULT)
+ if (rwb->enable_state == WBT_STATE_ON_DEFAULT) {
+ blk_stat_deactivate(rwb->cb);
rwb->wb_normal = 0;
+ }
}
EXPORT_SYMBOL_GPL(wbt_disable_default);
+#ifdef CONFIG_BLK_DEBUG_FS
+static int wbt_curr_win_nsec_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%llu\n", rwb->cur_win_nsec);
+ return 0;
+}
+
+static int wbt_enabled_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%d\n", rwb->enable_state);
+ return 0;
+}
+
+static int wbt_id_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+
+ seq_printf(m, "%u\n", rqos->id);
+ return 0;
+}
+
+static int wbt_inflight_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+ int i;
+
+ for (i = 0; i < WBT_NUM_RWQ; i++)
+ seq_printf(m, "%d: inflight %d\n", i,
+ atomic_read(&rwb->rq_wait[i].inflight));
+ return 0;
+}
+
+static int wbt_min_lat_nsec_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%lu\n", rwb->min_lat_nsec);
+ return 0;
+}
+
+static int wbt_unknown_cnt_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%u\n", rwb->unknown_cnt);
+ return 0;
+}
+
+static int wbt_normal_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%u\n", rwb->wb_normal);
+ return 0;
+}
+
+static int wbt_background_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct rq_wb *rwb = RQWB(rqos);
+
+ seq_printf(m, "%u\n", rwb->wb_background);
+ return 0;
+}
+
+static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
+ {"curr_win_nsec", 0400, wbt_curr_win_nsec_show},
+ {"enabled", 0400, wbt_enabled_show},
+ {"id", 0400, wbt_id_show},
+ {"inflight", 0400, wbt_inflight_show},
+ {"min_lat_nsec", 0400, wbt_min_lat_nsec_show},
+ {"unknown_cnt", 0400, wbt_unknown_cnt_show},
+ {"wb_normal", 0400, wbt_normal_show},
+ {"wb_background", 0400, wbt_background_show},
+ {},
+};
+#endif
static struct rq_qos_ops wbt_rqos_ops = {
.throttle = wbt_wait,
@@ -774,6 +811,9 @@ static struct rq_qos_ops wbt_rqos_ops = {
.done = wbt_done,
.cleanup = wbt_cleanup,
.exit = wbt_exit,
+#ifdef CONFIG_BLK_DEBUG_FS
+ .debugfs_attrs = wbt_debugfs_attrs,
+#endif
};
int wbt_init(struct request_queue *q)