summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2013-11-08 09:08:12 -0700
committerJens Axboe <axboe@kernel.dk>2013-11-08 09:08:12 -0700
commite37459b8e2c7db6735e39e019e448b76e5e77647 (patch)
treea3f0944db87a8ae0d41e5acbbbabc1e7ef534d1b /include
parentc7d1ba417c7cb7297d14dd47a390ec90ce548d5c (diff)
parente7e245000110a7794de8f925b9edc06a9c852f80 (diff)
Merge branch 'blk-mq/core' into for-3.13/core
Signed-off-by: Jens Axboe <axboe@kernel.dk> Conflicts: block/blk-timeout.c
Diffstat (limited to 'include')
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk-mq.h183
-rw-r--r--include/linux/blk_types.h68
-rw-r--r--include/linux/blkdev.h60
-rw-r--r--include/linux/percpu_ida.h23
5 files changed, 291 insertions, 45 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 162036aca741..060ff695085c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -420,6 +420,8 @@ static inline void bio_list_init(struct bio_list *bl)
bl->head = bl->tail = NULL;
}
+#define BIO_EMPTY_LIST { NULL, NULL }
+
#define bio_list_for_each(bio, bl) \
for (bio = (bl)->head; bio; bio = bio->bi_next)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
new file mode 100644
index 000000000000..ab0e9b2025b3
--- /dev/null
+++ b/include/linux/blk-mq.h
@@ -0,0 +1,183 @@
+#ifndef BLK_MQ_H
+#define BLK_MQ_H
+
+#include <linux/blkdev.h>
+
+struct blk_mq_tags;
+
+struct blk_mq_cpu_notifier {
+ struct list_head list;
+ void *data;
+ void (*notify)(void *data, unsigned long action, unsigned int cpu);
+};
+
+struct blk_mq_hw_ctx {
+ struct {
+ spinlock_t lock;
+ struct list_head dispatch;
+ } ____cacheline_aligned_in_smp;
+
+ unsigned long state; /* BLK_MQ_S_* flags */
+ struct delayed_work delayed_work;
+
+ unsigned long flags; /* BLK_MQ_F_* flags */
+
+ struct request_queue *queue;
+ unsigned int queue_num;
+
+ void *driver_data;
+
+ unsigned int nr_ctx;
+ struct blk_mq_ctx **ctxs;
+ unsigned int nr_ctx_map;
+ unsigned long *ctx_map;
+
+ struct request **rqs;
+ struct list_head page_list;
+ struct blk_mq_tags *tags;
+
+ unsigned long queued;
+ unsigned long run;
+#define BLK_MQ_MAX_DISPATCH_ORDER 10
+ unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
+
+ unsigned int queue_depth;
+ unsigned int numa_node;
+ unsigned int cmd_size; /* per-request extra data */
+
+ struct blk_mq_cpu_notifier cpu_notifier;
+ struct kobject kobj;
+};
+
+struct blk_mq_reg {
+ struct blk_mq_ops *ops;
+ unsigned int nr_hw_queues;
+ unsigned int queue_depth;
+ unsigned int reserved_tags;
+ unsigned int cmd_size; /* per-request extra data */
+ int numa_node;
+ unsigned int timeout;
+ unsigned int flags; /* BLK_MQ_F_* */
+};
+
+typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
+typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
+typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int);
+typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
+typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
+typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
+
+struct blk_mq_ops {
+ /*
+ * Queue request
+ */
+ queue_rq_fn *queue_rq;
+
+ /*
+ * Map to specific hardware queue
+ */
+ map_queue_fn *map_queue;
+
+ /*
+ * Called on request timeout
+ */
+ rq_timed_out_fn *timeout;
+
+ /*
+ * Override for hctx allocations (should probably go)
+ */
+ alloc_hctx_fn *alloc_hctx;
+ free_hctx_fn *free_hctx;
+
+ /*
+ * Called when the block layer side of a hardware queue has been
+ * set up, allowing the driver to allocate/init matching structures.
+ * Ditto for exit/teardown.
+ */
+ init_hctx_fn *init_hctx;
+ exit_hctx_fn *exit_hctx;
+};
+
+enum {
+ BLK_MQ_RQ_QUEUE_OK = 0, /* queued fine */
+ BLK_MQ_RQ_QUEUE_BUSY = 1, /* requeue IO for later */
+ BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */
+
+ BLK_MQ_F_SHOULD_MERGE = 1 << 0,
+ BLK_MQ_F_SHOULD_SORT = 1 << 1,
+ BLK_MQ_F_SHOULD_IPI = 1 << 2,
+
+ BLK_MQ_S_STOPPED = 1 << 0,
+
+ BLK_MQ_MAX_DEPTH = 2048,
+};
+
+struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *);
+void blk_mq_free_queue(struct request_queue *);
+int blk_mq_register_disk(struct gendisk *);
+void blk_mq_unregister_disk(struct gendisk *);
+void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
+
+void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
+
+void blk_mq_insert_request(struct request_queue *, struct request *, bool);
+void blk_mq_run_queues(struct request_queue *q, bool async);
+void blk_mq_free_request(struct request *rq);
+bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved);
+struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
+struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag);
+
+struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
+struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int);
+void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
+
+void blk_mq_end_io(struct request *rq, int error);
+
+void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
+void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
+void blk_mq_stop_hw_queues(struct request_queue *q);
+void blk_mq_start_stopped_hw_queues(struct request_queue *q);
+
+/*
+ * Driver command data is immediately after the request. So subtract request
+ * size to get back to the original request.
+ */
+static inline struct request *blk_mq_rq_from_pdu(void *pdu)
+{
+ return pdu - sizeof(struct request);
+}
+static inline void *blk_mq_rq_to_pdu(struct request *rq)
+{
+ return (void *) rq + sizeof(*rq);
+}
+
+static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx,
+ unsigned int tag)
+{
+ return hctx->rqs[tag];
+}
+
+#define queue_for_each_hw_ctx(q, hctx, i) \
+ for ((i) = 0, hctx = (q)->queue_hw_ctx[0]; \
+ (i) < (q)->nr_hw_queues; (i)++, hctx = (q)->queue_hw_ctx[i])
+
+#define queue_for_each_ctx(q, ctx, i) \
+ for ((i) = 0, ctx = per_cpu_ptr((q)->queue_ctx, 0); \
+ (i) < (q)->nr_queues; (i)++, ctx = per_cpu_ptr(q->queue_ctx, (i)))
+
+#define hctx_for_each_ctx(hctx, ctx, i) \
+ for ((i) = 0, ctx = (hctx)->ctxs[0]; \
+ (i) < (hctx)->nr_ctx; (i)++, ctx = (hctx)->ctxs[(i)])
+
+#define blk_ctx_sum(q, sum) \
+({ \
+ struct blk_mq_ctx *__x; \
+ unsigned int __ret = 0, __i; \
+ \
+ queue_for_each_ctx((q), __x, __i) \
+ __ret += sum; \
+ __ret; \
+})
+
+#endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fa1abeb45b76..238ef0ed62f8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,19 +178,20 @@ enum rq_flag_bits {
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
__REQ_KERNEL, /* direct IO to kernel pages */
__REQ_PM, /* runtime pm request */
+ __REQ_END, /* last of chain of requests */
__REQ_NR_BITS, /* stops here */
};
-#define REQ_WRITE (1 << __REQ_WRITE)
-#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV)
-#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT)
-#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER)
-#define REQ_SYNC (1 << __REQ_SYNC)
-#define REQ_META (1 << __REQ_META)
-#define REQ_PRIO (1 << __REQ_PRIO)
-#define REQ_DISCARD (1 << __REQ_DISCARD)
-#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME)
-#define REQ_NOIDLE (1 << __REQ_NOIDLE)
+#define REQ_WRITE (1ULL << __REQ_WRITE)
+#define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV)
+#define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT)
+#define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER)
+#define REQ_SYNC (1ULL << __REQ_SYNC)
+#define REQ_META (1ULL << __REQ_META)
+#define REQ_PRIO (1ULL << __REQ_PRIO)
+#define REQ_DISCARD (1ULL << __REQ_DISCARD)
+#define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME)
+#define REQ_NOIDLE (1ULL << __REQ_NOIDLE)
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
@@ -206,28 +207,29 @@ enum rq_flag_bits {
#define REQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
-#define REQ_RAHEAD (1 << __REQ_RAHEAD)
-#define REQ_THROTTLED (1 << __REQ_THROTTLED)
-
-#define REQ_SORTED (1 << __REQ_SORTED)
-#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
-#define REQ_FUA (1 << __REQ_FUA)
-#define REQ_NOMERGE (1 << __REQ_NOMERGE)
-#define REQ_STARTED (1 << __REQ_STARTED)
-#define REQ_DONTPREP (1 << __REQ_DONTPREP)
-#define REQ_QUEUED (1 << __REQ_QUEUED)
-#define REQ_ELVPRIV (1 << __REQ_ELVPRIV)
-#define REQ_FAILED (1 << __REQ_FAILED)
-#define REQ_QUIET (1 << __REQ_QUIET)
-#define REQ_PREEMPT (1 << __REQ_PREEMPT)
-#define REQ_ALLOCED (1 << __REQ_ALLOCED)
-#define REQ_COPY_USER (1 << __REQ_COPY_USER)
-#define REQ_FLUSH (1 << __REQ_FLUSH)
-#define REQ_FLUSH_SEQ (1 << __REQ_FLUSH_SEQ)
-#define REQ_IO_STAT (1 << __REQ_IO_STAT)
-#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
-#define REQ_SECURE (1 << __REQ_SECURE)
-#define REQ_KERNEL (1 << __REQ_KERNEL)
-#define REQ_PM (1 << __REQ_PM)
+#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
+#define REQ_THROTTLED (1ULL << __REQ_THROTTLED)
+
+#define REQ_SORTED (1ULL << __REQ_SORTED)
+#define REQ_SOFTBARRIER (1ULL << __REQ_SOFTBARRIER)
+#define REQ_FUA (1ULL << __REQ_FUA)
+#define REQ_NOMERGE (1ULL << __REQ_NOMERGE)
+#define REQ_STARTED (1ULL << __REQ_STARTED)
+#define REQ_DONTPREP (1ULL << __REQ_DONTPREP)
+#define REQ_QUEUED (1ULL << __REQ_QUEUED)
+#define REQ_ELVPRIV (1ULL << __REQ_ELVPRIV)
+#define REQ_FAILED (1ULL << __REQ_FAILED)
+#define REQ_QUIET (1ULL << __REQ_QUIET)
+#define REQ_PREEMPT (1ULL << __REQ_PREEMPT)
+#define REQ_ALLOCED (1ULL << __REQ_ALLOCED)
+#define REQ_COPY_USER (1ULL << __REQ_COPY_USER)
+#define REQ_FLUSH (1ULL << __REQ_FLUSH)
+#define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ)
+#define REQ_IO_STAT (1ULL << __REQ_IO_STAT)
+#define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE)
+#define REQ_SECURE (1ULL << __REQ_SECURE)
+#define REQ_KERNEL (1ULL << __REQ_KERNEL)
+#define REQ_PM (1ULL << __REQ_PM)
+#define REQ_END (1ULL << __REQ_END)
#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0e6f765aa1f5..f26ec20f6354 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -8,6 +8,7 @@
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
+#include <linux/llist.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/pagemap.h>
@@ -94,12 +95,19 @@ enum rq_cmd_type_bits {
* as well!
*/
struct request {
- struct list_head queuelist;
- struct call_single_data csd;
+ union {
+ struct list_head queuelist;
+ struct llist_node ll_list;
+ };
+ union {
+ struct call_single_data csd;
+ struct work_struct mq_flush_data;
+ };
struct request_queue *q;
+ struct blk_mq_ctx *mq_ctx;
- unsigned int cmd_flags;
+ u64 cmd_flags;
enum rq_cmd_type_bits cmd_type;
unsigned long atomic_flags;
@@ -160,8 +168,6 @@ struct request {
unsigned short ioprio;
- int ref_count;
-
void *special; /* opaque pointer available for LLD use */
char *buffer; /* kaddr of the current segment if available */
@@ -215,6 +221,8 @@ struct request_pm_state
#include <linux/elevator.h>
+struct blk_queue_ctx;
+
typedef void (request_fn_proc) (struct request_queue *q);
typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
@@ -313,6 +321,18 @@ struct request_queue {
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
+ struct blk_mq_ops *mq_ops;
+
+ unsigned int *mq_map;
+
+ /* sw queues */
+ struct blk_mq_ctx *queue_ctx;
+ unsigned int nr_queues;
+
+ /* hw dispatch queues */
+ struct blk_mq_hw_ctx **queue_hw_ctx;
+ unsigned int nr_hw_queues;
+
/*
* Dispatch queue sorting
*/
@@ -361,6 +381,11 @@ struct request_queue {
*/
struct kobject kobj;
+ /*
+ * mq queue kobject
+ */
+ struct kobject mq_kobj;
+
#ifdef CONFIG_PM_RUNTIME
struct device *dev;
int rpm_status;
@@ -425,7 +450,13 @@ struct request_queue {
unsigned long flush_pending_since;
struct list_head flush_queue[2];
struct list_head flush_data_in_flight;
- struct request flush_rq;
+ union {
+ struct request flush_rq;
+ struct {
+ spinlock_t mq_flush_lock;
+ struct work_struct mq_flush_work;
+ };
+ };
struct mutex sysfs_lock;
@@ -437,14 +468,14 @@ struct request_queue {
struct bsg_class_device bsg_dev;
#endif
-#ifdef CONFIG_BLK_CGROUP
- struct list_head all_q_node;
-#endif
#ifdef CONFIG_BLK_DEV_THROTTLING
/* Throttle data */
struct throtl_data *td;
#endif
struct rcu_head rcu_head;
+ wait_queue_head_t mq_freeze_wq;
+ struct percpu_counter mq_usage_counter;
+ struct list_head all_q_node;
};
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
@@ -467,6 +498,7 @@ struct request_queue {
#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */
+#define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -539,6 +571,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
+#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
@@ -570,7 +603,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
-#define rq_data_dir(rq) ((rq)->cmd_flags & 1)
+#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0)
static inline unsigned int blk_queue_cluster(struct request_queue *q)
{
@@ -1013,6 +1046,7 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
struct blk_plug {
unsigned long magic; /* detect uninitialized use-cases */
struct list_head list; /* requests */
+ struct list_head mq_list; /* blk-mq requests */
struct list_head cb_list; /* md requires an unplug callback */
};
#define BLK_MAX_REQUEST_COUNT 16
@@ -1050,7 +1084,10 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
{
struct blk_plug *plug = tsk->plug;
- return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
+ return plug &&
+ (!list_empty(&plug->list) ||
+ !list_empty(&plug->mq_list) ||
+ !list_empty(&plug->cb_list));
}
/*
@@ -1325,6 +1362,7 @@ static inline void put_dev_sector(Sector p)
struct work_struct;
int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
+int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
/*
diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
index 0b23edbee309..1900bd0fa639 100644
--- a/include/linux/percpu_ida.h
+++ b/include/linux/percpu_ida.h
@@ -16,6 +16,8 @@ struct percpu_ida {
* percpu_ida_init()
*/
unsigned nr_tags;
+ unsigned percpu_max_size;
+ unsigned percpu_batch_size;
struct percpu_ida_cpu __percpu *tag_cpu;
@@ -51,10 +53,29 @@ struct percpu_ida {
} ____cacheline_aligned_in_smp;
};
+/*
+ * Number of tags we move between the percpu freelist and the global freelist at
+ * a time
+ */
+#define IDA_DEFAULT_PCPU_BATCH_MOVE 32U
+/* Max size of percpu freelist, */
+#define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2)
+
int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp);
void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
void percpu_ida_destroy(struct percpu_ida *pool);
-int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags);
+int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
+ unsigned long max_size, unsigned long batch_size);
+static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
+{
+ return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE,
+ IDA_DEFAULT_PCPU_BATCH_MOVE);
+}
+
+typedef int (*percpu_ida_cb)(unsigned, void *);
+int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
+ void *data);
+unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu);
#endif /* __PERCPU_IDA_H__ */