diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 15:29:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 15:29:19 -0700 |
commit | 750a02ab8d3c49ca7d23102be90d3d1db19e2827 (patch) | |
tree | 3c829af238b6598178c9ed859edb00bc8a280c05 /include/linux | |
parent | 1966391fa576e1fb2701be8bcca197d8f72737b7 (diff) | |
parent | abb30460bda232f304f642510adc8c6576ea51ea (diff) |
Merge tag 'for-5.8/block-2020-06-01' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"Core block changes that have been queued up for this release:
- Remove dead blk-throttle and blk-wbt code (Guoqing)
- Include pid in blktrace note traces (Jan)
- Don't spew I/O errors on wouldblock termination (me)
- Zone append addition (Johannes, Keith, Damien)
- IO accounting improvements (Konstantin, Christoph)
- blk-mq hardware map update improvements (Ming)
- Scheduler dispatch improvement (Salman)
- Inline block encryption support (Satya)
- Request map fixes and improvements (Weiping)
- blk-iocost tweaks (Tejun)
- Fix for timeout failing with error injection (Keith)
- Queue re-run fixes (Douglas)
- CPU hotplug improvements (Christoph)
- Queue entry/exit improvements (Christoph)
- Move DMA drain handling to the few drivers that use it (Christoph)
- Partition handling cleanups (Christoph)"
* tag 'for-5.8/block-2020-06-01' of git://git.kernel.dk/linux-block: (127 commits)
block: mark bio_wouldblock_error() bio with BIO_QUIET
blk-wbt: rename __wbt_update_limits to wbt_update_limits
blk-wbt: remove wbt_update_limits
blk-throttle: remove tg_drain_bios
blk-throttle: remove blk_throtl_drain
null_blk: force complete for timeout request
blk-mq: drain I/O when all CPUs in a hctx are offline
blk-mq: add blk_mq_all_tag_iter
blk-mq: open code __blk_mq_alloc_request in blk_mq_alloc_request_hctx
blk-mq: use BLK_MQ_NO_TAG in more places
blk-mq: rename BLK_MQ_TAG_FAIL to BLK_MQ_NO_TAG
blk-mq: move more request initialization to blk_mq_rq_ctx_init
blk-mq: simplify the blk_mq_get_request calling convention
blk-mq: remove the bio argument to ->prepare_request
nvme: force complete cancelled requests
blk-mq: blk-mq: provide forced completion method
block: fix a warning when blkdev.h is included for !CONFIG_BLOCK builds
block: blk-crypto-fallback: remove redundant initialization of variable err
block: reduce part_stat_lock() scope
block: use __this_cpu_add() instead of access by smp_processor_id()
...
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/backing-dev-defs.h | 2 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 8 | ||||
-rw-r--r-- | include/linux/bio.h | 13 | ||||
-rw-r--r-- | include/linux/blk-cgroup.h | 53 | ||||
-rw-r--r-- | include/linux/blk-crypto.h | 123 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 14 | ||||
-rw-r--r-- | include/linux/blk_types.h | 24 | ||||
-rw-r--r-- | include/linux/blkdev.h | 122 | ||||
-rw-r--r-- | include/linux/bvec.h | 13 | ||||
-rw-r--r-- | include/linux/cdrom.h | 7 | ||||
-rw-r--r-- | include/linux/cpuhotplug.h | 1 | ||||
-rw-r--r-- | include/linux/device.h | 4 | ||||
-rw-r--r-- | include/linux/elevator.h | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 2 | ||||
-rw-r--r-- | include/linux/genhd.h | 40 | ||||
-rw-r--r-- | include/linux/keyslot-manager.h | 106 | ||||
-rw-r--r-- | include/linux/libata.h | 2 | ||||
-rw-r--r-- | include/linux/part_stat.h | 61 |
18 files changed, 473 insertions, 124 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 7367150f962a..90a7e844a098 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -193,8 +193,6 @@ struct backing_dev_info { congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ - const char *name; - struct kref refcnt; /* Reference counter for the structure */ unsigned int capabilities; /* Device capabilities */ unsigned int min_ratio; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c9ad5c3b7b4b..6b3504bf7a42 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -33,14 +33,10 @@ int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...); __printf(2, 0) int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args); -int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner); +void bdi_set_owner(struct backing_dev_info *bdi, struct device *owner); void bdi_unregister(struct backing_dev_info *bdi); -struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id); -static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask) -{ - return bdi_alloc_node(gfp_mask, NUMA_NO_NODE); -} +struct backing_dev_info *bdi_alloc(int node_id); void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); diff --git a/include/linux/bio.h b/include/linux/bio.h index a0ee494a6329..683ff5fd8871 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -70,7 +70,7 @@ static inline bool bio_has_data(struct bio *bio) return false; } -static inline bool bio_no_advance_iter(struct bio *bio) +static inline bool bio_no_advance_iter(const struct bio *bio) { return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE || @@ -138,8 +138,8 @@ static inline bool bio_next_segment(const struct bio *bio, #define bio_for_each_segment_all(bvl, bio, iter) \ for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); ) -static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, - unsigned bytes) +static inline void bio_advance_iter(const struct bio *bio, + struct bvec_iter *iter, unsigned int bytes) { iter->bi_sector += bytes >> 9; @@ -417,6 +417,7 @@ static inline void bio_io_error(struct bio *bio) static inline void bio_wouldblock_error(struct bio *bio) { + bio_set_flag(bio, BIO_QUIET); bio->bi_status = BLK_STS_AGAIN; bio_endio(bio); } @@ -444,12 +445,6 @@ void bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -void generic_start_io_acct(struct request_queue *q, int op, - unsigned long sectors, struct hd_struct *part); -void generic_end_io_acct(struct request_queue *q, int op, - struct hd_struct *part, - unsigned long start_time); - extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 35f8ffe92b70..a57ebe2f00ab 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -607,12 +607,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, u64_stats_update_begin(&bis->sync); /* - * If the bio is flagged with BIO_QUEUE_ENTERED it means this - * is a split bio and we would have already accounted for the - * size of the bio. + * If the bio is flagged with BIO_CGROUP_ACCT it means this is a + * split bio and we would have already accounted for the size of + * the bio. */ - if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) + if (!bio_flagged(bio, BIO_CGROUP_ACCT)) { + bio_set_flag(bio, BIO_CGROUP_ACCT); bis->cur.bytes[rwd] += bio->bi_iter.bi_size; + } bis->cur.ios[rwd]++; u64_stats_update_end(&bis->sync); @@ -629,6 +631,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, static inline void blkcg_use_delay(struct blkcg_gq *blkg) { + if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0)) + return; if (atomic_add_return(1, &blkg->use_delay) == 1) atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); } @@ -637,6 +641,8 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) { int old = atomic_read(&blkg->use_delay); + if (WARN_ON_ONCE(old < 0)) + return 0; if (old == 0) return 0; @@ -661,20 +667,39 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) return 1; } +/** + * blkcg_set_delay - Enable allocator delay mechanism with the specified delay amount + * @blkg: target blkg + * @delay: delay duration in nsecs + * + * When enabled with this function, the delay is not decayed and must be + * explicitly cleared with blkcg_clear_delay(). Must not be mixed with + * blkcg_[un]use_delay() and blkcg_add_delay() usages. + */ +static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) +{ + int old = atomic_read(&blkg->use_delay); + + /* We only want 1 person setting the congestion count for this blkg. */ + if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) + atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); + + atomic64_set(&blkg->delay_nsec, delay); +} + +/** + * blkcg_clear_delay - Disable allocator delay mechanism + * @blkg: target blkg + * + * Disable use_delay mechanism. See blkcg_set_delay(). + */ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) { int old = atomic_read(&blkg->use_delay); - if (!old) - return; + /* We only want 1 person clearing the congestion count for this blkg. */ - while (old) { - int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); - if (cur == old) { - atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); - break; - } - old = cur; - } + if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) + atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); } void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h new file mode 100644 index 000000000000..e82342907f2b --- /dev/null +++ b/include/linux/blk-crypto.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_BLK_CRYPTO_H +#define __LINUX_BLK_CRYPTO_H + +#include <linux/types.h> + +enum blk_crypto_mode_num { + BLK_ENCRYPTION_MODE_INVALID, + BLK_ENCRYPTION_MODE_AES_256_XTS, + BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, + BLK_ENCRYPTION_MODE_ADIANTUM, + BLK_ENCRYPTION_MODE_MAX, +}; + +#define BLK_CRYPTO_MAX_KEY_SIZE 64 +/** + * struct blk_crypto_config - an inline encryption key's crypto configuration + * @crypto_mode: encryption algorithm this key is for + * @data_unit_size: the data unit size for all encryption/decryptions with this + * key. This is the size in bytes of each individual plaintext and + * ciphertext. This is always a power of 2. It might be e.g. the + * filesystem block size or the disk sector size. + * @dun_bytes: the maximum number of bytes of DUN used when using this key + */ +struct blk_crypto_config { + enum blk_crypto_mode_num crypto_mode; + unsigned int data_unit_size; + unsigned int dun_bytes; +}; + +/** + * struct blk_crypto_key - an inline encryption key + * @crypto_cfg: the crypto configuration (like crypto_mode, key size) for this + * key + * @data_unit_size_bits: log2 of data_unit_size + * @size: size of this key in bytes (determined by @crypto_cfg.crypto_mode) + * @raw: the raw bytes of this key. Only the first @size bytes are used. + * + * A blk_crypto_key is immutable once created, and many bios can reference it at + * the same time. It must not be freed until all bios using it have completed + * and it has been evicted from all devices on which it may have been used. + */ +struct blk_crypto_key { + struct blk_crypto_config crypto_cfg; + unsigned int data_unit_size_bits; + unsigned int size; + u8 raw[BLK_CRYPTO_MAX_KEY_SIZE]; +}; + +#define BLK_CRYPTO_MAX_IV_SIZE 32 +#define BLK_CRYPTO_DUN_ARRAY_SIZE (BLK_CRYPTO_MAX_IV_SIZE / sizeof(u64)) + +/** + * struct bio_crypt_ctx - an inline encryption context + * @bc_key: the key, algorithm, and data unit size to use + * @bc_dun: the data unit number (starting IV) to use + * + * A bio_crypt_ctx specifies that the contents of the bio will be encrypted (for + * write requests) or decrypted (for read requests) inline by the storage device + * or controller, or by the crypto API fallback. + */ +struct bio_crypt_ctx { + const struct blk_crypto_key *bc_key; + u64 bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; +}; + +#include <linux/blk_types.h> +#include <linux/blkdev.h> + +struct request; +struct request_queue; + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + +static inline bool bio_has_crypt_ctx(struct bio *bio) +{ + return bio->bi_crypt_context; +} + +void bio_crypt_set_ctx(struct bio *bio, const struct blk_crypto_key *key, + const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], + gfp_t gfp_mask); + +bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc, + unsigned int bytes, + const u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]); + +int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, + enum blk_crypto_mode_num crypto_mode, + unsigned int dun_bytes, + unsigned int data_unit_size); + +int blk_crypto_start_using_key(const struct blk_crypto_key *key, + struct request_queue *q); + +int blk_crypto_evict_key(struct request_queue *q, + const struct blk_crypto_key *key); + +bool blk_crypto_config_supported(struct request_queue *q, + const struct blk_crypto_config *cfg); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +static inline bool bio_has_crypt_ctx(struct bio *bio) +{ + return false; +} + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + +void __bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask); +static inline void bio_crypt_clone(struct bio *dst, struct bio *src, + gfp_t gfp_mask) +{ + if (bio_has_crypt_ctx(src)) + __bio_crypt_clone(dst, src, gfp_mask); +} + +#endif /* __LINUX_BLK_CRYPTO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b45148ba3291..d6fcae17da5a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -140,6 +140,8 @@ struct blk_mq_hw_ctx { */ atomic_t nr_active; + /** @cpuhp_online: List to store request if CPU is going to die */ + struct hlist_node cpuhp_online; /** @cpuhp_dead: List to store request if some CPU die. */ struct hlist_node cpuhp_dead; /** @kobj: Kernel object for sysfs. */ @@ -391,6 +393,11 @@ struct blk_mq_ops { enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_SHARED = 1 << 1, + /* + * Set when this device requires underlying blk-mq device for + * completing IO: + */ + BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, @@ -400,6 +407,9 @@ enum { BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, + /* hw queue is inactive after all its CPUs become offline */ + BLK_MQ_S_INACTIVE = 3, + BLK_MQ_MAX_DEPTH = 10240, BLK_MQ_CPU_WORK_BATCH = 8, @@ -494,6 +504,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); bool blk_mq_complete_request(struct request *rq); +void blk_mq_force_complete_rq(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio, unsigned int nr_segs); bool blk_mq_queue_stopped(struct request_queue *q); @@ -508,6 +519,7 @@ void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); +void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); @@ -577,4 +589,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } +blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio); + #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 31eb92876be7..ccb895f911b1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -18,6 +18,7 @@ struct block_device; struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); +struct bio_crypt_ctx; /* * Block error status values. See block/blk-core:blk_errors for the details. @@ -63,6 +64,18 @@ typedef u8 __bitwise blk_status_t; */ #define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) +/* + * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone + * related resources are unavailable, but the driver can guarantee the queue + * will be rerun in the future once the resources become available again. + * + * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references + * a zone specific resource and IO to a different zone on the same device could + * still be served. Examples of that are zones that are write-locked, but a read + * to the same zone could be served. + */ +#define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with @@ -173,6 +186,11 @@ struct bio { u64 bi_iocost_cost; #endif #endif + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + struct bio_crypt_ctx *bi_crypt_context; +#endif + union { #if defined(CONFIG_BLK_DEV_INTEGRITY) struct bio_integrity_payload *bi_integrity; /* data integrity */ @@ -220,7 +238,7 @@ enum { * throttling rules. Don't do it again. */ BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ - BIO_QUEUE_ENTERED, /* can use blk_queue_enter_live() */ + BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ BIO_TRACKED, /* set if bio goes through the rq_qos path */ BIO_FLAG_LAST }; @@ -296,6 +314,8 @@ enum req_opf { REQ_OP_ZONE_CLOSE = 11, /* Transition a zone to full */ REQ_OP_ZONE_FINISH = 12, + /* write data at the current zone write pointer */ + REQ_OP_ZONE_APPEND = 13, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, @@ -323,7 +343,6 @@ enum req_flag_bits { __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_BACKGROUND, /* background IO */ __REQ_NOWAIT, /* Don't wait if request will block */ - __REQ_NOWAIT_INLINE, /* Return would-block error inline */ /* * When a shared kthread needs to issue a bio for a cgroup, doing * so synchronously can lead to priority inversions as the kthread @@ -358,7 +377,6 @@ enum req_flag_bits { #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) -#define REQ_NOWAIT_INLINE (1ULL << __REQ_NOWAIT_INLINE) #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32868fbedc9e..8fd900998b4e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -43,6 +43,7 @@ struct pr_ops; struct rq_qos; struct blk_queue_stats; struct blk_stat_callback; +struct blk_keyslot_manager; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -82,8 +83,6 @@ typedef __u32 __bitwise req_flags_t; /* set for "ide_preempt" requests and also for requests for which the SCSI "quiesce" state must be ignored. */ #define RQF_PREEMPT ((__force req_flags_t)(1 << 8)) -/* contains copies of user pages */ -#define RQF_COPY_USER ((__force req_flags_t)(1 << 9)) /* vaguely specified driver internal error. Ignored by the block layer */ #define RQF_FAILED ((__force req_flags_t)(1 << 10)) /* don't warn about errors */ @@ -223,11 +222,14 @@ struct request { unsigned short nr_integrity_segments; #endif +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + struct bio_crypt_ctx *crypt_ctx; + struct blk_ksm_keyslot *crypt_keyslot; +#endif + unsigned short write_hint; unsigned short ioprio; - unsigned int extra_len; /* length of alignment and padding */ - enum mq_rq_state state; refcount_t ref; @@ -290,7 +292,6 @@ struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); struct bio_vec; -typedef int (dma_drain_needed_fn)(struct request *); enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ @@ -336,6 +337,7 @@ struct queue_limits { unsigned int max_hw_discard_sectors; unsigned int max_write_same_sectors; unsigned int max_write_zeroes_sectors; + unsigned int max_zone_append_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -361,7 +363,8 @@ unsigned int blkdev_nr_zones(struct gendisk *disk); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); -extern int blk_revalidate_disk_zones(struct gendisk *disk); +int blk_revalidate_disk_zones(struct gendisk *disk, + void (*update_driver_data)(struct gendisk *disk)); extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); @@ -399,7 +402,6 @@ struct request_queue { struct rq_qos *rq_qos; make_request_fn *make_request_fn; - dma_drain_needed_fn *dma_drain_needed; const struct blk_mq_ops *mq_ops; @@ -469,11 +471,14 @@ struct request_queue { */ unsigned long nr_requests; /* Max # of requests */ - unsigned int dma_drain_size; - void *dma_drain_buffer; unsigned int dma_pad_mask; unsigned int dma_alignment; +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + /* Inline crypto capabilities */ + struct blk_keyslot_manager *ksm; +#endif + unsigned int rq_timeout; int poll_nsec; @@ -729,6 +734,16 @@ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { return 0; } +static inline bool blk_queue_zone_is_seq(struct request_queue *q, + sector_t sector) +{ + return false; +} +static inline unsigned int blk_queue_zone_no(struct request_queue *q, + sector_t sector) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline bool rq_is_sync(struct request *rq) @@ -747,6 +762,9 @@ static inline bool rq_mergeable(struct request *rq) if (req_op(rq) == REQ_OP_WRITE_ZEROES) return false; + if (req_op(rq) == REQ_OP_ZONE_APPEND) + return false; + if (rq->cmd_flags & REQ_NOMERGE_FLAGS) return false; if (rq->rq_flags & RQF_NOMERGE_FLAGS) @@ -1081,6 +1099,8 @@ extern void blk_queue_max_write_same_sectors(struct request_queue *q, extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); +extern void blk_queue_max_zone_append_sectors(struct request_queue *q, + unsigned int max_zone_append_sectors); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); @@ -1099,9 +1119,6 @@ extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); -extern int blk_queue_dma_drain(struct request_queue *q, - dma_drain_needed_fn *dma_drain_needed, - void *buf, unsigned int size); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); @@ -1138,7 +1155,15 @@ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) return max_t(unsigned short, rq->nr_phys_segments, 1); } -extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); +int __blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist, struct scatterlist **last_sg); +static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, + struct scatterlist *sglist) +{ + struct scatterlist *last_sg = NULL; + + return __blk_rq_map_sg(q, rq, sglist, &last_sg); +} extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); @@ -1206,7 +1231,9 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) !list_empty(&plug->cb_list)); } -extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); +extern void blk_io_schedule(void); + +int blkdev_issue_flush(struct block_device *, gfp_t); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); @@ -1293,6 +1320,11 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } +static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q) +{ + return q->limits.max_zone_append_sectors; +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { int retval = 512; @@ -1551,6 +1583,12 @@ struct blk_integrity *bdev_get_integrity(struct block_device *bdev) return blk_get_integrity(bdev->bd_disk); } +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) +{ + return q->integrity.profile; +} + static inline bool blk_integrity_rq(struct request *rq) { return rq->cmd_flags & REQ_INTEGRITY; @@ -1631,6 +1669,11 @@ static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { return NULL; } +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) +{ + return false; +} static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) { return 0; @@ -1682,6 +1725,25 @@ static inline struct bio_vec *rq_integrity_vec(struct request *rq) #endif /* CONFIG_BLK_DEV_INTEGRITY */ +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + +bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q); + +void blk_ksm_unregister(struct request_queue *q); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm, + struct request_queue *q) +{ + return true; +} + +static inline void blk_ksm_unregister(struct request_queue *q) { } + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + + struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); @@ -1719,6 +1781,7 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *, #ifdef CONFIG_BLK_DEV_ZONED bool blk_req_needs_zone_write_lock(struct request *rq); +bool blk_req_zone_write_trylock(struct request *rq); void __blk_req_zone_write_lock(struct request *rq); void __blk_req_zone_write_unlock(struct request *rq); @@ -1809,8 +1872,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) return false; } -static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, - sector_t *error_sector) +static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask) { return 0; } @@ -1830,4 +1892,32 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } +#ifdef CONFIG_BLOCK +unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, + unsigned int op); +void disk_end_io_acct(struct gendisk *disk, unsigned int op, + unsigned long start_time); + +/** + * bio_start_io_acct - start I/O accounting for bio based drivers + * @bio: bio to start account for + * + * Returns the start time that should be passed back to bio_end_io_acct(). + */ +static inline unsigned long bio_start_io_acct(struct bio *bio) +{ + return disk_start_io_acct(bio->bi_disk, bio_sectors(bio), bio_op(bio)); +} + +/** + * bio_end_io_acct - end I/O accounting for bio based drivers + * @bio: bio to end account for + * @start: start time returned by bio_start_io_acct() + */ +static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) +{ + return disk_end_io_acct(bio->bi_disk, bio_op(bio), start_time); +} +#endif /* CONFIG_BLOCK */ + #endif diff --git a/include/linux/bvec.h b/include/linux/bvec.h index a81c13ac1972..ac0c7299d5b8 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -12,8 +12,17 @@ #include <linux/errno.h> #include <linux/mm.h> -/* - * was unsigned short, but we might as well be ready for > 64kB I/O pages +/** + * struct bio_vec - a contiguous range of physical memory addresses + * @bv_page: First page associated with the address range. + * @bv_len: Number of bytes in the address range. + * @bv_offset: Start of the address range relative to the start of @bv_page. + * + * The following holds for a bvec if n * PAGE_SIZE < bv_offset + bv_len: + * + * nth_page(@bv_page, n) == @bv_page + n + * + * This holds because page_is_mergeable() checks the above property. */ struct bio_vec { struct page *bv_page; diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 528271c60018..8543fa59da72 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h @@ -94,6 +94,11 @@ struct cdrom_device_ops { struct packet_command *); }; +int cdrom_multisession(struct cdrom_device_info *cdi, + struct cdrom_multisession *info); +int cdrom_read_tocentry(struct cdrom_device_info *cdi, + struct cdrom_tocentry *entry); + /* the general block_device operations structure: */ extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t mode); @@ -104,7 +109,7 @@ extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi, unsigned int clearing); extern int cdrom_media_changed(struct cdrom_device_info *); -extern int register_cdrom(struct cdrom_device_info *cdi); +extern int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi); extern void unregister_cdrom(struct cdrom_device_info *cdi); typedef struct { diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 77d70b633531..24b3a77810b6 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -152,6 +152,7 @@ enum cpuhp_state { CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE, + CPUHP_AP_BLK_MQ_ONLINE, CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS, CPUHP_AP_X86_INTEL_EPB_ONLINE, CPUHP_AP_PERF_ONLINE, diff --git a/include/linux/device.h b/include/linux/device.h index ac8e37cd716a..15460a5ac024 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -884,10 +884,6 @@ extern bool device_is_bound(struct device *dev); /* * Easy functions for dynamically creating devices on the fly */ -extern __printf(5, 0) -struct device *device_create_vargs(struct class *cls, struct device *parent, - dev_t devt, void *drvdata, - const char *fmt, va_list vargs); extern __printf(5, 6) struct device *device_create(struct class *cls, struct device *parent, dev_t devt, void *drvdata, diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 901bda352dcb..bacc40a0bdf3 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -39,7 +39,7 @@ struct elevator_mq_ops { void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *); - void (*prepare_request)(struct request *, struct bio *bio); + void (*prepare_request)(struct request *); void (*finish_request)(struct request *); void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); diff --git a/include/linux/fs.h b/include/linux/fs.h index ef6acd2062eb..4fdd148dd763 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2591,7 +2591,6 @@ extern struct kmem_cache *names_cachep; #ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); extern void unregister_blkdev(unsigned int, const char *); -extern void bdev_unhash_inode(dev_t dev); extern struct block_device *bdget(dev_t); extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); @@ -2733,7 +2732,6 @@ extern bool is_bad_inode(struct inode *); extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *, bool); -extern int invalidate_partition(struct gendisk *, int); #endif unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9b3fffdf4011..f0d6d77309a5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -39,15 +39,6 @@ extern struct class block_class; #include <linux/fs.h> #include <linux/workqueue.h> -struct disk_stats { - u64 nsecs[NR_STAT_GROUPS]; - unsigned long sectors[NR_STAT_GROUPS]; - unsigned long ios[NR_STAT_GROUPS]; - unsigned long merges[NR_STAT_GROUPS]; - unsigned long io_ticks; - local_t in_flight[2]; -}; - #define PARTITION_META_INFO_VOLNAMELTH 64 /* * Enough for the string representation of any kind of UUID plus NULL. @@ -68,7 +59,13 @@ struct hd_struct { * can be non-atomic on 32bit machines with 64bit sector_t. */ sector_t nr_sects; +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) seqcount_t nr_sects_seq; +#endif + unsigned long stamp; + struct disk_stats __percpu *dkstats; + struct percpu_ref ref; + sector_t alignment_offset; unsigned int discard_alignment; struct device __dev; @@ -78,13 +75,6 @@ struct hd_struct { #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif - unsigned long stamp; -#ifdef CONFIG_SMP - struct disk_stats __percpu *dkstats; -#else - struct disk_stats dkstats; -#endif - struct percpu_ref ref; struct rcu_work rcu_work; }; @@ -217,11 +207,20 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ +#if IS_ENABLED(CONFIG_CDROM) + struct cdrom_device_info *cdi; +#endif int node_id; struct badblocks *bb; struct lockdep_map lockdep_map; }; +#if IS_REACHABLE(CONFIG_CDROM) +#define disk_to_cdi(disk) ((disk)->cdi) +#else +#define disk_to_cdi(disk) NULL +#endif + static inline struct gendisk *part_to_disk(struct hd_struct *part) { if (likely(part)) { @@ -265,6 +264,13 @@ static inline void disk_put_part(struct hd_struct *part) put_device(part_to_dev(part)); } +static inline void hd_sects_seq_init(struct hd_struct *p) +{ +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) + seqcount_init(&p->nr_sects_seq); +#endif +} + /* * Smarter partition iterator without context limits. */ @@ -339,7 +345,7 @@ extern dev_t blk_lookup_devt(const char *name, int partno); int bdev_disk_changed(struct block_device *bdev, bool invalidate); int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); -int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev); +int blk_drop_partitions(struct block_device *bdev); extern void printk_all_partitions(void); extern struct gendisk *__alloc_disk_node(int minors, int node_id); diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h new file mode 100644 index 000000000000..18f3f5346843 --- /dev/null +++ b/include/linux/keyslot-manager.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_KEYSLOT_MANAGER_H +#define __LINUX_KEYSLOT_MANAGER_H + +#include <linux/bio.h> +#include <linux/blk-crypto.h> + +struct blk_keyslot_manager; + +/** + * struct blk_ksm_ll_ops - functions to manage keyslots in hardware + * @keyslot_program: Program the specified key into the specified slot in the + * inline encryption hardware. + * @keyslot_evict: Evict key from the specified keyslot in the hardware. + * The key is provided so that e.g. dm layers can evict + * keys from the devices that they map over. + * Returns 0 on success, -errno otherwise. + * + * This structure should be provided by storage device drivers when they set up + * a keyslot manager - this structure holds the function ptrs that the keyslot + * manager will use to manipulate keyslots in the hardware. + */ +struct blk_ksm_ll_ops { + int (*keyslot_program)(struct blk_keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot); + int (*keyslot_evict)(struct blk_keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot); +}; + +struct blk_keyslot_manager { + /* + * The struct blk_ksm_ll_ops that this keyslot manager will use + * to perform operations like programming and evicting keys on the + * device + */ + struct blk_ksm_ll_ops ksm_ll_ops; + + /* + * The maximum number of bytes supported for specifying the data unit + * number. + */ + unsigned int max_dun_bytes_supported; + + /* + * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents + * whether a crypto mode and data unit size are supported. The i'th + * bit of crypto_mode_supported[crypto_mode] is set iff a data unit + * size of (1 << i) is supported. We only support data unit sizes + * that are powers of 2. + */ + unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX]; + + /* Device for runtime power management (NULL if none) */ + struct device *dev; + + /* Here onwards are *private* fields for internal keyslot manager use */ + + unsigned int num_slots; + + /* Protects programming and evicting keys from the device */ + struct rw_semaphore lock; + + /* List of idle slots, with least recently used slot at front */ + wait_queue_head_t idle_slots_wait_queue; + struct list_head idle_slots; + spinlock_t idle_slots_lock; + + /* + * Hash table which maps struct *blk_crypto_key to keyslots, so that we + * can find a key's keyslot in O(1) time rather than O(num_slots). + * Protected by 'lock'. + */ + struct hlist_head *slot_hashtable; + unsigned int log_slot_ht_size; + + /* Per-keyslot data */ + struct blk_ksm_keyslot *slots; +}; + +int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots); + +blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm, + const struct blk_crypto_key *key, + struct blk_ksm_keyslot **slot_ptr); + +unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot); + +void blk_ksm_put_slot(struct blk_ksm_keyslot *slot); + +bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, + const struct blk_crypto_config *cfg); + +int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, + const struct blk_crypto_key *key); + +void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm); + +void blk_ksm_destroy(struct blk_keyslot_manager *ksm); + +#endif /* __LINUX_KEYSLOT_MANAGER_H */ diff --git a/include/linux/libata.h b/include/linux/libata.h index cffa4714bfa8..af832852e620 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1092,6 +1092,7 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, #define ATA_SCSI_COMPAT_IOCTL /* empty */ #endif extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); +bool ata_scsi_dma_need_drain(struct request *rq); extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, unsigned int cmd, void __user *arg); extern bool ata_link_online(struct ata_link *link); @@ -1387,6 +1388,7 @@ extern struct device_attribute *ata_common_sdev_attrs[]; .ioctl = ata_scsi_ioctl, \ ATA_SCSI_COMPAT_IOCTL \ .queuecommand = ata_scsi_queuecmd, \ + .dma_need_drain = ata_scsi_dma_need_drain, \ .can_queue = ATA_DEF_QUEUE, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ .this_id = ATA_SHT_THIS_ID, \ diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index ece607607a86..24125778ef3e 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -4,21 +4,25 @@ #include <linux/genhd.h> +struct disk_stats { + u64 nsecs[NR_STAT_GROUPS]; + unsigned long sectors[NR_STAT_GROUPS]; + unsigned long ios[NR_STAT_GROUPS]; + unsigned long merges[NR_STAT_GROUPS]; + unsigned long io_ticks; + local_t in_flight[2]; +}; + /* * Macros to operate on percpu disk statistics: * - * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters - * and should be called between disk_stat_lock() and - * disk_stat_unlock(). + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters and should + * be called between disk_stat_lock() and disk_stat_unlock(). * * part_stat_read() can be called at any time. - * - * part_stat_{add|set_all}() and {init|free}_part_stats are for - * internal use only. */ -#ifdef CONFIG_SMP -#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) -#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) +#define part_stat_lock() preempt_disable() +#define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ (per_cpu_ptr((part)->dkstats, (cpu))->field) @@ -44,50 +48,13 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) sizeof(struct disk_stats)); } -static inline int init_part_stats(struct hd_struct *part) -{ - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; - return 1; -} - -static inline void free_part_stats(struct hd_struct *part) -{ - free_percpu(part->dkstats); -} - -#else /* !CONFIG_SMP */ -#define part_stat_lock() ({ rcu_read_lock(); 0; }) -#define part_stat_unlock() rcu_read_unlock() - -#define part_stat_get(part, field) ((part)->dkstats.field) -#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) -#define part_stat_read(part, field) part_stat_get(part, field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) -{ - memset(&part->dkstats, value, sizeof(struct disk_stats)); -} - -static inline int init_part_stats(struct hd_struct *part) -{ - return 1; -} - -static inline void free_part_stats(struct hd_struct *part) -{ -} - -#endif /* CONFIG_SMP */ - #define part_stat_read_accum(part, field) \ (part_stat_read(part, field[STAT_READ]) + \ part_stat_read(part, field[STAT_WRITE]) + \ part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ - (part_stat_get(part, field) += (addnd)) + __this_cpu_add((part)->dkstats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ |