diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-01 12:55:54 -1000 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-01 12:55:54 -1000 |
commit | 0364249d2073c32c5214f02866999ce940bc35a2 (patch) | |
tree | 8f6fa317669bdc90744481eb5a48f4401f0ca35e /drivers/md | |
parent | 39714efc23beb38ce850b29f4f132da6d997fc22 (diff) | |
parent | 9793c269da6cd339757de6ba5b2c8681b54c99af (diff) |
Merge tag 'for-6.7/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Update DM core to directly call the map function for both the linear
and stripe targets; which are provided by DM core
- Various updates to use new safer string functions
- Update DM core to respect REQ_NOWAIT flag in normal bios so that
memory allocations are always attempted with GFP_NOWAIT
- Add Mikulas Patocka to MAINTAINERS as a DM maintainer!
- Improve DM delay target's handling of short delays (< 50ms) by using
a kthread to check expiration of IOs rather than timers and a wq
- Update the DM error target so that it works with zoned storage. This
helps xfstests to provide proper IO error handling coverage when
testing a filesystem with native zoned storage support
- Update both DM crypt and integrity targets to improve performance by
using crypto_shash_digest() rather than init+update+final sequence
- Fix DM crypt target by backfilling missing memory allocation
accounting for compound pages
* tag 'for-6.7/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm crypt: account large pages in cc->n_allocated_pages
dm integrity: use crypto_shash_digest() in sb_mac()
dm crypt: use crypto_shash_digest() in crypt_iv_tcw_whitening()
dm error: Add support for zoned block devices
dm delay: for short delays, use kthread instead of timers and wq
MAINTAINERS: add Mikulas Patocka as a DM maintainer
dm: respect REQ_NOWAIT flag in normal bios issued to DM
dm: enhance alloc_multiple_bios() to be more versatile
dm: make __send_duplicate_bios return unsigned int
dm log userspace: replace deprecated strncpy with strscpy
dm ioctl: replace deprecated strncpy with strscpy_pad
dm crypt: replace open-coded kmemdup_nul
dm cache metadata: replace deprecated strncpy with strscpy
dm: shortcut the calls to linear_map and stripe_map
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 26 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 103 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 30 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-log-userspace-base.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 23 | ||||
-rw-r--r-- | drivers/md/dm-target.c | 106 | ||||
-rw-r--r-- | drivers/md/dm.c | 121 | ||||
-rw-r--r-- | drivers/md/dm.h | 2 |
12 files changed, 320 insertions, 107 deletions
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index acffed750e3e..5a18b80d3666 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -597,7 +597,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd, cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks)); cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); - strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); + strscpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]); cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]); cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]); @@ -707,7 +707,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd, disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); - strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); + strscpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); @@ -1726,7 +1726,7 @@ static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) return -EINVAL; - strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); + strscpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version)); hint_size = dm_cache_policy_get_hint_size(policy); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index be32a290c90a..6de107aff331 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -652,13 +652,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, /* calculate crc32 for every 32bit part and xor it */ desc->tfm = tcw->crc32_tfm; for (i = 0; i < 4; i++) { - r = crypto_shash_init(desc); - if (r) - goto out; - r = crypto_shash_update(desc, &buf[i * 4], 4); - if (r) - goto out; - r = crypto_shash_final(desc, &buf[i * 4]); + r = crypto_shash_digest(desc, &buf[i * 4], 4, &buf[i * 4]); if (r) goto out; } @@ -1699,11 +1693,17 @@ retry: order = min(order, remaining_order); while (order > 0) { + if (unlikely(percpu_counter_read_positive(&cc->n_allocated_pages) + + (1 << order) > dm_crypt_pages_per_client)) + goto decrease_order; pages = alloc_pages(gfp_mask | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_COMP, order); - if (likely(pages != NULL)) + if (likely(pages != NULL)) { + percpu_counter_add(&cc->n_allocated_pages, 1 << order); goto have_pages; + } +decrease_order: order--; } @@ -1741,10 +1741,13 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) if (clone->bi_vcnt > 0) { /* bio_for_each_folio_all crashes with an empty bio */ bio_for_each_folio_all(fi, clone) { - if (folio_test_large(fi.folio)) + if (folio_test_large(fi.folio)) { + percpu_counter_sub(&cc->n_allocated_pages, + 1 << folio_order(fi.folio)); folio_put(fi.folio); - else + } else { mempool_free(&fi.folio->page, &cc->page_pool); + } } } } @@ -2859,10 +2862,9 @@ static int crypt_ctr_auth_cipher(struct crypt_config *cc, char *cipher_api) if (!start || !end || ++start > end) return -EINVAL; - mac_alg = kzalloc(end - start + 1, GFP_KERNEL); + mac_alg = kmemdup_nul(start, end - start, GFP_KERNEL); if (!mac_alg) return -ENOMEM; - strncpy(mac_alg, start, end - start); mac = crypto_alloc_ahash(mac_alg, 0, CRYPTO_ALG_ALLOCATES_MEMORY); kfree(mac_alg); diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 7433525e5985..efd510984e25 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -13,6 +13,7 @@ #include <linux/blkdev.h> #include <linux/bio.h> #include <linux/slab.h> +#include <linux/kthread.h> #include <linux/device-mapper.h> @@ -31,6 +32,7 @@ struct delay_c { struct workqueue_struct *kdelayd_wq; struct work_struct flush_expired_bios; struct list_head delayed_bios; + struct task_struct *worker; atomic_t may_delay; struct delay_class read; @@ -66,6 +68,44 @@ static void queue_timeout(struct delay_c *dc, unsigned long expires) mutex_unlock(&dc->timer_lock); } +static inline bool delay_is_fast(struct delay_c *dc) +{ + return !!dc->worker; +} + +static void flush_delayed_bios_fast(struct delay_c *dc, bool flush_all) +{ + struct dm_delay_info *delayed, *next; + + mutex_lock(&delayed_bios_lock); + list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { + if (flush_all || time_after_eq(jiffies, delayed->expires)) { + struct bio *bio = dm_bio_from_per_bio_data(delayed, + sizeof(struct dm_delay_info)); + list_del(&delayed->list); + dm_submit_bio_remap(bio, NULL); + delayed->class->ops--; + } + } + mutex_unlock(&delayed_bios_lock); +} + +static int flush_worker_fn(void *data) +{ + struct delay_c *dc = data; + + while (1) { + flush_delayed_bios_fast(dc, false); + if (unlikely(list_empty(&dc->delayed_bios))) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } else + cond_resched(); + } + + return 0; +} + static void flush_bios(struct bio *bio) { struct bio *n; @@ -78,7 +118,7 @@ static void flush_bios(struct bio *bio) } } -static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) +static struct bio *flush_delayed_bios(struct delay_c *dc, bool flush_all) { struct dm_delay_info *delayed, *next; unsigned long next_expires = 0; @@ -115,7 +155,10 @@ static void flush_expired_bios(struct work_struct *work) struct delay_c *dc; dc = container_of(work, struct delay_c, flush_expired_bios); - flush_bios(flush_delayed_bios(dc, 0)); + if (delay_is_fast(dc)) + flush_delayed_bios_fast(dc, false); + else + flush_bios(flush_delayed_bios(dc, false)); } static void delay_dtr(struct dm_target *ti) @@ -131,8 +174,11 @@ static void delay_dtr(struct dm_target *ti) dm_put_device(ti, dc->write.dev); if (dc->flush.dev) dm_put_device(ti, dc->flush.dev); + if (dc->worker) + kthread_stop(dc->worker); - mutex_destroy(&dc->timer_lock); + if (!delay_is_fast(dc)) + mutex_destroy(&dc->timer_lock); kfree(dc); } @@ -175,6 +221,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct delay_c *dc; int ret; + unsigned int max_delay; if (argc != 3 && argc != 6 && argc != 9) { ti->error = "Requires exactly 3, 6 or 9 arguments"; @@ -188,16 +235,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = dc; - timer_setup(&dc->delay_timer, handle_delayed_timer, 0); - INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); INIT_LIST_HEAD(&dc->delayed_bios); - mutex_init(&dc->timer_lock); atomic_set(&dc->may_delay, 1); dc->argc = argc; ret = delay_class_ctr(ti, &dc->read, argv); if (ret) goto bad; + max_delay = dc->read.delay; if (argc == 3) { ret = delay_class_ctr(ti, &dc->write, argv); @@ -206,6 +251,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ret = delay_class_ctr(ti, &dc->flush, argv); if (ret) goto bad; + max_delay = max(max_delay, dc->write.delay); + max_delay = max(max_delay, dc->flush.delay); goto out; } @@ -216,19 +263,37 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) ret = delay_class_ctr(ti, &dc->flush, argv + 3); if (ret) goto bad; + max_delay = max(max_delay, dc->flush.delay); goto out; } ret = delay_class_ctr(ti, &dc->flush, argv + 6); if (ret) goto bad; + max_delay = max(max_delay, dc->flush.delay); out: - dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); - if (!dc->kdelayd_wq) { - ret = -EINVAL; - DMERR("Couldn't start kdelayd"); - goto bad; + if (max_delay < 50) { + /* + * In case of small requested delays, use kthread instead of + * timers and workqueue to achieve better latency. + */ + dc->worker = kthread_create(&flush_worker_fn, dc, + "dm-delay-flush-worker"); + if (IS_ERR(dc->worker)) { + ret = PTR_ERR(dc->worker); + goto bad; + } + } else { + timer_setup(&dc->delay_timer, handle_delayed_timer, 0); + INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); + mutex_init(&dc->timer_lock); + dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); + if (!dc->kdelayd_wq) { + ret = -EINVAL; + DMERR("Couldn't start kdelayd"); + goto bad; + } } ti->num_flush_bios = 1; @@ -260,7 +325,10 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) list_add_tail(&delayed->list, &dc->delayed_bios); mutex_unlock(&delayed_bios_lock); - queue_timeout(dc, expires); + if (delay_is_fast(dc)) + wake_up_process(dc->worker); + else + queue_timeout(dc, expires); return DM_MAPIO_SUBMITTED; } @@ -270,8 +338,13 @@ static void delay_presuspend(struct dm_target *ti) struct delay_c *dc = ti->private; atomic_set(&dc->may_delay, 0); - del_timer_sync(&dc->delay_timer); - flush_bios(flush_delayed_bios(dc, 1)); + + if (delay_is_fast(dc)) + flush_delayed_bios_fast(dc, true); + else { + del_timer_sync(&dc->delay_timer); + flush_bios(flush_delayed_bios(dc, true)); + } } static void delay_resume(struct dm_target *ti) @@ -356,7 +429,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .features = DM_TARGET_PASSES_INTEGRITY, .module = THIS_MODULE, .ctr = delay_ctr, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 97a8d5fc9ebb..e85c688fd91e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -493,42 +493,32 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) { SHASH_DESC_ON_STACK(desc, ic->journal_mac); int r; - unsigned int size = crypto_shash_digestsize(ic->journal_mac); + unsigned int mac_size = crypto_shash_digestsize(ic->journal_mac); + __u8 *sb = (__u8 *)ic->sb; + __u8 *mac = sb + (1 << SECTOR_SHIFT) - mac_size; - if (sizeof(struct superblock) + size > 1 << SECTOR_SHIFT) { + if (sizeof(struct superblock) + mac_size > 1 << SECTOR_SHIFT) { dm_integrity_io_error(ic, "digest is too long", -EINVAL); return -EINVAL; } desc->tfm = ic->journal_mac; - r = crypto_shash_init(desc); - if (unlikely(r < 0)) { - dm_integrity_io_error(ic, "crypto_shash_init", r); - return r; - } - - r = crypto_shash_update(desc, (__u8 *)ic->sb, (1 << SECTOR_SHIFT) - size); - if (unlikely(r < 0)) { - dm_integrity_io_error(ic, "crypto_shash_update", r); - return r; - } - if (likely(wr)) { - r = crypto_shash_final(desc, (__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size); + r = crypto_shash_digest(desc, sb, mac - sb, mac); if (unlikely(r < 0)) { - dm_integrity_io_error(ic, "crypto_shash_final", r); + dm_integrity_io_error(ic, "crypto_shash_digest", r); return r; } } else { - __u8 result[HASH_MAX_DIGESTSIZE]; + __u8 actual_mac[HASH_MAX_DIGESTSIZE]; - r = crypto_shash_final(desc, result); + r = crypto_shash_digest(desc, sb, mac - sb, actual_mac); if (unlikely(r < 0)) { - dm_integrity_io_error(ic, "crypto_shash_final", r); + dm_integrity_io_error(ic, "crypto_shash_digest", r); return r; } - if (memcmp((__u8 *)ic->sb + (1 << SECTOR_SHIFT) - size, result, size)) { + if (memcmp(mac, actual_mac, mac_size)) { dm_integrity_io_error(ic, "superblock mac", -EILSEQ); dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0); return -EILSEQ; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 21ebb6c39394..e65058e0ed06 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1295,8 +1295,8 @@ static void retrieve_status(struct dm_table *table, spec->status = 0; spec->sector_start = ti->begin; spec->length = ti->len; - strncpy(spec->target_type, ti->type->name, - sizeof(spec->target_type) - 1); + strscpy_pad(spec->target_type, ti->type->name, + sizeof(spec->target_type)); outptr += sizeof(struct dm_target_spec); remaining = len - (outptr - outbuf); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index f4448d520ee9..2d3e186ca87e 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -85,7 +85,7 @@ static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector) return lc->start + dm_target_offset(ti, bi_sector); } -static int linear_map(struct dm_target *ti, struct bio *bio) +int linear_map(struct dm_target *ti, struct bio *bio) { struct linear_c *lc = ti->private; diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 5aace6ee6d47..7e4f27e86150 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -224,7 +224,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, lc->usr_argc = argc; - strncpy(lc->uuid, argv[0], DM_UUID_LEN); + strscpy(lc->uuid, argv[0], sizeof(lc->uuid)); argc--; argv++; spin_lock_init(&lc->flush_lock); diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 5e70f5ae394d..16b93ae51d96 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -268,7 +268,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, return DM_MAPIO_SUBMITTED; } -static int stripe_map(struct dm_target *ti, struct bio *bio) +int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 37b48f63ae6a..198d38b53322 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -844,7 +844,8 @@ static bool dm_table_supports_dax(struct dm_table *t, if (!ti->type->direct_access) return false; - if (!ti->type->iterate_devices || + if (dm_target_is_wildcard(ti->type) || + !ti->type->iterate_devices || ti->type->iterate_devices(ti, iterate_fn, NULL)) return false; } @@ -1587,6 +1588,14 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev, return blk_queue_zoned_model(q) != *zoned_model; } +static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return blk_queue_zoned_model(q) != BLK_ZONED_NONE; +} + /* * Check the device zoned model based on the target feature flag. If the target * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are @@ -1600,6 +1609,18 @@ static bool dm_table_supports_zoned_model(struct dm_table *t, for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); + /* + * For the wildcard target (dm-error), if we do not have a + * backing device, we must always return false. If we have a + * backing device, the result must depend on checking zoned + * model, like for any other target. So for this, check directly + * if the target backing device is zoned as we get "false" when + * dm-error was set without a backing device. + */ + if (dm_target_is_wildcard(ti->type) && + !ti->type->iterate_devices(ti, device_is_zoned_model, NULL)) + return false; + if (dm_target_supports_zoned_hm(ti->type)) { if (!ti->type->iterate_devices || ti->type->iterate_devices(ti, device_not_zoned_model, diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 27e2992ff249..0c4efb0bef8a 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -116,9 +116,63 @@ EXPORT_SYMBOL(dm_unregister_target); * io-err: always fails an io, useful for bringing * up LVs that have holes in them. */ +struct io_err_c { + struct dm_dev *dev; + sector_t start; +}; + +static int io_err_get_args(struct dm_target *tt, unsigned int argc, char **args) +{ + unsigned long long start; + struct io_err_c *ioec; + char dummy; + int ret; + + ioec = kmalloc(sizeof(*ioec), GFP_KERNEL); + if (!ioec) { + tt->error = "Cannot allocate io_err context"; + return -ENOMEM; + } + + ret = -EINVAL; + if (sscanf(args[1], "%llu%c", &start, &dummy) != 1 || + start != (sector_t)start) { + tt->error = "Invalid device sector"; + goto bad; + } + ioec->start = start; + + ret = dm_get_device(tt, args[0], dm_table_get_mode(tt->table), &ioec->dev); + if (ret) { + tt->error = "Device lookup failed"; + goto bad; + } + + tt->private = ioec; + + return 0; + +bad: + kfree(ioec); + + return ret; +} + static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) { /* + * If we have arguments, assume it is the path to the backing + * block device and its mapping start sector (same as dm-linear). + * In this case, get the device so that we can get its limits. + */ + if (argc == 2) { + int ret = io_err_get_args(tt, argc, args); + + if (ret) + return ret; + } + + /* * Return error for discards instead of -EOPNOTSUPP */ tt->num_discard_bios = 1; @@ -129,7 +183,12 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) static void io_err_dtr(struct dm_target *tt) { - /* empty */ + struct io_err_c *ioec = tt->private; + + if (ioec) { + dm_put_device(tt, ioec->dev); + kfree(ioec); + } } static int io_err_map(struct dm_target *tt, struct bio *bio) @@ -149,6 +208,45 @@ static void io_err_release_clone_rq(struct request *clone, { } +#ifdef CONFIG_BLK_DEV_ZONED +static sector_t io_err_map_sector(struct dm_target *ti, sector_t bi_sector) +{ + struct io_err_c *ioec = ti->private; + + return ioec->start + dm_target_offset(ti, bi_sector); +} + +static int io_err_report_zones(struct dm_target *ti, + struct dm_report_zones_args *args, unsigned int nr_zones) +{ + struct io_err_c *ioec = ti->private; + + /* + * This should never be called when we do not have a backing device + * as that mean the target is not a zoned one. + */ + if (WARN_ON_ONCE(!ioec)) + return -EIO; + + return dm_report_zones(ioec->dev->bdev, ioec->start, + io_err_map_sector(ti, args->next_sector), + args, nr_zones); +} +#else +#define io_err_report_zones NULL +#endif + +static int io_err_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + struct io_err_c *ioec = ti->private; + + if (!ioec) + return 0; + + return fn(ti, ioec->dev, ioec->start, ti->len, data); +} + static void io_err_io_hints(struct dm_target *ti, struct queue_limits *limits) { limits->max_discard_sectors = UINT_MAX; @@ -165,15 +263,17 @@ static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, static struct target_type error_target = { .name = "error", - .version = {1, 6, 0}, - .features = DM_TARGET_WILDCARD, + .version = {1, 7, 0}, + .features = DM_TARGET_WILDCARD | DM_TARGET_ZONED_HM, .ctr = io_err_ctr, .dtr = io_err_dtr, .map = io_err_map, .clone_and_map_rq = io_err_clone_and_map_rq, .release_clone_rq = io_err_release_clone_rq, + .iterate_devices = io_err_iterate_devices, .io_hints = io_err_io_hints, .direct_access = io_err_dax_direct_access, + .report_zones = io_err_report_zones, }; int __init dm_target_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f7212e8fc27f..23c32cd1f1d8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -570,13 +570,15 @@ static void dm_end_io_acct(struct dm_io *io) dm_io_acct(io, true); } -static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) +static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio, gfp_t gfp_mask) { struct dm_io *io; struct dm_target_io *tio; struct bio *clone; - clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); + clone = bio_alloc_clone(NULL, bio, gfp_mask, &md->mempools->io_bs); + if (unlikely(!clone)) + return NULL; tio = clone_to_tio(clone); tio->flags = 0; dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO); @@ -1426,9 +1428,16 @@ static void __map_bio(struct bio *clone) if (unlikely(dm_emulate_zone_append(md))) r = dm_zone_map_bio(tio); else + goto do_map; + } else { +do_map: + if (likely(ti->type->map == linear_map)) + r = linear_map(ti, clone); + else if (ti->type->map == stripe_map) + r = stripe_map(ti, clone); + else r = ti->type->map(ti, clone); - } else - r = ti->type->map(ti, clone); + } switch (r) { case DM_MAPIO_SUBMITTED: @@ -1473,15 +1482,15 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len) static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, struct dm_target *ti, unsigned int num_bios, - unsigned *len) + unsigned *len, gfp_t gfp_flag) { struct bio *bio; - int try; + int try = (gfp_flag & GFP_NOWAIT) ? 0 : 1; - for (try = 0; try < 2; try++) { + for (; try < 2; try++) { int bio_nr; - if (try) + if (try && num_bios > 1) mutex_lock(&ci->io->md->table_devices_lock); for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { bio = alloc_tio(ci, ti, bio_nr, len, @@ -1491,7 +1500,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, bio_list_add(blist, bio); } - if (try) + if (try && num_bios > 1) mutex_unlock(&ci->io->md->table_devices_lock); if (bio_nr == num_bios) return; @@ -1501,34 +1510,31 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, } } -static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, - unsigned int num_bios, unsigned int *len) +static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, + unsigned int num_bios, unsigned int *len, + gfp_t gfp_flag) { struct bio_list blist = BIO_EMPTY_LIST; struct bio *clone; unsigned int ret = 0; - switch (num_bios) { - case 0: - break; - case 1: - if (len) - setup_split_accounting(ci, *len); - clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); - __map_bio(clone); - ret = 1; - break; - default: - if (len) - setup_split_accounting(ci, *len); - /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ - alloc_multiple_bios(&blist, ci, ti, num_bios, len); - while ((clone = bio_list_pop(&blist))) { + if (WARN_ON_ONCE(num_bios == 0)) /* num_bios = 0 is a bug in caller */ + return 0; + + /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ + if (len) + setup_split_accounting(ci, *len); + + /* + * Using alloc_multiple_bios(), even if num_bios is 1, to consistently + * support allocating using GFP_NOWAIT with GFP_NOIO fallback. + */ + alloc_multiple_bios(&blist, ci, ti, num_bios, len, gfp_flag); + while ((clone = bio_list_pop(&blist))) { + if (num_bios > 1) dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); - __map_bio(clone); - ret += 1; - } - break; + __map_bio(clone); + ret += 1; } return ret; @@ -1555,8 +1561,12 @@ static void __send_empty_flush(struct clone_info *ci) unsigned int bios; struct dm_target *ti = dm_table_get_target(t, i); + if (unlikely(ti->num_flush_bios == 0)) + continue; + atomic_add(ti->num_flush_bios, &ci->io->io_count); - bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); + bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, + NULL, GFP_NOWAIT); atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count); } @@ -1569,10 +1579,9 @@ static void __send_empty_flush(struct clone_info *ci) bio_uninit(ci->bio); } -static void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, - unsigned int num_bios, - unsigned int max_granularity, - unsigned int max_sectors) +static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti, + unsigned int num_bios, unsigned int max_granularity, + unsigned int max_sectors) { unsigned int len, bios; @@ -1580,7 +1589,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target __max_io_len(ti, ci->sector, max_granularity, max_sectors)); atomic_add(num_bios, &ci->io->io_count); - bios = __send_duplicate_bios(ci, ti, num_bios, &len); + bios = __send_duplicate_bios(ci, ti, num_bios, &len, GFP_NOIO); /* * alloc_io() takes one extra reference for submission, so the * reference won't reach 0 without the following (+1) subtraction @@ -1649,8 +1658,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, if (unlikely(!num_bios)) return BLK_STS_NOTSUPP; - __send_changing_extent_only(ci, ti, num_bios, - max_granularity, max_sectors); + __send_abnormal_io(ci, ti, num_bios, max_granularity, max_sectors); + return BLK_STS_OK; } @@ -1709,10 +1718,6 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) if (unlikely(!ti)) return BLK_STS_IOERR; - if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) && - unlikely(!dm_target_supports_nowait(ti->type))) - return BLK_STS_NOTSUPP; - if (unlikely(ci->is_abnormal_io)) return __process_abnormal_io(ci, ti); @@ -1724,7 +1729,17 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); setup_split_accounting(ci, len); - clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); + + if (unlikely(ci->bio->bi_opf & REQ_NOWAIT)) { + if (unlikely(!dm_target_supports_nowait(ti->type))) + return BLK_STS_NOTSUPP; + + clone = alloc_tio(ci, ti, 0, &len, GFP_NOWAIT); + if (unlikely(!clone)) + return BLK_STS_AGAIN; + } else { + clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); + } __map_bio(clone); ci->sector += len; @@ -1733,11 +1748,11 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) return BLK_STS_OK; } -static void init_clone_info(struct clone_info *ci, struct mapped_device *md, +static void init_clone_info(struct clone_info *ci, struct dm_io *io, struct dm_table *map, struct bio *bio, bool is_abnormal) { ci->map = map; - ci->io = alloc_io(md, bio); + ci->io = io; ci->bio = bio; ci->is_abnormal_io = is_abnormal; ci->submit_as_polled = false; @@ -1772,8 +1787,18 @@ static void dm_split_and_process_bio(struct mapped_device *md, return; } - init_clone_info(&ci, md, map, bio, is_abnormal); - io = ci.io; + /* Only support nowait for normal IO */ + if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) { + io = alloc_io(md, bio, GFP_NOWAIT); + if (unlikely(!io)) { + /* Unable to do anything without dm_io. */ + bio_wouldblock_error(bio); + return; + } + } else { + io = alloc_io(md, bio, GFP_NOIO); + } + init_clone_info(&ci, io, map, bio, is_abnormal); if (bio->bi_opf & REQ_PREFLUSH) { __send_empty_flush(&ci); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index f682295af91f..7f1acbf6bd9e 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -188,9 +188,11 @@ void dm_kobject_release(struct kobject *kobj); /* * Targets for linear and striped mappings */ +int linear_map(struct dm_target *ti, struct bio *bio); int dm_linear_init(void); void dm_linear_exit(void); +int stripe_map(struct dm_target *ti, struct bio *bio); int dm_stripe_init(void); void dm_stripe_exit(void); |