diff options
Diffstat (limited to 'drivers/md')
| -rw-r--r-- | drivers/md/bcache/super.c | 123 | ||||
| -rw-r--r-- | drivers/md/dm-bufio.c | 24 | ||||
| -rw-r--r-- | drivers/md/dm-core.h | 3 | ||||
| -rw-r--r-- | drivers/md/dm-crypt.c | 66 | ||||
| -rw-r--r-- | drivers/md/dm-flakey.c | 210 | ||||
| -rw-r--r-- | drivers/md/dm-integrity.c | 85 | ||||
| -rw-r--r-- | drivers/md/dm-ioctl.c | 98 | ||||
| -rw-r--r-- | drivers/md/dm-thin-metadata.c | 58 | ||||
| -rw-r--r-- | drivers/md/dm-thin.c | 41 | ||||
| -rw-r--r-- | drivers/md/dm-zone.c | 15 | ||||
| -rw-r--r-- | drivers/md/dm.c | 127 | ||||
| -rw-r--r-- | drivers/md/dm.h | 3 | ||||
| -rw-r--r-- | drivers/md/md.c | 32 | ||||
| -rw-r--r-- | drivers/md/md.h | 4 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 6 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.h | 1 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map.h | 3 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 3 | ||||
| -rw-r--r-- | drivers/md/raid0.c | 62 | ||||
| -rw-r--r-- | drivers/md/raid0.h | 1 | ||||
| -rw-r--r-- | drivers/md/raid1-10.c | 2 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 6 |
22 files changed, 649 insertions, 324 deletions
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e2a803683105..0ae2b3676293 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl) put_page(virt_to_page(dc->sb_disk)); if (!IS_ERR_OR_NULL(dc->bdev)) - blkdev_put(dc->bdev, bcache_kobj); + blkdev_put(dc->bdev, dc); wake_up(&unregister_wait); @@ -1453,7 +1453,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, memcpy(&dc->sb, sb, sizeof(struct cache_sb)); dc->bdev = bdev; - dc->bdev->bd_holder = dc; dc->sb_disk = sb_disk; if (cached_dev_init(dc, sb->block_size << 9)) @@ -2218,7 +2217,7 @@ void bch_cache_release(struct kobject *kobj) put_page(virt_to_page(ca->sb_disk)); if (!IS_ERR_OR_NULL(ca->bdev)) - blkdev_put(ca->bdev, bcache_kobj); + blkdev_put(ca->bdev, ca); kfree(ca); module_put(THIS_MODULE); @@ -2345,7 +2344,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; - ca->bdev->bd_holder = ca; ca->sb_disk = sb_disk; if (bdev_max_discard_sectors((bdev))) @@ -2359,7 +2357,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, * call blkdev_put() to bdev in bch_cache_release(). So we * explicitly call blkdev_put() here. */ - blkdev_put(bdev, bcache_kobj); + blkdev_put(bdev, ca); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; else if (ret == -EPERM) @@ -2448,6 +2446,7 @@ struct async_reg_args { struct cache_sb *sb; struct cache_sb_disk *sb_disk; struct block_device *bdev; + void *holder; }; static void register_bdev_worker(struct work_struct *work) @@ -2455,22 +2454,13 @@ static void register_bdev_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cached_dev *dc; - - dc = kzalloc(sizeof(*dc), GFP_KERNEL); - if (!dc) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, bcache_kobj); - goto out; - } mutex_lock(&bch_register_lock); - if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) + if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder) + < 0) fail = true; mutex_unlock(&bch_register_lock); -out: if (fail) pr_info("error %s: fail to register backing device\n", args->path); @@ -2485,21 +2475,11 @@ static void register_cache_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cache *ca; - - ca = kzalloc(sizeof(*ca), GFP_KERNEL); - if (!ca) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, bcache_kobj); - goto out; - } /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) + if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder)) fail = true; -out: if (fail) pr_info("error %s: fail to register cache device\n", args->path); @@ -2520,6 +2500,13 @@ static void register_device_async(struct async_reg_args *args) queue_delayed_work(system_wq, &args->reg_work, 10); } +static void *alloc_holder_object(struct cache_sb *sb) +{ + if (SB_IS_BDEV(sb)) + return kzalloc(sizeof(struct cached_dev), GFP_KERNEL); + return kzalloc(sizeof(struct cache), GFP_KERNEL); +} + static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { @@ -2527,9 +2514,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, char *path = NULL; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct block_device *bdev; + struct block_device *bdev, *bdev2; + void *holder = NULL; ssize_t ret; bool async_registration = false; + bool quiet = false; #ifdef CONFIG_BCACHE_ASYNC_REGISTRATION async_registration = true; @@ -2558,10 +2547,34 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ret = -EINVAL; err = "failed to open device"; - bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ | BLK_OPEN_WRITE, - bcache_kobj, NULL); + bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); + if (IS_ERR(bdev)) + goto out_free_sb; + + err = "failed to set blocksize"; + if (set_blocksize(bdev, 4096)) + goto out_blkdev_put; + + err = read_super(sb, bdev, &sb_disk); + if (err) + goto out_blkdev_put; + + holder = alloc_holder_object(sb); + if (!holder) { + ret = -ENOMEM; + err = "cannot allocate memory"; + goto out_put_sb_page; + } + + /* Now reopen in exclusive mode with proper holder */ + bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, + holder, NULL); + blkdev_put(bdev, NULL); + bdev = bdev2; if (IS_ERR(bdev)) { - if (bdev == ERR_PTR(-EBUSY)) { + ret = PTR_ERR(bdev); + bdev = NULL; + if (ret == -EBUSY) { dev_t dev; mutex_lock(&bch_register_lock); @@ -2571,20 +2584,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); - if (attr == &ksysfs_register_quiet) - goto done; + if (attr == &ksysfs_register_quiet) { + quiet = true; + ret = size; + } } - goto out_free_sb; + goto out_free_holder; } - err = "failed to set blocksize"; - if (set_blocksize(bdev, 4096)) - goto out_blkdev_put; - - err = read_super(sb, bdev, &sb_disk); - if (err) - goto out_blkdev_put; - err = "failed to register device"; if (async_registration) { @@ -2595,59 +2602,46 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!args) { ret = -ENOMEM; err = "cannot allocate memory"; - goto out_put_sb_page; + goto out_free_holder; } args->path = path; args->sb = sb; args->sb_disk = sb_disk; args->bdev = bdev; + args->holder = holder; register_device_async(args); /* No wait and returns to user space */ goto async_done; } if (SB_IS_BDEV(sb)) { - struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); - - if (!dc) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - mutex_lock(&bch_register_lock); - ret = register_bdev(sb, sb_disk, bdev, dc); + ret = register_bdev(sb, sb_disk, bdev, holder); mutex_unlock(&bch_register_lock); /* blkdev_put() will be called in cached_dev_free() */ if (ret < 0) goto out_free_sb; } else { - struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); - - if (!ca) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - /* blkdev_put() will be called in bch_cache_release() */ - ret = register_cache(sb, sb_disk, bdev, ca); + ret = register_cache(sb, sb_disk, bdev, holder); if (ret) goto out_free_sb; } -done: kfree(sb); kfree(path); module_put(THIS_MODULE); async_done: return size; +out_free_holder: + kfree(holder); out_put_sb_page: put_page(virt_to_page(sb_disk)); out_blkdev_put: - blkdev_put(bdev, register_bcache); + if (bdev) + blkdev_put(bdev, holder); out_free_sb: kfree(sb); out_free_path: @@ -2656,7 +2650,8 @@ out_free_path: out_module_put: module_put(THIS_MODULE); out: - pr_info("error %s: %s\n", path?path:"", err); + if (!quiet) + pr_info("error %s: %s\n", path?path:"", err); return ret; } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index eea977662e81..bc309e41d074 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1157,23 +1157,6 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask, *data_mode = DATA_MODE_VMALLOC; - /* - * __vmalloc allocates the data pages and auxiliary structures with - * gfp_flags that were specified, but pagetables are always allocated - * with GFP_KERNEL, no matter what was specified as gfp_mask. - * - * Consequently, we must set per-process flag PF_MEMALLOC_NOIO so that - * all allocations done by this process (including pagetables) are done - * as if GFP_NOIO was specified. - */ - if (gfp_mask & __GFP_NORETRY) { - unsigned int noio_flag = memalloc_noio_save(); - void *ptr = __vmalloc(c->block_size, gfp_mask); - - memalloc_noio_restore(noio_flag); - return ptr; - } - return __vmalloc(c->block_size, gfp_mask); } @@ -2592,6 +2575,13 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c) } EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); +void dm_bufio_client_reset(struct dm_bufio_client *c) +{ + drop_buffers(c); + flush_work(&c->shrink_work); +} +EXPORT_SYMBOL_GPL(dm_bufio_client_reset); + void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start) { c->start = start; diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index ce913ad91a52..0d93661f88d3 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -306,7 +306,8 @@ struct dm_io { */ enum { DM_IO_ACCOUNTED, - DM_IO_WAS_SPLIT + DM_IO_WAS_SPLIT, + DM_IO_BLK_STAT }; static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 15424bfea7ee..1dc6227d353e 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -31,10 +31,10 @@ #include <asm/unaligned.h> #include <crypto/hash.h> #include <crypto/md5.h> -#include <crypto/algapi.h> #include <crypto/skcipher.h> #include <crypto/aead.h> #include <crypto/authenc.h> +#include <crypto/utils.h> #include <linux/rtnetlink.h> /* for struct rtattr and RTA macros only */ #include <linux/key-type.h> #include <keys/user-type.h> @@ -745,16 +745,23 @@ static int crypt_iv_eboiv_ctr(struct crypt_config *cc, struct dm_target *ti, static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { - u8 buf[MAX_CIPHER_BLOCKSIZE] __aligned(__alignof__(__le64)); + struct crypto_skcipher *tfm = any_tfm(cc); struct skcipher_request *req; struct scatterlist src, dst; DECLARE_CRYPTO_WAIT(wait); + unsigned int reqsize; int err; + u8 *buf; - req = skcipher_request_alloc(any_tfm(cc), GFP_NOIO); + reqsize = ALIGN(crypto_skcipher_reqsize(tfm), __alignof__(__le64)); + + req = kmalloc(reqsize + cc->iv_size, GFP_NOIO); if (!req) return -ENOMEM; + skcipher_request_set_tfm(req, tfm); + + buf = (u8 *)req + reqsize; memset(buf, 0, cc->iv_size); *(__le64 *)buf = cpu_to_le64(dmreq->iv_sector * cc->sector_size); @@ -763,7 +770,7 @@ static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, skcipher_request_set_crypt(req, &src, &dst, cc->iv_size, buf); skcipher_request_set_callback(req, 0, crypto_req_done, &wait); err = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); - skcipher_request_free(req); + kfree_sensitive(req); return err; } @@ -1661,6 +1668,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone); * In order to not degrade performance with excessive locking, we try * non-blocking allocations without a mutex first but on failure we fallback * to blocking allocations with a mutex. + * + * In order to reduce allocation overhead, we try to allocate compound pages in + * the first pass. If they are not available, we fall back to the mempool. */ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size) { @@ -1668,8 +1678,8 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned int size) struct bio *clone; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM; - unsigned int i, len, remaining_size; - struct page *page; + unsigned int remaining_size; + unsigned int order = MAX_ORDER - 1; retry: if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM)) @@ -1682,19 +1692,34 @@ retry: remaining_size = size; - for (i = 0; i < nr_iovecs; i++) { - page = mempool_alloc(&cc->page_pool, gfp_mask); - if (!page) { + while (remaining_size) { + struct page *pages; + unsigned size_to_add; + unsigned remaining_order = __fls((remaining_size + PAGE_SIZE - 1) >> PAGE_SHIFT); + order = min(order, remaining_order); + + while (order > 0) { + pages = alloc_pages(gfp_mask + | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | __GFP_COMP, + order); + if (likely(pages != NULL)) + goto have_pages; + order--; + } + + pages = mempool_alloc(&cc->page_pool, gfp_mask); + if (!pages) { crypt_free_buffer_pages(cc, clone); bio_put(clone); gfp_mask |= __GFP_DIRECT_RECLAIM; + order = 0; goto retry; } - len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size; - - __bio_add_page(clone, page, len, 0); - remaining_size -= len; +have_pages: + size_to_add = min((unsigned)PAGE_SIZE << order, remaining_size); + __bio_add_page(clone, pages, size_to_add, 0); + remaining_size -= size_to_add; } /* Allocate space for integrity tags */ @@ -1712,12 +1737,15 @@ retry: static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bv, clone, iter_all) { - BUG_ON(!bv->bv_page); - mempool_free(bv->bv_page, &cc->page_pool); + if (clone->bi_vcnt > 0) { /* bio_for_each_folio_all crashes with an empty bio */ + bio_for_each_folio_all(fi, clone) { + if (folio_test_large(fi.folio)) + folio_put(fi.folio); + else + mempool_free(&fi.folio->page, &cc->page_pool); + } } } @@ -2887,7 +2915,7 @@ static int crypt_ctr_cipher_new(struct dm_target *ti, char *cipher_in, char *key ret = crypt_ctr_auth_cipher(cc, cipher_api); if (ret < 0) { ti->error = "Invalid AEAD cipher spec"; - return -ENOMEM; + return ret; } } diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index bd80bcafbe50..120153e44ae0 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -16,6 +16,8 @@ #define DM_MSG_PREFIX "flakey" +#define PROBABILITY_BASE 1000000000 + #define all_corrupt_bio_flags_match(bio, fc) \ (((bio)->bi_opf & (fc)->corrupt_bio_flags) == (fc)->corrupt_bio_flags) @@ -34,6 +36,8 @@ struct flakey_c { unsigned int corrupt_bio_rw; unsigned int corrupt_bio_value; blk_opf_t corrupt_bio_flags; + unsigned int random_read_corrupt; + unsigned int random_write_corrupt; }; enum feature_flag_bits { @@ -54,10 +58,11 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, const char *arg_name; static const struct dm_arg _args[] = { - {0, 7, "Invalid number of feature args"}, + {0, 11, "Invalid number of feature args"}, {1, UINT_MAX, "Invalid corrupt bio byte"}, {0, 255, "Invalid corrupt value to write into bio byte (0-255)"}, {0, UINT_MAX, "Invalid corrupt bio flags mask"}, + {0, PROBABILITY_BASE, "Invalid random corrupt argument"}, }; /* No feature arguments supplied. */ @@ -170,6 +175,32 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, continue; } + if (!strcasecmp(arg_name, "random_read_corrupt")) { + if (!argc) { + ti->error = "Feature random_read_corrupt requires a parameter"; + return -EINVAL; + } + r = dm_read_arg(_args + 4, as, &fc->random_read_corrupt, &ti->error); + if (r) + return r; + argc--; + + continue; + } + + if (!strcasecmp(arg_name, "random_write_corrupt")) { + if (!argc) { + ti->error = "Feature random_write_corrupt requires a parameter"; + return -EINVAL; + } + r = dm_read_arg(_args + 4, as, &fc->random_write_corrupt, &ti->error); + if (r) + return r; + argc--; + + continue; + } + ti->error = "Unrecognised flakey feature requested"; return -EINVAL; } @@ -184,7 +215,8 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, } if (!fc->corrupt_bio_byte && !test_bit(ERROR_READS, &fc->flags) && - !test_bit(DROP_WRITES, &fc->flags) && !test_bit(ERROR_WRITES, &fc->flags)) { + !test_bit(DROP_WRITES, &fc->flags) && !test_bit(ERROR_WRITES, &fc->flags) && + !fc->random_read_corrupt && !fc->random_write_corrupt) { set_bit(ERROR_WRITES, &fc->flags); set_bit(ERROR_READS, &fc->flags); } @@ -306,40 +338,143 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) bio->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector); } -static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) +static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte, + unsigned char corrupt_bio_value) { - unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1; - struct bvec_iter iter; struct bio_vec bvec; - if (!bio_has_data(bio)) - return; - /* * Overwrite the Nth byte of the bio's data, on whichever page * it falls. */ bio_for_each_segment(bvec, bio, iter) { if (bio_iter_len(bio, iter) > corrupt_bio_byte) { - char *segment; - struct page *page = bio_iter_page(bio, iter); - if (unlikely(page == ZERO_PAGE(0))) - break; - segment = bvec_kmap_local(&bvec); - segment[corrupt_bio_byte] = fc->corrupt_bio_value; + unsigned char *segment = bvec_kmap_local(&bvec); + segment[corrupt_bio_byte] = corrupt_bio_value; kunmap_local(segment); DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n", - bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, + bio, corrupt_bio_value, corrupt_bio_byte, (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, - (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size); + (unsigned long long)bio->bi_iter.bi_sector, + bio->bi_iter.bi_size); break; } corrupt_bio_byte -= bio_iter_len(bio, iter); } } +static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) +{ + unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1; + + if (!bio_has_data(bio)) + return; + + corrupt_bio_common(bio, corrupt_bio_byte, fc->corrupt_bio_value); +} + +static void corrupt_bio_random(struct bio *bio) +{ + unsigned int corrupt_byte; + unsigned char corrupt_value; + + if (!bio_has_data(bio)) + return; + + corrupt_byte = get_random_u32() % bio->bi_iter.bi_size; + corrupt_value = get_random_u8(); + + corrupt_bio_common(bio, corrupt_byte, corrupt_value); +} + +static void clone_free(struct bio *clone) +{ + struct folio_iter fi; + + if (clone->bi_vcnt > 0) { /* bio_for_each_folio_all crashes with an empty bio */ + bio_for_each_folio_all(fi, clone) + folio_put(fi.folio); + } + + bio_uninit(clone); + kfree(clone); +} + +static void clone_endio(struct bio *clone) +{ + struct bio *bio = clone->bi_private; + bio->bi_status = clone->bi_status; + clone_free(clone); + bio_endio(bio); +} + +static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct bio *bio) +{ + struct bio *clone; + unsigned size, remaining_size, nr_iovecs, order; + struct bvec_iter iter = bio->bi_iter; + + if (unlikely(bio->bi_iter.bi_size > UIO_MAXIOV << PAGE_SHIFT)) + dm_accept_partial_bio(bio, UIO_MAXIOV << PAGE_SHIFT >> SECTOR_SHIFT); + + size = bio->bi_iter.bi_size; + nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + + clone = bio_kmalloc(nr_iovecs, GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN); + if (!clone) + return NULL; + + bio_init(clone, fc->dev->bdev, bio->bi_inline_vecs, nr_iovecs, bio->bi_opf); + + clone->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector); + clone->bi_private = bio; + clone->bi_end_io = clone_endio; + + remaining_size = size; + + order = MAX_ORDER - 1; + while (remaining_size) { + struct page *pages; + unsigned size_to_add, to_copy; + unsigned char *virt; + unsigned remaining_order = __fls((remaining_size + PAGE_SIZE - 1) >> PAGE_SHIFT); + order = min(order, remaining_order); + +retry_alloc_pages: + pages = alloc_pages(GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN | __GFP_COMP, order); + if (unlikely(!pages)) { + if (order) { + order--; + goto retry_alloc_pages; + } + clone_free(clone); + return NULL; + } + size_to_add = min((unsigned)PAGE_SIZE << order, remaining_size); + + virt = page_to_virt(pages); + to_copy = size_to_add; + do { + struct bio_vec bvec = bvec_iter_bvec(bio->bi_io_vec, iter); + unsigned this_step = min(bvec.bv_len, to_copy); + void *map = bvec_kmap_local(&bvec); + memcpy(virt, map, this_step); + kunmap_local(map); + + bvec_iter_advance(bio->bi_io_vec, &iter, this_step); + to_copy -= this_step; + virt += this_step; + } while (to_copy); + + __bio_add_page(clone, pages, size_to_add, 0); + remaining_size -= size_to_add; + } + + return clone; +} + static int flakey_map(struct dm_target *ti, struct bio *bio) { struct flakey_c *fc = ti->private; @@ -354,6 +489,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { + bool corrupt_fixed, corrupt_random; /* * Flag this bio as submitted while down. */ @@ -383,12 +519,28 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) /* * Corrupt matching writes. */ - if (fc->corrupt_bio_byte) { - if (fc->corrupt_bio_rw == WRITE) { - if (all_corrupt_bio_flags_match(bio, fc)) - corrupt_bio_data(bio, fc); + corrupt_fixed = false; + corrupt_random = false; + if (fc->corrupt_bio_byte && fc->corrupt_bio_rw == WRITE) { + if (all_corrupt_bio_flags_match(bio, fc)) + corrupt_fixed = true; + } + if (fc->random_write_corrupt) { + u64 rnd = get_random_u64(); + u32 rem = do_div(rnd, PROBABILITY_BASE); + if (rem < fc->random_write_corrupt) + corrupt_random = true; + } + if (corrupt_fixed || corrupt_random) { + struct bio *clone = clone_bio(ti, fc, bio); + if (clone) { + if (corrupt_fixed) + corrupt_bio_data(clone, fc); + if (corrupt_random) + corrupt_bio_random(clone); + submit_bio(clone); + return DM_MAPIO_SUBMITTED; } - goto map_bio; } } @@ -417,6 +569,12 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, corrupt_bio_data(bio, fc); } } + if (fc->random_read_corrupt) { + u64 rnd = get_random_u64(); + u32 rem = do_div(rnd, PROBABILITY_BASE); + if (rem < fc->random_read_corrupt) + corrupt_bio_random(bio); + } if (test_bit(ERROR_READS, &fc->flags)) { /* * Error read during the down_interval if drop_writes @@ -449,7 +607,10 @@ static void flakey_status(struct dm_target *ti, status_type_t type, error_reads = test_bit(ERROR_READS, &fc->flags); drop_writes = test_bit(DROP_WRITES, &fc->flags); error_writes = test_bit(ERROR_WRITES, &fc->flags); - DMEMIT(" %u", error_reads + drop_writes + error_writes + (fc->corrupt_bio_byte > 0) * 5); + DMEMIT(" %u", error_reads + drop_writes + error_writes + + (fc->corrupt_bio_byte > 0) * 5 + + (fc->random_read_corrupt > 0) * 2 + + (fc->random_write_corrupt > 0) * 2); if (error_reads) DMEMIT(" error_reads"); @@ -464,6 +625,11 @@ static void flakey_status(struct dm_target *ti, status_type_t type, (fc->corrupt_bio_rw == WRITE) ? 'w' : 'r', fc->corrupt_bio_value, fc->corrupt_bio_flags); + if (fc->random_read_corrupt > 0) + DMEMIT(" random_read_corrupt %u", fc->random_read_corrupt); + if (fc->random_write_corrupt > 0) + DMEMIT(" random_write_corrupt %u", fc->random_write_corrupt); + break; case STATUSTYPE_IMA: diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 63ec502fcb12..3d5c56e0a062 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -34,11 +34,11 @@ #define DEFAULT_BUFFER_SECTORS 128 #define DEFAULT_JOURNAL_WATERMARK 50 #define DEFAULT_SYNC_MSEC 10000 -#define DEFAULT_MAX_JOURNAL_SECTORS 131072 +#define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192) #define MIN_LOG2_INTERLEAVE_SECTORS 3 #define MAX_LOG2_INTERLEAVE_SECTORS 31 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 -#define RECALC_SECTORS 32768 +#define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048) #define RECALC_WRITE_SUPER 16 #define BITMAP_BLOCK_SIZE 4096 /* don't change it */ #define BITMAP_FLUSH_INTERVAL (10 * HZ) @@ -251,8 +251,6 @@ struct dm_integrity_c { struct workqueue_struct *recalc_wq; struct work_struct recalc_work; - u8 *recalc_buffer; - u8 *recalc_tags; struct bio_list flush_bio_list; @@ -342,24 +340,9 @@ static struct kmem_cache *journal_io_cache; #define JOURNAL_IO_MEMPOOL 32 #ifdef DEBUG_PRINT -#define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__) -static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...) -{ - va_list args; - - va_start(args, msg); - vprintk(msg, args); - va_end(args); - if (len) - pr_cont(":"); - while (len) { - pr_cont(" %02x", *bytes); - bytes++; - len--; - } - pr_cont("\n"); -} -#define DEBUG_bytes(bytes, len, msg, ...) __DEBUG_bytes(bytes, len, KERN_DEBUG msg, ##__VA_ARGS__) +#define DEBUG_print(x, ...) printk(KERN_DEBUG x, ##__VA_ARGS__) +#define DEBUG_bytes(bytes, len, msg, ...) printk(KERN_DEBUG msg "%s%*ph\n", ##__VA_ARGS__, \ + len ? ": " : "", len, bytes) #else #define DEBUG_print(x, ...) do { } while (0) #define DEBUG_bytes(bytes, len, msg, ...) do { } while (0) @@ -2661,6 +2644,9 @@ static void recalc_write_super(struct dm_integrity_c *ic) static void integrity_recalc(struct work_struct *w) { struct dm_integrity_c *ic = container_of(w, struct dm_integrity_c, recalc_work); + size_t recalc_tags_size; + u8 *recalc_buffer = NULL; + u8 *recalc_tags = NULL; struct dm_integrity_range range; struct dm_io_request io_req; struct dm_io_region io_loc; @@ -2672,6 +2658,26 @@ static void integrity_recalc(struct work_struct *w) unsigned int i; int r; unsigned int super_counter = 0; + unsigned recalc_sectors = RECALC_SECTORS; + +retry: + recalc_buffer = __vmalloc(recalc_sectors << SECTOR_SHIFT, GFP_NOIO); + if (!recalc_buffer) { +oom: + recalc_sectors >>= 1; + if (recalc_sectors >= 1U << ic->sb->log2_sectors_per_block) + goto retry; + DMCRIT("out of memory for recalculate buffer - recalculation disabled"); + goto free_ret; + } + recalc_tags_size = (recalc_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size; + if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) + recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; + recalc_tags = kvmalloc(recalc_tags_size, GFP_NOIO); + if (!recalc_tags) { + vfree(recalc_buffer); + goto oom; + } DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector)); @@ -2693,7 +2699,7 @@ next_chunk: } get_area_and_offset(ic, range.logical_sector, &area, &offset); - range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector); + range.n_sectors = min((sector_t)recalc_sectors, ic->provided_data_sectors - range.logical_sector); if (!ic->meta_dev) range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned int)offset); @@ -2735,7 +2741,7 @@ next_chunk: io_req.bi_opf = REQ_OP_READ; io_req.mem.type = DM_IO_VMA; - io_req.mem.ptr.addr = ic->recalc_buffer; + io_req.mem.ptr.addr = recalc_buffer; io_req.notify.fn = NULL; io_req.client = ic->io; io_loc.bdev = ic->dev->bdev; @@ -2748,15 +2754,15 @@ next_chunk: goto err; } - t = ic->recalc_tags; + t = recalc_tags; for (i = 0; i < n_sectors; i += ic->sectors_per_block) { - integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); + integrity_sector_checksum(ic, logical_sector + i, recalc_buffer + (i << SECTOR_SHIFT), t); t += ic->tag_size; } metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset); - r = dm_integrity_rw_tag(ic, ic->recalc_tags, &metadata_block, &metadata_offset, t - ic->recalc_tags, TAG_WRITE); + r = dm_integrity_rw_tag(ic, recalc_tags, &metadata_block, &metadata_offset, t - recalc_tags, TAG_WRITE); if (unlikely(r)) { dm_integrity_io_error(ic, "writing tags", r); goto err; @@ -2784,12 +2790,16 @@ advance_and_next: err: remove_range(ic, &range); - return; + goto free_ret; unlock_ret: spin_unlock_irq(&ic->endio_wait.lock); recalc_write_super(ic); + +free_ret: + vfree(recalc_buffer); + kvfree(recalc_tags); } static void bitmap_block_work(struct work_struct *w) @@ -4454,8 +4464,6 @@ try_smaller_buffer: } if (ic->internal_hash) { - size_t recalc_tags_size; - ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); if (!ic->recalc_wq) { ti->error = "Cannot allocate workqueue"; @@ -4463,21 +4471,6 @@ try_smaller_buffer: goto bad; } INIT_WORK(&ic->recalc_work, integrity_recalc); - ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT); - if (!ic->recalc_buffer) { - ti->error = "Cannot allocate buffer for recalculating"; - r = -ENOMEM; - goto bad; - } - recalc_tags_size = (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size; - if (crypto_shash_digestsize(ic->internal_hash) > ic->tag_size) - recalc_tags_size += crypto_shash_digestsize(ic->internal_hash) - ic->tag_size; - ic->recalc_tags = kvmalloc(recalc_tags_size, GFP_KERNEL); - if (!ic->recalc_tags) { - ti->error = "Cannot allocate tags for recalculating"; - r = -ENOMEM; - goto bad; - } } else { if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { ti->error = "Recalculate can only be specified with internal_hash"; @@ -4621,8 +4614,6 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->writer_wq); if (ic->recalc_wq) destroy_workqueue(ic->recalc_wq); - vfree(ic->recalc_buffer); - kvfree(ic->recalc_tags); kvfree(ic->bbs); if (ic->bufio) dm_bufio_client_destroy(ic->bufio); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 6d301019e5e3..f5ed729a8e0c 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -767,7 +767,14 @@ static int get_target_version(struct file *filp, struct dm_ioctl *param, size_t static int check_name(const char *name) { if (strchr(name, '/')) { - DMERR("invalid device name"); + DMERR("device name cannot contain '/'"); + return -EINVAL; + } + + if (strcmp(name, DM_CONTROL_NODE) == 0 || + strcmp(name, ".") == 0 || + strcmp(name, "..") == 0) { + DMERR("device name cannot be \"%s\", \".\", or \"..\"", DM_CONTROL_NODE); return -EINVAL; } @@ -1388,16 +1395,38 @@ static inline blk_mode_t get_mode(struct dm_ioctl *param) return mode; } -static int next_target(struct dm_target_spec *last, uint32_t next, void *end, +static int next_target(struct dm_target_spec *last, uint32_t next, const char *end, struct dm_target_spec **spec, char **target_params) { - *spec = (struct dm_target_spec *) ((unsigned char *) last + next); - *target_params = (char *) (*spec + 1); + static_assert(__alignof__(struct dm_target_spec) <= 8, + "struct dm_target_spec must not require more than 8-byte alignment"); + + /* + * Number of bytes remaining, starting with last. This is always + * sizeof(struct dm_target_spec) or more, as otherwise *last was + * out of bounds already. + */ + size_t remaining = end - (char *)last; + + /* + * There must be room for both the next target spec and the + * NUL-terminator of the target itself. + */ + if (remaining - sizeof(struct dm_target_spec) <= next) { + DMERR("Target spec extends beyond end of parameters"); + return -EINVAL; + } - if (*spec < (last + 1)) + if (next % __alignof__(struct dm_target_spec)) { + DMERR("Next dm_target_spec (offset %u) is not %zu-byte aligned", + next, __alignof__(struct dm_target_spec)); return -EINVAL; + } + + *spec = (struct dm_target_spec *) ((unsigned char *) last + next); + *target_params = (char *) (*spec + 1); - return invalid_str(*target_params, end); + return 0; } static int populate_table(struct dm_table *table, @@ -1407,8 +1436,9 @@ static int populate_table(struct dm_table *table, unsigned int i = 0; struct dm_target_spec *spec = (struct dm_target_spec *) param; uint32_t next = param->data_start; - void *end = (void *) param + param_size; + const char *const end = (const char *) param + param_size; char *target_params; + size_t min_size = sizeof(struct dm_ioctl); if (!param->target_count) { DMERR("%s: no targets specified", __func__); @@ -1416,6 +1446,13 @@ static int populate_table(struct dm_table *table, } for (i = 0; i < param->target_count; i++) { + const char *nul_terminator; + + if (next < min_size) { + DMERR("%s: next target spec (offset %u) overlaps %s", + __func__, next, i ? "previous target" : "'struct dm_ioctl'"); + return -EINVAL; + } r = next_target(spec, next, end, &spec, &target_params); if (r) { @@ -1423,6 +1460,15 @@ static int populate_table(struct dm_table *table, return r; } + nul_terminator = memchr(target_params, 0, (size_t)(end - target_params)); + if (nul_terminator == NULL) { + DMERR("%s: target parameters not NUL-terminated", __func__); + return -EINVAL; + } + + /* Add 1 for NUL terminator */ + min_size = (size_t)(nul_terminator - (const char *)spec) + 1; + r = dm_table_add_target(table, spec->target_type, (sector_t) spec->sector_start, (sector_t) spec->length, @@ -1830,30 +1876,36 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) * As well as checking the version compatibility this always * copies the kernel interface version out. */ -static int check_version(unsigned int cmd, struct dm_ioctl __user *user) +static int check_version(unsigned int cmd, struct dm_ioctl __user *user, + struct dm_ioctl *kernel_params) { - uint32_t version[3]; int r = 0; - if (copy_from_user(version, user->version, sizeof(version))) + /* Make certain version is first member of dm_ioctl struct */ + BUILD_BUG_ON(offsetof(struct dm_ioctl, version) != 0); + + if (copy_from_user(kernel_params->version, user->version, sizeof(kernel_params->version))) return -EFAULT; - if ((version[0] != DM_VERSION_MAJOR) || - (version[1] > DM_VERSION_MINOR)) { + if ((kernel_params->version[0] != DM_VERSION_MAJOR) || + (kernel_params->version[1] > DM_VERSION_MINOR)) { DMERR("ioctl interface mismatch: kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", DM_VERSION_MAJOR, DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, - version[0], version[1], version[2], cmd); + kernel_params->version[0], + kernel_params->version[1], + kernel_params->version[2], + cmd); r = -EINVAL; } /* * Fill in the kernel version. */ - version[0] = DM_VERSION_MAJOR; - version[1] = DM_VERSION_MINOR; - version[2] = DM_VERSION_PATCHLEVEL; - if (copy_to_user(user->version, version, sizeof(version))) + kernel_params->version[0] = DM_VERSION_MAJOR; + kernel_params->version[1] = DM_VERSION_MINOR; + kernel_params->version[2] = DM_VERSION_PATCHLEVEL; + if (copy_to_user(user->version, kernel_params->version, sizeof(kernel_params->version))) return -EFAULT; return r; @@ -1877,9 +1929,11 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern struct dm_ioctl *dmi; int secure_data; const size_t minimum_data_size = offsetof(struct dm_ioctl, data); - unsigned int noio_flag; - if (copy_from_user(param_kernel, user, minimum_data_size)) + /* check_version() already copied version from userspace, avoid TOCTOU */ + if (copy_from_user((char *)param_kernel + sizeof(param_kernel->version), + (char __user *)user + sizeof(param_kernel->version), + minimum_data_size - sizeof(param_kernel->version))) return -EFAULT; if (param_kernel->data_size < minimum_data_size) { @@ -1904,9 +1958,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern * Use kmalloc() rather than vmalloc() when we can. */ dmi = NULL; - noio_flag = memalloc_noio_save(); - dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL | __GFP_HIGH); - memalloc_noio_restore(noio_flag); + dmi = kvmalloc(param_kernel->data_size, GFP_NOIO | __GFP_HIGH); if (!dmi) { if (secure_data && clear_user(user, param_kernel->data_size)) @@ -1991,7 +2043,7 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us * Check the interface version passed in. This also * writes out the kernel's interface version. */ - r = check_version(cmd, user); + r = check_version(cmd, user, ¶m_kernel); if (r) return r; diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 9dd0409848ab..6022189c1388 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -603,6 +603,8 @@ static int __format_metadata(struct dm_pool_metadata *pmd) r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, &pmd->tm, &pmd->metadata_sm); if (r < 0) { + pmd->tm = NULL; + pmd->metadata_sm = NULL; DMERR("tm_create_with_sm failed"); return r; } @@ -611,6 +613,7 @@ static int __format_metadata(struct dm_pool_metadata *pmd) if (IS_ERR(pmd->data_sm)) { DMERR("sm_disk_create failed"); r = PTR_ERR(pmd->data_sm); + pmd->data_sm = NULL; goto bad_cleanup_tm; } @@ -641,11 +644,15 @@ static int __format_metadata(struct dm_pool_metadata *pmd) bad_cleanup_nb_tm: dm_tm_destroy(pmd->nb_tm); + pmd->nb_tm = NULL; bad_cleanup_data_sm: dm_sm_destroy(pmd->data_sm); + pmd->data_sm = NULL; bad_cleanup_tm: dm_tm_destroy(pmd->tm); + pmd->tm = NULL; dm_sm_destroy(pmd->metadata_sm); + pmd->metadata_sm = NULL; return r; } @@ -711,6 +718,8 @@ static int __open_metadata(struct dm_pool_metadata *pmd) sizeof(disk_super->metadata_space_map_root), &pmd->tm, &pmd->metadata_sm); if (r < 0) { + pmd->tm = NULL; + pmd->metadata_sm = NULL; DMERR("tm_open_with_sm failed"); goto bad_unlock_sblock; } @@ -720,6 +729,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd) if (IS_ERR(pmd->data_sm)) { DMERR("sm_disk_open failed"); r = PTR_ERR(pmd->data_sm); + pmd->data_sm = NULL; goto bad_cleanup_tm; } @@ -746,9 +756,12 @@ static int __open_metadata(struct dm_pool_metadata *pmd) bad_cleanup_data_sm: dm_sm_destroy(pmd->data_sm); + pmd->data_sm = NULL; bad_cleanup_tm: dm_tm_destroy(pmd->tm); + pmd->tm = NULL; dm_sm_destroy(pmd->metadata_sm); + pmd->metadata_sm = NULL; bad_unlock_sblock: dm_bm_unlock(sblock); @@ -795,9 +808,13 @@ static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd, bool destroy_bm) { dm_sm_destroy(pmd->data_sm); + pmd->data_sm = NULL; dm_sm_destroy(pmd->metadata_sm); + pmd->metadata_sm = NULL; dm_tm_destroy(pmd->nb_tm); + pmd->nb_tm = NULL; dm_tm_destroy(pmd->tm); + pmd->tm = NULL; if (destroy_bm) dm_block_manager_destroy(pmd->bm); } @@ -1005,8 +1022,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) __func__, r); } pmd_write_unlock(pmd); - if (!pmd->fail_io) - __destroy_persistent_data_objects(pmd, true); + __destroy_persistent_data_objects(pmd, true); kfree(pmd); return 0; @@ -1881,53 +1897,29 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) { int r = -EINVAL; - struct dm_block_manager *old_bm = NULL, *new_bm = NULL; /* fail_io is double-checked with pmd->root_lock held below */ if (unlikely(pmd->fail_io)) return r; - /* - * Replacement block manager (new_bm) is created and old_bm destroyed outside of - * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of - * shrinker associated with the block manager's bufio client vs pmd root_lock). - * - must take shrinker_rwsem without holding pmd->root_lock - */ - new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, - THIN_MAX_CONCURRENT_LOCKS); - pmd_write_lock(pmd); if (pmd->fail_io) { pmd_write_unlock(pmd); - goto out; + return r; } - __set_abort_with_changes_flags(pmd); + + /* destroy data_sm/metadata_sm/nb_tm/tm */ __destroy_persistent_data_objects(pmd, false); - old_bm = pmd->bm; - if (IS_ERR(new_bm)) { - DMERR("could not create block manager during abort"); - pmd->bm = NULL; - r = PTR_ERR(new_bm); - goto out_unlock; - } - pmd->bm = new_bm; + /* reset bm */ + dm_block_manager_reset(pmd->bm); + + /* rebuild data_sm/metadata_sm/nb_tm/tm */ r = __open_or_format_metadata(pmd, false); - if (r) { - pmd->bm = NULL; - goto out_unlock; - } - new_bm = NULL; -out_unlock: if (r) pmd->fail_io = true; pmd_write_unlock(pmd); - dm_block_manager_destroy(old_bm); -out: - if (new_bm && !IS_ERR(new_bm)) - dm_block_manager_destroy(new_bm); - return r; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index f1d0dcb9db22..07c7f9795b10 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2527,16 +2527,11 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) /*----------------------------------------------------------------*/ -static bool passdown_enabled(struct pool_c *pt) -{ - return pt->adjusted_pf.discard_passdown; -} - static void set_discard_callbacks(struct pool *pool) { struct pool_c *pt = pool->ti->private; - if (passdown_enabled(pt)) { + if (pt->adjusted_pf.discard_passdown) { pool->process_discard_cell = process_discard_cell_passdown; pool->process_prepared_discard = process_prepared_discard_passdown_pt1; pool->process_prepared_discard_pt2 = process_prepared_discard_passdown_pt2; @@ -2845,7 +2840,7 @@ static bool is_factor(sector_t block_size, uint32_t n) * If discard_passdown was enabled verify that the data device * supports discards. Disable discard_passdown if not. */ -static void disable_passdown_if_not_supported(struct pool_c *pt) +static void disable_discard_passdown_if_not_supported(struct pool_c *pt) { struct pool *pool = pt->pool; struct block_device *data_bdev = pt->data_dev->bdev; @@ -3446,7 +3441,6 @@ out_unlock: static int pool_map(struct dm_target *ti, struct bio *bio) { - int r; struct pool_c *pt = ti->private; struct pool *pool = pt->pool; @@ -3455,10 +3449,9 @@ static int pool_map(struct dm_target *ti, struct bio *bio) */ spin_lock_irq(&pool->lock); bio_set_dev(bio, pt->data_dev->bdev); - r = DM_MAPIO_REMAPPED; spin_unlock_irq(&pool->lock); - return r; + return DM_MAPIO_REMAPPED; } static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit) @@ -4099,21 +4092,22 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) * They get transferred to the live pool in bind_control_target() * called from pool_preresume(). */ - if (!pt->adjusted_pf.discard_enabled) { + + if (pt->adjusted_pf.discard_enabled) { + disable_discard_passdown_if_not_supported(pt); + if (!pt->adjusted_pf.discard_passdown) + limits->max_discard_sectors = 0; + /* + * The pool uses the same discard limits as the underlying data + * device. DM core has already set this up. + */ + } else { /* * Must explicitly disallow stacking discard limits otherwise the * block layer will stack them if pool's data device has support. */ limits->discard_granularity = 0; - return; } - - disable_passdown_if_not_supported(pt); - - /* - * The pool uses the same discard limits as the underlying data - * device. DM core has already set this up. - */ } static struct target_type pool_target = { @@ -4497,11 +4491,10 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) struct thin_c *tc = ti->private; struct pool *pool = tc->pool; - if (!pool->pf.discard_enabled) - return; - - limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; - limits->max_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE; + if (pool->pf.discard_enabled) { + limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; + limits->max_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE; + } } static struct target_type thin_target = { diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 4b82b7798ce4..eb9832b22b14 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/slab.h> +#include <linux/bitmap.h> #include "dm-core.h" @@ -140,9 +141,9 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) void dm_cleanup_zoned_dev(struct mapped_device *md) { if (md->disk) { - kfree(md->disk->conv_zones_bitmap); + bitmap_free(md->disk->conv_zones_bitmap); md->disk->conv_zones_bitmap = NULL; - kfree(md->disk->seq_zones_wlock); + bitmap_free(md->disk->seq_zones_wlock); md->disk->seq_zones_wlock = NULL; } @@ -182,9 +183,8 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, switch (zone->type) { case BLK_ZONE_TYPE_CONVENTIONAL: if (!disk->conv_zones_bitmap) { - disk->conv_zones_bitmap = - kcalloc(BITS_TO_LONGS(disk->nr_zones), - sizeof(unsigned long), GFP_NOIO); + disk->conv_zones_bitmap = bitmap_zalloc(disk->nr_zones, + GFP_NOIO); if (!disk->conv_zones_bitmap) return -ENOMEM; } @@ -193,9 +193,8 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: if (!disk->seq_zones_wlock) { - disk->seq_zones_wlock = - kcalloc(BITS_TO_LONGS(disk->nr_zones), - sizeof(unsigned long), GFP_NOIO); + disk->seq_zones_wlock = bitmap_zalloc(disk->nr_zones, + GFP_NOIO); if (!disk->seq_zones_wlock) return -ENOMEM; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fe2d4750d9c7..f0f118ab20fa 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -487,48 +487,50 @@ u64 dm_start_time_ns_from_clone(struct bio *bio) } EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone); -static bool bio_is_flush_with_data(struct bio *bio) +static inline bool bio_is_flush_with_data(struct bio *bio) { return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size); } -static void dm_io_acct(struct dm_io *io, bool end) +static inline unsigned int dm_io_sectors(struct dm_io *io, struct bio *bio) { - struct dm_stats_aux *stats_aux = &io->stats_aux; - unsigned long start_time = io->start_time; - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; - unsigned int sectors; - /* * If REQ_PREFLUSH set, don't account payload, it will be * submitted (and accounted) after this flush completes. */ if (bio_is_flush_with_data(bio)) - sectors = 0; - else if (likely(!(dm_io_flagged(io, DM_IO_WAS_SPLIT)))) - sectors = bio_sectors(bio); - else - sectors = io->sectors; + return 0; + if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT))) + return io->sectors; + return bio_sectors(bio); +} - if (!end) - bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time); - else - bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors, - start_time); +static void dm_io_acct(struct dm_io *io, bool end) +{ + struct bio *bio = io->orig_bio; + + if (dm_io_flagged(io, DM_IO_BLK_STAT)) { + if (!end) + bdev_start_io_acct(bio->bi_bdev, bio_op(bio), + io->start_time); + else + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), + dm_io_sectors(io, bio), + io->start_time); + } if (static_branch_unlikely(&stats_enabled) && - unlikely(dm_stats_used(&md->stats))) { + unlikely(dm_stats_used(&io->md->stats))) { sector_t sector; - if (likely(!dm_io_flagged(io, DM_IO_WAS_SPLIT))) - sector = bio->bi_iter.bi_sector; - else + if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT))) sector = bio_end_sector(bio) - io->sector_offset; + else + sector = bio->bi_iter.bi_sector; - dm_stats_account_io(&md->stats, bio_data_dir(bio), - sector, sectors, - end, start_time, stats_aux); + dm_stats_account_io(&io->md->stats, bio_data_dir(bio), + sector, dm_io_sectors(io, bio), + end, io->start_time, &io->stats_aux); } } @@ -592,8 +594,11 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) spin_lock_init(&io->lock); io->start_time = jiffies; io->flags = 0; + if (blk_queue_io_stat(md->queue)) + dm_io_set_flag(io, DM_IO_BLK_STAT); - if (static_branch_unlikely(&stats_enabled)) + if (static_branch_unlikely(&stats_enabled) && + unlikely(dm_stats_used(&md->stats))) dm_stats_record_start(&md->stats, &io->stats_aux); return io; @@ -2348,6 +2353,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) break; case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: + blk_queue_flag_set(QUEUE_FLAG_IO_STAT, md->queue); break; case DM_TYPE_NONE: WARN_ON_ONCE(true); @@ -3143,6 +3149,8 @@ struct dm_pr { bool fail_early; int ret; enum pr_type type; + struct pr_keys *read_keys; + struct pr_held_reservation *rsv; }; static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn, @@ -3375,12 +3383,79 @@ out: return r; } +static int __dm_pr_read_keys(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct dm_pr *pr = data; + const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; + + if (!ops || !ops->pr_read_keys) { + pr->ret = -EOPNOTSUPP; + return -1; + } + + pr->ret = ops->pr_read_keys(dev->bdev, pr->read_keys); + if (!pr->ret) + return -1; + + return 0; +} + +static int dm_pr_read_keys(struct block_device *bdev, struct pr_keys *keys) +{ + struct dm_pr pr = { + .read_keys = keys, + }; + int ret; + + ret = dm_call_pr(bdev, __dm_pr_read_keys, &pr); + if (ret) + return ret; + + return pr.ret; +} + +static int __dm_pr_read_reservation(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct dm_pr *pr = data; + const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; + + if (!ops || !ops->pr_read_reservation) { + pr->ret = -EOPNOTSUPP; + return -1; + } + + pr->ret = ops->pr_read_reservation(dev->bdev, pr->rsv); + if (!pr->ret) + return -1; + + return 0; +} + +static int dm_pr_read_reservation(struct block_device *bdev, + struct pr_held_reservation *rsv) +{ + struct dm_pr pr = { + .rsv = rsv, + }; + int ret; + + ret = dm_call_pr(bdev, __dm_pr_read_reservation, &pr); + if (ret) + return ret; + + return pr.ret; +} + static const struct pr_ops dm_pr_ops = { .pr_register = dm_pr_register, .pr_reserve = dm_pr_reserve, .pr_release = dm_pr_release, .pr_preempt = dm_pr_preempt, .pr_clear = dm_pr_clear, + .pr_read_keys = dm_pr_read_keys, + .pr_read_reservation = dm_pr_read_reservation, }; static const struct block_device_operations dm_blk_dops = { diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 63d9010d8e61..f682295af91f 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -210,9 +210,6 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d); int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, unsigned int cookie, bool need_resize_uevent); -void dm_internal_suspend(struct mapped_device *md); -void dm_internal_resume(struct mapped_device *md); - int dm_io_init(void); void dm_io_exit(void); diff --git a/drivers/md/md.c b/drivers/md/md.c index cf3733c90c47..2e38ef421d69 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev) { mutex_init(&mddev->open_mutex); mutex_init(&mddev->reconfig_mutex); - mutex_init(&mddev->delete_mutex); mutex_init(&mddev->bitmap_info.mutex); INIT_LIST_HEAD(&mddev->disks); INIT_LIST_HEAD(&mddev->all_mddevs); @@ -749,26 +748,15 @@ static void mddev_free(struct mddev *mddev) static const struct attribute_group md_redundancy_group; -static void md_free_rdev(struct mddev *mddev) +void mddev_unlock(struct mddev *mddev) { struct md_rdev *rdev; struct md_rdev *tmp; + LIST_HEAD(delete); - mutex_lock(&mddev->delete_mutex); - if (list_empty(&mddev->deleting)) - goto out; - - list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) { - list_del_init(&rdev->same_set); - kobject_del(&rdev->kobj); - export_rdev(rdev, mddev); - } -out: - mutex_unlock(&mddev->delete_mutex); -} + if (!list_empty(&mddev->deleting)) + list_splice_init(&mddev->deleting, &delete); -void mddev_unlock(struct mddev *mddev) -{ if (mddev->to_remove) { /* These cannot be removed under reconfig_mutex as * an access to the files will try to take reconfig_mutex @@ -808,7 +796,11 @@ void mddev_unlock(struct mddev *mddev) } else mutex_unlock(&mddev->reconfig_mutex); - md_free_rdev(mddev); + list_for_each_entry_safe(rdev, tmp, &delete, same_set) { + list_del_init(&rdev->same_set); + kobject_del(&rdev->kobj); + export_rdev(rdev, mddev); + } md_wakeup_thread(mddev->thread); wake_up(&mddev->sb_wait); @@ -2458,7 +2450,7 @@ static void export_rdev(struct md_rdev *rdev, struct mddev *mddev) if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); #endif - blkdev_put(rdev->bdev, mddev->major_version == -2 ? &claim_rdev : rdev); + blkdev_put(rdev->bdev, mddev->external ? &claim_rdev : rdev); rdev->bdev = NULL; kobject_put(&rdev->kobj); } @@ -2488,9 +2480,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev) * reconfig_mutex is held, hence it can't be called under * reconfig_mutex and it's delayed to mddev_unlock(). */ - mutex_lock(&mddev->delete_mutex); list_add(&rdev->same_set, &mddev->deleting); - mutex_unlock(&mddev->delete_mutex); } static void export_array(struct mddev *mddev) @@ -6140,7 +6130,7 @@ static void md_clean(struct mddev *mddev) mddev->resync_min = 0; mddev->resync_max = MaxSector; mddev->reshape_position = MaxSector; - mddev->external = 0; + /* we still need mddev->external in export_rdev, do not clear it yet */ mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; diff --git a/drivers/md/md.h b/drivers/md/md.h index bfd2306bc750..1aef86bf3fc3 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -531,11 +531,9 @@ struct mddev { /* * Temporarily store rdev that will be finally removed when - * reconfig_mutex is unlocked. + * reconfig_mutex is unlocked, protected by reconfig_mutex. */ struct list_head deleting; - /* Protect the deleting list */ - struct mutex delete_mutex; bool has_superblocks:1; bool fail_last_dev:1; diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 7bdfc23f758a..0e010e1204aa 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -421,6 +421,12 @@ void dm_block_manager_destroy(struct dm_block_manager *bm) } EXPORT_SYMBOL_GPL(dm_block_manager_destroy); +void dm_block_manager_reset(struct dm_block_manager *bm) +{ + dm_bufio_client_reset(bm->bufio); +} +EXPORT_SYMBOL_GPL(dm_block_manager_reset); + unsigned int dm_bm_block_size(struct dm_block_manager *bm) { return dm_bufio_get_block_size(bm->bufio); diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 5746b0f82a03..f706d3de8d5a 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -36,6 +36,7 @@ struct dm_block_manager *dm_block_manager_create( struct block_device *bdev, unsigned int block_size, unsigned int max_held_per_thread); void dm_block_manager_destroy(struct dm_block_manager *bm); +void dm_block_manager_reset(struct dm_block_manager *bm); unsigned int dm_bm_block_size(struct dm_block_manager *bm); dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm); diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h index dab490353781..6bf69922b5ad 100644 --- a/drivers/md/persistent-data/dm-space-map.h +++ b/drivers/md/persistent-data/dm-space-map.h @@ -77,7 +77,8 @@ struct dm_space_map { static inline void dm_sm_destroy(struct dm_space_map *sm) { - sm->destroy(sm); + if (sm) + sm->destroy(sm); } static inline int dm_sm_extend(struct dm_space_map *sm, dm_block_t extra_blocks) diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 6dc016248baf..c88fa6266203 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -199,6 +199,9 @@ EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone); void dm_tm_destroy(struct dm_transaction_manager *tm) { + if (!tm) + return; + if (!tm->is_clone) wipe_shadow_table(tm); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f8ee9a95e25d..d1ac73fcd852 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -270,6 +270,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) goto abort; } + if (conf->layout == RAID0_ORIG_LAYOUT) { + for (i = 1; i < conf->nr_strip_zones; i++) { + sector_t first_sector = conf->strip_zone[i-1].zone_end; + + sector_div(first_sector, mddev->chunk_sectors); + zone = conf->strip_zone + i; + /* disk_shift is first disk index used in the zone */ + zone->disk_shift = sector_div(first_sector, + zone->nb_dev); + } + } + pr_debug("md/raid0:%s: done.\n", mdname(mddev)); *private_conf = conf; @@ -431,6 +443,20 @@ exit_acct_set: return ret; } +/* + * Convert disk_index to the disk order in which it is read/written. + * For example, if we have 4 disks, they are numbered 0,1,2,3. If we + * write the disks starting at disk 3, then the read/write order would + * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift() + * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map + * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in + * that 'output' space to understand the read/write disk ordering. + */ +static int map_disk_shift(int disk_index, int num_disks, int disk_shift) +{ + return ((disk_index + num_disks - disk_shift) % num_disks); +} + static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) { struct r0conf *conf = mddev->private; @@ -444,7 +470,9 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) sector_t end_disk_offset; unsigned int end_disk_index; unsigned int disk; + sector_t orig_start, orig_end; + orig_start = start; zone = find_zone(conf, &start); if (bio_end_sector(bio) > zone->zone_end) { @@ -458,6 +486,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) } else end = bio_end_sector(bio); + orig_end = end; if (zone != conf->strip_zone) end = end - zone[-1].zone_end; @@ -469,13 +498,26 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) last_stripe_index = end; sector_div(last_stripe_index, stripe_size); - start_disk_index = (int)(start - first_stripe_index * stripe_size) / - mddev->chunk_sectors; + /* In the first zone the original and alternate layouts are the same */ + if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) { + sector_div(orig_start, mddev->chunk_sectors); + start_disk_index = sector_div(orig_start, zone->nb_dev); + start_disk_index = map_disk_shift(start_disk_index, + zone->nb_dev, + zone->disk_shift); + sector_div(orig_end, mddev->chunk_sectors); + end_disk_index = sector_div(orig_end, zone->nb_dev); + end_disk_index = map_disk_shift(end_disk_index, + zone->nb_dev, zone->disk_shift); + } else { + start_disk_index = (int)(start - first_stripe_index * stripe_size) / + mddev->chunk_sectors; + end_disk_index = (int)(end - last_stripe_index * stripe_size) / + mddev->chunk_sectors; + } start_disk_offset = ((int)(start - first_stripe_index * stripe_size) % mddev->chunk_sectors) + first_stripe_index * mddev->chunk_sectors; - end_disk_index = (int)(end - last_stripe_index * stripe_size) / - mddev->chunk_sectors; end_disk_offset = ((int)(end - last_stripe_index * stripe_size) % mddev->chunk_sectors) + last_stripe_index * mddev->chunk_sectors; @@ -483,18 +525,22 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) for (disk = 0; disk < zone->nb_dev; disk++) { sector_t dev_start, dev_end; struct md_rdev *rdev; + int compare_disk; + + compare_disk = map_disk_shift(disk, zone->nb_dev, + zone->disk_shift); - if (disk < start_disk_index) + if (compare_disk < start_disk_index) dev_start = (first_stripe_index + 1) * mddev->chunk_sectors; - else if (disk > start_disk_index) + else if (compare_disk > start_disk_index) dev_start = first_stripe_index * mddev->chunk_sectors; else dev_start = start_disk_offset; - if (disk < end_disk_index) + if (compare_disk < end_disk_index) dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; - else if (disk > end_disk_index) + else if (compare_disk > end_disk_index) dev_end = last_stripe_index * mddev->chunk_sectors; else dev_end = end_disk_offset; diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 3816e5477db1..8cc761ca7423 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -6,6 +6,7 @@ struct strip_zone { sector_t zone_end; /* Start of the next zone (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ int nb_dev; /* # of devices attached to the zone */ + int disk_shift; /* start disk for the original layout */ }; /* Linux 3.14 (20d0189b101) made an unintended change to diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 169ebe296f2d..3f22edec70e7 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -116,7 +116,7 @@ static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp, static inline void raid1_submit_write(struct bio *bio) { - struct md_rdev *rdev = (struct md_rdev *)bio->bi_bdev; + struct md_rdev *rdev = (void *)bio->bi_bdev; bio->bi_next = NULL; bio_set_dev(bio, rdev->bdev); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d0de8c9fb3cf..5051149e27bb 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -325,7 +325,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio) if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) bio->bi_status = BLK_STS_IOERR; - if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) + if (r10_bio->start_time) bio_end_io_acct(bio, r10_bio->start_time); bio_endio(bio); /* @@ -1118,7 +1118,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->pending_bio_list, &plug->pending); spin_unlock_irq(&conf->device_lock); - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); md_wakeup_thread(mddev->thread); kfree(plug); return; @@ -1127,7 +1127,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) /* we aren't scheduling, so we can do the write-out directly. */ bio = bio_list_get(&plug->pending); raid1_prepare_flush_writes(mddev->bitmap); - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; |
