From 611bee526b4a89d49f1b9914a770bfdc101d5fb5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:41 +0200 Subject: block: replace bd_set_size with bd_set_nr_sectors Replace bd_set_size with a version that takes the number of sectors instead, as that fits most of the current and future callers much better. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- fs/block_dev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 8ae833e00443..f52597172c8b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1371,13 +1371,13 @@ int check_disk_change(struct block_device *bdev) EXPORT_SYMBOL(check_disk_change); -void bd_set_size(struct block_device *bdev, loff_t size) +void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) { inode_lock(bdev->bd_inode); - i_size_write(bdev->bd_inode, size); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); inode_unlock(bdev->bd_inode); } -EXPORT_SYMBOL(bd_set_size); +EXPORT_SYMBOL(bd_set_nr_sectors); static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); @@ -1514,7 +1514,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, } if (!ret) { - bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); + bd_set_nr_sectors(bdev, get_capacity(disk)); set_init_blocksize(bdev); } @@ -1542,7 +1542,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, ret = -ENXIO; goto out_clear; } - bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects); set_init_blocksize(bdev); } -- cgit v1.2.3-70-g09d2 From c2b4bb8cb3741c0bacf3683e4c1ecd04c977ada3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 23 Aug 2020 11:10:42 +0200 Subject: block: fix locking for struct block_device size updates Two different callers use two different mutexes for updating the block device size, which obviously doesn't help to actually protect against concurrent updates from the different callers. In addition one of the locks, bd_mutex is rather prone to deadlocks with other parts of the block stack that use it for high level synchronization. Switch to using a new spinlock protecting just the size updates, as that is all we need, and make sure everyone does the update through the proper helper. This fixes a bug reported with the nvme revalidating disks during a hot removal operation, which can currently deadlock on bd_mutex. Reported-by: Xianting Tian Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/partitions/core.c | 4 ++-- drivers/block/aoe/aoecmd.c | 4 +--- drivers/md/dm.c | 15 ++------------- drivers/s390/block/dasd_ioctl.c | 9 ++------- fs/block_dev.c | 25 ++++++++++++++----------- include/linux/blk_types.h | 1 + 6 files changed, 22 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/block/partitions/core.c b/block/partitions/core.c index 5b4869c08fb3..b1c0b50ca92d 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -592,8 +592,8 @@ int bdev_resize_partition(struct block_device *bdev, int partno, if (partition_overlaps(bdev->bd_disk, start, length, partno)) goto out_unlock; - part_nr_sects_write(part, (sector_t)length); - i_size_write(bdevp->bd_inode, length << SECTOR_SHIFT); + part_nr_sects_write(part, length); + bd_set_nr_sectors(bdevp, length); ret = 0; out_unlock: diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 6dba41395155..313f0b946fe2 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -900,9 +900,7 @@ aoecmd_sleepwork(struct work_struct *work) ssize = get_capacity(d->gd); bd = bdget_disk(d->gd, 0); if (bd) { - inode_lock(bd->bd_inode); - i_size_write(bd->bd_inode, (loff_t)ssize<<9); - inode_unlock(bd->bd_inode); + bd_set_nr_sectors(bd, ssize); bdput(bd); } spin_lock_irq(&d->lock); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fb0255d25e4b..3dedd9cc4fb6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2097,18 +2097,6 @@ static void event_callback(void *context) dm_issue_global_event(); } -/* - * Protected by md->suspend_lock obtained by dm_swap_table(). - */ -static void __set_size(struct mapped_device *md, sector_t size) -{ - lockdep_assert_held(&md->suspend_lock); - - set_capacity(md->disk, size); - - i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); -} - /* * Returns old map, which caller must destroy. */ @@ -2131,7 +2119,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, if (size != dm_get_size(md)) memset(&md->geometry, 0, sizeof(md->geometry)); - __set_size(md, size); + set_capacity(md->disk, size); + bd_set_nr_sectors(md->bdev, size); dm_table_event_callback(t, event_callback, md); diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 777734d1b4e5..faaf5596e31c 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -55,10 +55,7 @@ dasd_ioctl_enable(struct block_device *bdev) dasd_enable_device(base); /* Formatting the dasd device can change the capacity. */ - mutex_lock(&bdev->bd_mutex); - i_size_write(bdev->bd_inode, - (loff_t)get_capacity(base->block->gdp) << 9); - mutex_unlock(&bdev->bd_mutex); + bd_set_nr_sectors(bdev, get_capacity(base->block->gdp)); dasd_put_device(base); return 0; } @@ -91,9 +88,7 @@ dasd_ioctl_disable(struct block_device *bdev) * Set i_size to zero, since read, write, etc. check against this * value. */ - mutex_lock(&bdev->bd_mutex); - i_size_write(bdev->bd_inode, 0); - mutex_unlock(&bdev->bd_mutex); + bd_set_nr_sectors(bdev, 0); dasd_put_device(base); return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index f52597172c8b..08158bb2e76c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -876,6 +876,7 @@ struct block_device *bdget(dev_t dev) bdev = &BDEV_I(inode)->bdev; if (inode->i_state & I_NEW) { + spin_lock_init(&bdev->bd_size_lock); bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; @@ -1290,6 +1291,7 @@ static void check_disk_size_change(struct gendisk *disk, { loff_t disk_size, bdev_size; + spin_lock(&bdev->bd_size_lock); disk_size = (loff_t)get_capacity(disk) << 9; bdev_size = i_size_read(bdev->bd_inode); if (disk_size != bdev_size) { @@ -1299,11 +1301,15 @@ static void check_disk_size_change(struct gendisk *disk, disk->disk_name, bdev_size, disk_size); } i_size_write(bdev->bd_inode, disk_size); - if (bdev_size > disk_size && __invalidate_device(bdev, false)) + } + bdev->bd_invalidated = 0; + spin_unlock(&bdev->bd_size_lock); + + if (bdev_size > disk_size) { + if (__invalidate_device(bdev, false)) pr_warn("VFS: busy inodes on resized disk %s\n", disk->disk_name); } - bdev->bd_invalidated = 0; } /** @@ -1328,13 +1334,10 @@ int revalidate_disk(struct gendisk *disk) if (!(disk->flags & GENHD_FL_HIDDEN)) { struct block_device *bdev = bdget_disk(disk, 0); - if (!bdev) - return ret; - - mutex_lock(&bdev->bd_mutex); - check_disk_size_change(disk, bdev, ret == 0); - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); + if (bdev) { + check_disk_size_change(disk, bdev, ret == 0); + bdput(bdev); + } } return ret; } @@ -1373,9 +1376,9 @@ EXPORT_SYMBOL(check_disk_change); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) { - inode_lock(bdev->bd_inode); + spin_lock(&bdev->bd_size_lock); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - inode_unlock(bdev->bd_inode); + spin_unlock(&bdev->bd_size_lock); } EXPORT_SYMBOL(bd_set_nr_sectors); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 4ecf4fed171f..5accc2549d22 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -38,6 +38,7 @@ struct block_device { /* number of times partitions within this device have been opened. */ unsigned bd_part_count; int bd_invalidated; + spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; -- cgit v1.2.3-70-g09d2 From e5c7fb400227df5c7822a3c59b193d23e849d0ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 31 Aug 2020 20:02:36 +0200 Subject: block: move the devcgroup_inode_permission call to blkdev_get devcgroup_inode_permission is never called for the recusive case, so move it out into blkdev_get. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 08158bb2e76c..990e97bcbeaf 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1449,22 +1449,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, struct gendisk *disk; int ret; int partno; - int perm = 0; bool first_open = false, unblock_events = true, need_restart; - if (mode & FMODE_READ) - perm |= MAY_READ; - if (mode & FMODE_WRITE) - perm |= MAY_WRITE; - /* - * hooks: /n/, see "layering violations". - */ - if (!for_part) { - ret = devcgroup_inode_permission(bdev->bd_inode, perm); - if (ret != 0) - return ret; - } - restart: need_restart = false; ret = -ENXIO; @@ -1637,12 +1623,24 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, */ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { - int res; + int ret, perm = 0; - res =__blkdev_get(bdev, mode, holder, 0); - if (res) - bdput(bdev); - return res; + if (mode & FMODE_READ) + perm |= MAY_READ; + if (mode & FMODE_WRITE) + perm |= MAY_WRITE; + ret = devcgroup_inode_permission(bdev->bd_inode, perm); + if (ret) + goto bdput; + + ret =__blkdev_get(bdev, mode, holder, 0); + if (ret) + goto bdput; + return 0; + +bdput: + bdput(bdev); + return ret; } EXPORT_SYMBOL(blkdev_get); -- cgit v1.2.3-70-g09d2 From 6540fbf6b634071950f01ee4e4194e2ea8ca72d1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:41 +0200 Subject: block: don't clear bd_invalidated in check_disk_size_change bd_invalidated is set by check_disk_change or in add_disk to initiate a partition scan. Move it from check_disk_size_change which is called from both revalidate_disk() and bdev_disk_changed() to only the latter, as that is what is called from the block device open code (and nbd) to deal with the bd_invalidated event. revalidate_disk() on the other hand is mostly used to propagate a size update from the gendisk to the block device, which is entirely unrelated. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- fs/block_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 990e97bcbeaf..7e32398608c6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1302,7 +1302,6 @@ static void check_disk_size_change(struct gendisk *disk, } i_size_write(bdev->bd_inode, disk_size); } - bdev->bd_invalidated = 0; spin_unlock(&bdev->bd_size_lock); if (bdev_size > disk_size) { @@ -1391,6 +1390,8 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) lockdep_assert_held(&bdev->bd_mutex); + bdev->bd_invalidated = 0; + rescan: ret = blk_drop_partitions(bdev); if (ret) -- cgit v1.2.3-70-g09d2 From f4ad06f2bb8476548b08f89919ee65abc4e40212 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:42 +0200 Subject: block: rename bd_invalidated Replace bd_invalidate with a new BDEV_NEED_PART_SCAN flag in a bd_flags variable to better describe the condition. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/genhd.c | 2 +- drivers/block/nbd.c | 8 ++++---- fs/block_dev.c | 10 +++++----- include/linux/blk_types.h | 4 +++- 4 files changed, 13 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/block/genhd.c b/block/genhd.c index 5fc6d82e6c68..a36be970b767 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -732,7 +732,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, if (!bdev) goto exit; - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); err = blkdev_get(bdev, FMODE_READ, NULL); if (err < 0) goto exit; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a54f2d155a31..15eed210feef 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -315,7 +315,7 @@ static void nbd_size_update(struct nbd_device *nbd) bd_set_nr_sectors(bdev, nr_sectors); set_blocksize(bdev, config->blksize); } else - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); bdput(bdev); } kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -1322,7 +1322,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b return ret; if (max_part) - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); @@ -1500,9 +1500,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); } else if (nbd_disconnected(nbd->config)) { - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); } out: mutex_unlock(&nbd_index_mutex); diff --git a/fs/block_dev.c b/fs/block_dev.c index 7e32398608c6..1e6441dbe840 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -881,7 +881,7 @@ struct block_device *bdget(dev_t dev) bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_part_count = 0; - bdev->bd_invalidated = 0; + bdev->bd_flags = 0; inode->i_mode = S_IFBLK; inode->i_rdev = dev; inode->i_bdev = bdev; @@ -1365,7 +1365,7 @@ int check_disk_change(struct block_device *bdev) if (__invalidate_device(bdev, true)) pr_warn("VFS: busy inodes on changed media %s\n", disk->disk_name); - bdev->bd_invalidated = 1; + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); return 1; @@ -1390,7 +1390,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) lockdep_assert_held(&bdev->bd_mutex); - bdev->bd_invalidated = 0; + clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); rescan: ret = blk_drop_partitions(bdev); @@ -1514,7 +1514,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, * The latter is necessary to prevent ghost * partitions on a removed medium. */ - if (bdev->bd_invalidated && + if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); @@ -1544,7 +1544,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ - if (bdev->bd_invalidated && + if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 59d9150165c4..6ffa783e1633 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -19,6 +19,8 @@ struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; +#define BDEV_NEED_PART_SCAN 0 + struct block_device { dev_t bd_dev; int bd_openers; @@ -37,7 +39,7 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - int bd_invalidated; + unsigned long bd_flags; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; -- cgit v1.2.3-70-g09d2 From 659e56ba864d37b7ee0a49cd432205b2a5ca815e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:43 +0200 Subject: block: add a new revalidate_disk_size helper revalidate_disk is a relative awkward helper for driver use, as it first calls an optional driver method and then updates the block device size, while most callers either don't need the method call at all, or want to keep state between the caller and the called method. Add a revalidate_disk_size helper that just performs the update of the block device size from the gendisk one, and switch all drivers that do not implement ->revalidate_disk to use the new helper instead of revalidate_disk() Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Acked-by: Song Liu Signed-off-by: Jens Axboe --- drivers/block/rbd.c | 2 +- drivers/block/rnbd/rnbd-clt.c | 10 ++-------- drivers/block/virtio_blk.c | 2 +- drivers/block/zram/zram_drv.c | 4 ++-- drivers/md/dm-raid.c | 2 +- drivers/md/md-cluster.c | 6 +++--- drivers/md/md-linear.c | 2 +- drivers/md/md.c | 10 +++++----- fs/block_dev.c | 42 +++++++++++++++++++++++++++++------------- include/linux/genhd.h | 1 + 10 files changed, 46 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 011539039693..5d3923c0997c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4921,7 +4921,7 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; dout("setting size to %llu sectors", (unsigned long long)size); set_capacity(rbd_dev->disk, size); - revalidate_disk(rbd_dev->disk); + revalidate_disk_size(rbd_dev->disk, true); } } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index cc6a4e2587ae..157538fc8be5 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -102,18 +102,12 @@ static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev, size_t new_nsectors) { - int err = 0; - rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", dev->nsectors, new_nsectors); dev->nsectors = new_nsectors; set_capacity(dev->gd, dev->nsectors); - err = revalidate_disk(dev->gd); - if (err) - rnbd_clt_err(dev, - "Failed to change device size from %zu to %zu, err: %d\n", - dev->nsectors, new_nsectors, err); - return err; + revalidate_disk_size(dev->gd, true); + return 0; } static int process_msg_open_rsp(struct rnbd_clt_dev *dev, diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index ca63a41059d6..a314b9382442 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -598,7 +598,7 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; blk_queue_write_cache(vblk->disk->queue, writeback, false); - revalidate_disk(vblk->disk); + revalidate_disk_size(vblk->disk, true); } static const char *const virtblk_cache_types[] = { diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9100ac36670a..a356275605b1 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1739,7 +1739,7 @@ static ssize_t disksize_store(struct device *dev, zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - revalidate_disk(zram->disk); + revalidate_disk_size(zram->disk, true); up_write(&zram->init_lock); return len; @@ -1786,7 +1786,7 @@ static ssize_t reset_store(struct device *dev, /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - revalidate_disk(zram->disk); + revalidate_disk_size(zram->disk, true); bdput(bdev); mutex_lock(&bdev->bd_mutex); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 8d2b835d7a10..56b723d012ac 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -701,7 +701,7 @@ static void rs_set_capacity(struct raid_set *rs) struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table)); set_capacity(gendisk, rs->md.array_sectors); - revalidate_disk(gendisk); + revalidate_disk_size(gendisk, true); } /* diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index d50737ec4039..0580b51a156a 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -582,7 +582,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) break; case CHANGE_CAPACITY: set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); break; case RESYNCING: set_bit(MD_RESYNCING_REMOTE, &mddev->recovery); @@ -1296,12 +1296,12 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors) pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n", __func__, __LINE__); set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); } else { /* revert to previous sectors */ ret = mddev->pers->resize(mddev, old_dev_sectors); if (!ret) - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); ret = __sendmsg(cinfo, &cmsg); if (ret) pr_err("%s:%d: failed to send METADATA_UPDATED msg\n", diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index c2ae9125c4c3..5ab22069b5be 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -202,7 +202,7 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); set_capacity(mddev->gendisk, mddev->array_sectors); mddev_resume(mddev); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); kfree_rcu(oldconf, rcu); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 607278207023..9562ef598ae1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5358,7 +5358,7 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len) mddev->array_sectors = sectors; if (mddev->pers) { set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); } } mddev_unlock(mddev); @@ -6109,7 +6109,7 @@ int do_md_run(struct mddev *mddev) md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); clear_bit(MD_NOT_READY, &mddev->flags); mddev->changed = 1; kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); @@ -6427,7 +6427,7 @@ static int do_md_stop(struct mddev *mddev, int mode, set_capacity(disk, 0); mutex_unlock(&mddev->open_mutex); mddev->changed = 1; - revalidate_disk(disk); + revalidate_disk_size(disk, true); if (mddev->ro) mddev->ro = 0; @@ -7259,7 +7259,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) md_cluster_ops->update_size(mddev, old_dev_sectors); else if (mddev->queue) { set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); } } return rv; @@ -9018,7 +9018,7 @@ void md_do_sync(struct md_thread *thread) mddev_unlock(mddev); if (!mddev_is_clustered(mddev)) { set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); + revalidate_disk_size(mddev->gendisk, true); } } diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e6441dbe840..9bfe37f394bd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1311,6 +1311,34 @@ static void check_disk_size_change(struct gendisk *disk, } } +/** + * revalidate_disk_size - checks for disk size change and adjusts bdev size. + * @disk: struct gendisk to check + * @verbose: if %true log a message about a size change if there is any + * + * This routine checks to see if the bdev size does not match the disk size + * and adjusts it if it differs. When shrinking the bdev size, its all caches + * are freed. + */ +void revalidate_disk_size(struct gendisk *disk, bool verbose) +{ + struct block_device *bdev; + + /* + * Hidden disks don't have associated bdev so there's no point in + * revalidating them. + */ + if (disk->flags & GENHD_FL_HIDDEN) + return; + + bdev = bdget_disk(disk, 0); + if (bdev) { + check_disk_size_change(disk, bdev, verbose); + bdput(bdev); + } +} +EXPORT_SYMBOL(revalidate_disk_size); + /** * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back * @disk: struct gendisk to be revalidated @@ -1325,19 +1353,7 @@ int revalidate_disk(struct gendisk *disk) if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); - - /* - * Hidden disks don't have associated bdev so there's no point in - * revalidating it. - */ - if (!(disk->flags & GENHD_FL_HIDDEN)) { - struct block_device *bdev = bdget_disk(disk, 0); - - if (bdev) { - check_disk_size_change(disk, bdev, ret == 0); - bdput(bdev); - } - } + revalidate_disk_size(disk, ret == 0); return ret; } EXPORT_SYMBOL(revalidate_disk); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9ea2ca31c278..f76c8baf6b7d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -371,6 +371,7 @@ int register_blkdev(unsigned int major, const char *name); void unregister_blkdev(unsigned int major, const char *name); int revalidate_disk(struct gendisk *disk); +void revalidate_disk_size(struct gendisk *disk, bool verbose); int check_disk_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); -- cgit v1.2.3-70-g09d2 From de09077c89183cbc627d9393706343662da7f5a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 17:57:48 +0200 Subject: block: remove revalidate_disk() Remove the now unused helper. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Acked-by: Song Liu Signed-off-by: Jens Axboe --- drivers/md/md.h | 2 +- fs/block_dev.c | 19 ------------------- include/linux/genhd.h | 1 - 3 files changed, 1 insertion(+), 21 deletions(-) (limited to 'fs') diff --git a/drivers/md/md.h b/drivers/md/md.h index d9c4e6b7e939..f9e2ccdd22c4 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -397,7 +397,7 @@ struct mddev { * These locks are separate due to conflicting interactions * with bdev->bd_mutex. * Lock ordering is: - * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk + * reconfig_mutex -> bd_mutex * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open */ struct mutex open_mutex; diff --git a/fs/block_dev.c b/fs/block_dev.c index 9bfe37f394bd..9cb205405f9d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1339,25 +1339,6 @@ void revalidate_disk_size(struct gendisk *disk, bool verbose) } EXPORT_SYMBOL(revalidate_disk_size); -/** - * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back - * @disk: struct gendisk to be revalidated - * - * This routine is a wrapper for lower-level driver's revalidate_disk - * call-backs. It is used to do common pre and post operations needed - * for all revalidate_disk operations. - */ -int revalidate_disk(struct gendisk *disk) -{ - int ret = 0; - - if (disk->fops->revalidate_disk) - ret = disk->fops->revalidate_disk(disk); - revalidate_disk_size(disk, ret == 0); - return ret; -} -EXPORT_SYMBOL(revalidate_disk); - /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 02a73198b289..c618b27292fc 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -370,7 +370,6 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); int register_blkdev(unsigned int major, const char *name); void unregister_blkdev(unsigned int major, const char *name); -int revalidate_disk(struct gendisk *disk); void revalidate_disk_size(struct gendisk *disk, bool verbose); int check_disk_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -- cgit v1.2.3-70-g09d2 From 6dbf7bb555981fb5faf7b691e8f6169fc2b2e63b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Sep 2020 10:58:51 +0200 Subject: fs: Don't invalidate page buffers in block_write_full_page() If block_write_full_page() is called for a page that is beyond current inode size, it will truncate page buffers for the page and return 0. This logic has been added in 2.5.62 in commit 81eb69062588 ("fix ext3 BUG due to race with truncate") in history.git tree to fix a problem with ext3 in data=ordered mode. This particular problem doesn't exist anymore because ext3 is long gone and ext4 handles ordered data differently. Also normally buffers are invalidated by truncate code and there's no need to specially handle this in ->writepage() code. This invalidation of page buffers in block_write_full_page() is causing issues to filesystems (e.g. ext4 or ocfs2) when block device is shrunk under filesystem's hands and metadata buffers get discarded while being tracked by the journalling layer. Although it is obviously "not supported" it can cause kernel crashes like: [ 7986.689400] BUG: unable to handle kernel NULL pointer dereference at +0000000000000008 [ 7986.697197] PGD 0 P4D 0 [ 7986.699724] Oops: 0002 [#1] SMP PTI [ 7986.703200] CPU: 4 PID: 203778 Comm: jbd2/dm-3-8 Kdump: loaded Tainted: G +O --------- - - 4.18.0-147.5.0.5.h126.eulerosv2r9.x86_64 #1 [ 7986.716438] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 1.57 08/11/2015 [ 7986.723462] RIP: 0010:jbd2_journal_grab_journal_head+0x1b/0x40 [jbd2] ... [ 7986.810150] Call Trace: [ 7986.812595] __jbd2_journal_insert_checkpoint+0x23/0x70 [jbd2] [ 7986.818408] jbd2_journal_commit_transaction+0x155f/0x1b60 [jbd2] [ 7986.836467] kjournald2+0xbd/0x270 [jbd2] which is not great. The crash happens because bh->b_private is suddently NULL although BH_JBD flag is still set (this is because block_invalidatepage() cleared BH_Mapped flag and subsequent bh lookup found buffer without BH_Mapped set, called init_page_buffers() which has rewritten bh->b_private). So just remove the invalidation in block_write_full_page(). Note that the buffer cache invalidation when block device changes size is already careful to avoid similar problems by using invalidate_mapping_pages() which skips busy buffers so it was only this odd block_write_full_page() behavior that could tear down bdev buffers under filesystem's hands. Reported-by: Ye Bin Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig CC: stable@vger.kernel.org Signed-off-by: Jens Axboe --- fs/buffer.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index 50bbc99e3d96..5a28a6aa7f16 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2771,16 +2771,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block, /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_SIZE-1); if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ -#if 0 - /* Not really sure about this - do we need this ? */ - if (page->mapping->a_ops->invalidatepage) - page->mapping->a_ops->invalidatepage(page, offset); -#endif unlock_page(page); return 0; /* don't care */ } @@ -2975,12 +2965,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_SIZE-1); if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ - do_invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); return 0; /* don't care */ } -- cgit v1.2.3-70-g09d2 From 384d87ef2c954fc58e6c5fd8253e4a1984f5fe02 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Sep 2020 10:58:52 +0200 Subject: block: Do not discard buffers under a mounted filesystem Discarding blocks and buffers under a mounted filesystem is hardly anything admin wants to do. Usually it will confuse the filesystem and sometimes the loss of buffer_head state (including b_private field) can even cause crashes like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 4 PID: 203778 Comm: jbd2/dm-3-8 Kdump: loaded Tainted: G O --------- - - 4.18.0-147.5.0.5.h126.eulerosv2r9.x86_64 #1 Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 1.57 08/11/2015 RIP: 0010:jbd2_journal_grab_journal_head+0x1b/0x40 [jbd2] ... Call Trace: __jbd2_journal_insert_checkpoint+0x23/0x70 [jbd2] jbd2_journal_commit_transaction+0x155f/0x1b60 [jbd2] kjournald2+0xbd/0x270 [jbd2] So if we don't have block device open with O_EXCL already, claim the block device while we truncate buffer cache. This makes sure any exclusive block device user (such as filesystem) cannot operate on the device while we are discarding buffer cache. Reported-by: Ye Bin Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig [axboe: fix !CONFIG_BLOCK error in truncate_bdev_range()] Signed-off-by: Jens Axboe --- block/ioctl.c | 16 ++++++++++------ fs/block_dev.c | 37 +++++++++++++++++++++++++++++++++---- include/linux/blkdev.h | 7 +++++++ 3 files changed, 50 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/block/ioctl.c b/block/ioctl.c index bdb3bbb253d9..ae74d0409afa 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -112,8 +112,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, uint64_t range[2]; uint64_t start, len; struct request_queue *q = bdev_get_queue(bdev); - struct address_space *mapping = bdev->bd_inode->i_mapping; - + int err; if (!(mode & FMODE_WRITE)) return -EBADF; @@ -134,7 +133,11 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, if (start + len > i_size_read(bdev->bd_inode)) return -EINVAL; - truncate_inode_pages_range(mapping, start, start + len - 1); + + err = truncate_bdev_range(bdev, mode, start, start + len - 1); + if (err) + return err; + return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, flags); } @@ -143,8 +146,8 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, unsigned long arg) { uint64_t range[2]; - struct address_space *mapping; uint64_t start, end, len; + int err; if (!(mode & FMODE_WRITE)) return -EBADF; @@ -166,8 +169,9 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, return -EINVAL; /* Invalidate the page cache, including dirty pages */ - mapping = bdev->bd_inode->i_mapping; - truncate_inode_pages_range(mapping, start, end); + err = truncate_bdev_range(bdev, mode, start, end); + if (err) + return err; return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, BLKDEV_ZERO_NOUNMAP); diff --git a/fs/block_dev.c b/fs/block_dev.c index 9cb205405f9d..c70c41ecba48 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -103,6 +103,35 @@ void invalidate_bdev(struct block_device *bdev) } EXPORT_SYMBOL(invalidate_bdev); +/* + * Drop all buffers & page cache for given bdev range. This function bails + * with error if bdev has other exclusive owner (such as filesystem). + */ +int truncate_bdev_range(struct block_device *bdev, fmode_t mode, + loff_t lstart, loff_t lend) +{ + struct block_device *claimed_bdev = NULL; + int err; + + /* + * If we don't hold exclusive handle for the device, upgrade to it + * while we discard the buffer cache to avoid discarding buffers + * under live filesystem. + */ + if (!(mode & FMODE_EXCL)) { + claimed_bdev = bdev->bd_contains; + err = bd_prepare_to_claim(bdev, claimed_bdev, + truncate_bdev_range); + if (err) + return err; + } + truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); + if (claimed_bdev) + bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range); + return 0; +} +EXPORT_SYMBOL(truncate_bdev_range); + static void set_init_blocksize(struct block_device *bdev) { bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev)); @@ -1968,7 +1997,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, loff_t len) { struct block_device *bdev = I_BDEV(bdev_file_inode(file)); - struct address_space *mapping; loff_t end = start + len - 1; loff_t isize; int error; @@ -1996,8 +2024,9 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, return -EINVAL; /* Invalidate the page cache, including dirty pages. */ - mapping = bdev->bd_inode->i_mapping; - truncate_inode_pages_range(mapping, start, end); + error = truncate_bdev_range(bdev, file->f_mode, start, end); + if (error) + return error; switch (mode) { case FALLOC_FL_ZERO_RANGE: @@ -2024,7 +2053,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, * the caller will be given -EBUSY. The third argument is * inclusive, so the rounding here is safe. */ - return invalidate_inode_pages2_range(mapping, + return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping, start >> PAGE_SHIFT, end >> PAGE_SHIFT); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7d82959e7b86..37ec5a73d027 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1987,11 +1987,18 @@ void bdput(struct block_device *); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); +int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, + loff_t lend); int sync_blockdev(struct block_device *bdev); #else static inline void invalidate_bdev(struct block_device *bdev) { } +static inline int truncate_bdev_range(struct block_device *bdev, fmode_t mode, + loff_t lstart, loff_t lend) +{ + return 0; +} static inline int sync_blockdev(struct block_device *bdev) { return 0; -- cgit v1.2.3-70-g09d2 From 95f6f3a46fc4ee1a2b216a6b46bdf2b450f1877f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:29 +0200 Subject: block: add a bdev_check_media_change helper Like check_disk_changed, except that it does not call ->revalidate_disk but leaves that to the caller. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/genhd.c | 29 ++++++++++++++++++++++++++++- fs/block_dev.c | 17 +++-------------- include/linux/genhd.h | 2 +- 3 files changed, 32 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/block/genhd.c b/block/genhd.c index 081f1039d936..9d060e79eb31 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -2052,7 +2052,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask) * CONTEXT: * Might sleep. */ -unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) +static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) { struct disk_events *ev = disk->ev; unsigned int pending; @@ -2090,6 +2090,33 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask) return pending; } +/** + * bdev_check_media_change - check if a removable media has been changed + * @bdev: block device to check + * + * Check whether a removable media has been changed, and attempt to free all + * dentries and inodes and invalidates all block device page cache entries in + * that case. + * + * Returns %true if the block device changed, or %false if not. + */ +bool bdev_check_media_change(struct block_device *bdev) +{ + unsigned int events; + + events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE | + DISK_EVENT_EJECT_REQUEST); + if (!(events & DISK_EVENT_MEDIA_CHANGE)) + return false; + + if (__invalidate_device(bdev, true)) + pr_warn("VFS: busy inodes on changed media %s\n", + bdev->bd_disk->disk_name); + set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + return true; +} +EXPORT_SYMBOL(bdev_check_media_change); + /* * Separate this part out so that a different pointer for clearing_ptr can be * passed in for disk_clear_events. diff --git a/fs/block_dev.c b/fs/block_dev.c index c70c41ecba48..c6ac0bd22eca 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1379,21 +1379,10 @@ EXPORT_SYMBOL(revalidate_disk_size); */ int check_disk_change(struct block_device *bdev) { - struct gendisk *disk = bdev->bd_disk; - const struct block_device_operations *bdops = disk->fops; - unsigned int events; - - events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE | - DISK_EVENT_EJECT_REQUEST); - if (!(events & DISK_EVENT_MEDIA_CHANGE)) + if (!bdev_check_media_change(bdev)) return 0; - - if (__invalidate_device(bdev, true)) - pr_warn("VFS: busy inodes on changed media %s\n", - disk->disk_name); - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); - if (bdops->revalidate_disk) - bdops->revalidate_disk(bdev->bd_disk); + if (bdev->bd_disk->fops->revalidate_disk) + bdev->bd_disk->fops->revalidate_disk(bdev->bd_disk); return 1; } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index c618b27292fc..322d48a20772 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -315,7 +315,6 @@ extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev); -extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask); /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; @@ -372,6 +371,7 @@ void unregister_blkdev(unsigned int major, const char *name); void revalidate_disk_size(struct gendisk *disk, bool verbose); int check_disk_change(struct block_device *bdev); +bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); -- cgit v1.2.3-70-g09d2 From b92b53079aedbfb56bbb9ea360e5119fb563a2a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 16:53:47 +0200 Subject: block: remove check_disk_change Remove the now unused check_disk_change helper. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- fs/block_dev.c | 20 -------------------- include/linux/genhd.h | 1 - 2 files changed, 21 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index c6ac0bd22eca..0b34955b9e36 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1368,26 +1368,6 @@ void revalidate_disk_size(struct gendisk *disk, bool verbose) } EXPORT_SYMBOL(revalidate_disk_size); -/* - * This routine checks whether a removable media has been changed, - * and invalidates all buffer-cache-entries in that case. This - * is a relatively slow routine, so we have to try to minimize using - * it. Thus it is called only upon a 'mount' or 'open'. This - * is the best way of combining speed and utility, I think. - * People changing diskettes in the middle of an operation deserve - * to lose :-) - */ -int check_disk_change(struct block_device *bdev) -{ - if (!bdev_check_media_change(bdev)) - return 0; - if (bdev->bd_disk->fops->revalidate_disk) - bdev->bd_disk->fops->revalidate_disk(bdev->bd_disk); - return 1; -} - -EXPORT_SYMBOL(check_disk_change); - void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) { spin_lock(&bdev->bd_size_lock); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 322d48a20772..1c97cf84f011 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -370,7 +370,6 @@ int register_blkdev(unsigned int major, const char *name); void unregister_blkdev(unsigned int major, const char *name); void revalidate_disk_size(struct gendisk *disk, bool verbose); -int check_disk_change(struct block_device *bdev); bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); -- cgit v1.2.3-70-g09d2 From 38430f0876fa8b9549ec434f569dce03e057c076 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:45 +0200 Subject: block: move the NEED_PART_SCAN flag to struct gendisk We can only scan for partitions on the whole disk, so move the flag from struct block_device to struct gendisk. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 4 ++-- drivers/block/nbd.c | 8 ++++---- drivers/ide/ide-gd.c | 2 +- fs/block_dev.c | 7 +++---- include/linux/blk_types.h | 4 +--- include/linux/genhd.h | 2 ++ 6 files changed, 13 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/block/genhd.c b/block/genhd.c index 9d060e79eb31..7b56203c90a3 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -731,7 +731,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, if (!bdev) goto exit; - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &disk->state); err = blkdev_get(bdev, FMODE_READ, NULL); if (err < 0) goto exit; @@ -2112,7 +2112,7 @@ bool bdev_check_media_change(struct block_device *bdev) if (__invalidate_device(bdev, true)) pr_warn("VFS: busy inodes on changed media %s\n", bdev->bd_disk->disk_name); - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); return true; } EXPORT_SYMBOL(bdev_check_media_change); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 15eed210feef..2dca0aab0a9a 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -315,7 +315,7 @@ static void nbd_size_update(struct nbd_device *nbd) bd_set_nr_sectors(bdev, nr_sectors); set_blocksize(bdev, config->blksize); } else - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); bdput(bdev); } kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -1322,7 +1322,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b return ret; if (max_part) - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); @@ -1500,9 +1500,9 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } else if (nbd_disconnected(nbd->config)) { - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); } out: mutex_unlock(&nbd_index_mutex); diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 661e2aa9c967..e2b6c82586ce 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -230,7 +230,7 @@ static int ide_gd_open(struct block_device *bdev, fmode_t mode) bdev->bd_disk->disk_name); drive->disk_ops->get_capacity(drive); set_capacity(disk, ide_gd_capacity(drive)); - set_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + set_bit(GD_NEED_PART_SCAN, &disk->state); } else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) { ret = -EBUSY; goto out_put_idkp; diff --git a/fs/block_dev.c b/fs/block_dev.c index 0b34955b9e36..1a9325f43157 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -910,7 +910,6 @@ struct block_device *bdget(dev_t dev) bdev->bd_super = NULL; bdev->bd_inode = inode; bdev->bd_part_count = 0; - bdev->bd_flags = 0; inode->i_mode = S_IFBLK; inode->i_rdev = dev; inode->i_bdev = bdev; @@ -1385,7 +1384,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) lockdep_assert_held(&bdev->bd_mutex); - clear_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags); + clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); rescan: ret = blk_drop_partitions(bdev); @@ -1509,7 +1508,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, * The latter is necessary to prevent ghost * partitions on a removed medium. */ - if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && + if (test_bit(GD_NEED_PART_SCAN, &disk->state) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); @@ -1539,7 +1538,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ - if (test_bit(BDEV_NEED_PART_SCAN, &bdev->bd_flags) && + if (test_bit(GD_NEED_PART_SCAN, &disk->state) && (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6ffa783e1633..eb20e28184ab 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -19,8 +19,6 @@ struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; -#define BDEV_NEED_PART_SCAN 0 - struct block_device { dev_t bd_dev; int bd_openers; @@ -39,7 +37,7 @@ struct block_device { struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; - unsigned long bd_flags; + spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1c97cf84f011..38f23d757013 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -191,6 +191,8 @@ struct gendisk { void *private_data; int flags; + unsigned long state; +#define GD_NEED_PART_SCAN 0 struct rw_semaphore lookup_sem; struct kobject *slave_dir; -- cgit v1.2.3-70-g09d2 From e455ed22906c02c7638e127dbf60634765db02cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:53 +0200 Subject: ocfs2: cleanup o2hb_region_dev_store Use blkdev_get_by_dev instead of igrab (aka open coded bdgrab) + blkdev_get. Signed-off-by: Christoph Hellwig Reviewed-by: Joseph Qi Signed-off-by: Jens Axboe --- fs/ocfs2/cluster/heartbeat.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 89d13e0705fe..0179a73a3fa2 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1766,7 +1766,6 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, int sectsize; char *p = (char *)page; struct fd f; - struct inode *inode; ssize_t ret = -EINVAL; int live_threshold; @@ -1793,20 +1792,16 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, reg->hr_block_bytes == 0) goto out2; - inode = igrab(f.file->f_mapping->host); - if (inode == NULL) + if (!S_ISBLK(f.file->f_mapping->host->i_mode)) goto out2; - if (!S_ISBLK(inode->i_mode)) - goto out3; - - reg->hr_bdev = I_BDEV(f.file->f_mapping->host); - ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, NULL); - if (ret) { + reg->hr_bdev = blkdev_get_by_dev(f.file->f_mapping->host->i_rdev, + FMODE_WRITE | FMODE_READ, NULL); + if (IS_ERR(reg->hr_bdev)) { + ret = PTR_ERR(reg->hr_bdev); reg->hr_bdev = NULL; - goto out3; + goto out2; } - inode = NULL; bdevname(reg->hr_bdev, reg->hr_dev_name); @@ -1909,16 +1904,13 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, config_item_name(®->hr_item), reg->hr_dev_name); out3: - iput(inode); + if (ret < 0) { + blkdev_put(reg->hr_bdev, FMODE_READ | FMODE_WRITE); + reg->hr_bdev = NULL; + } out2: fdput(f); out: - if (ret < 0) { - if (reg->hr_bdev) { - blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); - reg->hr_bdev = NULL; - } - } return ret; } -- cgit v1.2.3-70-g09d2 From bb3247a399801ebba20bef101c89e563f5fe7f02 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:55 +0200 Subject: PM: rewrite is_hibernate_resume_dev to not require an inode Just check the dev_t to help simplifying the code. Signed-off-by: Christoph Hellwig Acked-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 +- include/linux/suspend.h | 4 ++-- kernel/power/user.c | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 1a9325f43157..2898d69be6b3 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1885,7 +1885,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if (bdev_read_only(I_BDEV(bd_inode))) return -EPERM; - if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode)) + if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) return -ETXTBSY; if (!iov_iter_count(from)) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index cb9afad82a90..8af13ba60c7e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -473,9 +473,9 @@ static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) { #endif /* CONFIG_HIBERNATION */ #ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV -int is_hibernate_resume_dev(const struct inode *); +int is_hibernate_resume_dev(dev_t dev); #else -static inline int is_hibernate_resume_dev(const struct inode *i) { return 0; } +static inline int is_hibernate_resume_dev(dev_t dev) { return 0; } #endif /* Hibernation and suspend events */ diff --git a/kernel/power/user.c b/kernel/power/user.c index d5eedc2baa2a..b5815685b944 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -35,12 +35,12 @@ static struct snapshot_data { bool ready; bool platform_support; bool free_bitmaps; - struct inode *bd_inode; + dev_t dev; } snapshot_state; -int is_hibernate_resume_dev(const struct inode *bd_inode) +int is_hibernate_resume_dev(dev_t dev) { - return hibernation_available() && snapshot_state.bd_inode == bd_inode; + return hibernation_available() && snapshot_state.dev == dev; } static int snapshot_open(struct inode *inode, struct file *filp) @@ -101,7 +101,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->frozen = false; data->ready = false; data->platform_support = false; - data->bd_inode = NULL; + data->dev = 0; Unlock: unlock_system_sleep(); @@ -117,7 +117,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) swsusp_free(); data = filp->private_data; - data->bd_inode = NULL; + data->dev = 0; free_all_swap_pages(data->swap); if (data->frozen) { pm_restore_gfp_mask(); @@ -245,7 +245,7 @@ static int snapshot_set_swap_area(struct snapshot_data *data, if (data->swap < 0) return -ENODEV; - data->bd_inode = bdev->bd_inode; + data->dev = bdev->bd_dev; bdput(bdev); return 0; } -- cgit v1.2.3-70-g09d2 From 1fb1a2ad75e33e646d33e42b9ed17d879d472859 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Sep 2020 09:19:58 +0200 Subject: block: mark blkdev_get static There are no users outside the core block code left now. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 3 +-- include/linux/blkdev.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 2898d69be6b3..6b9d19ffa5af 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1616,7 +1616,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, * RETURNS: * 0 on success, -errno on failure. */ -int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) +static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { int ret, perm = 0; @@ -1637,7 +1637,6 @@ bdput: bdput(bdev); return ret; } -EXPORT_SYMBOL(blkdev_get); /** * blkdev_get_by_path - open a block device by name diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6e19a7aa1672..be5ef6f4ba19 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1981,7 +1981,6 @@ void blkdev_show(struct seq_file *seqf, off_t offset); #define BLKDEV_MAJOR_MAX 0 #endif -int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -- cgit v1.2.3-70-g09d2 From 402dd2cf46b177be5bcb138b7d7fd8f38aa130e4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:28 +0200 Subject: fs: remove the unused SB_I_MULTIROOT flag The last user of SB_I_MULTIROOT is disappeared with commit f2aedb713c28 ("NFS: Add fs_context support.") Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- fs/namei.c | 4 ++-- include/linux/fs.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index e99e2a9da0f7..f1eb8ccd2be9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -568,8 +568,8 @@ static bool path_connected(struct vfsmount *mnt, struct dentry *dentry) { struct super_block *sb = mnt->mnt_sb; - /* Bind mounts and multi-root filesystems can have disconnected paths */ - if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root)) + /* Bind mounts can have disconnected paths */ + if (mnt->mnt_root == sb->s_root) return true; return is_subdir(dentry, mnt->mnt_root); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7519ae003a08..fbd74df5ce5f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1385,7 +1385,6 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ -#define SB_I_MULTIROOT 0x00000008 /* Multiple roots to the dentry tree */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ -- cgit v1.2.3-70-g09d2 From 55b2598e84e97efc5d952958cb5e34236c43276b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:32 +0200 Subject: bdi: initialize ->ra_pages and ->io_pages in bdi_init Set up a readahead size by default, as very few users have a good reason to change it. This means code, ecryptfs, and orangefs now set up the values while they were previously missing it, while ubifs, mtd and vboxsf manually set it to 0 to avoid readahead. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Acked-by: David Sterba [btrfs] Acked-by: Richard Weinberger [ubifs, mtd] Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- drivers/mtd/mtdcore.c | 2 ++ fs/9p/vfs_super.c | 6 ++++-- fs/afs/super.c | 1 - fs/btrfs/disk-io.c | 1 - fs/fuse/inode.c | 1 - fs/nfs/super.c | 9 +-------- fs/ubifs/super.c | 2 ++ fs/vboxsf/super.c | 2 ++ mm/backing-dev.c | 2 ++ 10 files changed, 13 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/block/blk-core.c b/block/blk-core.c index ca3f0f00c943..865d39e5be2b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -538,8 +538,6 @@ struct request_queue *blk_alloc_queue(int node_id) if (!q->stats) goto fail_stats; - q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES; - q->backing_dev_info->io_pages = VM_READAHEAD_PAGES; q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; q->node = node_id; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 7d930569a7df..b5e5d3140f57 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -2196,6 +2196,8 @@ static struct backing_dev_info * __init mtd_bdi_init(char *name) bdi = bdi_alloc(NUMA_NO_NODE); if (!bdi) return ERR_PTR(-ENOMEM); + bdi->ra_pages = 0; + bdi->io_pages = 0; /* * We put '-0' suffix to the name to get the same name format as we diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 74df32be4c6a..e34fa20acf61 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -80,8 +80,10 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (ret) return ret; - if (v9ses->cache) - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; + if (!v9ses->cache) { + sb->s_bdi->ra_pages = 0; + sb->s_bdi->io_pages = 0; + } sb->s_flags |= SB_ACTIVE | SB_DIRSYNC; if (!v9ses->cache) diff --git a/fs/afs/super.c b/fs/afs/super.c index b552357b1d13..3a40ee752c1e 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -456,7 +456,6 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) ret = super_setup_bdi(sb); if (ret) return ret; - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; /* allocate the root inode and dentry */ if (as->dyn_root) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f6bba7eb1fa1..047934cea25e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3092,7 +3092,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index bba747520e9b..17b00670fb53 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1049,7 +1049,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) if (err) return err; - sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; /* fuse does it's own writeback accounting */ sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 7a70287f21a2..f943e37853fa 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1200,13 +1200,6 @@ static void nfs_get_cache_cookie(struct super_block *sb, } #endif -static void nfs_set_readahead(struct backing_dev_info *bdi, - unsigned long iomax_pages) -{ - bdi->ra_pages = VM_READAHEAD_PAGES; - bdi->io_pages = iomax_pages; -} - int nfs_get_tree_common(struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); @@ -1251,7 +1244,7 @@ int nfs_get_tree_common(struct fs_context *fc) MINOR(server->s_dev)); if (error) goto error_splat_super; - nfs_set_readahead(s->s_bdi, server->rpages); + s->s_bdi->io_pages = server->rpages; server->super = s; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index a2420c900275..fbddb2a1c03f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2177,6 +2177,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) c->vi.vol_id); if (err) goto out_close; + sb->s_bdi->ra_pages = 0; + sb->s_bdi->io_pages = 0; sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 8fe03b4a0d2b..8e3792177a85 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -167,6 +167,8 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id); if (err) goto fail_free; + sb->s_bdi->ra_pages = 0; + sb->s_bdi->io_pages = 0; /* Turn source into a shfl_string and map the folder */ size = strlen(fc->source) + 1; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8e8b00627bb2..2dac3be61271 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -746,6 +746,8 @@ struct backing_dev_info *bdi_alloc(int node_id) kfree(bdi); return NULL; } + bdi->ra_pages = VM_READAHEAD_PAGES; + bdi->io_pages = VM_READAHEAD_PAGES; return bdi; } EXPORT_SYMBOL(bdi_alloc); -- cgit v1.2.3-70-g09d2 From ed7b6b4f6e915cb0bc52d0000bcc63168867b6ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:35 +0200 Subject: bdi: remove BDI_CAP_CGROUP_WRITEBACK Just checking SB_I_CGROUPWB for cgroup writeback support is enough. Either the file system allocates its own bdi (e.g. btrfs), in which case it is known to support cgroup writeback, or the bdi comes from the block layer, which always supports cgroup writeback. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - fs/btrfs/disk-io.c | 1 - include/linux/backing-dev.h | 8 +++----- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/block/blk-core.c b/block/blk-core.c index 865d39e5be2b..1cc4fa6bc7fe 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -538,7 +538,6 @@ struct request_queue *blk_alloc_queue(int node_id) if (!q->stats) goto fail_stats; - q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK; q->node = node_id; atomic_set(&q->nr_active_requests_shared_sbitmap, 0); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 047934cea25e..e24927bddd58 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3091,7 +3091,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_sb_buffer; } - sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0b06b2d26c9a..52583b6f2ea0 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -123,7 +123,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. * - * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be * inefficient. */ @@ -233,9 +232,9 @@ int inode_congested(struct inode *inode, int cong_bits); * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode * @inode: inode of interest * - * cgroup writeback requires support from both the bdi and filesystem. - * Also, both memcg and iocg have to be on the default hierarchy. Test - * whether all conditions are met. + * Cgroup writeback requires support from the filesystem. Also, both memcg and + * iocg have to be on the default hierarchy. Test whether all conditions are + * met. * * Note that the test result may change dynamically on the same inode * depending on how memcg and iocg are configured. @@ -247,7 +246,6 @@ static inline bool inode_cgwb_enabled(struct inode *inode) return cgroup_subsys_on_dfl(memory_cgrp_subsys) && cgroup_subsys_on_dfl(io_cgrp_subsys) && bdi_cap_account_dirty(bdi) && - (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } -- cgit v1.2.3-70-g09d2 From 1cb039f3dc1619eb795c54aad0a98fdb379b4237 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:38 +0200 Subject: bdi: replace BDI_CAP_STABLE_WRITES with a queue and a sb flag The BDI_CAP_STABLE_WRITES is one of the few bits of information in the backing_dev_info shared between the block drivers and the writeback code. To help untangling the dependency replace it with a queue flag and a superblock flag derived from it. This also helps with the case of e.g. a file system requiring stable writes due to its own checksumming, but not forcing it on other users of the block device like the swap code. One downside is that we an't support the stable_pages_required bdi attribute in sysfs anymore. It is replaced with a queue attribute which also is writable for easier testing. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-integrity.c | 4 ++-- block/blk-mq-debugfs.c | 1 + block/blk-sysfs.c | 3 +++ drivers/block/rbd.c | 2 +- drivers/block/zram/zram_drv.c | 2 +- drivers/md/dm-table.c | 6 +++--- drivers/md/raid5.c | 8 ++++---- drivers/mmc/core/queue.c | 3 +-- drivers/nvme/host/core.c | 3 +-- drivers/nvme/host/multipath.c | 10 +++------- drivers/scsi/iscsi_tcp.c | 4 ++-- fs/super.c | 2 ++ include/linux/backing-dev.h | 6 ------ include/linux/blkdev.h | 3 +++ include/linux/fs.h | 1 + mm/backing-dev.c | 7 +++---- mm/page-writeback.c | 2 +- mm/swapfile.c | 2 +- 18 files changed, 33 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index c03705cbb9c9..2b36a8f9b813 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -408,7 +408,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template bi->tuple_size = template->tuple_size; bi->tag_size = template->tag_size; - disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); #ifdef CONFIG_BLK_INLINE_ENCRYPTION if (disk->queue->ksm) { @@ -428,7 +428,7 @@ EXPORT_SYMBOL(blk_integrity_register); */ void blk_integrity_unregister(struct gendisk *disk) { - disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue); memset(&disk->queue->integrity, 0, sizeof(struct blk_integrity)); } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 645b7f800cb8..3094542e12ae 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -116,6 +116,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SAME_FORCE), QUEUE_FLAG_NAME(DEAD), QUEUE_FLAG_NAME(INIT_DONE), + QUEUE_FLAG_NAME(STABLE_WRITES), QUEUE_FLAG_NAME(POLL), QUEUE_FLAG_NAME(WC), QUEUE_FLAG_NAME(FUA), diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 869ed21a9edc..76b54c7750b0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -287,6 +287,7 @@ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \ QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1); QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); +QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0); #undef QUEUE_SYSFS_BIT_FNS static ssize_t queue_zoned_show(struct request_queue *q, char *page) @@ -613,6 +614,7 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { QUEUE_RW_ENTRY(queue_nonrot, "rotational"); QUEUE_RW_ENTRY(queue_iostats, "iostats"); QUEUE_RW_ENTRY(queue_random, "add_random"); +QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); static struct attribute *queue_attrs[] = { &queue_requests_entry.attr, @@ -645,6 +647,7 @@ static struct attribute *queue_attrs[] = { &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, + &queue_stable_writes_entry.attr, &queue_random_entry.attr, &queue_poll_entry.attr, &queue_wc_entry.attr, diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5d3923c0997c..cf5b016358cd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5022,7 +5022,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) } if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) - q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); /* * disk_release() expects a queue ref from add_disk() and will diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e21ca844d7c2..bff3d4021c18 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1955,7 +1955,7 @@ static int zram_add(void) if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); - zram->disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); device_add_disk(NULL, zram->disk, zram_disk_attr_groups); strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ef2757012f59..405d7cf10eb9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1815,7 +1815,7 @@ static int device_requires_stable_pages(struct dm_target *ti, { struct request_queue *q = bdev_get_queue(dev->bdev); - return q && bdi_cap_stable_pages_required(q->backing_dev_info); + return q && blk_queue_stable_writes(q); } /* @@ -1900,9 +1900,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, * because they do their own checksumming. */ if (dm_table_requires_stable_pages(t)) - q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); else - q->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); /* * Determine whether or not this queue's I/O timings contribute diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7ace1f76b147..d589d26c86ea 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6638,14 +6638,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len) if (!conf) err = -ENODEV; else if (new != conf->skip_copy) { + struct request_queue *q = mddev->queue; + mddev_suspend(mddev); conf->skip_copy = new; if (new) - mddev->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); else - mddev->queue->backing_dev_info->capabilities &= - ~BDI_CAP_STABLE_WRITES; + blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); mddev_resume(mddev); } mddev_unlock(mddev); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 6c022ef0f84d..80fe3852ce0f 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -472,8 +472,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) } if (mmc_host_is_spi(host) && host->use_spi_crc) - mq->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); mq->queue->queuedata = mq; blk_queue_rq_timeout(mq->queue, 60 * HZ); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 741c9bfa8e14..c190c56bf702 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3926,8 +3926,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) goto out_free_ns; if (ctrl->opts && ctrl->opts->data_digest) - ns->queue->backing_dev_info->capabilities - |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->queue); blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); if (ctrl->ops->flags & NVME_F_PCI_P2PDMA) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d4ba736c6c89..74896be40c17 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -673,13 +673,9 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) nvme_mpath_set_live(ns); } - if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { - struct gendisk *disk = ns->head->disk; - - if (disk) - disk->queue->backing_dev_info->capabilities |= - BDI_CAP_STABLE_WRITES; - } + if (blk_queue_stable_writes(ns->queue) && ns->head->disk) + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, + ns->head->disk->queue); } void nvme_mpath_remove_disk(struct nvme_ns_head *head) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index b5dd1caae5e9..a622f334c933 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -962,8 +962,8 @@ static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev) struct iscsi_conn *conn = session->leadconn; if (conn->datadgst_en) - sdev->request_queue->backing_dev_info->capabilities - |= BDI_CAP_STABLE_WRITES; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, + sdev->request_queue); blk_queue_dma_alignment(sdev->request_queue, 0); return 0; } diff --git a/fs/super.c b/fs/super.c index 904459b35119..a51c2083cd6b 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1256,6 +1256,8 @@ static int set_bdev_super(struct super_block *s, void *data) s->s_dev = s->s_bdev->bd_dev; s->s_bdi = bdi_get(s->s_bdev->bd_bdi); + if (blk_queue_stable_writes(s->s_bdev->bd_disk->queue)) + s->s_iflags |= SB_I_STABLE_WRITES; return 0; } diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 860ea33571bc..5da4ea3dd0cc 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -126,7 +126,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 #define BDI_CAP_NO_ACCT_WB 0x00000004 -#define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 @@ -170,11 +169,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(int sync, long timeout); -static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) -{ - return bdi->capabilities & BDI_CAP_STABLE_WRITES; -} - static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) { return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 282f5ca424f1..8e77f12de522 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -606,6 +606,7 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ +#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 17 /* Write back caching */ #define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ @@ -635,6 +636,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_stable_writes(q) \ + test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) diff --git a/include/linux/fs.h b/include/linux/fs.h index fbd74df5ce5f..222465b7cf41 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1385,6 +1385,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ #define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ #define SB_I_NODEV 0x00000004 /* Ignore devices on this fs */ +#define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */ /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 2dac3be61271..8e3802bf03a9 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -204,10 +204,9 @@ static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, char *page) { - struct backing_dev_info *bdi = dev_get_drvdata(dev); - - return snprintf(page, PAGE_SIZE-1, "%d\n", - bdi_cap_stable_pages_required(bdi) ? 1 : 0); + dev_warn_once(dev, + "the stable_pages_required attribute has been removed. Use the stable_writes queue attribute instead.\n"); + return snprintf(page, PAGE_SIZE-1, "%d\n", 0); } static DEVICE_ATTR_RO(stable_pages_required); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4e4ddd67b71e..e9c36521461a 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2849,7 +2849,7 @@ EXPORT_SYMBOL_GPL(wait_on_page_writeback); */ void wait_for_stable_page(struct page *page) { - if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host))) + if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES) wait_on_page_writeback(page); } EXPORT_SYMBOL_GPL(wait_for_stable_page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 96a7c47dd533..65ef407512b5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3237,7 +3237,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap_unlock_inode; } - if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) + if (p->bdev && blk_queue_stable_writes(p->bdev->bd_disk->queue)) p->flags |= SWP_STABLE_WRITES; if (p->bdev && p->bdev->bd_disk->fops->rw_page) -- cgit v1.2.3-70-g09d2 From 823423ef55f4d9c470b1edc9c5b5c93d06abfaae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:39 +0200 Subject: bdi: invert BDI_CAP_NO_ACCT_WB Replace BDI_CAP_NO_ACCT_WB with a positive BDI_CAP_WRITEBACK_ACCT to make the checks more obvious. Also remove the pointless bdi_cap_account_writeback wrapper that just obsfucates the check. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- fs/fuse/inode.c | 3 ++- include/linux/backing-dev.h | 13 +++---------- mm/backing-dev.c | 1 + mm/page-writeback.c | 4 ++-- 4 files changed, 8 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 17b00670fb53..581329203d68 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1050,7 +1050,8 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) return err; /* fuse does it's own writeback accounting */ - sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT; + sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; + sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; /* * For a single fuse filesystem use max 1% of dirty + diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 5da4ea3dd0cc..b217344a2c63 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -120,17 +120,17 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * * BDI_CAP_NO_ACCT_DIRTY: Dirty pages shouldn't contribute to accounting * BDI_CAP_NO_WRITEBACK: Don't write pages back - * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages + * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 -#define BDI_CAP_NO_ACCT_WB 0x00000004 +#define BDI_CAP_WRITEBACK_ACCT 0x00000004 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ - (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) + (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY) extern struct backing_dev_info noop_backing_dev_info; @@ -179,13 +179,6 @@ static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi) return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY); } -static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi) -{ - /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */ - return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB | - BDI_CAP_NO_WRITEBACK)); -} - static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) { return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host)); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8e3802bf03a9..df18f0088dd3 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -745,6 +745,7 @@ struct backing_dev_info *bdi_alloc(int node_id) kfree(bdi); return NULL; } + bdi->capabilities = BDI_CAP_WRITEBACK_ACCT; bdi->ra_pages = VM_READAHEAD_PAGES; bdi->io_pages = VM_READAHEAD_PAGES; return bdi; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e9c36521461a..0139f9622a92 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2738,7 +2738,7 @@ int test_clear_page_writeback(struct page *page) if (ret) { __xa_clear_mark(&mapping->i_pages, page_index(page), PAGECACHE_TAG_WRITEBACK); - if (bdi_cap_account_writeback(bdi)) { + if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) { struct bdi_writeback *wb = inode_to_wb(inode); dec_wb_stat(wb, WB_WRITEBACK); @@ -2791,7 +2791,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write) PAGECACHE_TAG_WRITEBACK); xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK); - if (bdi_cap_account_writeback(bdi)) + if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK); /* -- cgit v1.2.3-70-g09d2 From f56753ac2a90810726334df04d735e9f8f5a32d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 08:51:40 +0200 Subject: bdi: replace BDI_CAP_NO_{WRITEBACK,ACCT_DIRTY} with a single flag Replace the two negative flags that are always used together with a single positive flag that indicates the writeback capability instead of two related non-capabilities. Also remove the pointless wrappers to just check the flag. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- fs/9p/vfs_file.c | 2 +- fs/fs-writeback.c | 7 ++++--- include/linux/backing-dev.h | 48 ++++++++++----------------------------------- mm/backing-dev.c | 6 ++---- mm/filemap.c | 4 ++-- mm/memcontrol.c | 2 +- mm/memory-failure.c | 2 +- mm/migrate.c | 2 +- mm/mmap.c | 2 +- mm/page-writeback.c | 12 ++++++------ 10 files changed, 29 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 3576123d8299..6ecf863bfa2f 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -625,7 +625,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) inode = file_inode(vma->vm_file); - if (!mapping_cap_writeback_dirty(inode->i_mapping)) + if (!mapping_can_writeback(inode->i_mapping)) wbc.nr_to_write = 0; might_sleep(); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 149227160ff0..d4f84a2fe087 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2321,7 +2321,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) wb = locked_inode_to_wb_and_lock_list(inode); - WARN(bdi_cap_writeback_dirty(wb->bdi) && + WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) && !test_bit(WB_registered, &wb->state), "bdi-%s not registered\n", bdi_dev_name(wb->bdi)); @@ -2346,7 +2346,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) * to make sure background write-back happens * later. */ - if (bdi_cap_writeback_dirty(wb->bdi) && wakeup_bdi) + if (wakeup_bdi && + (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) wb_wakeup_delayed(wb); return; } @@ -2581,7 +2582,7 @@ int write_inode_now(struct inode *inode, int sync) .range_end = LLONG_MAX, }; - if (!mapping_cap_writeback_dirty(inode->i_mapping)) + if (!mapping_can_writeback(inode->i_mapping)) wbc.nr_to_write = 0; might_sleep(); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index b217344a2c63..44df4fcef65c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -110,27 +110,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); /* * Flags in backing_dev_info::capability * - * The first three flags control whether dirty pages will contribute to the - * VM's accounting and whether writepages() should be called for dirty pages - * (something that would not, for example, be appropriate for ramfs) - * - * WARNING: these flags are closely related and should not normally be - * used separately. The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these - * three flags into a single convenience macro. - * - * BDI_CAP_NO_ACCT_DIRTY: Dirty pages shouldn't contribute to accounting - * BDI_CAP_NO_WRITEBACK: Don't write pages back - * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages - * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. + * BDI_CAP_WRITEBACK: Supports dirty page writeback, and dirty pages + * should contribute to accounting + * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages + * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold */ -#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 -#define BDI_CAP_NO_WRITEBACK 0x00000002 -#define BDI_CAP_WRITEBACK_ACCT 0x00000004 -#define BDI_CAP_STRICTLIMIT 0x00000010 -#define BDI_CAP_CGROUP_WRITEBACK 0x00000020 - -#define BDI_CAP_NO_ACCT_AND_WRITEBACK \ - (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY) +#define BDI_CAP_WRITEBACK (1 << 0) +#define BDI_CAP_WRITEBACK_ACCT (1 << 1) +#define BDI_CAP_STRICTLIMIT (1 << 2) extern struct backing_dev_info noop_backing_dev_info; @@ -169,24 +156,9 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(int sync, long timeout); -static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) -{ - return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); -} - -static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi) -{ - return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY); -} - -static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) -{ - return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host)); -} - -static inline bool mapping_cap_account_dirty(struct address_space *mapping) +static inline bool mapping_can_writeback(struct address_space *mapping) { - return bdi_cap_account_dirty(inode_to_bdi(mapping->host)); + return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK; } static inline int bdi_sched_wait(void *word) @@ -223,7 +195,7 @@ static inline bool inode_cgwb_enabled(struct inode *inode) return cgroup_subsys_on_dfl(memory_cgrp_subsys) && cgroup_subsys_on_dfl(io_cgrp_subsys) && - bdi_cap_account_dirty(bdi) && + (bdi->capabilities & BDI_CAP_WRITEBACK) && (inode->i_sb->s_iflags & SB_I_CGROUPWB); } diff --git a/mm/backing-dev.c b/mm/backing-dev.c index df18f0088dd3..408d5051d05b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -14,9 +14,7 @@ #include #include -struct backing_dev_info noop_backing_dev_info = { - .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, -}; +struct backing_dev_info noop_backing_dev_info; EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; @@ -745,7 +743,7 @@ struct backing_dev_info *bdi_alloc(int node_id) kfree(bdi); return NULL; } - bdi->capabilities = BDI_CAP_WRITEBACK_ACCT; + bdi->capabilities = BDI_CAP_WRITEBACK | BDI_CAP_WRITEBACK_ACCT; bdi->ra_pages = VM_READAHEAD_PAGES; bdi->io_pages = VM_READAHEAD_PAGES; return bdi; diff --git a/mm/filemap.c b/mm/filemap.c index 1aaea26556cc..6c2a0139e22f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -414,7 +414,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, .range_end = end, }; - if (!mapping_cap_writeback_dirty(mapping) || + if (!mapping_can_writeback(mapping) || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) return 0; @@ -1702,7 +1702,7 @@ repeat: no_page: if (!page && (fgp_flags & FGP_CREAT)) { int err; - if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping)) + if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) gfp_mask |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) gfp_mask &= ~__GFP_FS; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b807952b4d43..d2352f76d651 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5643,7 +5643,7 @@ static int mem_cgroup_move_account(struct page *page, if (PageDirty(page)) { struct address_space *mapping = page_mapping(page); - if (mapping_cap_account_dirty(mapping)) { + if (mapping_can_writeback(mapping)) { __mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages); __mod_lruvec_state(to_vec, NR_FILE_DIRTY, diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f1aa6433f404..a1e73943445e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1006,7 +1006,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, */ mapping = page_mapping(hpage); if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping && - mapping_cap_writeback_dirty(mapping)) { + mapping_can_writeback(mapping)) { if (page_mkclean(hpage)) { SetPageDirty(hpage); } else { diff --git a/mm/migrate.c b/mm/migrate.c index 34a842a8eb6a..9d2f42a3a162 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -503,7 +503,7 @@ int migrate_page_move_mapping(struct address_space *mapping, __dec_lruvec_state(old_lruvec, NR_SHMEM); __inc_lruvec_state(new_lruvec, NR_SHMEM); } - if (dirty && mapping_cap_account_dirty(mapping)) { + if (dirty && mapping_can_writeback(mapping)) { __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY); __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING); __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY); diff --git a/mm/mmap.c b/mm/mmap.c index 40248d84ad5f..1fc0e92be4ba 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1666,7 +1666,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) /* Can the mapping track the dirty pages? */ return vma->vm_file && vma->vm_file->f_mapping && - mapping_cap_account_dirty(vma->vm_file->f_mapping); + mapping_can_writeback(vma->vm_file->f_mapping); } /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0139f9622a92..358d6f28c627 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1882,7 +1882,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) int ratelimit; int *p; - if (!bdi_cap_account_dirty(bdi)) + if (!(bdi->capabilities & BDI_CAP_WRITEBACK)) return; if (inode_cgwb_enabled(inode)) @@ -2423,7 +2423,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) trace_writeback_dirty_page(page, mapping); - if (mapping_cap_account_dirty(mapping)) { + if (mapping_can_writeback(mapping)) { struct bdi_writeback *wb; inode_attach_wb(inode, page); @@ -2450,7 +2450,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) void account_page_cleaned(struct page *page, struct address_space *mapping, struct bdi_writeback *wb) { - if (mapping_cap_account_dirty(mapping)) { + if (mapping_can_writeback(mapping)) { dec_lruvec_page_state(page, NR_FILE_DIRTY); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); dec_wb_stat(wb, WB_RECLAIMABLE); @@ -2513,7 +2513,7 @@ void account_page_redirty(struct page *page) { struct address_space *mapping = page->mapping; - if (mapping && mapping_cap_account_dirty(mapping)) { + if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; @@ -2625,7 +2625,7 @@ void __cancel_dirty_page(struct page *page) { struct address_space *mapping = page_mapping(page); - if (mapping_cap_account_dirty(mapping)) { + if (mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; @@ -2665,7 +2665,7 @@ int clear_page_dirty_for_io(struct page *page) VM_BUG_ON_PAGE(!PageLocked(page), page); - if (mapping && mapping_cap_account_dirty(mapping)) { + if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; -- cgit v1.2.3-70-g09d2 From fa01b1e9733fd59ecb8b5b6d85dfb481d2025fbf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 07:40:57 +0200 Subject: block: add a bdev_is_partition helper Add a littler helper to make the somewhat arcane bd_contains checks a little more obvious. Signed-off-by: Christoph Hellwig Acked-by: Ulf Hansson Signed-off-by: Jens Axboe --- block/blk-lib.c | 2 +- block/ioctl.c | 4 ++-- block/scsi_ioctl.c | 2 +- drivers/ide/ide-ioctls.c | 4 ++-- drivers/md/dm-table.c | 2 +- drivers/mmc/core/block.c | 2 +- drivers/s390/block/dasd_ioctl.c | 8 ++++---- fs/nfsd/blocklayout.c | 4 ++-- include/linux/blkdev.h | 9 +++++++-- kernel/trace/blktrace.c | 2 +- 10 files changed, 22 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/block/blk-lib.c b/block/blk-lib.c index 0d1811e57ac7..e90614fd8d6a 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -64,7 +64,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return -EINVAL; /* In case the discard request is in a partition */ - if (bdev->bd_partno) + if (bdev_is_partition(bdev)) part_offset = bdev->bd_part->start_sect; while (nr_sects) { diff --git a/block/ioctl.c b/block/ioctl.c index 06262c28f0c6..3fbc382eb926 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -23,7 +23,7 @@ static int blkpg_do_ioctl(struct block_device *bdev, return -EACCES; if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) return -EFAULT; - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return -EINVAL; if (p.pno <= 0) @@ -94,7 +94,7 @@ static int blkdev_reread_part(struct block_device *bdev) { int ret; - if (!disk_part_scan_enabled(bdev->bd_disk) || bdev != bdev->bd_contains) + if (!disk_part_scan_enabled(bdev->bd_disk) || bdev_is_partition(bdev)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 4421e61c1af1..d4abd1ed5a2d 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -852,7 +852,7 @@ EXPORT_SYMBOL(scsi_cmd_ioctl); int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd) { - if (bd && bd == bd->bd_contains) + if (bd && !bdev_is_partition(bd)) return 0; if (capable(CAP_SYS_RAWIO)) diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index 09491098047b..58994da10c06 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -49,7 +49,7 @@ read_val: return err >= 0 ? put_user_long(err, arg) : err; set_val: - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) err = -EINVAL; else { if (!capable(CAP_SYS_ADMIN)) @@ -257,7 +257,7 @@ int generic_ide_ioctl(ide_drive_t *drive, struct block_device *bdev, switch (cmd) { case HDIO_OBSOLETE_IDENTITY: case HDIO_GET_IDENTITY: - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return -EINVAL; return ide_get_identity_ioctl(drive, cmd, argp); case HDIO_GET_NICE: diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 405d7cf10eb9..12d909044c10 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -903,7 +903,7 @@ static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev, struct request_queue *q = bdev_get_queue(bdev); /* request-based cannot stack on partitions! */ - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return false; return queue_is_mq(q); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index fa313b634135..8d3df0be0355 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -723,7 +723,7 @@ static int mmc_blk_check_blkdev(struct block_device *bdev) * whole block device, not on a partition. This prevents overspray * between sibling partitions. */ - if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains)) + if (!capable(CAP_SYS_RAWIO) || bdev_is_partition(bdev)) return -EPERM; return 0; } diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index faaf5596e31c..cb6427fb9f3d 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -277,7 +277,7 @@ dasd_ioctl_format(struct block_device *bdev, void __user *argp) dasd_put_device(base); return -EFAULT; } - if (bdev != bdev->bd_contains) { + if (bdev_is_partition(bdev)) { pr_warn("%s: The specified DASD is a partition and cannot be formatted\n", dev_name(&base->cdev->dev)); dasd_put_device(base); @@ -304,7 +304,7 @@ static int dasd_ioctl_check_format(struct block_device *bdev, void __user *argp) base = dasd_device_from_gendisk(bdev->bd_disk); if (!base) return -ENODEV; - if (bdev != bdev->bd_contains) { + if (bdev_is_partition(bdev)) { pr_warn("%s: The specified DASD is a partition and cannot be checked\n", dev_name(&base->cdev->dev)); rc = -EINVAL; @@ -362,7 +362,7 @@ static int dasd_ioctl_release_space(struct block_device *bdev, void __user *argp rc = -EROFS; goto out_err; } - if (bdev != bdev->bd_contains) { + if (bdev_is_partition(bdev)) { pr_warn("%s: The specified DASD is a partition and tracks cannot be released\n", dev_name(&base->cdev->dev)); rc = -EINVAL; @@ -540,7 +540,7 @@ dasd_ioctl_set_ro(struct block_device *bdev, void __user *argp) if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) // ro setting is not allowed for partitions return -EINVAL; if (get_user(intval, (int __user *)argp)) diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 311e5ce80cfc..a07c39c94bbd 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -170,7 +170,7 @@ nfsd4_block_proc_getdeviceinfo(struct super_block *sb, struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdp) { - if (sb->s_bdev != sb->s_bdev->bd_contains) + if (bdev_is_partition(sb->s_bdev)) return nfserr_inval; return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); } @@ -382,7 +382,7 @@ nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb, struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdp) { - if (sb->s_bdev != sb->s_bdev->bd_contains) + if (bdev_is_partition(sb->s_bdev)) return nfserr_inval; return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp)); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8e77f12de522..33f283885ba5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1353,6 +1353,11 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, extern int blk_verify_command(unsigned char *cmd, fmode_t mode); +static inline bool bdev_is_partition(struct block_device *bdev) +{ + return bdev->bd_partno; +} + enum blk_default_limits { BLK_MAX_SEGMENTS = 128, BLK_SAFE_MAX_SECTORS = 255, @@ -1469,7 +1474,7 @@ static inline int bdev_alignment_offset(struct block_device *bdev) if (q->limits.misaligned) return -1; - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, bdev->bd_part->start_sect); return q->limits.alignment_offset; @@ -1510,7 +1515,7 @@ static inline int bdev_discard_alignment(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - if (bdev != bdev->bd_contains) + if (bdev_is_partition(bdev)) return queue_limit_discard_alignment(&q->limits, bdev->bd_part->start_sect); return q->limits.discard_alignment; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c0887f32f647..ec874ea04092 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -527,7 +527,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * and scsi-generic block devices we create a temporary new debugfs * directory that will be removed once the trace ends. */ - if (bdev && bdev == bdev->bd_contains) + if (bdev && !bdev_is_partition(bdev)) dir = q->debugfs_dir; else bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); -- cgit v1.2.3-70-g09d2 From 10ed16662da9e28a33b6c991c36c6b323b03dd5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Sep 2020 18:06:18 +0200 Subject: block: add a bdget_part helper All remaining callers of bdget() outside of fs/block_dev.c want to get a reference to the struct block_device for a given struct hd_struct. Add a helper just for that and then mark bdget static. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 2 +- block/partitions/core.c | 2 +- fs/block_dev.c | 9 ++++++--- include/linux/blkdev.h | 2 +- kernel/trace/blktrace.c | 9 ++------- 5 files changed, 11 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/block/genhd.c b/block/genhd.c index e5f17f022ec7..0a273211fec2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1048,7 +1048,7 @@ struct block_device *bdget_disk(struct gendisk *disk, int partno) part = disk_get_part(disk, partno); if (part) - bdev = bdget(part_devt(part)); + bdev = bdget_part(part); disk_put_part(part); return bdev; diff --git a/block/partitions/core.c b/block/partitions/core.c index dd6811422a87..5309e0f44ba3 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -580,7 +580,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno, return -ENXIO; ret = -ENOMEM; - bdevp = bdget(part_devt(part)); + bdevp = bdget_part(part); if (!bdevp) goto out_put_part; diff --git a/fs/block_dev.c b/fs/block_dev.c index 6b9d19ffa5af..9e84b1928b94 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -891,7 +891,7 @@ static int bdev_set(struct inode *inode, void *data) return 0; } -struct block_device *bdget(dev_t dev) +static struct block_device *bdget(dev_t dev) { struct block_device *bdev; struct inode *inode; @@ -920,8 +920,6 @@ struct block_device *bdget(dev_t dev) return bdev; } -EXPORT_SYMBOL(bdget); - /** * bdgrab -- Grab a reference to an already referenced block device * @bdev: Block device to grab a reference to. @@ -933,6 +931,11 @@ struct block_device *bdgrab(struct block_device *bdev) } EXPORT_SYMBOL(bdgrab); +struct block_device *bdget_part(struct hd_struct *part) +{ + return bdget(part_devt(part)); +} + long nr_blockdev_pages(void) { struct inode *inode; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d5a3e1a4c2f7..cf80e61b4c5e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2003,7 +2003,7 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, void blkdev_put(struct block_device *bdev, fmode_t mode); struct block_device *I_BDEV(struct inode *inode); -struct block_device *bdget(dev_t); +struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index ec874ea04092..f1022945e346 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1827,13 +1827,11 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev = bdget_part(dev_to_part(dev)); struct request_queue *q; - struct block_device *bdev; struct blk_trace *bt; ssize_t ret = -ENXIO; - bdev = bdget(part_devt(p)); if (bdev == NULL) goto out; @@ -1875,7 +1873,6 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, { struct block_device *bdev; struct request_queue *q; - struct hd_struct *p; struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1895,9 +1892,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; ret = -ENXIO; - - p = dev_to_part(dev); - bdev = bdget(part_devt(p)); + bdev = bdget_part(dev_to_part(dev)); if (bdev == NULL) goto out; -- cgit v1.2.3-70-g09d2