From 621c1f42945e76015c3a585e7a9fe6e71665eba0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:16:44 +0200 Subject: block: move struct block_device to blk_types.h Move the struct block_device definition together with most of the block layer definitions, as it has nothing to do with the rest of fs.h. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'include/linux/blk_types.h') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ccb895f911b1..a602132cbe32 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -14,12 +14,49 @@ struct bio_set; struct bio; struct bio_integrity_payload; struct page; -struct block_device; struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; +struct block_device { + dev_t bd_dev; /* not a kdev_t - it's a search key */ + int bd_openers; + struct inode * bd_inode; /* will die */ + struct super_block * bd_super; + struct mutex bd_mutex; /* open/close mutex */ + void * bd_claiming; + void * bd_holder; + int bd_holders; + bool bd_write_holder; +#ifdef CONFIG_SYSFS + struct list_head bd_holder_disks; +#endif + struct block_device * bd_contains; + unsigned bd_block_size; + u8 bd_partno; + struct hd_struct * bd_part; + /* number of times partitions within this device have been opened. */ + unsigned bd_part_count; + int bd_invalidated; + struct gendisk * bd_disk; + struct request_queue * bd_queue; + struct backing_dev_info *bd_bdi; + struct list_head bd_list; + /* + * Private data. You must have bd_claim'ed the block_device + * to use this. NOTE: bd_claim allows an owner to claim + * the same device multiple times, the owner must take special + * care to not mess up bd_private for that case. + */ + unsigned long bd_private; + + /* The counter of freeze processes */ + int bd_fsfreeze_count; + /* Mutex for freeze */ + struct mutex bd_fsfreeze_mutex; +} __randomize_layout; + /* * Block error status values. See block/blk-core:blk_errors for the details. * Alpha cannot write a byte atomically, so we need to use 32-bit value. -- cgit v1.2.3-70-g09d2 From 6b7b181b67aa8177e57732723106a0411570a86d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Jun 2020 10:01:55 +0200 Subject: block: remove the bd_block_size field from struct block_device We can trivially calculate the block size from the inodes i_blkbits variable. Use that instead of keeping two redundant copies of the information in slightly different formats. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 9 ++------- include/linux/blk_types.h | 1 - include/linux/blkdev.h | 2 +- 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux/blk_types.h') diff --git a/fs/block_dev.c b/fs/block_dev.c index 8b7a9c76d33e..06d31e459048 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -105,10 +105,7 @@ EXPORT_SYMBOL(invalidate_bdev); static void set_init_blocksize(struct block_device *bdev) { - unsigned bsize = bdev_logical_block_size(bdev); - - bdev->bd_block_size = bsize; - bdev->bd_inode->i_blkbits = blksize_bits(bsize); + bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev)); } int set_blocksize(struct block_device *bdev, int size) @@ -122,9 +119,8 @@ int set_blocksize(struct block_device *bdev, int size) return -EINVAL; /* Don't change the size if it is same as current */ - if (bdev->bd_block_size != size) { + if (bdev->bd_inode->i_blkbits != blksize_bits(size)) { sync_blockdev(bdev); - bdev->bd_block_size = size; bdev->bd_inode->i_blkbits = blksize_bits(size); kill_bdev(bdev); } @@ -889,7 +885,6 @@ struct block_device *bdget(dev_t dev) bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; - bdev->bd_block_size = i_blocksize(inode); bdev->bd_part_count = 0; bdev->bd_invalidated = 0; inode->i_mode = S_IFBLK; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a602132cbe32..b01cd19bbe8a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -33,7 +33,6 @@ struct block_device { struct list_head bd_holder_disks; #endif struct block_device * bd_contains; - unsigned bd_block_size; u8 bd_partno; struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cc913ffdbe2..408eb66a82fd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1543,7 +1543,7 @@ static inline unsigned int blksize_bits(unsigned int size) static inline unsigned int block_size(struct block_device *bdev) { - return bdev->bd_block_size; + return 1 << bdev->bd_inode->i_blkbits; } int kblockd_schedule_work(struct work_struct *work); -- cgit v1.2.3-70-g09d2 From e556f6ba10f0f3c3484a1597382ceaec1e7bc700 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Jun 2020 10:01:56 +0200 Subject: block: remove the bd_queue field from struct block_device Just use bd_disk->queue instead. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/dax/super.c | 2 +- drivers/md/md.c | 2 +- drivers/nvme/target/core.c | 2 +- fs/block_dev.c | 11 ++++------- fs/direct-io.c | 4 ++-- fs/xfs/xfs_pwork.c | 2 +- include/linux/blk_types.h | 1 - mm/swapfile.c | 2 +- 8 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include/linux/blk_types.h') diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 8e32345be0f7..f50828526331 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -59,7 +59,7 @@ EXPORT_SYMBOL(bdev_dax_pgoff); #if IS_ENABLED(CONFIG_FS_DAX) struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) { - if (!blk_queue_dax(bdev->bd_queue)) + if (!blk_queue_dax(bdev->bd_disk->queue)) return NULL; return dax_get_by_host(bdev->bd_disk->disk_name); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 7b7cb49be351..8bb69c61afe0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -199,7 +199,7 @@ static int rdevs_init_serial(struct mddev *mddev) static int rdev_need_serial(struct md_rdev *rdev) { return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 && - rdev->bdev->bd_queue->nr_hw_queues != 1 && + rdev->bdev->bd_disk->queue->nr_hw_queues != 1 && test_bit(WriteMostly, &rdev->flags)); } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6e2f623e472e..6816507fba58 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -467,7 +467,7 @@ static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns) return -EINVAL; } - if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) { + if (!blk_queue_pci_p2pdma(ns->bdev->bd_disk->queue)) { pr_err("peer-to-peer DMA is not supported by the driver of %s\n", ns->device_path); return -EINVAL; diff --git a/fs/block_dev.c b/fs/block_dev.c index 06d31e459048..68cb08bc1b7a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -693,12 +693,12 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, if (!ops->rw_page || bdev_get_integrity(bdev)) return result; - result = blk_queue_enter(bdev->bd_queue, 0); + result = blk_queue_enter(bdev->bd_disk->queue, 0); if (result) return result; result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, REQ_OP_READ); - blk_queue_exit(bdev->bd_queue); + blk_queue_exit(bdev->bd_disk->queue); return result; } @@ -729,7 +729,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; - result = blk_queue_enter(bdev->bd_queue, 0); + result = blk_queue_enter(bdev->bd_disk->queue, 0); if (result) return result; @@ -742,7 +742,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, clean_page_buffers(page); unlock_page(page); } - blk_queue_exit(bdev->bd_queue); + blk_queue_exit(bdev->bd_disk->queue); return result; } @@ -1568,7 +1568,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!bdev->bd_openers) { first_open = true; bdev->bd_disk = disk; - bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; bdev->bd_partno = partno; @@ -1589,7 +1588,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk_put_part(bdev->bd_part); bdev->bd_part = NULL; bdev->bd_disk = NULL; - bdev->bd_queue = NULL; mutex_unlock(&bdev->bd_mutex); disk_unblock_events(disk); put_disk_and_module(disk); @@ -1666,7 +1664,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; - bdev->bd_queue = NULL; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; diff --git a/fs/direct-io.c b/fs/direct-io.c index 6d5370eac2a8..183299892465 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1387,8 +1387,8 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, * Attempt to prefetch the pieces we likely need later. */ prefetch(&bdev->bd_disk->part_tbl); - prefetch(bdev->bd_queue); - prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); + prefetch(bdev->bd_disk->queue); + prefetch((char *)bdev->bd_disk->queue + SMP_CACHE_BYTES); return do_blockdev_direct_IO(iocb, inode, bdev, iter, get_block, end_io, submit_io, flags); diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c index 4bcc3e61056c..b03333f1c84a 100644 --- a/fs/xfs/xfs_pwork.c +++ b/fs/xfs/xfs_pwork.c @@ -132,5 +132,5 @@ xfs_pwork_guess_datadev_parallelism( * For now we'll go with the most conservative setting possible, * which is two threads for an SSD and 1 thread everywhere else. */ - return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1; + return blk_queue_nonrot(btp->bt_bdev->bd_disk->queue) ? 2 : 1; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index b01cd19bbe8a..667cd365fd04 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -39,7 +39,6 @@ struct block_device { unsigned bd_part_count; int bd_invalidated; struct gendisk * bd_disk; - struct request_queue * bd_queue; struct backing_dev_info *bd_bdi; struct list_head bd_list; /* diff --git a/mm/swapfile.c b/mm/swapfile.c index 987276c557d1..6c26916e95fd 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2929,7 +2929,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) * write only restriction. Hence zoned block devices are not * suitable for swapping. Disallow them here. */ - if (blk_queue_is_zoned(p->bdev->bd_queue)) + if (blk_queue_is_zoned(p->bdev->bd_disk->queue)) return -EINVAL; p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { -- cgit v1.2.3-70-g09d2 From 47b5e00322a3033851ab304f3c3873aebdfb4979 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Jun 2020 10:01:57 +0200 Subject: block: remove the unused bd_private field from struct block_device Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux/blk_types.h') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 667cd365fd04..b5f7105806e4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -41,13 +41,6 @@ struct block_device { struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; struct list_head bd_list; - /* - * Private data. You must have bd_claim'ed the block_device - * to use this. NOTE: bd_claim allows an owner to claim - * the same device multiple times, the owner must take special - * care to not mess up bd_private for that case. - */ - unsigned long bd_private; /* The counter of freeze processes */ int bd_fsfreeze_count; -- cgit v1.2.3-70-g09d2 From 1008fe6dc36dd87dfd02d4307f49162f0b4f1665 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 26 Jun 2020 10:01:58 +0200 Subject: block: remove the all_bdevs list Instead just iterate over the inodes for the block device superblock. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 22 +++++++--------------- include/linux/blk_types.h | 1 - 2 files changed, 7 insertions(+), 16 deletions(-) (limited to 'include/linux/blk_types.h') diff --git a/fs/block_dev.c b/fs/block_dev.c index 68cb08bc1b7a..2d2fcb50e78e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -773,7 +773,6 @@ static void init_once(void *foo) memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); - INIT_LIST_HEAD(&bdev->bd_list); #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); #endif @@ -789,9 +788,6 @@ static void bdev_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); invalidate_inode_buffers(inode); /* is it needed here? */ clear_inode(inode); - spin_lock(&bdev_lock); - list_del_init(&bdev->bd_list); - spin_unlock(&bdev_lock); /* Detach inode from wb early as bdi_put() may free bdi->wb */ inode_detach_wb(inode); if (bdev->bd_bdi != &noop_backing_dev_info) { @@ -866,8 +862,6 @@ static int bdev_set(struct inode *inode, void *data) return 0; } -static LIST_HEAD(all_bdevs); - struct block_device *bdget(dev_t dev) { struct block_device *bdev; @@ -892,9 +886,6 @@ struct block_device *bdget(dev_t dev) inode->i_bdev = bdev; inode->i_data.a_ops = &def_blk_aops; mapping_set_gfp_mask(&inode->i_data, GFP_USER); - spin_lock(&bdev_lock); - list_add(&bdev->bd_list, &all_bdevs); - spin_unlock(&bdev_lock); unlock_new_inode(inode); } return bdev; @@ -915,13 +906,14 @@ EXPORT_SYMBOL(bdgrab); long nr_blockdev_pages(void) { - struct block_device *bdev; + struct inode *inode; long ret = 0; - spin_lock(&bdev_lock); - list_for_each_entry(bdev, &all_bdevs, bd_list) { - ret += bdev->bd_inode->i_mapping->nrpages; - } - spin_unlock(&bdev_lock); + + spin_lock(&blockdev_superblock->s_inode_list_lock); + list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) + ret += inode->i_mapping->nrpages; + spin_unlock(&blockdev_superblock->s_inode_list_lock); + return ret; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index b5f7105806e4..07facaf62b72 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -40,7 +40,6 @@ struct block_device { int bd_invalidated; struct gendisk * bd_disk; struct backing_dev_info *bd_bdi; - struct list_head bd_list; /* The counter of freeze processes */ int bd_fsfreeze_count; -- cgit v1.2.3-70-g09d2 From ecdef9f459ad24bf987267df6c25967819016707 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 17 Jul 2020 10:42:29 +0800 Subject: block: change REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL to be odd numbers Currently REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL are defined as even numbers 6 and 8, such zone reset bios are treated as READ bios by bio_data_dir(), which is obviously misleading. The macro bio_data_dir() is defined in include/linux/bio.h as, 55 #define bio_data_dir(bio) \ 56 (op_is_write(bio_op(bio)) ? WRITE : READ) And op_is_write() is defined in include/linux/blk_types.h as, 397 static inline bool op_is_write(unsigned int op) 398 { 399 return (op & 1); 400 } The convention of op_is_write() is when there is data transfer then the op code should be odd number, and treat as a write op. bio_data_dir() treats all bio direction as READ if op_is_write() reports false, and WRITE if op_is_write() reports true. Because REQ_OP_ZONE_RESET and REQ_OP_ZONE_RESET_ALL are even numbers, although they don't transfer data but reporting them as READ bio by bio_data_dir() is misleading and might be wrong. Because these two commands will reset the writer pointers of the resetting zones, and all content after the reset write pointer will be invalid and unaccessible, obviously they are not READ bios in any means. This patch changes REQ_OP_ZONE_RESET from 6 to 15, and changes REQ_OP_ZONE_RESET_ALL from 8 to 17. Now bios with these two op code can be treated as WRITE by bio_data_dir(). Although they don't transfer data, now we keep them consistent with REQ_OP_DISCARD and REQ_OP_WRITE_ZEROES with the ituition that they change on-media content and should be WRITE request. Signed-off-by: Coly Li Reviewed-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Jens Axboe Cc: Johannes Thumshirn Cc: Keith Busch Cc: Shaun Tancheff Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/blk_types.h') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 07facaf62b72..4ecf4fed171f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -327,12 +327,8 @@ enum req_opf { REQ_OP_DISCARD = 3, /* securely erase sectors */ REQ_OP_SECURE_ERASE = 5, - /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = 6, /* write the same sector many times */ REQ_OP_WRITE_SAME = 7, - /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = 8, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, /* Open a zone */ @@ -343,6 +339,10 @@ enum req_opf { REQ_OP_ZONE_FINISH = 12, /* write data at the current zone write pointer */ REQ_OP_ZONE_APPEND = 13, + /* reset a zone write pointer */ + REQ_OP_ZONE_RESET = 15, + /* reset all the zone present on the device */ + REQ_OP_ZONE_RESET_ALL = 17, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, -- cgit v1.2.3-70-g09d2