From 9b6cd5f76d60b563d75e55e432e03ed134761432 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 16 May 2016 17:17:04 +0200 Subject: ext2: Fix block zeroing in ext2_get_blocks() for DAX When zeroing allocated blocks for DAX, we accidentally zeroed only the first allocated block instead of all of them. So far this problem is hidden by the fact that page faults always need only a single block and DAX write code zeroes blocks again. But the zeroing in DAX code is racy and needs to be removed so fix the zeroing in ext2 to zero all allocated blocks. Reported-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Vishal Verma --- fs/ext2/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext2/inode.c') diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 6bd58e6ff038..038d0ed5f565 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -740,7 +740,7 @@ static int ext2_get_blocks(struct inode *inode, err = dax_clear_sectors(inode->i_sb->s_bdev, le32_to_cpu(chain[depth-1].key) << (inode->i_blkbits - 9), - 1 << inode->i_blkbits); + count << inode->i_blkbits); if (err) { mutex_unlock(&ei->truncate_mutex); goto cleanup; -- cgit v1.2.3-70-g09d2 From 86b0624e42d03a424e9571b8591d191c436f9af1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 May 2016 11:58:49 +0200 Subject: ext2: Avoid DAX zeroing to corrupt data Currently ext2 zeroes any data blocks allocated for DAX inode however it still returns them as BH_New. Thus DAX code zeroes them again in dax_insert_mapping() which can possibly overwrite the data that has been already stored to those blocks by a racing dax_io(). Avoid marking pre-zeroed buffers as new. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Vishal Verma --- fs/ext2/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext2/inode.c') diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 038d0ed5f565..9a14af3b1a69 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -745,11 +745,11 @@ static int ext2_get_blocks(struct inode *inode, mutex_unlock(&ei->truncate_mutex); goto cleanup; } - } + } else + set_buffer_new(bh_result); ext2_splice_branch(inode, iblock, partial, indirect_blks, count); mutex_unlock(&ei->truncate_mutex); - set_buffer_new(bh_result); got_it: map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); if (count > blocks_to_boundary) -- cgit v1.2.3-70-g09d2 From 3dc29161070ab14d065554c0ad58988ab77a7bfd Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 15 Mar 2016 11:20:41 -0600 Subject: dax: use sb_issue_zerout instead of calling dax_clear_sectors dax_clear_sectors() cannot handle poisoned blocks. These must be zeroed using the BIO interface instead. Convert ext2 and XFS to use only sb_issue_zerout(). Reviewed-by: Jeff Moyer Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox [vishal: Also remove the dax_clear_sectors function entirely] Signed-off-by: Vishal Verma --- fs/dax.c | 32 -------------------------------- fs/ext2/inode.c | 8 ++++---- fs/xfs/xfs_bmap_util.c | 15 ++++----------- include/linux/dax.h | 1 - 4 files changed, 8 insertions(+), 48 deletions(-) (limited to 'fs/ext2/inode.c') diff --git a/fs/dax.c b/fs/dax.c index d602410d8e52..0abbbb62981e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -87,38 +87,6 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n) return page; } -/* - * dax_clear_sectors() is called from within transaction context from XFS, - * and hence this means the stack from this point must follow GFP_NOFS - * semantics for all operations. - */ -int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size) -{ - struct blk_dax_ctl dax = { - .sector = _sector, - .size = _size, - }; - - might_sleep(); - do { - long count, sz; - - count = dax_map_atomic(bdev, &dax); - if (count < 0) - return count; - sz = min_t(long, count, SZ_128K); - clear_pmem(dax.addr, sz); - dax.size -= sz; - dax.sector += sz / 512; - dax_unmap_atomic(bdev, &dax); - cond_resched(); - } while (dax.size); - - wmb_pmem(); - return 0; -} -EXPORT_SYMBOL_GPL(dax_clear_sectors); - static bool buffer_written(struct buffer_head *bh) { return buffer_mapped(bh) && !buffer_unwritten(bh); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 9a14af3b1a69..17cbd6b696f2 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -737,10 +738,9 @@ static int ext2_get_blocks(struct inode *inode, * so that it's not found by another thread before it's * initialised */ - err = dax_clear_sectors(inode->i_sb->s_bdev, - le32_to_cpu(chain[depth-1].key) << - (inode->i_blkbits - 9), - count << inode->i_blkbits); + err = sb_issue_zeroout(inode->i_sb, + le32_to_cpu(chain[depth-1].key), count, + GFP_NOFS); if (err) { mutex_unlock(&ei->truncate_mutex); goto cleanup; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 3b6309865c65..930ac6a17ce3 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -72,18 +72,11 @@ xfs_zero_extent( struct xfs_mount *mp = ip->i_mount; xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); sector_t block = XFS_BB_TO_FSBT(mp, sector); - ssize_t size = XFS_FSB_TO_B(mp, count_fsb); - - if (IS_DAX(VFS_I(ip))) - return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)), - sector, size); - - /* - * let the block layer decide on the fastest method of - * implementing the zeroing. - */ - return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS); + return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), + block << (mp->m_super->s_blocksize_bits - 9), + count_fsb << (mp->m_super->s_blocksize_bits - 9), + GFP_NOFS, true); } /* diff --git a/include/linux/dax.h b/include/linux/dax.h index 7c45ac7ea1d1..7f853ffaa987 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -7,7 +7,6 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, get_block_t, dio_iodone_t, int flags); -int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); -- cgit v1.2.3-70-g09d2