diff options
-rw-r--r-- | fs/btrfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 18 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 4 | ||||
-rw-r--r-- | fs/btrfs/file.c | 97 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 379 | ||||
-rw-r--r-- | fs/direct-io.c | 19 | ||||
-rw-r--r-- | include/linux/fs.h | 2 |
7 files changed, 286 insertions, 234 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 68b95ad82126..575636f6491e 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -14,7 +14,6 @@ config BTRFS_FS select LZO_DECOMPRESS select ZSTD_COMPRESS select ZSTD_DECOMPRESS - select FS_IOMAP select RAID6_PQ select XOR_BLOCKS select SRCU diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index aeff56a0e105..e7d709505cb1 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -28,6 +28,7 @@ enum { BTRFS_INODE_NEEDS_FULL_SYNC, BTRFS_INODE_COPY_EVERYTHING, BTRFS_INODE_IN_DELALLOC_LIST, + BTRFS_INODE_READDIO_NEED_LOCK, BTRFS_INODE_HAS_PROPS, BTRFS_INODE_SNAPSHOT_FLUSH, }; @@ -312,6 +313,23 @@ struct btrfs_dio_private { u8 csums[]; }; +/* + * Disable DIO read nolock optimization, so new dio readers will be forced + * to grab i_mutex. It is used to avoid the endless truncate due to + * nonlocked dio read. + */ +static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode) +{ + set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags); + smp_mb(); +} + +static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode) +{ + smp_mb__before_atomic(); + clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags); +} + /* Array of bytes with variable length, hexadecimal format 0x1234 */ #define CSUM_FMT "0x%*phN" #define CSUM_FMT_VALUE(size, bytes) size, bytes diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 161533040978..30ce7039bc27 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -28,7 +28,6 @@ #include <linux/dynamic_debug.h> #include <linux/refcount.h> #include <linux/crc32c.h> -#include <linux/iomap.h> #include "extent-io-tree.h" #include "extent_io.h" #include "extent_map.h" @@ -2934,9 +2933,6 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end); void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, u64 end, int uptodate); extern const struct dentry_operations btrfs_dentry_operations; -ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter); -extern const struct iomap_ops btrfs_dio_iomap_ops; -extern const struct iomap_dio_ops btrfs_dops; /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fde125616687..2c14312b05e8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1809,61 +1809,21 @@ again: return num_written ? num_written : ret; } -static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, - const struct iov_iter *iter, loff_t offset) -{ - const unsigned int blocksize_mask = fs_info->sectorsize - 1; - - if (offset & blocksize_mask) - return -EINVAL; - - if (iov_iter_alignment(iter) & blocksize_mask) - return -EINVAL; - - return 0; -} - -static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) +static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - loff_t pos = iocb->ki_pos; - ssize_t written = 0; + loff_t pos; + ssize_t written; ssize_t written_buffered; loff_t endbyte; int err; - size_t count = 0; - bool relock = false; - if (check_direct_IO(fs_info, from, pos)) - goto buffered; - - count = iov_iter_count(from); - /* - * If the write DIO is beyond the EOF, we need update the isize, but it - * is protected by i_mutex. So we can not unlock the i_mutex at this - * case. - */ - if (pos + count <= inode->i_size) { - inode_unlock(inode); - relock = true; - } else if (iocb->ki_flags & IOCB_NOWAIT) { - return -EAGAIN; - } - - down_read(&BTRFS_I(inode)->dio_sem); - written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dops, - is_sync_kiocb(iocb)); - up_read(&BTRFS_I(inode)->dio_sem); - - if (relock) - inode_lock(inode); + written = generic_file_direct_write(iocb, from); if (written < 0 || !iov_iter_count(from)) return written; -buffered: pos = iocb->ki_pos; written_buffered = btrfs_buffered_write(iocb, from); if (written_buffered < 0) { @@ -2002,7 +1962,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, atomic_inc(&BTRFS_I(inode)->sync_writers); if (iocb->ki_flags & IOCB_DIRECT) { - num_written = btrfs_direct_write(iocb, from); + num_written = __btrfs_direct_write(iocb, from); } else { num_written = btrfs_buffered_write(iocb, from); if (num_written > 0) @@ -3516,54 +3476,9 @@ static int btrfs_file_open(struct inode *inode, struct file *filp) return generic_file_open(inode, filp); } -static int check_direct_read(struct btrfs_fs_info *fs_info, - const struct iov_iter *iter, loff_t offset) -{ - int ret; - int i, seg; - - ret = check_direct_IO(fs_info, iter, offset); - if (ret < 0) - return ret; - - for (seg = 0; seg < iter->nr_segs; seg++) - for (i = seg + 1; i < iter->nr_segs; i++) - if (iter->iov[seg].iov_base == iter->iov[i].iov_base) - return -EINVAL; - return 0; -} - -static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) -{ - struct inode *inode = file_inode(iocb->ki_filp); - ssize_t ret; - - if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos)) - return 0; - - inode_lock_shared(inode); - ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dops, - is_sync_kiocb(iocb)); - inode_unlock_shared(inode); - return ret; -} - -static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) -{ - ssize_t ret = 0; - - if (iocb->ki_flags & IOCB_DIRECT) { - ret = btrfs_direct_read(iocb, to); - if (ret < 0) - return ret; - } - - return generic_file_buffered_read(iocb, to, ret); -} - const struct file_operations btrfs_file_operations = { .llseek = btrfs_file_llseek, - .read_iter = btrfs_file_read_iter, + .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, .write_iter = btrfs_file_write_iter, .mmap = btrfs_file_mmap, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 31ac8c682f19..d04c82c88418 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/bio.h> +#include <linux/buffer_head.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/pagemap.h> @@ -57,9 +58,9 @@ struct btrfs_iget_args { struct btrfs_dio_data { u64 reserve; - loff_t length; - ssize_t submitted; - struct extent_changeset *data_reserved; + u64 unsubmitted_oe_range_start; + u64 unsubmitted_oe_range_end; + int overwrite; }; static const struct inode_operations btrfs_dir_inode_operations; @@ -4810,7 +4811,10 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) truncate_setsize(inode, newsize); + /* Disable nonlocked read DIO to avoid the endless truncate */ + btrfs_inode_block_unlocked_dio(BTRFS_I(inode)); inode_dio_wait(inode); + btrfs_inode_resume_unlocked_dio(BTRFS_I(inode)); ret = btrfs_truncate(inode, newsize == oldsize); if (ret && inode->i_nlink) { @@ -7041,7 +7045,7 @@ out: } static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, - struct extent_state **cached_state, bool writing) + struct extent_state **cached_state, int writing) { struct btrfs_ordered_extent *ordered; int ret = 0; @@ -7179,7 +7183,30 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, } +static int btrfs_get_blocks_direct_read(struct extent_map *em, + struct buffer_head *bh_result, + struct inode *inode, + u64 start, u64 len) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + + if (em->block_start == EXTENT_MAP_HOLE || + test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + return -ENOENT; + + len = min(len, em->len - (start - em->start)); + + bh_result->b_blocknr = (em->block_start + (start - em->start)) >> + inode->i_blkbits; + bh_result->b_size = len; + bh_result->b_bdev = fs_info->fs_devices->latest_bdev; + set_buffer_mapped(bh_result); + + return 0; +} + static int btrfs_get_blocks_direct_write(struct extent_map **map, + struct buffer_head *bh_result, struct inode *inode, struct btrfs_dio_data *dio_data, u64 start, u64 len) @@ -7241,6 +7268,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, } /* this will cow the extent */ + len = bh_result->b_size; free_extent_map(em); *map = em = btrfs_new_extent_direct(inode, start, len); if (IS_ERR(em)) { @@ -7251,73 +7279,64 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, len = min(len, em->len - (start - em->start)); skip_cow: + bh_result->b_blocknr = (em->block_start + (start - em->start)) >> + inode->i_blkbits; + bh_result->b_size = len; + bh_result->b_bdev = fs_info->fs_devices->latest_bdev; + set_buffer_mapped(bh_result); + + if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + set_buffer_new(bh_result); + /* * Need to update the i_size under the extent lock so buffered * readers will get the updated i_size when we unlock. */ - if (start + len > i_size_read(inode)) + if (!dio_data->overwrite && start + len > i_size_read(inode)) i_size_write(inode, start + len); + WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; + dio_data->unsubmitted_oe_range_end = start + len; + current->journal_info = dio_data; out: return ret; } -static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, - loff_t length, unsigned flags, struct iomap *iomap, - struct iomap *srcmap) +static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_map *em; struct extent_state *cached_state = NULL; struct btrfs_dio_data *dio_data = NULL; + u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; - const bool write = !!(flags & IOMAP_WRITE); + u64 len = bh_result->b_size; int ret = 0; - u64 len = length; - bool unlock_extents = false; - if (!write) + if (!create) len = min_t(u64, len, fs_info->sectorsize); lockstart = start; lockend = start + len - 1; - /* - * The generic stuff only does filemap_write_and_wait_range, which - * isn't enough if we've written compressed pages to this area, so we - * need to flush the dirty pages again to make absolutely sure that any - * outstanding dirty pages are on disk. - */ - if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, - &BTRFS_I(inode)->runtime_flags)) - ret = filemap_fdatawrite_range(inode->i_mapping, start, - start + length - 1); - - dio_data = kzalloc(sizeof(*dio_data), GFP_NOFS); - if (!dio_data) - return -ENOMEM; - - dio_data->length = length; - if (write) { - dio_data->reserve = round_up(length, fs_info->sectorsize); - ret = btrfs_delalloc_reserve_space(inode, - &dio_data->data_reserved, - start, dio_data->reserve); - if (ret) { - extent_changeset_free(dio_data->data_reserved); - kfree(dio_data); - return ret; - } + if (current->journal_info) { + /* + * Need to pull our outstanding extents and set journal_info to NULL so + * that anything that needs to check if there's a transaction doesn't get + * confused. + */ + dio_data = current->journal_info; + current->journal_info = NULL; } - iomap->private = dio_data; - /* * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. */ - if (lock_extent_direct(inode, lockstart, lockend, &cached_state, write)) { + if (lock_extent_direct(inode, lockstart, lockend, &cached_state, + create)) { ret = -ENOTBLK; goto err; } @@ -7349,47 +7368,35 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, goto unlock_err; } - len = min(len, em->len - (start - em->start)); - if (write) { - ret = btrfs_get_blocks_direct_write(&em, inode, dio_data, - start, len); + if (create) { + ret = btrfs_get_blocks_direct_write(&em, bh_result, inode, + dio_data, start, len); if (ret < 0) goto unlock_err; - unlock_extents = true; - /* Recalc len in case the new em is smaller than requested */ - len = min(len, em->len - (start - em->start)); + + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, + lockend, &cached_state); } else { + ret = btrfs_get_blocks_direct_read(em, bh_result, inode, + start, len); + /* Can be negative only if we read from a hole */ + if (ret < 0) { + ret = 0; + free_extent_map(em); + goto unlock_err; + } /* * We need to unlock only the end area that we aren't using. * The rest is going to be unlocked by the endio routine. */ - lockstart = start + len; - if (lockstart < lockend) - unlock_extents = true; - } - - if (unlock_extents) - unlock_extent_cached(&BTRFS_I(inode)->io_tree, - lockstart, lockend, &cached_state); - else - free_extent_state(cached_state); - - /* - * Translate extent map information to iomap. - * We trim the extents (and move the addr) even though iomap code does - * that, since we have locked only the parts we are performing I/O in. - */ - if ((em->block_start == EXTENT_MAP_HOLE) || - (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && !write)) { - iomap->addr = IOMAP_NULL_ADDR; - iomap->type = IOMAP_HOLE; - } else { - iomap->addr = em->block_start + (start - em->start); - iomap->type = IOMAP_MAPPED; + lockstart = start + bh_result->b_size; + if (lockstart < lockend) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, + lockstart, lockend, &cached_state); + } else { + free_extent_state(cached_state); + } } - iomap->offset = start; - iomap->bdev = fs_info->fs_devices->latest_bdev; - iomap->length = len; free_extent_map(em); @@ -7399,53 +7406,8 @@ unlock_err: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); err: - if (dio_data) { - btrfs_delalloc_release_space(inode, dio_data->data_reserved, - start, dio_data->reserve, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->reserve); - extent_changeset_free(dio_data->data_reserved); - kfree(dio_data); - } - return ret; -} - -static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length, - ssize_t written, unsigned flags, struct iomap *iomap) -{ - int ret = 0; - struct btrfs_dio_data *dio_data = iomap->private; - size_t submitted = dio_data->submitted; - const bool write = !!(flags & IOMAP_WRITE); - - if (!write && (iomap->type == IOMAP_HOLE)) { - /* If reading from a hole, unlock and return */ - unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1); - goto out; - } - - if (submitted < length) { - pos += submitted; - length -= submitted; - if (write) - __endio_write_update_ordered(inode, pos, length, false); - else - unlock_extent(&BTRFS_I(inode)->io_tree, pos, - pos + length - 1); - ret = -ENOTBLK; - } - - if (write) { - if (dio_data->reserve) - btrfs_delalloc_release_space(inode, - dio_data->data_reserved, pos, - dio_data->reserve, true); - btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->length); - extent_changeset_free(dio_data->data_reserved); - } -out: - kfree(dio_data); - iomap->private = NULL; - + if (dio_data) + current->journal_info = dio_data; return ret; } @@ -7468,7 +7430,7 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip) dip->logical_offset + dip->bytes - 1); } - bio_endio(dip->dio_bio); + dio_end_io(dip->dio_bio); kfree(dip); } @@ -7704,11 +7666,24 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio, dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; dip->dio_bio = dio_bio; refcount_set(&dip->refs, 1); + + if (write) { + struct btrfs_dio_data *dio_data = current->journal_info; + + /* + * Setting range start and end to the same value means that + * no cleanup will happen in btrfs_direct_IO + */ + dio_data->unsubmitted_oe_range_end = dip->logical_offset + + dip->bytes; + dio_data->unsubmitted_oe_range_start = + dio_data->unsubmitted_oe_range_end; + } return dip; } -static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, - struct bio *dio_bio, loff_t file_offset) +static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, + loff_t file_offset) { const bool write = (bio_op(dio_bio) == REQ_OP_WRITE); const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM); @@ -7725,7 +7700,6 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, int ret; blk_status_t status; struct btrfs_io_geometry geom; - struct btrfs_dio_data *dio_data = iomap->private; dip = btrfs_create_dio_private(dio_bio, inode, file_offset); if (!dip) { @@ -7734,8 +7708,8 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, file_offset + dio_bio->bi_iter.bi_size - 1); } dio_bio->bi_status = BLK_STS_RESOURCE; - bio_endio(dio_bio); - return BLK_QC_T_NONE; + dio_end_io(dio_bio); + return; } if (!write && csum) { @@ -7806,27 +7780,156 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, goto out_err; } - dio_data->submitted += clone_len; clone_offset += clone_len; start_sector += clone_len >> 9; file_offset += clone_len; } while (submit_len > 0); - return BLK_QC_T_NONE; + return; out_err: dip->dio_bio->bi_status = status; btrfs_dio_private_put(dip); - return BLK_QC_T_NONE; } -const struct iomap_ops btrfs_dio_iomap_ops = { - .iomap_begin = btrfs_dio_iomap_begin, - .iomap_end = btrfs_dio_iomap_end, -}; +static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, + const struct iov_iter *iter, loff_t offset) +{ + int seg; + int i; + unsigned int blocksize_mask = fs_info->sectorsize - 1; + ssize_t retval = -EINVAL; -const struct iomap_dio_ops btrfs_dops = { - .submit_io = btrfs_submit_direct, -}; + if (offset & blocksize_mask) + goto out; + + if (iov_iter_alignment(iter) & blocksize_mask) + goto out; + + /* If this is a write we don't need to check anymore */ + if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter)) + return 0; + /* + * Check to make sure we don't have duplicate iov_base's in this + * iovec, if so return EINVAL, otherwise we'll get csum errors + * when reading back. + */ + for (seg = 0; seg < iter->nr_segs; seg++) { + for (i = seg + 1; i < iter->nr_segs; i++) { + if (iter->iov[seg].iov_base == iter->iov[i].iov_base) + goto out; + } + } + retval = 0; +out: + return retval; +} + +static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_dio_data dio_data = { 0 }; + struct extent_changeset *data_reserved = NULL; + loff_t offset = iocb->ki_pos; + size_t count = 0; + int flags = 0; + bool wakeup = true; + bool relock = false; + ssize_t ret; + + if (check_direct_IO(fs_info, iter, offset)) + return 0; + + inode_dio_begin(inode); + + /* + * The generic stuff only does filemap_write_and_wait_range, which + * isn't enough if we've written compressed pages to this area, so + * we need to flush the dirty pages again to make absolutely sure + * that any outstanding dirty pages are on disk. + */ + count = iov_iter_count(iter); + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + filemap_fdatawrite_range(inode->i_mapping, offset, + offset + count - 1); + + if (iov_iter_rw(iter) == WRITE) { + /* + * If the write DIO is beyond the EOF, we need update + * the isize, but it is protected by i_mutex. So we can + * not unlock the i_mutex at this case. + */ + if (offset + count <= inode->i_size) { + dio_data.overwrite = 1; + inode_unlock(inode); + relock = true; + } else if (iocb->ki_flags & IOCB_NOWAIT) { + ret = -EAGAIN; + goto out; + } + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, + offset, count); + if (ret) + goto out; + + /* + * We need to know how many extents we reserved so that we can + * do the accounting properly if we go over the number we + * originally calculated. Abuse current->journal_info for this. + */ + dio_data.reserve = round_up(count, + fs_info->sectorsize); + dio_data.unsubmitted_oe_range_start = (u64)offset; + dio_data.unsubmitted_oe_range_end = (u64)offset; + current->journal_info = &dio_data; + down_read(&BTRFS_I(inode)->dio_sem); + } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, + &BTRFS_I(inode)->runtime_flags)) { + inode_dio_end(inode); + flags = DIO_LOCKING | DIO_SKIP_HOLES; + wakeup = false; + } + + ret = __blockdev_direct_IO(iocb, inode, + fs_info->fs_devices->latest_bdev, + iter, btrfs_get_blocks_direct, NULL, + btrfs_submit_direct, flags); + if (iov_iter_rw(iter) == WRITE) { + up_read(&BTRFS_I(inode)->dio_sem); + current->journal_info = NULL; + if (ret < 0 && ret != -EIOCBQUEUED) { + if (dio_data.reserve) + btrfs_delalloc_release_space(inode, data_reserved, + offset, dio_data.reserve, true); + /* + * On error we might have left some ordered extents + * without submitting corresponding bios for them, so + * cleanup them up to avoid other tasks getting them + * and waiting for them to complete forever. + */ + if (dio_data.unsubmitted_oe_range_start < + dio_data.unsubmitted_oe_range_end) + __endio_write_update_ordered(inode, + dio_data.unsubmitted_oe_range_start, + dio_data.unsubmitted_oe_range_end - + dio_data.unsubmitted_oe_range_start, + false); + } else if (ret >= 0 && (size_t)ret < count) + btrfs_delalloc_release_space(inode, data_reserved, + offset, count - (size_t)ret, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), count); + } +out: + if (wakeup) + inode_dio_end(inode); + if (relock) + inode_lock(inode); + + extent_changeset_free(data_reserved); + return ret; +} static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) @@ -10122,7 +10225,7 @@ static const struct address_space_operations btrfs_aops = { .writepage = btrfs_writepage, .writepages = btrfs_writepages, .readahead = btrfs_readahead, - .direct_IO = noop_direct_IO, + .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, #ifdef CONFIG_MIGRATION diff --git a/fs/direct-io.c b/fs/direct-io.c index 1543b5af400e..6d5370eac2a8 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -386,6 +386,25 @@ static void dio_bio_end_io(struct bio *bio) spin_unlock_irqrestore(&dio->bio_lock, flags); } +/** + * dio_end_io - handle the end io action for the given bio + * @bio: The direct io bio thats being completed + * + * This is meant to be called by any filesystem that uses their own dio_submit_t + * so that the DIO specific endio actions are dealt with after the filesystem + * has done it's completion work. + */ +void dio_end_io(struct bio *bio) +{ + struct dio *dio = bio->bi_private; + + if (dio->is_async) + dio_bio_end_aio(bio); + else + dio_bio_end_io(bio); +} +EXPORT_SYMBOL_GPL(dio_end_io); + static inline void dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, struct block_device *bdev, diff --git a/include/linux/fs.h b/include/linux/fs.h index 8e1f8f93108f..6c4ab4dc1cd7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3204,6 +3204,8 @@ enum { DIO_SKIP_HOLES = 0x02, }; +void dio_end_io(struct bio *bio); + ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct block_device *bdev, struct iov_iter *iter, get_block_t get_block, |