diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-12-02 14:46:22 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-12-02 14:46:22 -0800 |
commit | 97eeb4d9d755605385fa329da9afa38729f3413c (patch) | |
tree | fc63d9f43fc7235a9fe5cfaf03d73ec03dc5f2a6 /fs/xfs/xfs_file.c | |
parent | 9b326948c23908692d7dfe56ed149840d3829eaa (diff) | |
parent | 8feb4732ff9f2732354b44c4418569974e2f949c (diff) |
Merge tag 'xfs-5.5-merge-16' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull XFS updates from Darrick Wong:
"For this release, we changed quite a few things.
Highlights:
- Fixed some long tail latency problems in the block allocator
- Removed some long deprecated (and for the past several years no-op)
mount options and ioctls
- Strengthened the extended attribute and directory verifiers
- Audited and fixed all the places where we could return EFSCORRUPTED
without logging anything
- Refactored the old SGI space allocation ioctls to make the
equivalent fallocate calls
- Fixed a race between fallocate and directio
- Fixed an integer overflow when files have more than a few
billion(!) extents
- Fixed a longstanding bug where quota accounting could be incorrect
when performing unwritten extent conversion on a freshly mounted fs
- Fixed various complaints in scrub about soft lockups and
unresponsiveness to signals
- De-vtable'd the directory handling code, which should make it
faster
- Converted to the new mount api, for better or for worse
- Cleaned up some memory leaks
and quite a lot of other smaller fixes and cleanups.
A more detailed summary:
- Fill out the build string
- Prevent inode fork extent count overflows
- Refactor the allocator to reduce long tail latency
- Rework incore log locking a little to reduce spinning
- Break up the xfs_iomap_begin functions into smaller more cohesive
parts
- Fix allocation alignment being dropped too early when the
allocation request is for more blocks than an AG is large
- Other small cleanups
- Clean up file buftarg retrieval helpers
- Hoist the resvsp and unresvsp ioctls to the vfs
- Remove the undocumented biosize mount option, since it has never
been mentioned as existing or supported on linux
- Clean up some of the mount option printing and parsing
- Enhance attr leaf verifier to check block structure
- Check dirent and attr names for invalid characters before passing
them to the vfs
- Refactor open-coded bmbt walking
- Fix a few places where we return EIO instead of EFSCORRUPTED after
failing metadata sanity checks
- Fix a synchronization problem between fallocate and aio dio
corrupting the file length
- Clean up various loose ends in the iomap and bmap code
- Convert to the new mount api
- Make sure we always log something when returning EFSCORRUPTED
- Fix some problems where long running scrub loops could trigger soft
lockup warnings and/or fail to exit due to fatal signals pending
- Fix various Coverity complaints
- Remove most of the function pointers from the directory code to
reduce indirection penalties
- Ensure that dquots are attached to the inode when performing
unwritten extent conversion after io
- Deuglify incore projid and crtime types
- Fix another AGI/AGF locking order deadlock when renaming
- Clean up some quota typedefs
- Remove the FSSETDM ioctls which haven't done anything in 20 years
- Fix some memory leaks when mounting the log fails
- Fix an underflow when updating an xattr leaf freemap
- Remove some trivial wrappers
- Report metadata corruption as an error, not a (potentially) fatal
assertion
- Clean up the dir/attr buffer mapping code
- Allow fatal signals to kill scrub during parent pointer checks"
* tag 'xfs-5.5-merge-16' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (198 commits)
xfs: allow parent directory scans to be interrupted with fatal signals
xfs: remove the mappedbno argument to xfs_da_get_buf
xfs: remove the mappedbno argument to xfs_da_read_buf
xfs: split xfs_da3_node_read
xfs: remove the mappedbno argument to xfs_dir3_leafn_read
xfs: remove the mappedbno argument to xfs_dir3_leaf_read
xfs: remove the mappedbno argument to xfs_attr3_leaf_read
xfs: remove the mappedbno argument to xfs_da_reada_buf
xfs: improve the xfs_dabuf_map calling conventions
xfs: refactor xfs_dabuf_map
xfs: simplify mappedbno handling in xfs_da_{get,read}_buf
xfs: report corruption only as a regular error
xfs: Remove kmem_zone_free() wrapper
xfs: Remove kmem_zone_destroy() wrapper
xfs: Remove slab init wrappers
xfs: fix attr leaf header freemap.size underflow
xfs: fix some memory leaks in log recovery
xfs: fix another missing include
xfs: remove XFS_IOC_FSSETDM and XFS_IOC_FSSETDM_BY_HANDLE
xfs: remove duplicated include from xfs_dir2_data.c
...
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r-- | fs/xfs/xfs_file.c | 104 |
1 files changed, 78 insertions, 26 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index c0620135a279..c93250108952 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -188,7 +188,8 @@ xfs_file_dio_aio_read( file_accessed(iocb->ki_filp); xfs_ilock(ip, XFS_IOLOCK_SHARED); - ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL, is_sync_kiocb(iocb)); + ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, + is_sync_kiocb(iocb)); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; @@ -215,7 +216,7 @@ xfs_file_dax_read( xfs_ilock(ip, XFS_IOLOCK_SHARED); } - ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); + ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops); xfs_iunlock(ip, XFS_IOLOCK_SHARED); file_accessed(iocb->ki_filp); @@ -351,7 +352,7 @@ restart: trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, - NULL, &xfs_iomap_ops); + NULL, &xfs_buffered_write_iomap_ops); if (error) return error; } else @@ -486,8 +487,7 @@ xfs_file_dio_aio_write( int unaligned_io = 0; int iolock; size_t count = iov_iter_count(from); - struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; + struct xfs_buftarg *target = xfs_inode_buftarg(ip); /* DIO must be aligned to device logical sector size */ if ((iocb->ki_pos | count) & target->bt_logical_sectormask) @@ -551,7 +551,8 @@ xfs_file_dio_aio_write( * If unaligned, this is the only IO in-flight. Wait on it before we * release the iolock to prevent subsequent overlapping IO. */ - ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops, + ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, + &xfs_dio_write_ops, is_sync_kiocb(iocb) || unaligned_io); out: xfs_iunlock(ip, iolock); @@ -591,7 +592,7 @@ xfs_file_dax_write( count = iov_iter_count(from); trace_xfs_file_dax_write(ip, count, pos); - ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); + ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); @@ -638,7 +639,8 @@ write_retry: current->backing_dev_info = inode_to_bdi(inode); trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); - ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); + ret = iomap_file_buffered_write(iocb, from, + &xfs_buffered_write_iomap_ops); if (likely(ret >= 0)) iocb->ki_pos += ret; @@ -815,6 +817,36 @@ xfs_file_fallocate( if (error) goto out_unlock; + /* + * Must wait for all AIO to complete before we continue as AIO can + * change the file size on completion without holding any locks we + * currently hold. We must do this first because AIO can update both + * the on disk and in memory inode sizes, and the operations that follow + * require the in-memory size to be fully up-to-date. + */ + inode_dio_wait(inode); + + /* + * Now AIO and DIO has drained we flush and (if necessary) invalidate + * the cached range over the first operation we are about to run. + * + * We care about zero and collapse here because they both run a hole + * punch over the range first. Because that can zero data, and the range + * of invalidation for the shift operations is much larger, we still do + * the required flush for collapse in xfs_prepare_shift(). + * + * Insert has the same range requirements as collapse, and we extend the + * file first which can zero data. Hence insert has the same + * flush/invalidate requirements as collapse and so they are both + * handled at the right time by xfs_prepare_shift(). + */ + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE | + FALLOC_FL_COLLAPSE_RANGE)) { + error = xfs_flush_unmap_range(ip, offset, len); + if (error) + goto out_unlock; + } + if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) @@ -878,16 +910,30 @@ xfs_file_fallocate( } if (mode & FALLOC_FL_ZERO_RANGE) { - error = xfs_zero_file_space(ip, offset, len); + /* + * Punch a hole and prealloc the range. We use a hole + * punch rather than unwritten extent conversion for two + * reasons: + * + * 1.) Hole punch handles partial block zeroing for us. + * 2.) If prealloc returns ENOSPC, the file range is + * still zero-valued by virtue of the hole punch. + */ + unsigned int blksize = i_blocksize(inode); + + trace_xfs_zero_file_space(ip); + + error = xfs_free_file_space(ip, offset, len); + if (error) + goto out_unlock; + + len = round_up(offset + len, blksize) - + round_down(offset, blksize); + offset = round_down(offset, blksize); } else if (mode & FALLOC_FL_UNSHARE_RANGE) { error = xfs_reflink_unshare(ip, offset, len); if (error) goto out_unlock; - - if (!xfs_is_always_cow_inode(ip)) { - error = xfs_alloc_file_space(ip, offset, len, - XFS_BMAPI_PREALLOC); - } } else { /* * If always_cow mode we can't use preallocations and @@ -897,12 +943,14 @@ xfs_file_fallocate( error = -EOPNOTSUPP; goto out_unlock; } + } + if (!xfs_is_always_cow_inode(ip)) { error = xfs_alloc_file_space(ip, offset, len, XFS_BMAPI_PREALLOC); + if (error) + goto out_unlock; } - if (error) - goto out_unlock; } if (file->f_flags & O_DSYNC) @@ -1056,7 +1104,7 @@ xfs_dir_open( */ mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) - error = xfs_dir3_data_readahead(ip, 0, -1); + error = xfs_dir3_data_readahead(ip, 0, 0); xfs_iunlock(ip, mode); return error; } @@ -1153,12 +1201,16 @@ __xfs_filemap_fault( if (IS_DAX(inode)) { pfn_t pfn; - ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, + (write_fault && !vmf->cow_page) ? + &xfs_direct_write_iomap_ops : + &xfs_read_iomap_ops); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); } else { if (write_fault) - ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); + ret = iomap_page_mkwrite(vmf, + &xfs_buffered_write_iomap_ops); else ret = filemap_fault(vmf); } @@ -1222,22 +1274,22 @@ static const struct vm_operations_struct xfs_file_vm_ops = { STATIC int xfs_file_mmap( - struct file *filp, - struct vm_area_struct *vma) + struct file *file, + struct vm_area_struct *vma) { - struct dax_device *dax_dev; + struct inode *inode = file_inode(file); + struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode)); - dax_dev = xfs_find_daxdev_for_inode(file_inode(filp)); /* * We don't support synchronous mappings for non-DAX files and * for DAX files if underneath dax_device is not synchronous. */ - if (!daxdev_mapping_supported(vma, dax_dev)) + if (!daxdev_mapping_supported(vma, target->bt_daxdev)) return -EOPNOTSUPP; - file_accessed(filp); + file_accessed(file); vma->vm_ops = &xfs_file_vm_ops; - if (IS_DAX(file_inode(filp))) + if (IS_DAX(inode)) vma->vm_flags |= VM_HUGEPAGE; return 0; } |