diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-03 20:33:41 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-03 20:33:41 -0700 |
commit | 725737e7c21d2d25a4312c2aaa82a52bd03e3126 (patch) | |
tree | 85378eed2b05ec4c176afd8b0b33faab538084a3 /fs/ext4 | |
parent | 5779aa2dac9a8dcad89b3774ee354de8b453ab21 (diff) | |
parent | 61a223df421f698c253143014cfd384255b3cf1e (diff) |
Merge tag 'statx-dioalign-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull STATX_DIOALIGN support from Eric Biggers:
"Make statx() support reporting direct I/O (DIO) alignment information.
This provides a generic interface for userspace programs to determine
whether a file supports DIO, and if so with what alignment
restrictions. Specifically, STATX_DIOALIGN works on block devices, and
on regular files when their containing filesystem has implemented
support.
An interface like this has been requested for years, since the
conditions for when DIO is supported in Linux have gotten increasingly
complex over time. Today, DIO support and alignment requirements can
be affected by various filesystem features such as multi-device
support, data journalling, inline data, encryption, verity,
compression, checkpoint disabling, log-structured mode, etc.
Further complicating things, Linux v6.0 relaxed the traditional rule
of DIO needing to be aligned to the block device's logical block size;
now user buffers (but not file offsets) only need to be aligned to the
DMA alignment.
The approach of uplifting the XFS specific ioctl XFS_IOC_DIOINFO was
discarded in favor of creating a clean new interface with statx().
For more information, see the individual commits and the man page
update[1]"
Link: https://lore.kernel.org/r/20220722074229.148925-1-ebiggers@kernel.org [1]
* tag 'statx-dioalign-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux:
xfs: support STATX_DIOALIGN
f2fs: support STATX_DIOALIGN
f2fs: simplify f2fs_force_buffered_io()
f2fs: move f2fs_force_buffered_io() into file.c
ext4: support STATX_DIOALIGN
fscrypt: change fscrypt_dio_supported() to prepare for STATX_DIOALIGN
vfs: support STATX_DIOALIGN on block devices
statx: add direct I/O alignment information
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/file.c | 37 | ||||
-rw-r--r-- | fs/ext4/inode.c | 37 |
3 files changed, 64 insertions, 11 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3bf9a6926798..e5f2f5ca5120 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2977,6 +2977,7 @@ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct user_namespace *, struct dentry *, struct iattr *); +extern u32 ext4_dio_alignment(struct inode *inode); extern int ext4_getattr(struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); extern void ext4_evict_inode(struct inode *); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 109d07629f81..8bb1c35fd6dd 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -36,19 +36,34 @@ #include "acl.h" #include "truncate.h" -static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter) +/* + * Returns %true if the given DIO request should be attempted with DIO, or + * %false if it should fall back to buffered I/O. + * + * DIO isn't well specified; when it's unsupported (either due to the request + * being misaligned, or due to the file not supporting DIO at all), filesystems + * either fall back to buffered I/O or return EINVAL. For files that don't use + * any special features like encryption or verity, ext4 has traditionally + * returned EINVAL for misaligned DIO. iomap_dio_rw() uses this convention too. + * In this case, we should attempt the DIO, *not* fall back to buffered I/O. + * + * In contrast, in cases where DIO is unsupported due to ext4 features, ext4 + * traditionally falls back to buffered I/O. + * + * This function implements the traditional ext4 behavior in all these cases. + */ +static bool ext4_should_use_dio(struct kiocb *iocb, struct iov_iter *iter) { struct inode *inode = file_inode(iocb->ki_filp); + u32 dio_align = ext4_dio_alignment(inode); - if (!fscrypt_dio_supported(iocb, iter)) - return false; - if (fsverity_active(inode)) + if (dio_align == 0) return false; - if (ext4_should_journal_data(inode)) - return false; - if (ext4_has_inline_data(inode)) - return false; - return true; + + if (dio_align == 1) + return true; + + return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align); } static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -63,7 +78,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) inode_lock_shared(inode); } - if (!ext4_dio_supported(iocb, to)) { + if (!ext4_should_use_dio(iocb, to)) { inode_unlock_shared(inode); /* * Fallback to buffered I/O if the operation being performed on @@ -511,7 +526,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) } /* Fallback to buffered I/O if the inode does not support direct I/O. */ - if (!ext4_dio_supported(iocb, from)) { + if (!ext4_should_use_dio(iocb, from)) { if (ilock_shared) inode_unlock_shared(inode); else diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 601214453c3a..364774230d87 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5550,6 +5550,22 @@ err_out: return error; } +u32 ext4_dio_alignment(struct inode *inode) +{ + if (fsverity_active(inode)) + return 0; + if (ext4_should_journal_data(inode)) + return 0; + if (ext4_has_inline_data(inode)) + return 0; + if (IS_ENCRYPTED(inode)) { + if (!fscrypt_dio_supported(inode)) + return 0; + return i_blocksize(inode); + } + return 1; /* use the iomap defaults */ +} + int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -5565,6 +5581,27 @@ int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path, stat->btime.tv_nsec = ei->i_crtime.tv_nsec; } + /* + * Return the DIO alignment restrictions if requested. We only return + * this information when requested, since on encrypted files it might + * take a fair bit of work to get if the file wasn't opened recently. + */ + if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { + u32 dio_align = ext4_dio_alignment(inode); + + stat->result_mask |= STATX_DIOALIGN; + if (dio_align == 1) { + struct block_device *bdev = inode->i_sb->s_bdev; + + /* iomap defaults */ + stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; + stat->dio_offset_align = bdev_logical_block_size(bdev); + } else { + stat->dio_mem_align = dio_align; + stat->dio_offset_align = dio_align; + } + } + flags = ei->i_flags & EXT4_FL_USER_VISIBLE; if (flags & EXT4_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; |