diff options
-rw-r--r-- | fs/cachefiles/namei.c | 12 | ||||
-rw-r--r-- | fs/file_table.c | 91 | ||||
-rw-r--r-- | fs/internal.h | 5 | ||||
-rw-r--r-- | fs/namei.c | 24 | ||||
-rw-r--r-- | fs/open.c | 76 | ||||
-rw-r--r-- | fs/overlayfs/file.c | 8 | ||||
-rw-r--r-- | fs/overlayfs/overlayfs.h | 5 | ||||
-rw-r--r-- | include/linux/fs.h | 42 | ||||
-rw-r--r-- | include/linux/fsnotify.h | 4 |
9 files changed, 205 insertions, 62 deletions
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 66482c193e86..d9d22d0ec38a 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -451,10 +451,10 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object) ret = cachefiles_inject_write_error(); if (ret == 0) { - file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, - S_IFREG | 0600, - O_RDWR | O_LARGEFILE | O_DIRECT, - cache->cache_cred); + file = kernel_tmpfile_open(&nop_mnt_idmap, &parentpath, + S_IFREG | 0600, + O_RDWR | O_LARGEFILE | O_DIRECT, + cache->cache_cred); ret = PTR_ERR_OR_ZERO(file); } if (ret) { @@ -561,8 +561,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object, */ path.mnt = cache->mnt; path.dentry = dentry; - file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, - d_backing_inode(dentry), cache->cache_cred); + file = kernel_file_open(&path, O_RDWR | O_LARGEFILE | O_DIRECT, + d_backing_inode(dentry), cache->cache_cred); if (IS_ERR(file)) { trace_cachefiles_vfs_error(object, d_backing_inode(dentry), PTR_ERR(file), diff --git a/fs/file_table.c b/fs/file_table.c index 372653b92617..e06c68e2d757 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -44,18 +44,40 @@ static struct kmem_cache *filp_cachep __read_mostly; static struct percpu_counter nr_files __cacheline_aligned_in_smp; +/* Container for backing file with optional real path */ +struct backing_file { + struct file file; + struct path real_path; +}; + +static inline struct backing_file *backing_file(struct file *f) +{ + return container_of(f, struct backing_file, file); +} + +struct path *backing_file_real_path(struct file *f) +{ + return &backing_file(f)->real_path; +} +EXPORT_SYMBOL_GPL(backing_file_real_path); + static void file_free_rcu(struct rcu_head *head) { struct file *f = container_of(head, struct file, f_rcuhead); put_cred(f->f_cred); - kmem_cache_free(filp_cachep, f); + if (unlikely(f->f_mode & FMODE_BACKING)) + kfree(backing_file(f)); + else + kmem_cache_free(filp_cachep, f); } static inline void file_free(struct file *f) { security_file_free(f); - if (!(f->f_mode & FMODE_NOACCOUNT)) + if (unlikely(f->f_mode & FMODE_BACKING)) + path_put(backing_file_real_path(f)); + if (likely(!(f->f_mode & FMODE_NOACCOUNT))) percpu_counter_dec(&nr_files); call_rcu(&f->f_rcuhead, file_free_rcu); } @@ -131,20 +153,15 @@ static int __init init_fs_stat_sysctls(void) fs_initcall(init_fs_stat_sysctls); #endif -static struct file *__alloc_file(int flags, const struct cred *cred) +static int init_file(struct file *f, int flags, const struct cred *cred) { - struct file *f; int error; - f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); - if (unlikely(!f)) - return ERR_PTR(-ENOMEM); - f->f_cred = get_cred(cred); error = security_file_alloc(f); if (unlikely(error)) { file_free_rcu(&f->f_rcuhead); - return ERR_PTR(error); + return error; } atomic_long_set(&f->f_count, 1); @@ -155,7 +172,7 @@ static struct file *__alloc_file(int flags, const struct cred *cred) f->f_mode = OPEN_FMODE(flags); /* f->f_version: 0 */ - return f; + return 0; } /* Find an unused file structure and return a pointer to it. @@ -172,6 +189,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) { static long old_max; struct file *f; + int error; /* * Privileged users can go above max_files @@ -185,9 +203,15 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) goto over; } - f = __alloc_file(flags, cred); - if (!IS_ERR(f)) - percpu_counter_inc(&nr_files); + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); + if (unlikely(!f)) + return ERR_PTR(-ENOMEM); + + error = init_file(f, flags, cred); + if (unlikely(error)) + return ERR_PTR(error); + + percpu_counter_inc(&nr_files); return f; @@ -203,18 +227,51 @@ over: /* * Variant of alloc_empty_file() that doesn't check and modify nr_files. * - * Should not be used unless there's a very good reason to do so. + * This is only for kernel internal use, and the allocate file must not be + * installed into file tables or such. */ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred) { - struct file *f = __alloc_file(flags, cred); + struct file *f; + int error; - if (!IS_ERR(f)) - f->f_mode |= FMODE_NOACCOUNT; + f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); + if (unlikely(!f)) + return ERR_PTR(-ENOMEM); + + error = init_file(f, flags, cred); + if (unlikely(error)) + return ERR_PTR(error); + + f->f_mode |= FMODE_NOACCOUNT; return f; } +/* + * Variant of alloc_empty_file() that allocates a backing_file container + * and doesn't check and modify nr_files. + * + * This is only for kernel internal use, and the allocate file must not be + * installed into file tables or such. + */ +struct file *alloc_empty_backing_file(int flags, const struct cred *cred) +{ + struct backing_file *ff; + int error; + + ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL); + if (unlikely(!ff)) + return ERR_PTR(-ENOMEM); + + error = init_file(&ff->file, flags, cred); + if (unlikely(error)) + return ERR_PTR(error); + + ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT; + return &ff->file; +} + /** * alloc_file - allocate and initialize a 'struct file' * diff --git a/fs/internal.h b/fs/internal.h index 5cec33cdf70f..f7a3dc111026 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -97,8 +97,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ -extern struct file *alloc_empty_file(int, const struct cred *); -extern struct file *alloc_empty_file_noaccount(int, const struct cred *); +struct file *alloc_empty_file(int flags, const struct cred *cred); +struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred); +struct file *alloc_empty_backing_file(int flags, const struct cred *cred); static inline void put_file_access(struct file *file) { diff --git a/fs/namei.c b/fs/namei.c index 6a5e26a529e1..91171da719c5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3703,7 +3703,7 @@ static int vfs_tmpfile(struct mnt_idmap *idmap, } /** - * vfs_tmpfile_open - open a tmpfile for kernel internal use + * kernel_tmpfile_open - open a tmpfile for kernel internal use * @idmap: idmap of the mount the inode was found from * @parentpath: path of the base directory * @mode: mode of the new tmpfile @@ -3714,24 +3714,26 @@ static int vfs_tmpfile(struct mnt_idmap *idmap, * hence this is only for kernel internal use, and must not be installed into * file tables or such. */ -struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, - const struct path *parentpath, - umode_t mode, int open_flag, const struct cred *cred) +struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, + const struct path *parentpath, + umode_t mode, int open_flag, + const struct cred *cred) { struct file *file; int error; file = alloc_empty_file_noaccount(open_flag, cred); - if (!IS_ERR(file)) { - error = vfs_tmpfile(idmap, parentpath, file, mode); - if (error) { - fput(file); - file = ERR_PTR(error); - } + if (IS_ERR(file)) + return file; + + error = vfs_tmpfile(idmap, parentpath, file, mode); + if (error) { + fput(file); + file = ERR_PTR(error); } return file; } -EXPORT_SYMBOL(vfs_tmpfile_open); +EXPORT_SYMBOL(kernel_tmpfile_open); static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, diff --git a/fs/open.c b/fs/open.c index fa5d53282dfe..fb07b2840eb4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1108,23 +1108,77 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, } EXPORT_SYMBOL(dentry_create); -struct file *open_with_fake_path(const struct path *path, int flags, +/** + * kernel_file_open - open a file for kernel internal use + * @path: path of the file to open + * @flags: open flags + * @inode: the inode + * @cred: credentials for open + * + * Open a file for use by in-kernel consumers. The file is not accounted + * against nr_files and must not be installed into the file descriptor + * table. + * + * Return: Opened file on success, an error pointer on failure. + */ +struct file *kernel_file_open(const struct path *path, int flags, struct inode *inode, const struct cred *cred) { - struct file *f = alloc_empty_file_noaccount(flags, cred); - if (!IS_ERR(f)) { - int error; + struct file *f; + int error; - f->f_path = *path; - error = do_dentry_open(f, inode, NULL); - if (error) { - fput(f); - f = ERR_PTR(error); - } + f = alloc_empty_file_noaccount(flags, cred); + if (IS_ERR(f)) + return f; + + f->f_path = *path; + error = do_dentry_open(f, inode, NULL); + if (error) { + fput(f); + f = ERR_PTR(error); } return f; } -EXPORT_SYMBOL(open_with_fake_path); +EXPORT_SYMBOL_GPL(kernel_file_open); + +/** + * backing_file_open - open a backing file for kernel internal use + * @path: path of the file to open + * @flags: open flags + * @path: path of the backing file + * @cred: credentials for open + * + * Open a backing file for a stackable filesystem (e.g., overlayfs). + * @path may be on the stackable filesystem and backing inode on the + * underlying filesystem. In this case, we want to be able to return + * the @real_path of the backing inode. This is done by embedding the + * returned file into a container structure that also stores the path of + * the backing inode on the underlying filesystem, which can be + * retrieved using backing_file_real_path(). + */ +struct file *backing_file_open(const struct path *path, int flags, + const struct path *real_path, + const struct cred *cred) +{ + struct file *f; + int error; + + f = alloc_empty_backing_file(flags, cred); + if (IS_ERR(f)) + return f; + + f->f_path = *path; + path_get(real_path); + *backing_file_real_path(f) = *real_path; + error = do_dentry_open(f, d_inode(real_path->dentry), NULL); + if (error) { + fput(f); + f = ERR_PTR(error); + } + + return f; +} +EXPORT_SYMBOL_GPL(backing_file_open); #define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE)) #define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 0801917f932e..dbbb156c2d67 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -34,8 +34,8 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode) return 'm'; } -/* No atime modification nor notify on underlying */ -#define OVL_OPEN_FLAGS (O_NOATIME | __FMODE_NONOTIFY) +/* No atime modification on underlying */ +#define OVL_OPEN_FLAGS (O_NOATIME) static struct file *ovl_open_realfile(const struct file *file, const struct path *realpath) @@ -61,8 +61,8 @@ static struct file *ovl_open_realfile(const struct file *file, if (!inode_owner_or_capable(real_idmap, realinode)) flags &= ~O_NOATIME; - realfile = open_with_fake_path(&file->f_path, flags, realinode, - current_cred()); + realfile = backing_file_open(&file->f_path, flags, realpath, + current_cred()); } revert_creds(old_cred); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 4d0b278f5630..23686e8a06c4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -329,8 +329,9 @@ static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs, struct dentry *dentry, umode_t mode) { struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry }; - struct file *file = vfs_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, mode, - O_LARGEFILE | O_WRONLY, current_cred()); + struct file *file = kernel_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, + mode, O_LARGEFILE | O_WRONLY, + current_cred()); int err = PTR_ERR_OR_ZERO(file); pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); diff --git a/include/linux/fs.h b/include/linux/fs.h index 66f105ef3427..4ca804f2b3ed 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -171,6 +171,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports non-exclusive O_DIRECT writes from multiple threads */ #define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000) +/* File is embedded in backing_file object */ +#define FMODE_BACKING ((__force fmode_t)0x2000000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) @@ -1678,9 +1681,12 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap, WHITEOUT_DEV); } -struct file *vfs_tmpfile_open(struct mnt_idmap *idmap, - const struct path *parentpath, - umode_t mode, int open_flag, const struct cred *cred); +struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, + const struct path *parentpath, + umode_t mode, int open_flag, + const struct cred *cred); +struct file *kernel_file_open(const struct path *path, int flags, + struct inode *inode, const struct cred *cred); int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), @@ -2355,11 +2361,31 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt, return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root}, name, flags, mode); } -extern struct file * dentry_open(const struct path *, int, const struct cred *); -extern struct file *dentry_create(const struct path *path, int flags, - umode_t mode, const struct cred *cred); -extern struct file * open_with_fake_path(const struct path *, int, - struct inode*, const struct cred *); +struct file *dentry_open(const struct path *path, int flags, + const struct cred *creds); +struct file *dentry_create(const struct path *path, int flags, umode_t mode, + const struct cred *cred); +struct file *backing_file_open(const struct path *path, int flags, + const struct path *real_path, + const struct cred *cred); +struct path *backing_file_real_path(struct file *f); + +/* + * file_real_path - get the path corresponding to f_inode + * + * When opening a backing file for a stackable filesystem (e.g., + * overlayfs) f_path may be on the stackable filesystem and f_inode on + * the underlying filesystem. When the path associated with f_inode is + * needed, this helper should be used instead of accessing f_path + * directly. +*/ +static inline const struct path *file_real_path(struct file *f) +{ + if (unlikely(f->f_mode & FMODE_BACKING)) + return backing_file_real_path(f); + return &f->f_path; +} + static inline struct file *file_clone_open(struct file *file) { return dentry_open(&file->f_path, file->f_flags, file->f_cred); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index bb8467cd11ae..ed48e4f1e755 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -91,11 +91,13 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) static inline int fsnotify_file(struct file *file, __u32 mask) { - const struct path *path = &file->f_path; + const struct path *path; if (file->f_mode & FMODE_NONOTIFY) return 0; + /* Overlayfs internal files have fake f_path */ + path = file_real_path(file); return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } |