From 54566b2c1594c2326a645a3551f9d989f7ba3c5e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 4 Jan 2009 12:00:53 -0800 Subject: fs: symlink write_begin allocation context fix With the write_begin/write_end aops, page_symlink was broken because it could no longer pass a GFP_NOFS type mask into the point where the allocations happened. They are done in write_begin, which would always assume that the filesystem can be entered from reclaim. This bug could cause filesystem deadlocks. The funny thing with having a gfp_t mask there is that it doesn't really allow the caller to arbitrarily tinker with the context in which it can be called. It couldn't ever be GFP_ATOMIC, for example, because it needs to take the page lock. The only thing any callers care about is __GFP_FS anyway, so turn that into a single flag. Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on this flag in their write_begin function. Change __grab_cache_page to accept a nofs argument as well, to honour that flag (while we're there, change the name to grab_cache_page_write_begin which is more instructive and does away with random leading underscores). This is really a more flexible way to go in the end anyway -- if a filesystem happens to want any extra allocations aside from the pagecache ones in ints write_begin function, it may now use GFP_KERNEL (rather than GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a random example). [kosaki.motohiro@jp.fujitsu.com: fix ubifs] [kosaki.motohiro@jp.fujitsu.com: fix fuse] Signed-off-by: Nick Piggin Reviewed-by: KOSAKI Motohiro Cc: [2.6.28.x] Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton [ Cleaned up the calling convention: just pass in the AOP flags untouched to the grab_cache_page_write_begin() function. That just simplifies everybody, and may even allow future expansion of the logic. - Linus ] Signed-off-by: Linus Torvalds --- fs/namei.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index dd5c9f0bf829..df2d3df4f049 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2817,18 +2817,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) } } -int __page_symlink(struct inode *inode, const char *symname, int len, - gfp_t gfp_mask) +/* + * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS + */ +int __page_symlink(struct inode *inode, const char *symname, int len, int nofs) { struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; int err; char *kaddr; + unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE; + if (nofs) + flags |= AOP_FLAG_NOFS; retry: err = pagecache_write_begin(NULL, mapping, 0, len-1, - AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + flags, &page, &fsdata); if (err) goto fail; @@ -2852,7 +2857,7 @@ fail: int page_symlink(struct inode *inode, const char *symname, int len) { return __page_symlink(inode, symname, len, - mapping_gfp_mask(inode->i_mapping)); + !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); } const struct inode_operations page_symlink_inode_operations = { -- cgit v1.2.3-70-g09d2 From acfa4380efe77e290d3a96b11cd4c9f24f4fbb18 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 4 Dec 2008 10:06:33 -0500 Subject: inode->i_op is never NULL We used to have rather schizophrenic set of checks for NULL ->i_op even though it had been eliminated years ago. You'd need to go out of your way to set it to NULL explicitly _and_ a bunch of code would die on such inodes anyway. After killing two remaining places that still did that bogosity, all that crap can go away. Signed-off-by: Al Viro --- fs/cifs/inode.c | 2 +- fs/ecryptfs/inode.c | 3 +-- fs/namei.c | 32 +++++++++++++------------------- fs/nfsd/vfs.c | 8 ++++---- fs/open.c | 2 +- fs/stat.c | 2 +- fs/xattr.c | 2 +- mm/memory.c | 4 ++-- mm/nommu.c | 2 +- security/commoncap.c | 6 +++--- 10 files changed, 28 insertions(+), 35 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f247da9f4edc..5ab9896fdcb2 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1641,7 +1641,7 @@ do_expand: i_size_write(inode, offset); spin_unlock(&inode->i_lock); out_truncate: - if (inode->i_op && inode->i_op->truncate) + if (inode->i_op->truncate) inode->i_op->truncate(inode); return 0; out_sig: diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5e78fc179886..0111906a8877 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -612,8 +612,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user * buf, int bufsiz) struct ecryptfs_crypt_stat *crypt_stat; lower_dentry = ecryptfs_dentry_to_lower(dentry); - if (!lower_dentry->d_inode->i_op || - !lower_dentry->d_inode->i_op->readlink) { + if (!lower_dentry->d_inode->i_op->readlink) { rc = -EINVAL; goto out; } diff --git a/fs/namei.c b/fs/namei.c index dd5c9f0bf829..1f6656c3d1b9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -257,7 +257,7 @@ int inode_permission(struct inode *inode, int mask) return -EACCES; } - if (inode->i_op && inode->i_op->permission) + if (inode->i_op->permission) retval = inode->i_op->permission(inode, mask); else retval = generic_permission(inode, mask, NULL); @@ -432,7 +432,7 @@ static int exec_permission_lite(struct inode *inode) { umode_t mode = inode->i_mode; - if (inode->i_op && inode->i_op->permission) + if (inode->i_op->permission) return -EAGAIN; if (current_fsuid() == inode->i_uid) @@ -908,9 +908,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd) inode = next.dentry->d_inode; if (!inode) goto out_dput; - err = -ENOTDIR; - if (!inode->i_op) - goto out_dput; if (inode->i_op->follow_link) { err = do_follow_link(&next, nd); @@ -920,9 +917,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd) inode = nd->path.dentry->d_inode; if (!inode) break; - err = -ENOTDIR; - if (!inode->i_op) - break; } else path_to_nameidata(&next, nd); err = -ENOTDIR; @@ -961,7 +955,7 @@ last_component: break; inode = next.dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { + && inode && inode->i_op->follow_link) { err = do_follow_link(&next, nd); if (err) goto return_err; @@ -973,7 +967,7 @@ last_component: break; if (lookup_flags & LOOKUP_DIRECTORY) { err = -ENOTDIR; - if (!inode->i_op || !inode->i_op->lookup) + if (!inode->i_op->lookup) break; } goto return_base; @@ -1469,7 +1463,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, if (error) return error; - if (!dir->i_op || !dir->i_op->create) + if (!dir->i_op->create) return -EACCES; /* shouldn't it be ENOSYS? */ mode &= S_IALLUGO; mode |= S_IFREG; @@ -1752,7 +1746,7 @@ do_last: error = -ENOENT; if (!path.dentry->d_inode) goto exit_dput; - if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) + if (path.dentry->d_inode->i_op->follow_link) goto do_link; path_to_nameidata(&path, &nd); @@ -1933,7 +1927,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) return -EPERM; - if (!dir->i_op || !dir->i_op->mknod) + if (!dir->i_op->mknod) return -EPERM; error = devcgroup_inode_mknod(mode, dev); @@ -2035,7 +2029,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (error) return error; - if (!dir->i_op || !dir->i_op->mkdir) + if (!dir->i_op->mkdir) return -EPERM; mode &= (S_IRWXUGO|S_ISVTX); @@ -2126,7 +2120,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (error) return error; - if (!dir->i_op || !dir->i_op->rmdir) + if (!dir->i_op->rmdir) return -EPERM; DQUOT_INIT(dir); @@ -2213,7 +2207,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) if (error) return error; - if (!dir->i_op || !dir->i_op->unlink) + if (!dir->i_op->unlink) return -EPERM; DQUOT_INIT(dir); @@ -2320,7 +2314,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) if (error) return error; - if (!dir->i_op || !dir->i_op->symlink) + if (!dir->i_op->symlink) return -EPERM; error = security_inode_symlink(dir, dentry, oldname); @@ -2401,7 +2395,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de */ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return -EPERM; - if (!dir->i_op || !dir->i_op->link) + if (!dir->i_op->link) return -EPERM; if (S_ISDIR(inode->i_mode)) return -EPERM; @@ -2608,7 +2602,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; - if (!old_dir->i_op || !old_dir->i_op->rename) + if (!old_dir->i_op->rename) return -EPERM; DQUOT_INIT(old_dir); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d1c5f787b365..5245a3965004 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1211,7 +1211,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, dirp = dentry->d_inode; err = nfserr_notdir; - if(!dirp->i_op || !dirp->i_op->lookup) + if (!dirp->i_op->lookup) goto out; /* * Check whether the response file handle has been verified yet. @@ -1347,7 +1347,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, /* Get all the sanity checks out of the way before * we lock the parent. */ err = nfserr_notdir; - if(!dirp->i_op || !dirp->i_op->lookup) + if (!dirp->i_op->lookup) goto out; fh_lock_nested(fhp, I_MUTEX_PARENT); @@ -1482,7 +1482,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) inode = dentry->d_inode; err = nfserr_inval; - if (!inode->i_op || !inode->i_op->readlink) + if (!inode->i_op->readlink) goto out; touch_atime(fhp->fh_export->ex_path.mnt, dentry); @@ -2162,7 +2162,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) size_t size; int error; - if (!IS_POSIXACL(inode) || !inode->i_op || + if (!IS_POSIXACL(inode) || !inode->i_op->setxattr || !inode->i_op->removexattr) return -EOPNOTSUPP; switch(type) { diff --git a/fs/open.c b/fs/open.c index 1cd7d40e9991..d882fd2351d6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -412,7 +412,7 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len) if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) goto out_fput; - if (inode->i_op && inode->i_op->fallocate) + if (inode->i_op->fallocate) ret = inode->i_op->fallocate(inode, mode, offset, len); else ret = -EOPNOTSUPP; diff --git a/fs/stat.c b/fs/stat.c index 7c46fbeb8b76..7e12a6f82795 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -305,7 +305,7 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *pathname, struct inode *inode = path.dentry->d_inode; error = -EINVAL; - if (inode->i_op && inode->i_op->readlink) { + if (inode->i_op->readlink) { error = security_inode_readlink(path.dentry); if (!error) { touch_atime(path.mnt, path.dentry); diff --git a/fs/xattr.c b/fs/xattr.c index 468377e66531..237804cd6b56 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -175,7 +175,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size) if (error) return error; error = -EOPNOTSUPP; - if (d->d_inode->i_op && d->d_inode->i_op->listxattr) { + if (d->d_inode->i_op->listxattr) { error = d->d_inode->i_op->listxattr(d, list, size); } else { error = security_inode_listsecurity(d->d_inode, list, size); diff --git a/mm/memory.c b/mm/memory.c index 0a2010a9518c..7b9db658aca2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2266,7 +2266,7 @@ int vmtruncate(struct inode * inode, loff_t offset) unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); } - if (inode->i_op && inode->i_op->truncate) + if (inode->i_op->truncate) inode->i_op->truncate(inode); return 0; @@ -2286,7 +2286,7 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) * a way to truncate a range of blocks (punch a hole) - * we should return failure right now. */ - if (!inode->i_op || !inode->i_op->truncate_range) + if (!inode->i_op->truncate_range) return -ENOSYS; mutex_lock(&inode->i_mutex); diff --git a/mm/nommu.c b/mm/nommu.c index 7695dc850785..1c28ea3a4e9c 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -86,7 +86,7 @@ do_expand: i_size_write(inode, offset); out_truncate: - if (inode->i_op && inode->i_op->truncate) + if (inode->i_op->truncate) inode->i_op->truncate(inode); return 0; out_sig: diff --git a/security/commoncap.c b/security/commoncap.c index 79713545cd63..69fc9952650f 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -238,7 +238,7 @@ int cap_inode_need_killpriv(struct dentry *dentry) struct inode *inode = dentry->d_inode; int error; - if (!inode->i_op || !inode->i_op->getxattr) + if (!inode->i_op->getxattr) return 0; error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); @@ -259,7 +259,7 @@ int cap_inode_killpriv(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - if (!inode->i_op || !inode->i_op->removexattr) + if (!inode->i_op->removexattr) return 0; return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); @@ -317,7 +317,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); - if (!inode || !inode->i_op || !inode->i_op->getxattr) + if (!inode || !inode->i_op->getxattr) return -ENODATA; size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps, -- cgit v1.2.3-70-g09d2