From 703a3b8e5c01cf6fb33c6d8dc99905f889a4e992 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 21 May 2009 22:21:53 +0000 Subject: [CIFS] fix posix open regression Posix open code was not properly adding the file to the list of open files. Fix allocating cifsFileInfo more than once, and adding twice to flist and tlist. Also fix mode setting to be done in one place in these paths. Signed-off-by: Steve French Reviewed-by: Shirish Pargaonkar Tested-by: Jeff Layton Tested-by: Luca Tettamanti --- fs/cifs/dir.c | 14 ++++++------- fs/cifs/file.c | 66 ++++++++++++++++++++++++++++++++-------------------------- 2 files changed, 44 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 11431ed72a7f..f49d684edd96 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -225,6 +225,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, if (!(oflags & FMODE_READ)) write_only = true; + mode &= ~current_umask(); rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, pnetfid, presp_data, &oplock, full_path, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & @@ -310,7 +311,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, return -ENOMEM; } - mode &= ~current_umask(); if (oplockEnabled) oplock = REQ_OPLOCK; @@ -336,7 +336,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, else /* success, no need to query */ goto cifs_create_set_dentry; } else if ((rc != -EIO) && (rc != -EREMOTE) && - (rc != -EOPNOTSUPP)) /* path not found or net err */ + (rc != -EOPNOTSUPP) && (rc != -EINVAL)) goto cifs_create_out; /* else fallthrough to retry, using older open call, this is case where server does not support this SMB level, and @@ -609,7 +609,6 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ int oplock = 0; - int mode; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; @@ -660,13 +659,12 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if (pTcon->unix_ext) { if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && - (nd->flags & LOOKUP_OPEN)) { + (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open) { if (!((nd->intent.open.flags & O_CREAT) && (nd->intent.open.flags & O_EXCL))) { - mode = nd->intent.open.create_mode & - ~current_umask(); rc = cifs_posix_open(full_path, &newInode, - parent_dir_inode->i_sb, mode, + parent_dir_inode->i_sb, + nd->intent.open.create_mode, nd->intent.open.flags, &oplock, &fileHandle, xid); /* @@ -681,6 +679,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, */ if ((rc != -EINVAL) && (rc != -EOPNOTSUPP)) posix_open = true; + else + pTcon->broken_posix_open = true; } } if (!posix_open) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 38c06f826575..302ea15f02e6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -130,10 +130,6 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode, struct cifsFileInfo *pCifsFile, int oplock, u16 netfid) { - file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); - if (file->private_data == NULL) - return -ENOMEM; - pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); write_lock(&GlobalSMBSeslock); pCifsInode = CIFS_I(file->f_path.dentry->d_inode); @@ -184,6 +180,38 @@ psx_client_can_cache: return 0; } +static struct cifsFileInfo * +cifs_fill_filedata(struct file *file) +{ + struct list_head *tmp; + struct cifsFileInfo *pCifsFile = NULL; + struct cifsInodeInfo *pCifsInode = NULL; + + /* search inode for this file and fill in file->private_data */ + pCifsInode = CIFS_I(file->f_path.dentry->d_inode); + read_lock(&GlobalSMBSeslock); + list_for_each(tmp, &pCifsInode->openFileList) { + pCifsFile = list_entry(tmp, struct cifsFileInfo, flist); + if ((pCifsFile->pfile == NULL) && + (pCifsFile->pid == current->tgid)) { + /* mode set in cifs_create */ + + /* needed for writepage */ + pCifsFile->pfile = file; + file->private_data = pCifsFile; + break; + } + } + read_unlock(&GlobalSMBSeslock); + + if (file->private_data != NULL) { + return pCifsFile; + } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) + cERROR(1, ("could not find file instance for " + "new file %p", file)); + return NULL; +} + /* all arguments to this function must be checked for validity in caller */ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile, @@ -258,7 +286,6 @@ int cifs_open(struct inode *inode, struct file *file) struct cifsTconInfo *tcon; struct cifsFileInfo *pCifsFile; struct cifsInodeInfo *pCifsInode; - struct list_head *tmp; char *full_path = NULL; int desiredAccess; int disposition; @@ -270,32 +297,12 @@ int cifs_open(struct inode *inode, struct file *file) cifs_sb = CIFS_SB(inode->i_sb); tcon = cifs_sb->tcon; - /* search inode for this file and fill in file->private_data */ pCifsInode = CIFS_I(file->f_path.dentry->d_inode); - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &pCifsInode->openFileList) { - pCifsFile = list_entry(tmp, struct cifsFileInfo, - flist); - if ((pCifsFile->pfile == NULL) && - (pCifsFile->pid == current->tgid)) { - /* mode set in cifs_create */ - - /* needed for writepage */ - pCifsFile->pfile = file; - - file->private_data = pCifsFile; - break; - } - } - read_unlock(&GlobalSMBSeslock); - - if (file->private_data != NULL) { - rc = 0; + pCifsFile = cifs_fill_filedata(file); + if (pCifsFile) { FreeXid(xid); - return rc; - } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) - cERROR(1, ("could not find file instance for " - "new file %p", file)); + return 0; + } full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { @@ -325,6 +332,7 @@ int cifs_open(struct inode *inode, struct file *file) /* no need for special case handling of setting mode on read only files needed here */ + pCifsFile = cifs_fill_filedata(file); cifs_posix_open_inode_helper(inode, file, pCifsInode, pCifsFile, oplock, netfid); goto out; -- cgit v1.2.3-70-g09d2 From d5046853634a8d73f28bad3cf68d182c4a99035d Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 22 May 2009 20:36:21 +0900 Subject: nilfs2: fix memory leak in nilfs_ioctl_clean_segments This fixes a new memory leak problem in garbage collection. The problem was brought by the bugfix patch ("nilfs2: fix lock order reversal in nilfs_clean_segments ioctl"). Thanks to Kentaro Suzuki for finding this problem. Reported-by: Kentaro Suzuki Signed-off-by: Ryusuke Konishi --- fs/nilfs2/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 50ff3f2cdf24..d6759b92006f 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -576,7 +576,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); out_free: - while (--n > 0) + while (--n >= 0) vfree(kbufs[n]); kfree(kbufs[4]); return ret; -- cgit v1.2.3-70-g09d2 From 8db14ca12569fe885694bd3d5ff84c2d973d3cb0 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 23 May 2009 18:57:25 +0000 Subject: [CIFS] Avoid open on possible directories since Samba now rejects them Small change (mostly formatting) to limit lookup based open calls to file create only. After discussion yesteday on samba-technical about the posix lookup regression, and looking at a problem with cifs posix open to one particular Samba version, Jeff and JRA realized that Samba server's behavior changed in this area (posix open behavior on files vs. directories). To make this behavior consistent, JRA just made a fix to Samba server to alter how it handles open of directories (now returning the equivalent of EISDIR instead of success). Since we don't know at lookup time whether the inode is a directory or file (and thus whether posix open will succeed with most current Samba server), this change avoids the posix open code on lookup open (just issues posix open on creates). This gets the semantic benefits we want (atomicity, posix byte range locks, improved write semantics on newly created files) and file create still is fast, and we avoid the problem that Jeff noticed yesterday with "openat" (and some open directory calls) of non-cached directories to one version of Samba server, and will work with future Samba versions (which include the fix jra just pushed into Samba server). I confirmed this approach with jra yesterday and with Shirish today. Posix open is only called (at lookup time) for file create now. For opens (rather than creates), because we do not know if it is a file or directory yet, and current Samba no longer allows us to do posix open on dirs, we could end up wasting an open call on what turns out to be a dir. For file opens, we wait to call posix open till cifs_open. It could be added here (lookup) in the future but the performance tradeoff of the extra network request when EISDIR or EACCES is returned would have to be weighed against the 50% reduction in network traffic in the other paths. Reviewed-by: Shirish Pargaonkar Tested-by: Jeff Layton CC: Jeremy Allison Signed-off-by: Steve French --- fs/cifs/dir.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f49d684edd96..3758965d73d5 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -657,31 +657,36 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); + /* Posix open is only called (at lookup time) for file create now. + * For opens (rather than creates), because we do not know if it + * is a file or directory yet, and current Samba no longer allows + * us to do posix open on dirs, we could end up wasting an open call + * on what turns out to be a dir. For file opens, we wait to call posix + * open till cifs_open. It could be added here (lookup) in the future + * but the performance tradeoff of the extra network request when EISDIR + * or EACCES is returned would have to be weighed against the 50% + * reduction in network traffic in the other paths. + */ if (pTcon->unix_ext) { if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && - (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open) { - if (!((nd->intent.open.flags & O_CREAT) && - (nd->intent.open.flags & O_EXCL))) { - rc = cifs_posix_open(full_path, &newInode, + (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && + (nd->intent.open.flags & O_CREAT)) { + rc = cifs_posix_open(full_path, &newInode, parent_dir_inode->i_sb, nd->intent.open.create_mode, nd->intent.open.flags, &oplock, &fileHandle, xid); - /* - * This code works around a bug in - * samba posix open in samba versions 3.3.1 - * and earlier where create works - * but open fails with invalid parameter. - * If either of these error codes are - * returned, follow the normal lookup. - * Otherwise, the error during posix open - * is handled. - */ - if ((rc != -EINVAL) && (rc != -EOPNOTSUPP)) - posix_open = true; - else - pTcon->broken_posix_open = true; - } + /* + * The check below works around a bug in POSIX + * open in samba versions 3.3.1 and earlier where + * open could incorrectly fail with invalid parameter. + * If either that or op not supported returned, follow + * the normal lookup. + */ + if ((rc == 0) || (rc == -ENOENT)) + posix_open = true; + else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP)) + pTcon->broken_posix_open = true; } if (!posix_open) rc = cifs_get_inode_info_unix(&newInode, full_path, -- cgit v1.2.3-70-g09d2 From d0367a508af9cf97beb202935bb9ad8883d30cd1 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 26 May 2009 14:51:00 -0400 Subject: nfs: fix build error in nfsroot with initconst fix build error with latest kbuild adjustments to initconst. The commit a447c0932445f92ce6f4c1bd020f62c5097a7842 ("vfs: Use const for kernel parser table") changed: static match_table_t __initdata tokens = { to static match_table_t __initconst tokens = { But the missing const causes popwerpc to fail with latest updates to __initconst like this: fs/nfs/nfsroot.c:400: error: __setup_str_nfs_root_setup causes a section type conflict fs/nfs/nfsroot.c:400: error: __setup_str_nfs_root_setup causes a section type conflict The bug is only present with kbuild-next. Following patch has been build tested. Signed-off-by: Sam Ravnborg Cc: Steven Whitehouse Cc: Stephen Rothwell Acked-by: Jan Beulich Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index d9ef602fbc5a..e3ed5908820b 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -129,7 +129,7 @@ enum { Opt_err }; -static match_table_t __initconst tokens = { +static const match_table_t tokens __initconst = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, -- cgit v1.2.3-70-g09d2 From 95baa25c7321eb8613246acbf61b97911cc748d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 26 May 2009 14:51:00 -0400 Subject: NFSv4: Fix the case where NFSv4 renewal fails If the asynchronous lease renewal fails (usually due to a soft timeout), then we _must_ schedule state recovery in order to ensure that we don't lose the lease unnecessarily or, if the lease is already lost, that we recover the locking state promptly... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a4d242680299..4674f8092da8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2594,12 +2594,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data) unsigned long timestamp = (unsigned long)data; if (task->tk_status < 0) { - switch (task->tk_status) { - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: - case -NFS4ERR_CB_PATH_DOWN: - nfs4_schedule_state_recovery(clp); - } + /* Unless we're shutting down, schedule state recovery! */ + if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) + nfs4_schedule_state_recovery(clp); return; } spin_lock(&clp->cl_lock); -- cgit v1.2.3-70-g09d2 From 348ca1029e8bae6e0c49097ad25439b17c5326f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 May 2009 15:46:50 +0100 Subject: FS-Cache: Fixup renamed filenames in comments in internal.h Fix up renamed filenames in comments in fs/fscache/internal.h. Originally, the files were all called fsc-xxx.c, but they got renamed to just xxx.c. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/fscache/internal.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index e0cbd16f6dc9..1c341304621f 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -28,7 +28,7 @@ #define FSCACHE_MAX_THREADS 32 /* - * fsc-cache.c + * cache.c */ extern struct list_head fscache_cache_list; extern struct rw_semaphore fscache_addremove_sem; @@ -37,7 +37,7 @@ extern struct fscache_cache *fscache_select_cache_for_object( struct fscache_cookie *); /* - * fsc-cookie.c + * cookie.c */ extern struct kmem_cache *fscache_cookie_jar; @@ -45,13 +45,13 @@ extern void fscache_cookie_init_once(void *); extern void __fscache_cookie_put(struct fscache_cookie *); /* - * fsc-fsdef.c + * fsdef.c */ extern struct fscache_cookie fscache_fsdef_index; extern struct fscache_cookie_def fscache_fsdef_netfs_def; /* - * fsc-histogram.c + * histogram.c */ #ifdef CONFIG_FSCACHE_HISTOGRAM extern atomic_t fscache_obj_instantiate_histogram[HZ]; @@ -75,7 +75,7 @@ extern const struct file_operations fscache_histogram_fops; #endif /* - * fsc-main.c + * main.c */ extern unsigned fscache_defer_lookup; extern unsigned fscache_defer_create; @@ -86,14 +86,14 @@ extern int fscache_wait_bit(void *); extern int fscache_wait_bit_interruptible(void *); /* - * fsc-object.c + * object.c */ extern void fscache_withdrawing_object(struct fscache_cache *, struct fscache_object *); extern void fscache_enqueue_object(struct fscache_object *); /* - * fsc-operation.c + * operation.c */ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); @@ -104,7 +104,7 @@ extern void fscache_start_operations(struct fscache_object *); extern void fscache_operation_gc(struct work_struct *); /* - * fsc-proc.c + * proc.c */ #ifdef CONFIG_PROC_FS extern int __init fscache_proc_init(void); @@ -115,7 +115,7 @@ extern void fscache_proc_cleanup(void); #endif /* - * fsc-stats.c + * stats.c */ #ifdef CONFIG_FSCACHE_STATS extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS]; -- cgit v1.2.3-70-g09d2 From 911e690e70540f009125bacd16c017eb1a7b1916 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 May 2009 15:46:55 +0100 Subject: CacheFiles: Fixup renamed filenames in comments in internal.h Fix up renamed filenames in comments in fs/cachefiles/internal.h. Originally, the files were all called cf-xxx.c, but they got renamed to just xxx.c. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/cachefiles/internal.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 19218e1463d6..f7c255f9c624 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -122,13 +122,13 @@ static inline void cachefiles_state_changed(struct cachefiles_cache *cache) } /* - * cf-bind.c + * bind.c */ extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); /* - * cf-daemon.c + * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; @@ -136,17 +136,17 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, unsigned fnr, unsigned bnr); /* - * cf-interface.c + * interface.c */ extern const struct fscache_cache_ops cachefiles_cache_ops; /* - * cf-key.c + * key.c */ extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); /* - * cf-namei.c + * namei.c */ extern int cachefiles_delete_object(struct cachefiles_cache *cache, struct cachefiles_object *object); @@ -165,7 +165,7 @@ extern int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, char *filename); /* - * cf-proc.c + * proc.c */ #ifdef CONFIG_CACHEFILES_HISTOGRAM extern atomic_t cachefiles_lookup_histogram[HZ]; @@ -190,7 +190,7 @@ void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) #endif /* - * cf-rdwr.c + * rdwr.c */ extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, struct page *, gfp_t); @@ -205,7 +205,7 @@ extern int cachefiles_write_page(struct fscache_storage *, struct page *); extern void cachefiles_uncache_page(struct fscache_object *, struct page *); /* - * cf-security.c + * security.c */ extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, @@ -225,7 +225,7 @@ static inline void cachefiles_end_secure(struct cachefiles_cache *cache, } /* - * cf-xattr.c + * xattr.c */ extern int cachefiles_check_object_type(struct cachefiles_object *object); extern int cachefiles_set_object_xattr(struct cachefiles_object *object, -- cgit v1.2.3-70-g09d2 From a0d24b295aed7a9daf4ca36bd4784e4d40f82303 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 19 May 2009 12:03:15 +0800 Subject: nfsd: fix hung up of nfs client while sync write data to nfs server Commit 'Short write in nfsd becomes a full write to the client' (31dec2538e45e9fff2007ea1f4c6bae9f78db724) broken the sync write. With the following commands to reproduce: $ mount -t nfs -o sync 192.168.0.21:/nfsroot /mnt $ cd /mnt $ echo aaaa > temp.txt Then nfs client is hung up. In SYNC mode the server alaways return the write count 0 to the client. This is because the value of host_err in nfsd_vfs_write() will be overwrite in SYNC mode by 'host_err=nfsd_sync(file);', and then we return host_err(which is now 0) as write count. This patch fixed the problem. Signed-off-by: Wei Yongjun Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6c68ffd6b4bb..b660435978d2 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1015,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); set_fs(oldfs); if (host_err >= 0) { + *cnt = host_err; nfsdstats.io_write += host_err; fsnotify_modify(file->f_path.dentry); } @@ -1060,10 +1061,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, } dprintk("nfsd: write complete host_err=%d\n", host_err); - if (host_err >= 0) { + if (host_err >= 0) err = 0; - *cnt = host_err; - } else + else err = nfserrno(host_err); out: return err; -- cgit v1.2.3-70-g09d2 From 086a377edc969aea6c761176a7e4ff68f264d6fe Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 7 May 2009 12:36:53 -0700 Subject: sysfs: file.c: use create_singlethread_workqueue() We don't need a kernel thread per CPU for this application. Acked-by: Alex Chiang Cc: Lai Jiangshan Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b1606e07b7a3..561a9c050cef 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -723,7 +723,7 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), mutex_unlock(&sysfs_workq_mutex); if (sysfs_workqueue == NULL) { - sysfs_workqueue = create_workqueue("sysfsd"); + sysfs_workqueue = create_singlethread_workqueue("sysfsd"); if (sysfs_workqueue == NULL) { module_put(owner); return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 81e2962801bbb4e740c501ca687d5cb857929c04 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Thu, 28 May 2009 17:43:59 +0200 Subject: jffs2: Fix corruption when flash erase/write failure Erase errors such as: "Newly-erased block contained word 0xa4ef223e at offset 0x0296a014" and failure to write the clean marker, moves the offending erase block to erasing list before calling jffs2_erase_failed(). This is bad as jffs2_erase_failed() will also move the block to the bad_list, but is now moving the wrong block, causing FS corruption. Signed-off-by: Joakim Tjernlund Signed-off-by: David Woodhouse --- fs/jffs2/erase.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index c32b4a1ad6cf..a0244740b75a 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -480,13 +480,6 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb return; filebad: - mutex_lock(&c->erase_free_sem); - spin_lock(&c->erase_completion_lock); - /* Stick it on a list (any list) so erase_failed can take it - right off again. Silly, but shouldn't happen often. */ - list_move(&jeb->list, &c->erasing_list); - spin_unlock(&c->erase_completion_lock); - mutex_unlock(&c->erase_free_sem); jffs2_erase_failed(c, jeb, bad_offset); return; -- cgit v1.2.3-70-g09d2 From bd6daba909d8484bd2ccf6017db4028d7a420927 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 28 May 2009 14:34:21 -0700 Subject: procfs: make errno values consistent when open pident vs exit(2) race occurs proc_pident_instantiate() has following call flow. proc_pident_lookup() proc_pident_instantiate() proc_pid_make_inode() And, proc_pident_lookup() has following error handling. const struct pid_entry *p, *last; error = ERR_PTR(-ENOENT); if (!task) goto out_no_task; Then, proc_pident_instantiate should return ENOENT too when racing against exit(2) occur. EINAL has two bad reason. - it implies caller is wrong. bad the race isn't caller's mistake. - man 2 open don't explain EINVAL. user often don't handle it. Note: Other proc_pid_make_inode() caller already use ENOENT properly. Acked-by: Eric W. Biederman Cc: Alexey Dobriyan Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index fb45615943c2..3326bbf9ab95 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1956,7 +1956,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; - struct dentry *error = ERR_PTR(-EINVAL); + struct dentry *error = ERR_PTR(-ENOENT); inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) -- cgit v1.2.3-70-g09d2 From c3dc5bec05a2ae03a72ef82e321d77fb549d951c Mon Sep 17 00:00:00 2001 From: Oskar Schirmer Date: Thu, 28 May 2009 14:34:31 -0700 Subject: flat: fix data sections alignment The flat loader uses an architecture's flat_stack_align() to align the stack but assumes word-alignment is enough for the data sections. However, on the Xtensa S6000 we have registers up to 128bit width which can be used from userspace and therefor need userspace stack and data-section alignment of at least this size. This patch drops flat_stack_align() and uses the same alignment that is required for slab caches, ARCH_SLAB_MINALIGN, or wordsize if it's not defined by the architecture. It also fixes m32r which was obviously kaput, aligning an uninitialized stack entry instead of the stack pointer. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Oskar Schirmer Cc: David Howells Cc: Russell King Cc: Bryan Wu Cc: Geert Uytterhoeven Acked-by: Paul Mundt Cc: Greg Ungerer Signed-off-by: Johannes Weiner Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/flat.h | 3 --- arch/blackfin/include/asm/flat.h | 1 - arch/h8300/include/asm/flat.h | 1 - arch/m32r/include/asm/flat.h | 1 - arch/m68k/include/asm/flat.h | 1 - arch/sh/include/asm/flat.h | 1 - fs/binfmt_flat.c | 46 +++++++++++++++++++++++++++------------- 7 files changed, 31 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h index 1d77e51907f6..59426a4595c9 100644 --- a/arch/arm/include/asm/flat.h +++ b/arch/arm/include/asm/flat.h @@ -5,9 +5,6 @@ #ifndef __ARM_FLAT_H__ #define __ARM_FLAT_H__ -/* An odd number of words will be pushed after this alignment, so - deliberately misalign the value. */ -#define flat_stack_align(sp) sp = (void *)(((unsigned long)(sp) - 4) | 4) #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) diff --git a/arch/blackfin/include/asm/flat.h b/arch/blackfin/include/asm/flat.h index e70074e05f4e..733a178d782d 100644 --- a/arch/blackfin/include/asm/flat.h +++ b/arch/blackfin/include/asm/flat.h @@ -10,7 +10,6 @@ #include -#define flat_stack_align(sp) /* nothing needed */ #define flat_argvp_envp_on_stack() 0 #define flat_old_ram_flag(flags) (flags) diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h index 2a873508a9a1..bd12b31b90e6 100644 --- a/arch/h8300/include/asm/flat.h +++ b/arch/h8300/include/asm/flat.h @@ -5,7 +5,6 @@ #ifndef __H8300_FLAT_H__ #define __H8300_FLAT_H__ -#define flat_stack_align(sp) /* nothing needed */ #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) 1 #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) diff --git a/arch/m32r/include/asm/flat.h b/arch/m32r/include/asm/flat.h index d851cf0c4aa5..5d711c4688fb 100644 --- a/arch/m32r/include/asm/flat.h +++ b/arch/m32r/include/asm/flat.h @@ -12,7 +12,6 @@ #ifndef __ASM_M32R_FLAT_H #define __ASM_M32R_FLAT_H -#define flat_stack_align(sp) (*sp += (*sp & 3 ? (4 - (*sp & 3)): 0)) #define flat_argvp_envp_on_stack() 0 #define flat_old_ram_flag(flags) (flags) #define flat_set_persistent(relval, p) 0 diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h index 814b5174a8e0..a0e290793978 100644 --- a/arch/m68k/include/asm/flat.h +++ b/arch/m68k/include/asm/flat.h @@ -5,7 +5,6 @@ #ifndef __M68KNOMMU_FLAT_H__ #define __M68KNOMMU_FLAT_H__ -#define flat_stack_align(sp) /* nothing needed */ #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h index d3b2b4f109e3..5d84df5e27f6 100644 --- a/arch/sh/include/asm/flat.h +++ b/arch/sh/include/asm/flat.h @@ -12,7 +12,6 @@ #ifndef __ASM_SH_FLAT_H #define __ASM_SH_FLAT_H -#define flat_stack_align(sp) /* nothing needed */ #define flat_argvp_envp_on_stack() 0 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 5cebf0b37798..697f6b5f1313 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -41,6 +41,7 @@ #include #include #include +#include /****************************************************************************/ @@ -54,6 +55,18 @@ #define DBG_FLT(a...) #endif +/* + * User data (stack, data section and bss) needs to be aligned + * for the same reasons as SLAB memory is, and to the same amount. + * Avoid duplicating architecture specific code by using the same + * macro as with SLAB allocation: + */ +#ifdef ARCH_SLAB_MINALIGN +#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN) +#else +#define FLAT_DATA_ALIGN (sizeof(void *)) +#endif + #define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ #define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ @@ -114,20 +127,18 @@ static unsigned long create_flat_tables( int envc = bprm->envc; char uninitialized_var(dummy); - sp = (unsigned long *) ((-(unsigned long)sizeof(char *))&(unsigned long) p); + sp = (unsigned long *)p; + sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); + sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN); + argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0); + envp = argv + (argc + 1); - sp -= envc+1; - envp = sp; - sp -= argc+1; - argv = sp; - - flat_stack_align(sp); if (flat_argvp_envp_on_stack()) { - --sp; put_user((unsigned long) envp, sp); - --sp; put_user((unsigned long) argv, sp); + put_user((unsigned long) envp, sp + 2); + put_user((unsigned long) argv, sp + 1); } - put_user(argc,--sp); + put_user(argc, sp); current->mm->arg_start = (unsigned long) p; while (argc-->0) { put_user((unsigned long) p, argv++); @@ -558,7 +569,9 @@ static int load_flat_file(struct linux_binprm * bprm, ret = realdatastart; goto err; } - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); + datapos = ALIGN(realdatastart + + MAX_SHARED_LIBS * sizeof(unsigned long), + FLAT_DATA_ALIGN); DBG_FLT("BINFMT_FLAT: Allocated data+bss+stack (%d bytes): %x\n", (int)(data_len + bss_len + stack_len), (int)datapos); @@ -604,9 +617,12 @@ static int load_flat_file(struct linux_binprm * bprm, } realdatastart = textpos + ntohl(hdr->data_start); - datapos = realdatastart + MAX_SHARED_LIBS * sizeof(unsigned long); - reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) + - MAX_SHARED_LIBS * sizeof(unsigned long)); + datapos = ALIGN(realdatastart + + MAX_SHARED_LIBS * sizeof(unsigned long), + FLAT_DATA_ALIGN); + + reloc = (unsigned long *) + (datapos + (ntohl(hdr->reloc_start) - text_len)); memp = textpos; memp_size = len; #ifdef CONFIG_BINFMT_ZFLAT @@ -854,7 +870,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) stack_len = TOP_OF_ARGS - bprm->p; /* the strings */ stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */ stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */ - + stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */ res = load_flat_file(bprm, &libinfo, 0, &stack_len); if (res > (unsigned long)-4096) -- cgit v1.2.3-70-g09d2 From 62013ab5d5df297a01ae5863b5c26d758ec0af7f Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 30 May 2009 21:50:58 +0900 Subject: nilfs2: fix bh leak in nilfs_cpfile_delete_checkpoints function The nilfs_cpfile_delete_checkpoints() wrongly skips brelse() for the header block of checkpoint file in case of errors. This fixes the leak bug. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/cpfile.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index e90b60dfced9..300f1cdfa862 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -311,7 +311,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) { if (ret != -ENOENT) - goto out_sem; + goto out_header; /* skip hole */ ret = 0; continue; @@ -344,7 +344,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; printk(KERN_ERR "%s: cannot delete block\n", __func__); - goto out_sem; + goto out_header; } } @@ -361,6 +361,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, nilfs_mdt_mark_dirty(cpfile); kunmap_atomic(kaddr, KM_USER0); } + + out_header: brelse(header_bh); out_sem: -- cgit v1.2.3-70-g09d2 From 1f23920dbf1377fa9e4aef4f3d20c34a06a71a35 Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Thu, 7 May 2009 19:49:45 -0500 Subject: xfs: fix double unlock in xfs_swap_extents() Regreesion from commit ef8f7fc, which rearranged the code in xfs_swap_extents() leading to double unlock of xfs inode ilock. That resulted in xfs_fsr deadlocking itself on platforms, which don't handle double unlock of rw_semaphore nicely. It caused the count go negative, which represents the write holder, without really having one. ia64 is one of the platforms where deadlock was easily reproduced and the fix was tested. Signed-off-by: Eric Sandeen Reviewed-by: Eric Sandeen Signed-off-by: Felix Blyakher --- fs/xfs/xfs_dfrag.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index e6d839bddbf0..7465f9ee125f 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -347,13 +347,15 @@ xfs_swap_extents( error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); -out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); out: kmem_free(tempifp); return error; +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + goto out; + out_trans_cancel: xfs_trans_cancel(tp, 0); goto out_unlock; -- cgit v1.2.3-70-g09d2 From e6da7c9fed111ba1243297ee6eda8e24ae11c384 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sat, 23 May 2009 14:30:12 -0500 Subject: xfs: fix overflow in xfs_growfs_data_private In the case where growing a filesystem would leave the last AG too small, the fixup code has an overflow in the calculation of the new size with one fewer ag, because "nagcount" is a 32 bit number. If the new filesystem has > 2^32 blocks in it this causes a problem resulting in an EINVAL return from growfs: # xfs_io -f -c "truncate 19998630180864" fsfile # mkfs.xfs -f -bsize=4096 -dagsize=76288719b,size=3905982455b fsfile # mount -o loop fsfile /mnt # xfs_growfs /mnt meta-data=/dev/loop0 isize=256 agcount=52, agsize=76288719 blks = sectsz=512 attr=2 data = bsize=4096 blocks=3905982455, imaxpct=5 = sunit=0 swidth=0 blks naming =version 2 bsize=4096 ascii-ci=0 log =internal bsize=4096 blocks=32768, version=2 = sectsz=512 sunit=0 blks, lazy-count=0 realtime =none extsz=4096 blocks=0, rtextents=0 xfs_growfs: XFS_IOC_FSGROWFSDATA xfsctl failed: Invalid argument Reported-by: richard.ems@cape-horn-eng.com Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher --- fs/xfs/xfs_fsops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8379e3bca26c..cbd451bb4848 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -160,7 +160,7 @@ xfs_growfs_data_private( nagcount = new + (nb_mod != 0); if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { nagcount--; - nb = nagcount * mp->m_sb.sb_agblocks; + nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; if (nb < mp->m_sb.sb_dblocks) return XFS_ERROR(EINVAL); } -- cgit v1.2.3-70-g09d2 From 1b17d766463d51904cb242f194a780737e5f73ef Mon Sep 17 00:00:00 2001 From: Felix Blyakher Date: Mon, 1 Jun 2009 13:13:24 -0500 Subject: xfs: prevent deadlock in xfs_qm_shake() It's possible to recurse into filesystem from the memory allocation, which deadlocks in xfs_qm_shake(). Add check for __GFP_FS, and bail out if it is not set. Signed-off-by: Felix Blyakher Signed-off-by: Hedi Berriche Reviewed-by: Christoph Hellwig Reviewed-by: Andi Kleen Signed-off-by: Felix Blyakher --- fs/xfs/linux-2.6/kmem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index af6843c7ee4b..179cbd630f69 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); static inline int kmem_shake_allow(gfp_t gfp_mask) { - return (gfp_mask & __GFP_WAIT) != 0; + return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); } #endif /* __XFS_SUPPORT_KMEM_H__ */ -- cgit v1.2.3-70-g09d2 From 2cc3c559fb2fe8cecca82a517bc56e88b0c1effd Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 4 Jun 2009 09:23:50 -0400 Subject: Btrfs: set device->total_disk_bytes when adding new device It was not being properly initialized, and so the size saved to disk was not correct. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5f01dad4b696..a6d35b0054ca 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1440,6 +1440,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->io_align = root->sectorsize; device->sector_size = root->sectorsize; device->total_bytes = i_size_read(bdev->bd_inode); + device->disk_total_bytes = device->total_bytes; device->dev_root = root->fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; -- cgit v1.2.3-70-g09d2 From 44fb5511638938a2c37c895abc14df648ffc07e9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 4 Jun 2009 15:34:51 -0400 Subject: Btrfs: Fix oops and use after free during space balancing The btrfs allocator uses list_for_each to walk the available block groups when searching for free blocks. It starts off with a hint to help find the best block group for a given allocation. The hint is resolved into a block group, but we don't properly check to make sure the block group we find isn't in the middle of being freed due to filesystem shrinking or balancing. If it is being freed, the list pointers in it are bogus and can't be trusted. But, the code happily goes along and uses them in the list_for_each loop, leading to all kinds of fun. The fix used here is to check to make sure the block group we find really is on the list before we use it. list_del_init is used when removing it from the list, so we can do a proper check. The allocation clustering code has a similar bug where it will trust the block group in the current free space cluster. If our allocation flags have changed (going from single spindle dup to raid1 for example) because the drives in the FS have changed, we're not allowed to use the old block group any more. The fix used here is to check the current cluster against the current allocation flags. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3e2c7c738f23..35af93355063 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2622,7 +2622,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, search_start); if (block_group && block_group_bits(block_group, data)) { down_read(&space_info->groups_sem); - goto have_block_group; + if (list_empty(&block_group->list) || + block_group->ro) { + /* + * someone is removing this block group, + * we can't jump into the have_block_group + * target because our list pointers are not + * valid + */ + btrfs_put_block_group(block_group); + up_read(&space_info->groups_sem); + } else + goto have_block_group; } else if (block_group) { btrfs_put_block_group(block_group); } @@ -2656,6 +2667,13 @@ have_block_group: * people trying to start a new cluster */ spin_lock(&last_ptr->refill_lock); + if (last_ptr->block_group && + (last_ptr->block_group->ro || + !block_group_bits(last_ptr->block_group, data))) { + offset = 0; + goto refill_cluster; + } + offset = btrfs_alloc_from_cluster(block_group, last_ptr, num_bytes, search_start); if (offset) { @@ -2681,10 +2699,17 @@ have_block_group: last_ptr_loop = 1; search_start = block_group->key.objectid; + /* + * we know this block group is properly + * in the list because + * btrfs_remove_block_group, drops the + * cluster before it removes the block + * group from the list + */ goto have_block_group; } spin_unlock(&last_ptr->lock); - +refill_cluster: /* * this cluster didn't work out, free it and * start over @@ -5968,6 +5993,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, { struct btrfs_path *path; struct btrfs_block_group_cache *block_group; + struct btrfs_free_cluster *cluster; struct btrfs_key key; int ret; @@ -5979,6 +6005,21 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, memcpy(&key, &block_group->key, sizeof(key)); + /* make sure this block group isn't part of an allocation cluster */ + cluster = &root->fs_info->data_alloc_cluster; + spin_lock(&cluster->refill_lock); + btrfs_return_cluster_to_free_space(block_group, cluster); + spin_unlock(&cluster->refill_lock); + + /* + * make sure this block group isn't part of a metadata + * allocation cluster + */ + cluster = &root->fs_info->meta_alloc_cluster; + spin_lock(&cluster->refill_lock); + btrfs_return_cluster_to_free_space(block_group, cluster); + spin_unlock(&cluster->refill_lock); + path = btrfs_alloc_path(); BUG_ON(!path); @@ -5988,7 +6029,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, spin_unlock(&root->fs_info->block_group_cache_lock); btrfs_remove_free_space_cache(block_group); down_write(&block_group->space_info->groups_sem); - list_del(&block_group->list); + /* + * we must use list_del_init so people can check to see if they + * are still on the list after taking the semaphore + */ + list_del_init(&block_group->list); up_write(&block_group->space_info->groups_sem); spin_lock(&block_group->space_info->lock); -- cgit v1.2.3-70-g09d2 From 460bcf57b128ce1c0dd553d905fedc097f9955c6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 12 May 2009 07:37:56 -0400 Subject: Fix nobh_truncate_page() to not pass stack garbage to get_block() The nobh_truncate_page() function is used by ext2, exofs, and jfs. Of these three, only ext2 and jfs's get_block() function pays attention to bh->b_size --- which is normally always the filesystem blocksize except when the get_block() function is called by either mpage_readpage(), mpage_readpages(), or the direct I/O routines in fs/direct_io.c. Unfortunately, nobh_truncate_page() does not initialize map_bh before calling the filesystem-supplied get_block() function. So ext2 and jfs will try to calculate the number of blocks to map by taking stack garbage and shifting it left by inode->i_blkbits. This should be *mostly* harmless (except the filesystem will do some unnneeded work) unless the stack garbage is less than filesystem's blocksize, in which case maxblocks will be zero, and the attempt to find out whether or not the filesystem has a hole at a given logical block will fail, and the page cache entry might not get zero'ed out. Also if the stack garbage in in map_bh->state happens to have the BH_Mapped bit set, there could be an attempt to call readpage() on a non-existent page, which could cause nobh_truncate_page() to return an error when it should not. Fix this by initializing map_bh->state and map_bh->size. Fortunately, it's probably fairly unlikely that ext2 and jfs users mount with nobh these days. Signed-off-by: "Theodore Ts'o" Cc: Dave Kleikamp Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Al Viro --- fs/buffer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/buffer.c b/fs/buffer.c index aed297739eb0..49106127a4aa 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2736,6 +2736,8 @@ has_buffers: pos += blocksize; } + map_bh.b_size = blocksize; + map_bh.b_state = 0; err = get_block(inode, iblock, &map_bh, 0); if (err) goto unlock; -- cgit v1.2.3-70-g09d2 From 72a43d63cb51057393edfbcfc4596066205ad15d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 13 May 2009 19:13:40 +0100 Subject: ext3/4 with synchronous writes gets wedged by Postfix OK, that's probably the easiest way to do that, as much as I don't like it... Since iget() et.al. will not accept I_FREEING (will wait to go away and restart), and since we'd better have serialization between new/free on fs data structures anyway, we can afford simply skipping I_FREEING et.al. in insert_inode_locked(). We do that from new_inode, so it won't race with free_inode in any interesting ways and it won't race with iget (of any origin; nfsd or in case of fs corruption a lookup) since both still will wait for I_LOCK. Reviewed-by: "Theodore Ts'o" Acked-by: Jan Kara Tested-by: David Watson Signed-off-by: Al Viro --- fs/inode.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index 0571983755dc..a4876e561953 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1053,13 +1053,22 @@ int insert_inode_locked(struct inode *inode) struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; struct hlist_head *head = inode_hashtable + hash(sb, ino); - struct inode *old; inode->i_state |= I_LOCK|I_NEW; while (1) { + struct hlist_node *node; + struct inode *old = NULL; spin_lock(&inode_lock); - old = find_inode_fast(sb, head, ino); - if (likely(!old)) { + hlist_for_each_entry(old, node, head, i_hash) { + if (old->i_ino != ino) + continue; + if (old->i_sb != sb) + continue; + if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) + continue; + break; + } + if (likely(!node)) { hlist_add_head(&inode->i_hash, head); spin_unlock(&inode_lock); return 0; @@ -1081,14 +1090,24 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, { struct super_block *sb = inode->i_sb; struct hlist_head *head = inode_hashtable + hash(sb, hashval); - struct inode *old; inode->i_state |= I_LOCK|I_NEW; while (1) { + struct hlist_node *node; + struct inode *old = NULL; + spin_lock(&inode_lock); - old = find_inode(sb, head, test, data); - if (likely(!old)) { + hlist_for_each_entry(old, node, head, i_hash) { + if (old->i_sb != sb) + continue; + if (!test(old, data)) + continue; + if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) + continue; + break; + } + if (likely(!node)) { hlist_add_head(&inode->i_hash, head); spin_unlock(&inode_lock); return 0; -- cgit v1.2.3-70-g09d2 From f07502dae230a2c3b65381fd1b06e8a18b2c7525 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 6 Jun 2009 21:18:09 +0100 Subject: integrity: fix IMA inode leak CONFIG_IMA=y inode activity leaks iint_cache and radix_tree_node objects until the system runs out of memory. Nowhere is calling ima_inode_free() a.k.a. ima_iint_delete(). Fix that by calling it from destroy_inode(). Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- fs/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/inode.c b/fs/inode.c index a4876e561953..bca0c618fdb3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -219,6 +219,7 @@ static struct inode *alloc_inode(struct super_block *sb) void destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); + ima_inode_free(inode); security_inode_free(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); -- cgit v1.2.3-70-g09d2