From 718360c59f34b80d9878429300c1c688f7c2031d Mon Sep 17 00:00:00 2001
From: Noah Massey <noah.massey@gmail.com>
Date: Thu, 30 Jan 2014 21:31:12 -0500
Subject: nfs: fix setting of ACLs on file creation.

nfs3_get_acl() tries to skip posix equivalent ACLs, but misinterprets
the return value of posix_acl_equiv_mode(). Fix it.

This is a regression introduced by
"nfs: use generic posix ACL infrastructure for v3 Posix ACLs"

CC: Christoph Hellwig <hch@infradead.org>
CC: linux-nfs@vger.kernel.org
CC: linux-fsdevel@vger.kernel.org

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9a5ca03fa539..0851f852568d 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -80,7 +80,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
 	}
 
 	if (res.acl_access != NULL) {
-		if (posix_acl_equiv_mode(res.acl_access, NULL) ||
+		if ((posix_acl_equiv_mode(res.acl_access, NULL) == 0) ||
 		    res.acl_access->a_count == 0) {
 			posix_acl_release(res.acl_access);
 			res.acl_access = NULL;
-- 
cgit v1.2.3-70-g09d2


From 17ead6c85c3d0ef57a14d1373f1f1cee2ce60ea8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 1 Feb 2014 14:53:23 -0500
Subject: NFSv4: Fix memory corruption in nfs4_proc_open_confirm

nfs41_wake_and_assign_slot() relies on the task->tk_msg.rpc_argp and
task->tk_msg.rpc_resp always pointing to the session sequence arguments.

nfs4_proc_open_confirm tries to pull a fast one by reusing the open
sequence structure, thus causing corruption of the NFSv4 slot table.

Cc: stable@vger.kernel.org # 3.12+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 8 ++++----
 include/linux/nfs_xdr.h | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 42da6af77587..2da6a698b8f7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1620,15 +1620,15 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_opendata *data = calldata;
 
-	nfs40_setup_sequence(data->o_arg.server, &data->o_arg.seq_args,
-				&data->o_res.seq_res, task);
+	nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args,
+				&data->c_res.seq_res, task);
 }
 
 static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_opendata *data = calldata;
 
-	nfs40_sequence_done(task, &data->o_res.seq_res);
+	nfs40_sequence_done(task, &data->c_res.seq_res);
 
 	data->rpc_status = task->tk_status;
 	if (data->rpc_status == 0) {
@@ -1686,7 +1686,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	};
 	int status;
 
-	nfs4_init_sequence(&data->o_arg.seq_args, &data->o_res.seq_res, 1);
+	nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1);
 	kref_get(&data->kref);
 	data->rpc_done = 0;
 	data->rpc_status = 0;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3ccfcecf8999..b2fb167b2e6d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -379,12 +379,14 @@ struct nfs_openres {
  * Arguments to the open_confirm call.
  */
 struct nfs_open_confirmargs {
+	struct nfs4_sequence_args	seq_args;
 	const struct nfs_fh *	fh;
 	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
 };
 
 struct nfs_open_confirmres {
+	struct nfs4_sequence_res	seq_res;
 	nfs4_stateid            stateid;
 	struct nfs_seqid *	seqid;
 };
-- 
cgit v1.2.3-70-g09d2


From 20b9a9024540a775395d5d1f41eec0ec6ec41f9b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 1 Feb 2014 13:47:06 -0500
Subject: NFSv4.1: nfs4_destroy_session must call rpc_destroy_waitqueue

There may still be timers active on the session waitqueues. Make sure
that we kill them before freeing the memory.

Cc: stable@vger.kernel.org # 3.12+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4client.c  |  2 +-
 fs/nfs/nfs4session.c | 25 ++++++++++++++++++++-----
 fs/nfs/nfs4session.h |  2 +-
 3 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index dbb3e1f30c68..860ad26a5590 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -170,7 +170,7 @@ void nfs41_shutdown_client(struct nfs_client *clp)
 void nfs40_shutdown_client(struct nfs_client *clp)
 {
 	if (clp->cl_slot_tbl) {
-		nfs4_release_slot_table(clp->cl_slot_tbl);
+		nfs4_shutdown_slot_table(clp->cl_slot_tbl);
 		kfree(clp->cl_slot_tbl);
 	}
 }
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index cf883c7ae053..e799dc3c3b1d 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -231,14 +231,23 @@ out:
 	return ret;
 }
 
+/*
+ * nfs4_release_slot_table - release all slot table entries
+ */
+static void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
+{
+	nfs4_shrink_slot_table(tbl, 0);
+}
+
 /**
- * nfs4_release_slot_table - release resources attached to a slot table
+ * nfs4_shutdown_slot_table - release resources attached to a slot table
  * @tbl: slot table to shut down
  *
  */
-void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
+void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl)
 {
-	nfs4_shrink_slot_table(tbl, 0);
+	nfs4_release_slot_table(tbl);
+	rpc_destroy_wait_queue(&tbl->slot_tbl_waitq);
 }
 
 /**
@@ -422,7 +431,7 @@ void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
 	spin_unlock(&tbl->slot_tbl_lock);
 }
 
-static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+static void nfs4_release_session_slot_tables(struct nfs4_session *session)
 {
 	nfs4_release_slot_table(&session->fc_slot_table);
 	nfs4_release_slot_table(&session->bc_slot_table);
@@ -450,7 +459,7 @@ int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
 	if (status && tbl->slots == NULL)
 		/* Fore and back channel share a connection so get
 		 * both slot tables or neither */
-		nfs4_destroy_session_slot_tables(ses);
+		nfs4_release_session_slot_tables(ses);
 	return status;
 }
 
@@ -470,6 +479,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 	return session;
 }
 
+static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+{
+	nfs4_shutdown_slot_table(&session->fc_slot_table);
+	nfs4_shutdown_slot_table(&session->bc_slot_table);
+}
+
 void nfs4_destroy_session(struct nfs4_session *session)
 {
 	struct rpc_xprt *xprt;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 232306100651..b34ada9bc6a2 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -74,7 +74,7 @@ enum nfs4_session_state {
 
 extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
 		unsigned int max_reqs, const char *queue);
-extern void nfs4_release_slot_table(struct nfs4_slot_table *tbl);
+extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
 extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
 extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
 extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
-- 
cgit v1.2.3-70-g09d2


From 8101c8dbf6243ba517aab58d69bf1bc37d8b7b9c Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 29 Jan 2014 16:05:30 -0500
Subject: Btrfs: disable snapshot aware defrag for now

It's just broken and it's taking a lot of effort to fix it, so for now just
disable it so people can defrag in peace.  Thanks,

Cc: stable@vger.kernel.org
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fb74a536add3..1af34d0c744b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2629,7 +2629,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 			EXTENT_DEFRAG, 1, cached_state);
 	if (ret) {
 		u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
-		if (last_snapshot >= BTRFS_I(inode)->generation)
+		if (0 && last_snapshot >= BTRFS_I(inode)->generation)
 			/* the inode is shared */
 			new = record_old_file_extents(inode, ordered_extent);
 
-- 
cgit v1.2.3-70-g09d2


From 0b947aff1599afbbd2ec07ada87b05af0f94cf10 Mon Sep 17 00:00:00 2001
From: Filipe David Borba Manana <fdmanana@gmail.com>
Date: Wed, 29 Jan 2014 21:06:04 +0000
Subject: Btrfs: use btrfs_crc32c everywhere instead of libcrc32c

After the commit titled "Btrfs: fix btrfs boot when compiled as built-in",
LIBCRC32C requirement was removed from btrfs' Kconfig. This made it not
possible to build a kernel with btrfs enabled (either as module or built-in)
if libcrc32c is not enabled as well. So just replace all uses of libcrc32c
with the equivalent function in btrfs hash.h - btrfs_crc32c.

Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/check-integrity.c | 4 ++--
 fs/btrfs/disk-io.c         | 4 ++--
 fs/btrfs/send.c            | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 160fb509d720..39bfd56a1f26 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -92,11 +92,11 @@
 #include <linux/slab.h>
 #include <linux/buffer_head.h>
 #include <linux/mutex.h>
-#include <linux/crc32c.h>
 #include <linux/genhd.h>
 #include <linux/blkdev.h>
 #include "ctree.h"
 #include "disk-io.h"
+#include "hash.h"
 #include "transaction.h"
 #include "extent_io.h"
 #include "volumes.h"
@@ -1823,7 +1823,7 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
 		size_t sublen = i ? PAGE_CACHE_SIZE :
 				    (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
 
-		crc = crc32c(crc, data, sublen);
+		crc = btrfs_crc32c(crc, data, sublen);
 	}
 	btrfs_csum_final(crc, csum);
 	if (memcmp(csum, h->csum, state->csum_size))
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7619147da382..3903bd3f8d2b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -26,7 +26,6 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
-#include <linux/crc32c.h>
 #include <linux/slab.h>
 #include <linux/migrate.h>
 #include <linux/ratelimit.h>
@@ -35,6 +34,7 @@
 #include <asm/unaligned.h>
 #include "ctree.h"
 #include "disk-io.h"
+#include "hash.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
 #include "volumes.h"
@@ -244,7 +244,7 @@ out:
 
 u32 btrfs_csum_data(char *data, u32 seed, size_t len)
 {
-	return crc32c(seed, data, len);
+	return btrfs_crc32c(seed, data, len);
 }
 
 void btrfs_csum_final(u32 crc, char *result)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 730dce395858..cf9107a64204 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -24,12 +24,12 @@
 #include <linux/xattr.h>
 #include <linux/posix_acl_xattr.h>
 #include <linux/radix-tree.h>
-#include <linux/crc32c.h>
 #include <linux/vmalloc.h>
 #include <linux/string.h>
 
 #include "send.h"
 #include "backref.h"
+#include "hash.h"
 #include "locking.h"
 #include "disk-io.h"
 #include "btrfs_inode.h"
@@ -620,7 +620,7 @@ static int send_cmd(struct send_ctx *sctx)
 	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
 	hdr->crc = 0;
 
-	crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
+	crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
 	hdr->crc = cpu_to_le32(crc);
 
 	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
-- 
cgit v1.2.3-70-g09d2


From 60efa5eb2e886852a0d5f9e1ffa7c896a1099da8 Mon Sep 17 00:00:00 2001
From: Filipe David Borba Manana <fdmanana@gmail.com>
Date: Sat, 1 Feb 2014 21:27:56 +0000
Subject: Btrfs: use late_initcall instead of module_init

It seems that when init_btrfs_fs() is called, crc32c/crc32c-intel might
not always be already initialized, which results in the call to crypto_alloc_shash()
returning -ENOENT, as experienced by Ahmet who reported this.

Therefore make sure init_btrfs_fs() is called after crc32c is initialized (which
is at initialization level 6, module_init), by using late_initcall (which is at
initialization level 7) instead of module_init for btrfs.

Reported-and-Tested-by: Ahmet Inan <ainan@mathematik.uni-freiburg.de>
Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c02f63356895..97cc24198554 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1996,7 +1996,7 @@ static void __exit exit_btrfs_fs(void)
 	btrfs_hash_exit();
 }
 
-module_init(init_btrfs_fs)
+late_initcall(init_btrfs_fs);
 module_exit(exit_btrfs_fs)
 
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-70-g09d2


From d4c42fb493e018e9240810bb6dc5334ae0505145 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 2 Feb 2014 14:41:42 -0500
Subject: NFSv3: Remove unused function nfs3_proc_set_default_acl

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3acl.c | 19 -------------------
 1 file changed, 19 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 0851f852568d..9271a6bb9a41 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -233,25 +233,6 @@ fail:
 	return PTR_ERR(alloc);
 }
 
-int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
-		umode_t mode)
-{
-	struct posix_acl *default_acl, *acl;
-	int error;
-
-	error = posix_acl_create(dir, &mode, &default_acl, &acl);
-	if (error)
-		return (error == -EOPNOTSUPP) ? 0 : error;
-
-	error = nfs3_proc_setacls(inode, acl, default_acl);
-
-	if (acl)
-		posix_acl_release(acl);
-	if (default_acl)
-		posix_acl_release(default_acl);
-	return error;
-}
-
 const struct xattr_handler *nfs3_xattr_handlers[] = {
 	&posix_acl_access_xattr_handler,
 	&posix_acl_default_xattr_handler,
-- 
cgit v1.2.3-70-g09d2


From 8f493b9cfcd8941c6b27d6ce8e3b4a78c094b3c1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 2 Feb 2014 14:36:42 -0500
Subject: NFSv3: Fix return value of nfs3_proc_setacls

nfs3_proc_setacls is used internally by the NFSv3 create operations
to set the acl after the file has been created. If the operation
fails because the server doesn't support acls, then it must return '0',
not -EOPNOTSUPP.

Reported-by: Russell King <linux@arm.linux.org.uk>
Link: http://lkml.kernel.org/r/20140201010328.GI15937@n2100.arm.linux.org.uk
Cc: Christoph Hellwig <hch@lst.de>
Tested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3acl.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 9271a6bb9a41..871d6eda8dba 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -113,7 +113,7 @@ getout:
 	return ERR_PTR(status);
 }
 
-int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 		struct posix_acl *dfacl)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -198,6 +198,15 @@ out:
 	return status;
 }
 
+int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+		struct posix_acl *dfacl)
+{
+	int ret;
+	ret = __nfs3_proc_setacls(inode, acl, dfacl);
+	return (ret == -EOPNOTSUPP) ? 0 : ret;
+
+}
+
 int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
 	struct posix_acl *alloc = NULL, *dfacl = NULL;
@@ -225,7 +234,7 @@ int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 		if (IS_ERR(alloc))
 			goto fail;
 	}
-	status = nfs3_proc_setacls(inode, acl, dfacl);
+	status = __nfs3_proc_setacls(inode, acl, dfacl);
 	posix_acl_release(alloc);
 	return status;
 
-- 
cgit v1.2.3-70-g09d2


From 789b663ae3d427ea9c50505339a13276e7228c9d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 31 Jan 2014 14:25:19 -0500
Subject: fs: get_acl() must be allowed to return EOPNOTSUPP

posix_acl_xattr_get requires get_acl() to return EOPNOTSUPP if the
filesystem cannot support acls. This is needed for NFS, which can't
know whether or not the server supports acls until it tries to get/set
one.
This patch converts posix_acl_chmod and posix_acl_create to deal with
EOPNOTSUPP return values from get_acl().

Reported-by: Russell King <linux@arm.linux.org.uk>
Link: http://lkml.kernel.org/r/20140130140834.GW15937@n2100.arm.linux.org.uk
Cc: Al Viro viro@zeniv.linux.org.uk>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/posix_acl.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 38bae5a0ea25..11c54fd51e16 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -521,8 +521,11 @@ posix_acl_chmod(struct inode *inode, umode_t mode)
 		return -EOPNOTSUPP;
 
 	acl = get_acl(inode, ACL_TYPE_ACCESS);
-	if (IS_ERR_OR_NULL(acl))
+	if (IS_ERR_OR_NULL(acl)) {
+		if (acl == ERR_PTR(-EOPNOTSUPP))
+			return 0;
 		return PTR_ERR(acl);
+	}
 
 	ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
 	if (ret)
@@ -544,14 +547,15 @@ posix_acl_create(struct inode *dir, umode_t *mode,
 		goto no_acl;
 
 	p = get_acl(dir, ACL_TYPE_DEFAULT);
-	if (IS_ERR(p))
+	if (IS_ERR(p)) {
+		if (p == ERR_PTR(-EOPNOTSUPP))
+			goto apply_umask;
 		return PTR_ERR(p);
-
-	if (!p) {
-		*mode &= ~current_umask();
-		goto no_acl;
 	}
 
+	if (!p)
+		goto apply_umask;
+
 	*acl = posix_acl_clone(p, GFP_NOFS);
 	if (!*acl)
 		return -ENOMEM;
@@ -575,6 +579,8 @@ posix_acl_create(struct inode *dir, umode_t *mode,
 	}
 	return 0;
 
+apply_umask:
+	*mode &= ~current_umask();
 no_acl:
 	*default_acl = NULL;
 	*acl = NULL;
-- 
cgit v1.2.3-70-g09d2


From da9846ae15186d491d6e21ebbb5051e1d3c7f652 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 29 Jan 2014 12:04:03 -0500
Subject: kernfs: make kernfs_deactivate() honor KERNFS_LOCKDEP flag

kernfs_deactivate() forgot to check whether KERNFS_LOCKDEP is set
before performing lockdep annotations and ends up feeding
uninitialized lockdep_map to lockdep triggering warning like the
following on USB stick hotunplug.

 usb 1-2: USB disconnect, device number 2
 INFO: trying to register non-static key.
 the code is fine but needs lockdep annotation.
 turning off the locking correctness validator.
 CPU: 1 PID: 62 Comm: khubd Not tainted 3.13.0-work+ #82
 Hardware name: empty empty/S3992, BIOS 080011  10/26/2007
  ffff880065ca7f60 ffff88013a4ffa08 ffffffff81cfb6bd 0000000000000002
  ffff88013a4ffac8 ffffffff810f8530 ffff88013a4fc710 0000000000000002
  ffff880100000000 ffffffff82a3db50 0000000000000001 ffff88013a4fc710
 Call Trace:
  [<ffffffff81cfb6bd>] dump_stack+0x4e/0x7a
  [<ffffffff810f8530>] __lock_acquire+0x1910/0x1e70
  [<ffffffff810f931a>] lock_acquire+0x9a/0x1d0
  [<ffffffff8127c75e>] kernfs_deactivate+0xee/0x130
  [<ffffffff8127d4c8>] kernfs_addrm_finish+0x38/0x60
  [<ffffffff8127d701>] kernfs_remove_by_name_ns+0x51/0xa0
  [<ffffffff8127b4f1>] remove_files.isra.1+0x41/0x80
  [<ffffffff8127b7e7>] sysfs_remove_group+0x47/0xa0
  [<ffffffff8127b873>] sysfs_remove_groups+0x33/0x50
  [<ffffffff8177d66d>] device_remove_attrs+0x4d/0x80
  [<ffffffff8177e25e>] device_del+0x12e/0x1d0
  [<ffffffff819722c2>] usb_disconnect+0x122/0x1a0
  [<ffffffff819749b5>] hub_thread+0x3c5/0x1290
  [<ffffffff810c6a6d>] kthread+0xed/0x110
  [<ffffffff81d0a56c>] ret_from_fork+0x7c/0xb0

Fix it by making kernfs_deactivate() perform lockdep annotations only
if KERNFS_LOCKDEP is set.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Fabio Estevam <festevam@gmail.com>
Reported-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Jiri Kosina <jkosina@suse.cz>
Reported-by: Dave Jones <davej@redhat.com>
Tested-by: Fabio Estevam <fabio.estevam@freescale.com>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/dir.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 5104cf5d25c5..bd6e18be6e1a 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -187,19 +187,23 @@ static void kernfs_deactivate(struct kernfs_node *kn)
 
 	kn->u.completion = (void *)&wait;
 
-	rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
+	if (kn->flags & KERNFS_LOCKDEP)
+		rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
 	/* atomic_add_return() is a mb(), put_active() will always see
 	 * the updated kn->u.completion.
 	 */
 	v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active);
 
 	if (v != KN_DEACTIVATED_BIAS) {
-		lock_contended(&kn->dep_map, _RET_IP_);
+		if (kn->flags & KERNFS_LOCKDEP)
+			lock_contended(&kn->dep_map, _RET_IP_);
 		wait_for_completion(&wait);
 	}
 
-	lock_acquired(&kn->dep_map, _RET_IP_);
-	rwsem_release(&kn->dep_map, 1, _RET_IP_);
+	if (kn->flags & KERNFS_LOCKDEP) {
+		lock_acquired(&kn->dep_map, _RET_IP_);
+		rwsem_release(&kn->dep_map, 1, _RET_IP_);
+	}
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From c4ad8f98bef77c7356aa6a9ad9188a6acc6b849d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 5 Feb 2014 12:54:53 -0800
Subject: execve: use 'struct filename *' for executable name passing

This changes 'do_execve()' to get the executable name as a 'struct
filename', and to free it when it is done.  This is what the normal
users want, and it simplifies and streamlines their error handling.

The controlled lifetime of the executable name also fixes a
use-after-free problem with the trace_sched_process_exec tracepoint: the
lifetime of the passed-in string for kernel users was not at all
obvious, and the user-mode helper code used UMH_WAIT_EXEC to serialize
the pathname allocation lifetime with the execve() having finished,
which in turn meant that the trace point that happened after
mm_release() of the old process VM ended up using already free'd memory.

To solve the kernel string lifetime issue, this simply introduces
"getname_kernel()" that works like the normal user-space getname()
function, except with the source coming from kernel memory.

As Oleg points out, this also means that we could drop the tcomm[] array
from 'struct linux_binprm', since the pathname lifetime now covers
setup_new_exec().  That would be a separate cleanup.

Reported-by: Igor Zhbanov <i.zhbanov@samsung.com>
Tested-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/parisc/hpux/fs.c   | 15 +--------------
 fs/exec.c               | 45 +++++++++++++++++++++------------------------
 fs/namei.c              | 30 ++++++++++++++++++++++++++++++
 include/linux/binfmts.h |  1 -
 include/linux/fs.h      |  1 +
 include/linux/sched.h   |  3 ++-
 init/main.c             |  2 +-
 kernel/auditsc.c        |  2 +-
 kernel/kmod.c           |  2 +-
 9 files changed, 58 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c
index 88d0962de65a..2bedafea3d94 100644
--- a/arch/parisc/hpux/fs.c
+++ b/arch/parisc/hpux/fs.c
@@ -33,22 +33,9 @@
 
 int hpux_execve(struct pt_regs *regs)
 {
-	int error;
-	struct filename *filename;
-
-	filename = getname((const char __user *) regs->gr[26]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-
-	error = do_execve(filename->name,
+	return  do_execve(getname((const char __user *) regs->gr[26]),
 			  (const char __user *const __user *) regs->gr[25],
 			  (const char __user *const __user *) regs->gr[24]);
-
-	putname(filename);
-
-out:
-	return error;
 }
 
 struct hpux_dirent {
diff --git a/fs/exec.c b/fs/exec.c
index e1529b4c79b1..3d78fccdd723 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -748,11 +748,10 @@ EXPORT_SYMBOL(setup_arg_pages);
 
 #endif /* CONFIG_MMU */
 
-struct file *open_exec(const char *name)
+static struct file *do_open_exec(struct filename *name)
 {
 	struct file *file;
 	int err;
-	struct filename tmp = { .name = name };
 	static const struct open_flags open_exec_flags = {
 		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
 		.acc_mode = MAY_EXEC | MAY_OPEN,
@@ -760,7 +759,7 @@ struct file *open_exec(const char *name)
 		.lookup_flags = LOOKUP_FOLLOW,
 	};
 
-	file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags);
+	file = do_filp_open(AT_FDCWD, name, &open_exec_flags);
 	if (IS_ERR(file))
 		goto out;
 
@@ -784,6 +783,12 @@ exit:
 	fput(file);
 	return ERR_PTR(err);
 }
+
+struct file *open_exec(const char *name)
+{
+	struct filename tmp = { .name = name };
+	return do_open_exec(&tmp);
+}
 EXPORT_SYMBOL(open_exec);
 
 int kernel_read(struct file *file, loff_t offset,
@@ -1162,7 +1167,7 @@ int prepare_bprm_creds(struct linux_binprm *bprm)
 	return -ENOMEM;
 }
 
-void free_bprm(struct linux_binprm *bprm)
+static void free_bprm(struct linux_binprm *bprm)
 {
 	free_arg_pages(bprm);
 	if (bprm->cred) {
@@ -1432,7 +1437,7 @@ static int exec_binprm(struct linux_binprm *bprm)
 /*
  * sys_execve() executes a new program.
  */
-static int do_execve_common(const char *filename,
+static int do_execve_common(struct filename *filename,
 				struct user_arg_ptr argv,
 				struct user_arg_ptr envp)
 {
@@ -1441,6 +1446,9 @@ static int do_execve_common(const char *filename,
 	struct files_struct *displaced;
 	int retval;
 
+	if (IS_ERR(filename))
+		return PTR_ERR(filename);
+
 	/*
 	 * We move the actual failure in case of RLIMIT_NPROC excess from
 	 * set*uid() to execve() because too many poorly written programs
@@ -1473,7 +1481,7 @@ static int do_execve_common(const char *filename,
 	check_unsafe_exec(bprm);
 	current->in_execve = 1;
 
-	file = open_exec(filename);
+	file = do_open_exec(filename);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out_unmark;
@@ -1481,8 +1489,7 @@ static int do_execve_common(const char *filename,
 	sched_exec();
 
 	bprm->file = file;
-	bprm->filename = filename;
-	bprm->interp = filename;
+	bprm->filename = bprm->interp = filename->name;
 
 	retval = bprm_mm_init(bprm);
 	if (retval)
@@ -1523,6 +1530,7 @@ static int do_execve_common(const char *filename,
 	acct_update_integrals(current);
 	task_numa_free(current);
 	free_bprm(bprm);
+	putname(filename);
 	if (displaced)
 		put_files_struct(displaced);
 	return retval;
@@ -1544,10 +1552,11 @@ out_files:
 	if (displaced)
 		reset_files_struct(displaced);
 out_ret:
+	putname(filename);
 	return retval;
 }
 
-int do_execve(const char *filename,
+int do_execve(struct filename *filename,
 	const char __user *const __user *__argv,
 	const char __user *const __user *__envp)
 {
@@ -1557,7 +1566,7 @@ int do_execve(const char *filename,
 }
 
 #ifdef CONFIG_COMPAT
-static int compat_do_execve(const char *filename,
+static int compat_do_execve(struct filename *filename,
 	const compat_uptr_t __user *__argv,
 	const compat_uptr_t __user *__envp)
 {
@@ -1607,25 +1616,13 @@ SYSCALL_DEFINE3(execve,
 		const char __user *const __user *, argv,
 		const char __user *const __user *, envp)
 {
-	struct filename *path = getname(filename);
-	int error = PTR_ERR(path);
-	if (!IS_ERR(path)) {
-		error = do_execve(path->name, argv, envp);
-		putname(path);
-	}
-	return error;
+	return do_execve(getname(filename), argv, envp);
 }
 #ifdef CONFIG_COMPAT
 asmlinkage long compat_sys_execve(const char __user * filename,
 	const compat_uptr_t __user * argv,
 	const compat_uptr_t __user * envp)
 {
-	struct filename *path = getname(filename);
-	int error = PTR_ERR(path);
-	if (!IS_ERR(path)) {
-		error = compat_do_execve(path->name, argv, envp);
-		putname(path);
-	}
-	return error;
+	return compat_do_execve(getname(filename), argv, envp);
 }
 #endif
diff --git a/fs/namei.c b/fs/namei.c
index d580df2e6804..385f7817bfcc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -196,6 +196,7 @@ recopy:
 		goto error;
 
 	result->uptr = filename;
+	result->aname = NULL;
 	audit_getname(result);
 	return result;
 
@@ -210,6 +211,35 @@ getname(const char __user * filename)
 	return getname_flags(filename, 0, NULL);
 }
 
+/*
+ * The "getname_kernel()" interface doesn't do pathnames longer
+ * than EMBEDDED_NAME_MAX. Deal with it - you're a kernel user.
+ */
+struct filename *
+getname_kernel(const char * filename)
+{
+	struct filename *result;
+	char *kname;
+	int len;
+
+	len = strlen(filename);
+	if (len >= EMBEDDED_NAME_MAX)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	result = __getname();
+	if (unlikely(!result))
+		return ERR_PTR(-ENOMEM);
+
+	kname = (char *)result + sizeof(*result);
+	result->name = kname;
+	result->uptr = NULL;
+	result->aname = NULL;
+	result->separate = false;
+
+	strlcpy(kname, filename, EMBEDDED_NAME_MAX);
+	return result;
+}
+
 #ifdef CONFIG_AUDITSYSCALL
 void putname(struct filename *name)
 {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index fd8bf3219ef7..b4a745d7d9a9 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -115,7 +115,6 @@ extern int copy_strings_kernel(int argc, const char *const *argv,
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void set_binfmt(struct linux_binfmt *new);
-extern void free_bprm(struct linux_binprm *);
 extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);
 
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 09f553c59813..d79678c188ad 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2079,6 +2079,7 @@ extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 
 extern struct filename *getname(const char __user *);
+extern struct filename *getname_kernel(const char *);
 
 enum {
 	FILE_CREATED = 1,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68a0e84463a0..a781dec1cd0b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -128,6 +128,7 @@ struct bio_list;
 struct fs_struct;
 struct perf_event_context;
 struct blk_plug;
+struct filename;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -2311,7 +2312,7 @@ extern void do_group_exit(int);
 extern int allow_signal(int);
 extern int disallow_signal(int);
 
-extern int do_execve(const char *,
+extern int do_execve(struct filename *,
 		     const char __user * const __user *,
 		     const char __user * const __user *);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
diff --git a/init/main.c b/init/main.c
index 2fd9cef70ee8..eb03090cdced 100644
--- a/init/main.c
+++ b/init/main.c
@@ -812,7 +812,7 @@ void __init load_default_modules(void)
 static int run_init_process(const char *init_filename)
 {
 	argv_init[0] = init_filename;
-	return do_execve(init_filename,
+	return do_execve(getname_kernel(init_filename),
 		(const char __user *const __user *)argv_init,
 		(const char __user *const __user *)envp_init);
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 10176cd5956a..7aef2f4b6c64 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1719,7 +1719,7 @@ void audit_putname(struct filename *name)
 	struct audit_context *context = current->audit_context;
 
 	BUG_ON(!context);
-	if (!context->in_syscall) {
+	if (!name->aname || !context->in_syscall) {
 #if AUDIT_DEBUG == 2
 		printk(KERN_ERR "%s:%d(:%d): final_putname(%p)\n",
 		       __FILE__, __LINE__, context->serial, name);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index b086006c59e7..6b375af4958d 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -239,7 +239,7 @@ static int ____call_usermodehelper(void *data)
 
 	commit_creds(new);
 
-	retval = do_execve(sub_info->path,
+	retval = do_execve(getname_kernel(sub_info->path),
 			   (const char __user *const __user *)sub_info->argv,
 			   (const char __user *const __user *)sub_info->envp);
 	if (!retval)
-- 
cgit v1.2.3-70-g09d2


From fb951eb5e167de9f07973ce0dfff674a2019bfab Mon Sep 17 00:00:00 2001
From: Zongxun Wang <wangzongxun@huawei.com>
Date: Thu, 6 Feb 2014 12:04:20 -0800
Subject: ocfs2: free allocated clusters if error occurs after
 ocfs2_claim_clusters

Even if using the same jbd2 handle, we cannot rollback a transaction.
So once some error occurs after successfully allocating clusters, the
allocated clusters will never be used and it means they are lost.  For
example, call ocfs2_claim_clusters successfully when expanding a file,
but failed in ocfs2_insert_extent.  So we need free the allocated
clusters if they are not used indeed.

Signed-off-by: Zongxun Wang <wangzongxun@huawei.com>
Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Acked-by: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/alloc.c      | 38 +++++++++++++++++++++++++++++++++++---
 fs/ocfs2/localalloc.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/localalloc.h |  6 ++++++
 3 files changed, 83 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 8750ae1b8636..aada5801567a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 				enum ocfs2_alloc_restarted *reason_ret)
 {
 	int status = 0, err = 0;
+	int need_free = 0;
 	int free_extents;
 	enum ocfs2_alloc_restarted reason = RESTART_NONE;
 	u32 bit_off, num_bits;
@@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 					      OCFS2_JOURNAL_ACCESS_WRITE);
 	if (status < 0) {
 		mlog_errno(status);
-		goto leave;
+		need_free = 1;
+		goto bail;
 	}
 
 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
@@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 				     num_bits, flags, meta_ac);
 	if (status < 0) {
 		mlog_errno(status);
-		goto leave;
+		need_free = 1;
+		goto bail;
 	}
 
 	ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
 		reason = RESTART_TRANS;
 	}
 
+bail:
+	if (need_free) {
+		if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+			ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+					bit_off, num_bits);
+		else
+			ocfs2_free_clusters(handle,
+					data_ac->ac_inode,
+					data_ac->ac_bh,
+					ocfs2_clusters_to_blocks(osb->sb, bit_off),
+					num_bits);
+	}
+
 leave:
 	if (reason_ret)
 		*reason_ret = reason;
@@ -6805,6 +6821,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 					 struct buffer_head *di_bh)
 {
 	int ret, i, has_data, num_pages = 0;
+	int need_free = 0;
+	u32 bit_off, num;
 	handle_t *handle;
 	u64 uninitialized_var(block);
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
@@ -6850,7 +6868,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 	}
 
 	if (has_data) {
-		u32 bit_off, num;
 		unsigned int page_end;
 		u64 phys;
 
@@ -6886,6 +6903,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
 		if (ret) {
 			mlog_errno(ret);
+			need_free = 1;
 			goto out_commit;
 		}
 
@@ -6896,6 +6914,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
 		if (ret) {
 			mlog_errno(ret);
+			need_free = 1;
 			goto out_commit;
 		}
 
@@ -6927,6 +6946,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
 		ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
 		if (ret) {
 			mlog_errno(ret);
+			need_free = 1;
 			goto out_commit;
 		}
 
@@ -6938,6 +6958,18 @@ out_commit:
 		dquot_free_space_nodirty(inode,
 					  ocfs2_clusters_to_bytes(osb->sb, 1));
 
+	if (need_free) {
+		if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+			ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+					bit_off, num);
+		else
+			ocfs2_free_clusters(handle,
+					data_ac->ac_inode,
+					data_ac->ac_bh,
+					ocfs2_clusters_to_blocks(osb->sb, bit_off),
+					num);
+	}
+
 	ocfs2_commit_trans(osb, handle);
 
 out_unlock:
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index cd5496b7a0a3..044013455621 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -781,6 +781,48 @@ bail:
 	return status;
 }
 
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+				handle_t *handle,
+				struct ocfs2_alloc_context *ac,
+				u32 bit_off,
+				u32 num_bits)
+{
+	int status, start;
+	u32 clear_bits;
+	struct inode *local_alloc_inode;
+	void *bitmap;
+	struct ocfs2_dinode *alloc;
+	struct ocfs2_local_alloc *la;
+
+	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
+
+	local_alloc_inode = ac->ac_inode;
+	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
+	la = OCFS2_LOCAL_ALLOC(alloc);
+
+	bitmap = la->la_bitmap;
+	start = bit_off - le32_to_cpu(la->la_bm_off);
+	clear_bits = num_bits;
+
+	status = ocfs2_journal_access_di(handle,
+			INODE_CACHE(local_alloc_inode),
+			osb->local_alloc_bh,
+			OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	while (clear_bits--)
+		ocfs2_clear_bit(start++, bitmap);
+
+	le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
+	ocfs2_journal_dirty(handle, osb->local_alloc_bh);
+
+bail:
+	return status;
+}
+
 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
 {
 	u32 count;
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 1be9b5864460..44a7d1fb2dec 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
 				 u32 *bit_off,
 				 u32 *num_bits);
 
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+				handle_t *handle,
+				struct ocfs2_alloc_context *ac,
+				u32 bit_off,
+				u32 num_bits);
+
 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
 				      unsigned int num_clusters);
 void ocfs2_la_enable_worker(struct work_struct *work);
-- 
cgit v1.2.3-70-g09d2


From 227d53b397a32a7614667b3ecaf1d89902fb6c12 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 6 Feb 2014 12:04:28 -0800
Subject: mm: __set_page_dirty uses spin_lock_irqsave instead of spin_lock_irq

To use spin_{un}lock_irq is dangerous if caller disabled interrupt.
During aio buffer migration, we have a possibility to see the following
call stack.

aio_migratepage  [disable interrupt]
  migrate_page_copy
    clear_page_dirty_for_io
      set_page_dirty
        __set_page_dirty_buffers
          __set_page_dirty
            spin_lock_irq

This mean, current aio migration is a deadlockable.  spin_lock_irqsave
is a safer alternative and we should use it.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reported-by: David Rientjes rientjes@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 651dba10b9c2..27265a8b43c1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -654,14 +654,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
 static void __set_page_dirty(struct page *page,
 		struct address_space *mapping, int warn)
 {
-	spin_lock_irq(&mapping->tree_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mapping->tree_lock, flags);
 	if (page->mapping) {	/* Race with truncate? */
 		WARN_ON_ONCE(warn && !PageUptodate(page));
 		account_page_dirtied(page, mapping);
 		radix_tree_tag_set(&mapping->page_tree,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
-	spin_unlock_irq(&mapping->tree_lock);
+	spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 }
 
-- 
cgit v1.2.3-70-g09d2


From c18f7b51200c3c8b76c63e391f9995b65ace9c83 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Fri, 7 Feb 2014 14:36:10 -0600
Subject: jfs: fix generic posix ACL regression

I missed a couple errors in reviewing the patches converting jfs
to use the generic posix ACL function. Setting ACL's currently
fails with -EOPNOTSUPP.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Reported-by: Michael L. Semon <mlsemon35@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/jfs/xattr.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 3bd5ee45f7b3..46325d5c34fc 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -854,9 +854,6 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 	int rc;
 	tid_t tid;
 
-	if ((rc = can_set_xattr(inode, name, value, value_len)))
-		return rc;
-
 	/*
 	 * If this is a request for a synthetic attribute in the system.*
 	 * namespace use the generic infrastructure to resolve a handler
@@ -865,6 +862,9 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 		return generic_setxattr(dentry, name, value, value_len, flags);
 
+	if ((rc = can_set_xattr(inode, name, value, value_len)))
+		return rc;
+
 	if (value == NULL) {	/* empty EA, do not remove */
 		value = "";
 		value_len = 0;
@@ -1034,9 +1034,6 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
 	int rc;
 	tid_t tid;
 
-	if ((rc = can_set_xattr(inode, name, NULL, 0)))
-		return rc;
-
 	/*
 	 * If this is a request for a synthetic attribute in the system.*
 	 * namespace use the generic infrastructure to resolve a handler
@@ -1045,6 +1042,9 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
 	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 		return generic_removexattr(dentry, name);
 
+	if ((rc = can_set_xattr(inode, name, NULL, 0)))
+		return rc;
+
 	tid = txBegin(inode->i_sb, 0);
 	mutex_lock(&ji->commit_mutex);
 	rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
@@ -1061,7 +1061,7 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
  * attributes are handled directly.
  */
 const struct xattr_handler *jfs_xattr_handlers[] = {
-#ifdef JFS_POSIX_ACL
+#ifdef CONFIG_JFS_POSIX_ACL
 	&posix_acl_access_xattr_handler,
 	&posix_acl_default_xattr_handler,
 #endif
-- 
cgit v1.2.3-70-g09d2


From 6cc98d90f8d14f8ebce2391323929024d7eef39f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 5 Feb 2014 16:19:21 -0500
Subject: Btrfs: fix assert screwup for the pending move stuff

Wang noticed that he was failing btrfs/030 even though me and Filipe couldn't
reproduce.  Turns out this is because Wang didn't have CONFIG_BTRFS_ASSERT set,
which meant that a key part of Filipe's original patch was not being built in.
This appears to be a mess up with merging Filipe's patch as it does not exist in
his original patch.  Fix this by changing how we make sure del_waiting_dir_move
asserts that it did not error and take the function out of the ifdef check.
This makes btrfs/030 pass with the assert on or off.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Filipe Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/send.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index cf9107a64204..9c8d1a3fdc3a 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2774,8 +2774,6 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
 	return 0;
 }
 
-#ifdef CONFIG_BTRFS_ASSERT
-
 static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
 {
 	struct rb_node *n = sctx->waiting_dir_moves.rb_node;
@@ -2796,8 +2794,6 @@ static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
 	return -ENOENT;
 }
 
-#endif
-
 static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
 {
 	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
@@ -2902,7 +2898,9 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 	}
 
 	sctx->send_progress = sctx->cur_ino + 1;
-	ASSERT(del_waiting_dir_move(sctx, pm->ino) == 0);
+	ret = del_waiting_dir_move(sctx, pm->ino);
+	ASSERT(ret == 0);
+
 	ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
 	if (ret < 0)
 		goto out;
-- 
cgit v1.2.3-70-g09d2


From d0270aca88966641eb15306e9bd0c7ad15321440 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Fri, 7 Feb 2014 14:33:57 +0100
Subject: btrfs: commit transaction after setting label and features

The set_fslabel ioctl uses btrfs_end_transaction, which means it's
possible that the change will be lost if the system crashes, same for
the newly set features. Let's use btrfs_commit_transaction instead.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/ioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 34772cbcc7aa..5bbf6b7216c3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4547,7 +4547,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
 	spin_lock(&root->fs_info->super_lock);
 	strcpy(super_block->label, label);
 	spin_unlock(&root->fs_info->super_lock);
-	ret = btrfs_end_transaction(trans, root);
+	ret = btrfs_commit_transaction(trans, root);
 
 out_unlock:
 	mnt_drop_write_file(file);
@@ -4711,7 +4711,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
 	btrfs_set_super_incompat_flags(super_block, newflags);
 	spin_unlock(&root->fs_info->super_lock);
 
-	return btrfs_end_transaction(trans, root);
+	return btrfs_commit_transaction(trans, root);
 }
 
 long btrfs_ioctl(struct file *file, unsigned int
-- 
cgit v1.2.3-70-g09d2


From 8051aa1a3d5aaa7bd4c062cad94d09c3d567ef2e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.cz>
Date: Fri, 7 Feb 2014 14:34:04 +0100
Subject: btrfs: reserve no transaction units in btrfs_ioctl_set_features

Added in patch "btrfs: add ioctls to query/change feature bits online"
modifications to superblock don't need to reserve metadata blocks when
starting a transaction.

Signed-off-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5bbf6b7216c3..ebdd866d4cfd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4690,7 +4690,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
 	if (ret)
 		return ret;
 
-	trans = btrfs_start_transaction(root, 1);
+	trans = btrfs_start_transaction(root, 0);
 	if (IS_ERR(trans))
 		return PTR_ERR(trans);
 
-- 
cgit v1.2.3-70-g09d2


From 27a377db745ed4d11b3b9b340756857cb8dde07f Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 7 Feb 2014 13:57:59 -0500
Subject: Btrfs: don't loop forever if we can't run because of the tree mod log

A user reported a 100% cpu hang with my new delayed ref code.  Turns out I
forgot to increase the count check when we can't run a delayed ref because of
the tree mod log.  If we can't run any delayed refs during this there is no
point in continuing to look, and we need to break out.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/extent-tree.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9c9ecc93ae2c..32312e09f0f5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2385,6 +2385,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			spin_unlock(&delayed_refs->lock);
 			locked_ref = NULL;
 			cond_resched();
+			count++;
 			continue;
 		}
 
-- 
cgit v1.2.3-70-g09d2


From a2aa75e18a21b21952dc6daa9bac7c9f4426f81f Mon Sep 17 00:00:00 2001
From: Filipe David Borba Manana <fdmanana@gmail.com>
Date: Sat, 8 Feb 2014 15:47:46 +0000
Subject: Btrfs: fix data corruption when reading/updating compressed extents

When using a mix of compressed file extents and prealloc extents, it
is possible to fill a page of a file with random, garbage data from
some unrelated previous use of the page, instead of a sequence of zeroes.

A simple sequence of steps to get into such case, taken from the test
case I made for xfstests, is:

   _scratch_mkfs
   _scratch_mount "-o compress-force=lzo"
   $XFS_IO_PROG -f -c "pwrite -S 0x06 -b 18670 266978 18670" $SCRATCH_MNT/foobar
   $XFS_IO_PROG -c "falloc 26450 665194" $SCRATCH_MNT/foobar
   $XFS_IO_PROG -c "truncate 542872" $SCRATCH_MNT/foobar
   $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar

This results in the following file items in the fs tree:

   item 4 key (257 INODE_ITEM 0) itemoff 15879 itemsize 160
       inode generation 6 transid 6 size 542872 block group 0 mode 100600
   item 5 key (257 INODE_REF 256) itemoff 15863 itemsize 16
       inode ref index 2 namelen 6 name: foobar
   item 6 key (257 EXTENT_DATA 0) itemoff 15810 itemsize 53
       extent data disk byte 0 nr 0 gen 6
       extent data offset 0 nr 24576 ram 266240
       extent compression 0
   item 7 key (257 EXTENT_DATA 24576) itemoff 15757 itemsize 53
       prealloc data disk byte 12849152 nr 241664 gen 6
       prealloc data offset 0 nr 241664
   item 8 key (257 EXTENT_DATA 266240) itemoff 15704 itemsize 53
       extent data disk byte 12845056 nr 4096 gen 6
       extent data offset 0 nr 20480 ram 20480
       extent compression 2
   item 9 key (257 EXTENT_DATA 286720) itemoff 15651 itemsize 53
       prealloc data disk byte 13090816 nr 405504 gen 6
       prealloc data offset 0 nr 258048

The on disk extent at offset 266240 (which corresponds to 1 single disk block),
contains 5 compressed chunks of file data. Each of the first 4 compress 4096
bytes of file data, while the last one only compresses 3024 bytes of file data.
Therefore a read into the file region [285648 ; 286720[ (length = 4096 - 3024 =
1072 bytes) should always return zeroes (our next extent is a prealloc one).

The solution here is the compression code path to zero the remaining (untouched)
bytes of the last page it uncompressed data into, as the information about how
much space the file data consumes in the last page is not known in the upper layer
fs/btrfs/extent_io.c:__do_readpage(). In __do_readpage we were correctly zeroing
the remainder of the page but only if it corresponds to the last page of the inode
and if the inode's size is not a multiple of the page size.

This would cause not only returning random data on reads, but also permanently
storing random data when updating parts of the region that should be zeroed.
For the example above, it means updating a single byte in the region [285648 ; 286720[
would store that byte correctly but also store random data on disk.

A test case for xfstests follows soon.

Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/compression.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index af815eb8f970..ed1ff1cb1017 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1011,6 +1011,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
 		bytes = min(bytes, working_bytes);
 		kaddr = kmap_atomic(page_out);
 		memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+		if (*pg_index == (vcnt - 1) && *pg_offset == 0)
+			memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
 		kunmap_atomic(kaddr);
 		flush_dcache_page(page_out);
 
-- 
cgit v1.2.3-70-g09d2


From d311d79de305f1ada47cadd672e6ed1b28a949eb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 9 Feb 2014 15:18:09 -0500
Subject: fix O_SYNC|O_APPEND syncing the wrong range on write()

It actually goes back to 2004 ([PATCH] Concurrent O_SYNC write support)
when sync_page_range() had been introduced; generic_file_write{,v}() correctly
synced
	pos_after_write - written .. pos_after_write - 1
but generic_file_aio_write() synced
	pos_before_write .. pos_before_write + written - 1
instead.  Which is not the same thing with O_APPEND, obviously.
A couple of years later correct variant had been killed off when
everything switched to use of generic_file_aio_write().

All users of generic_file_aio_write() are affected, and the same bug
has been copied into other instances of ->aio_write().

The fix is trivial; the only subtle point is that generic_write_sync()
ought to be inlined to avoid calculations useless for the majority of
calls.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cifs/file.c     |  4 ++--
 fs/ext4/file.c     |  2 +-
 fs/ntfs/file.c     |  2 +-
 fs/sync.c          | 17 -----------------
 fs/xfs/xfs_file.c  |  2 +-
 include/linux/fs.h |  8 +++++++-
 mm/filemap.c       |  4 ++--
 7 files changed, 14 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 853d6d1cc822..a7eda8ebfacc 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2559,8 +2559,8 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
 	if (rc > 0) {
 		ssize_t err;
 
-		err = generic_write_sync(file, pos, rc);
-		if (err < 0 && rc > 0)
+		err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+		if (err < 0)
 			rc = err;
 	}
 
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 43e64f6022eb..1a5073959f32 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -152,7 +152,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (ret > 0) {
 		ssize_t err;
 
-		err = generic_write_sync(file, pos, ret);
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0 && ret > 0)
 			ret = err;
 	}
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index ea4ba9daeb47..db9bd8a31725 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2134,7 +2134,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
 	if (ret > 0) {
-		int err = generic_write_sync(file, pos, ret);
+		int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0)
 			ret = err;
 	}
diff --git a/fs/sync.c b/fs/sync.c
index f15537452231..e8ba024a055b 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -222,23 +222,6 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
 	return do_fsync(fd, 1);
 }
 
-/**
- * generic_write_sync - perform syncing after a write if file / inode is sync
- * @file:	file to which the write happened
- * @pos:	offset where the write started
- * @count:	length of the write
- *
- * This is just a simple wrapper about our general syncing function.
- */
-int generic_write_sync(struct file *file, loff_t pos, loff_t count)
-{
-	if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
-		return 0;
-	return vfs_fsync_range(file, pos, pos + count - 1,
-			       (file->f_flags & __O_SYNC) ? 0 : 1);
-}
-EXPORT_SYMBOL(generic_write_sync);
-
 /*
  * sys_sync_file_range() permits finely controlled syncing over a segment of
  * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 2e7989e3a2d6..64b48eade91d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -799,7 +799,7 @@ xfs_file_aio_write(
 		XFS_STATS_ADD(xs_write_bytes, ret);
 
 		/* Handle various SYNC-type writes */
-		err = generic_write_sync(file, pos, ret);
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0)
 			ret = err;
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 09f553c59813..75ff961be051 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2273,7 +2273,13 @@ extern int filemap_fdatawrite_range(struct address_space *mapping,
 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
 			   int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
-extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
+static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count)
+{
+	if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
+		return 0;
+	return vfs_fsync_range(file, pos, pos + count - 1,
+			       (file->f_flags & __O_SYNC) ? 0 : 1);
+}
 extern void emergency_sync(void);
 extern void emergency_remount(void);
 #ifdef CONFIG_BLOCK
diff --git a/mm/filemap.c b/mm/filemap.c
index d56d3c145b9f..7a13f6ac5421 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2553,8 +2553,8 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (ret > 0) {
 		ssize_t err;
 
-		err = generic_write_sync(file, pos, ret);
-		if (err < 0 && ret > 0)
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+		if (err < 0)
 			ret = err;
 	}
 	return ret;
-- 
cgit v1.2.3-70-g09d2