43 files changed, 630 insertions, 520 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 8edf253484af..8c79e1a53af9 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1367,6 +1367,39 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p)
+{
+	struct kioctx *ioctx = NULL;
+	unsigned long ctx;
+	long ret;
+
+	ret = get_user(ctx, ctx32p);
+	if (unlikely(ret))
+		goto out;
+
+	ret = -EINVAL;
+	if (unlikely(ctx || nr_events == 0)) {
+		pr_debug("EINVAL: ctx %lu nr_events %u\n",
+		         ctx, nr_events);
+		goto out;
+	}
+
+	ioctx = ioctx_alloc(nr_events);
+	ret = PTR_ERR(ioctx);
+	if (!IS_ERR(ioctx)) {
+		/* truncating is ok because it's a user address */
+		ret = put_user((u32)ioctx->user_id, ctx32p);
+		if (ret)
+			kill_ioctx(current->mm, ioctx, NULL);
+		percpu_ref_put(&ioctx->users);
+	}
+
+out:
+	return ret;
+}
+#endif
+
 /* sys_io_destroy:
  *	Destroy the aio_context specified.  May cancel any outstanding 
  *	AIOs and block on completion.  Will fail with -ENOSYS if not
@@ -1591,8 +1624,8 @@ out_put_req:
 	return ret;
 }
 
-long do_io_submit(aio_context_t ctx_id, long nr,
-		  struct iocb __user *__user *iocbpp, bool compat)
+static long do_io_submit(aio_context_t ctx_id, long nr,
+			  struct iocb __user *__user *iocbpp, bool compat)
 {
 	struct kioctx *ctx;
 	long ret = 0;
@@ -1662,6 +1695,44 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
 	return do_io_submit(ctx_id, nr, iocbpp, 0);
 }
 
+#ifdef CONFIG_COMPAT
+static inline long
+copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
+{
+	compat_uptr_t uptr;
+	int i;
+
+	for (i = 0; i < nr; ++i) {
+		if (get_user(uptr, ptr32 + i))
+			return -EFAULT;
+		if (put_user(compat_ptr(uptr), ptr64 + i))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+#define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
+
+COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
+		       int, nr, u32 __user *, iocb)
+{
+	struct iocb __user * __user *iocb64;
+	long ret;
+
+	if (unlikely(nr < 0))
+		return -EINVAL;
+
+	if (nr > MAX_AIO_SUBMITS)
+		nr = MAX_AIO_SUBMITS;
+
+	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
+	ret = copy_iocb(nr, iocb, iocb64);
+	if (!ret)
+		ret = do_io_submit(ctx_id, nr, iocb64, 1);
+	return ret;
+}
+#endif
+
 /* lookup_kiocb
  *	Finds a given iocb for cancellation.
  */
@@ -1761,3 +1832,25 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
 	}
 	return ret;
 }
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
+		       compat_long_t, min_nr,
+		       compat_long_t, nr,
+		       struct io_event __user *, events,
+		       struct compat_timespec __user *, timeout)
+{
+	struct timespec t;
+	struct timespec __user *ut = NULL;
+
+	if (timeout) {
+		if (compat_get_timespec(&t, timeout))
+			return -EFAULT;
+
+		ut = compat_alloc_user_space(sizeof(*ut));
+		if (copy_to_user(ut, &t, sizeof(t)))
+			return -EFAULT;
+	}
+	return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
+}
+#endif
diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index c6bad51d8ec7..b914cfb03820 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -129,6 +129,7 @@ static inline befs_inode_addr
 blockno2iaddr(struct super_block *sb, befs_blocknr_t blockno)
 {
 	befs_inode_addr iaddr;
+
 	iaddr.allocation_group = blockno >> BEFS_SB(sb)->ag_shift;
 	iaddr.start =
 	    blockno - (iaddr.allocation_group << BEFS_SB(sb)->ag_shift);
@@ -140,7 +141,7 @@ blockno2iaddr(struct super_block *sb, befs_blocknr_t blockno)
 static inline unsigned int
 befs_iaddrs_per_block(struct super_block *sb)
 {
-	return BEFS_SB(sb)->block_size / sizeof (befs_disk_inode_addr);
+	return BEFS_SB(sb)->block_size / sizeof(befs_disk_inode_addr);
 }
 
 #include "endian.h"
diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h
index eb557d9dc8be..69c9d8cde955 100644
--- a/fs/befs/befs_fs_types.h
+++ b/fs/befs/befs_fs_types.h
@@ -55,12 +55,12 @@ enum super_flags {
 };
 
 #define BEFS_BYTEORDER_NATIVE 0x42494745
-#define BEFS_BYTEORDER_NATIVE_LE (__force fs32)cpu_to_le32(BEFS_BYTEORDER_NATIVE)
-#define BEFS_BYTEORDER_NATIVE_BE (__force fs32)cpu_to_be32(BEFS_BYTEORDER_NATIVE)
+#define BEFS_BYTEORDER_NATIVE_LE ((__force fs32)cpu_to_le32(BEFS_BYTEORDER_NATIVE))
+#define BEFS_BYTEORDER_NATIVE_BE ((__force fs32)cpu_to_be32(BEFS_BYTEORDER_NATIVE))
 
 #define BEFS_SUPER_MAGIC BEFS_SUPER_MAGIC1
-#define BEFS_SUPER_MAGIC1_LE (__force fs32)cpu_to_le32(BEFS_SUPER_MAGIC1)
-#define BEFS_SUPER_MAGIC1_BE (__force fs32)cpu_to_be32(BEFS_SUPER_MAGIC1)
+#define BEFS_SUPER_MAGIC1_LE ((__force fs32)cpu_to_le32(BEFS_SUPER_MAGIC1))
+#define BEFS_SUPER_MAGIC1_BE ((__force fs32)cpu_to_be32(BEFS_SUPER_MAGIC1))
 
 /*
  * Flags of inode
@@ -79,7 +79,7 @@ enum inode_flags {
 	BEFS_INODE_WAS_WRITTEN = 0x00020000,
 	BEFS_NO_TRANSACTION = 0x00040000,
 };
-/* 
+/*
  * On-Disk datastructures of BeFS
  */
 
@@ -139,7 +139,7 @@ typedef struct {
 
 } PACKED befs_super_block;
 
-/* 
+/*
  * Note: the indirect and dbl_indir block_runs may
  * be longer than one block!
  */
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index 7e135ea73fdd..d509887c580c 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -12,8 +12,8 @@
  *
  * Dominic Giampaolo, author of "Practical File System
  * Design with the Be File System", for such a helpful book.
- * 
- * Marcus J. Ranum, author of the b+tree package in 
+ *
+ * Marcus J. Ranum, author of the b+tree package in
  * comp.sources.misc volume 10. This code is not copied from that
  * work, but it is partially based on it.
  *
@@ -38,38 +38,38 @@
  */
 
 /* Befs B+tree structure:
- * 
+ *
  * The first thing in the tree is the tree superblock. It tells you
  * all kinds of useful things about the tree, like where the rootnode
  * is located, and the size of the nodes (always 1024 with current version
  * of BeOS).
  *
  * The rest of the tree consists of a series of nodes. Nodes contain a header
- * (struct befs_btree_nodehead), the packed key data, an array of shorts 
+ * (struct befs_btree_nodehead), the packed key data, an array of shorts
  * containing the ending offsets for each of the keys, and an array of
- * befs_off_t values. In interior nodes, the keys are the ending keys for 
- * the childnode they point to, and the values are offsets into the 
- * datastream containing the tree. 
+ * befs_off_t values. In interior nodes, the keys are the ending keys for
+ * the childnode they point to, and the values are offsets into the
+ * datastream containing the tree.
  */
 
 /* Note:
- * 
- * The book states 2 confusing things about befs b+trees. First, 
+ *
+ * The book states 2 confusing things about befs b+trees. First,
  * it states that the overflow field of node headers is used by internal nodes
  * to point to another node that "effectively continues this one". Here is what
  * I believe that means. Each key in internal nodes points to another node that
- * contains key values less than itself. Inspection reveals that the last key 
- * in the internal node is not the last key in the index. Keys that are 
- * greater than the last key in the internal node go into the overflow node. 
+ * contains key values less than itself. Inspection reveals that the last key
+ * in the internal node is not the last key in the index. Keys that are
+ * greater than the last key in the internal node go into the overflow node.
  * I imagine there is a performance reason for this.
  *
- * Second, it states that the header of a btree node is sufficient to 
- * distinguish internal nodes from leaf nodes. Without saying exactly how. 
+ * Second, it states that the header of a btree node is sufficient to
+ * distinguish internal nodes from leaf nodes. Without saying exactly how.
  * After figuring out the first, it becomes obvious that internal nodes have
  * overflow nodes and leafnodes do not.
  */
 
-/* 
+/*
  * Currently, this code is only good for directory B+trees.
  * In order to be used for other BFS indexes, it needs to be extended to handle
  * duplicate keys and non-string keytypes (int32, int64, float, double).
@@ -237,8 +237,8 @@ befs_bt_read_node(struct super_block *sb, const befs_data_stream *ds,
  * with @key (usually the disk block number of an inode).
  *
  * On failure, returns BEFS_ERR or BEFS_BT_NOT_FOUND.
- * 
- * Algorithm: 
+ *
+ * Algorithm:
  *   Read the superblock and rootnode of the b+tree.
  *   Drill down through the interior nodes using befs_find_key().
  *   Once at the correct leaf node, use befs_find_key() again to get the
@@ -402,12 +402,12 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
  *
  * Here's how it works: Key_no is the index of the key/value pair to
  * return in keybuf/value.
- * Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is 
+ * Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is
  * the number of characters in the key (just a convenience).
  *
  * Algorithm:
  *   Get the first leafnode of the tree. See if the requested key is in that
- *   node. If not, follow the node->right link to the next leafnode. Repeat 
+ *   node. If not, follow the node->right link to the next leafnode. Repeat
  *   until the (key_no)th key is found or the tree is out of keys.
  */
 int
@@ -536,7 +536,7 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
  * @node_off: Pointer to offset of current node within datastream. Modified
  * 		by the function.
  *
- * Helper function for btree traverse. Moves the current position to the 
+ * Helper function for btree traverse. Moves the current position to the
  * start of the first leaf node.
  *
  * Also checks for an empty tree. If there are no keys, returns BEFS_BT_EMPTY.
@@ -592,10 +592,10 @@ befs_btree_seekleaf(struct super_block *sb, const befs_data_stream *ds,
 }
 
 /**
- * befs_leafnode - Determine if the btree node is a leaf node or an 
+ * befs_leafnode - Determine if the btree node is a leaf node or an
  * interior node
  * @node: Pointer to node structure to test
- * 
+ *
  * Return 1 if leaf, 0 if interior
  */
 static int
@@ -656,7 +656,7 @@ befs_bt_valarray(struct befs_btree_node *node)
  * @node: Pointer to the node structure to find the keydata array within
  *
  * Returns a pointer to the start of the keydata array
- * of the node pointed to by the node header 
+ * of the node pointed to by the node header
  */
 static char *
 befs_bt_keydata(struct befs_btree_node *node)
@@ -702,7 +702,7 @@ befs_bt_get_key(struct super_block *sb, struct befs_btree_node *node,
 
 /**
  * befs_compare_strings - compare two strings
- * @key1: pointer to the first key to be compared 
+ * @key1: pointer to the first key to be compared
  * @keylen1: length in bytes of key1
  * @key2: pointer to the second key to be compared
  * @keylen2: length in bytes of key2
diff --git a/fs/befs/btree.h b/fs/befs/btree.h
index f2a8f637e9e0..60c6c728e64e 100644
--- a/fs/befs/btree.h
+++ b/fs/befs/btree.h
@@ -1,13 +1,11 @@
 /*
  * btree.h
- * 
+ *
  */
 
-
 int befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
-		    const char *key, befs_off_t * value);
+		    const char *key, befs_off_t *value);
 
 int befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
 		    loff_t key_no, size_t bufsize, char *keybuf,
-		    size_t * keysize, befs_off_t * value);
-
+		    size_t *keysize, befs_off_t *value);
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index b4c7ba013c0d..720b3bc5c16a 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -84,13 +84,11 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
  *
  * Takes a file position and gives back a brun who's starting block
  * is block number fblock of the file.
- * 
+ *
  * Returns BEFS_OK or BEFS_ERR.
- * 
+ *
  * Calls specialized functions for each of the three possible
  * datastream regions.
- *
- * 2001-11-15 Will Dyson
  */
 int
 befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
@@ -120,7 +118,7 @@ befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
 
 /**
  * befs_read_lsmylink - read long symlink from datastream.
- * @sb: Filesystem superblock 
+ * @sb: Filesystem superblock
  * @ds: Datastream to read from
  * @buff: Buffer in which to place long symlink data
  * @len: Length of the long symlink in bytes
diff --git a/fs/befs/datastream.h b/fs/befs/datastream.h
index 91ba8203d83f..7ff9ff09ec6e 100644
--- a/fs/befs/datastream.h
+++ b/fs/befs/datastream.h
@@ -5,10 +5,10 @@
 
 struct buffer_head *befs_read_datastream(struct super_block *sb,
 					 const befs_data_stream *ds,
-					 befs_off_t pos, uint * off);
+					 befs_off_t pos, uint *off);
 
 int befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
-		     befs_blocknr_t fblock, befs_block_run * run);
+		     befs_blocknr_t fblock, befs_block_run *run);
 
 size_t befs_read_lsymlink(struct super_block *sb, const befs_data_stream *data,
 			  void *buff, befs_off_t len);
@@ -17,4 +17,3 @@ befs_blocknr_t befs_count_blocks(struct super_block *sb,
 			  const befs_data_stream *ds);
 
 extern const befs_inode_addr BAD_IADDR;
-
diff --git a/fs/befs/debug.c b/fs/befs/debug.c
index 85c13392e9e8..36656c86f50e 100644
--- a/fs/befs/debug.c
+++ b/fs/befs/debug.c
@@ -1,6 +1,6 @@
 /*
  *  linux/fs/befs/debug.c
- * 
+ *
  * Copyright (C) 2001 Will Dyson (will_dyson at pobox.com)
  *
  * With help from the ntfs-tng driver by Anton Altparmakov
@@ -57,6 +57,7 @@ befs_debug(const struct super_block *sb, const char *fmt, ...)
 
 	struct va_format vaf;
 	va_list args;
+
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
@@ -67,7 +68,7 @@ befs_debug(const struct super_block *sb, const char *fmt, ...)
 }
 
 void
-befs_dump_inode(const struct super_block *sb, befs_inode * inode)
+befs_dump_inode(const struct super_block *sb, befs_inode *inode)
 {
 #ifdef CONFIG_BEFS_DEBUG
 
@@ -151,7 +152,7 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode)
  */
 
 void
-befs_dump_super_block(const struct super_block *sb, befs_super_block * sup)
+befs_dump_super_block(const struct super_block *sb, befs_super_block *sup)
 {
 #ifdef CONFIG_BEFS_DEBUG
 
@@ -202,7 +203,7 @@ befs_dump_super_block(const struct super_block *sb, befs_super_block * sup)
 #if 0
 /* unused */
 void
-befs_dump_small_data(const struct super_block *sb, befs_small_data * sd)
+befs_dump_small_data(const struct super_block *sb, befs_small_data *sd)
 {
 }
 
@@ -221,7 +222,8 @@ befs_dump_run(const struct super_block *sb, befs_disk_block_run run)
 #endif  /*  0  */
 
 void
-befs_dump_index_entry(const struct super_block *sb, befs_disk_btree_super * super)
+befs_dump_index_entry(const struct super_block *sb,
+		      befs_disk_btree_super *super)
 {
 #ifdef CONFIG_BEFS_DEBUG
 
@@ -242,7 +244,7 @@ befs_dump_index_entry(const struct super_block *sb, befs_disk_btree_super * supe
 }
 
 void
-befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead * node)
+befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead *node)
 {
 #ifdef CONFIG_BEFS_DEBUG
 
diff --git a/fs/befs/inode.c b/fs/befs/inode.c
index fa4b718de597..5367a6470a69 100644
--- a/fs/befs/inode.c
+++ b/fs/befs/inode.c
@@ -1,6 +1,6 @@
 /*
  * inode.c
- * 
+ *
  * Copyright (C) 2001 Will Dyson <will_dyson@pobox.com>
  */
 
@@ -10,12 +10,12 @@
 #include "inode.h"
 
 /*
-	Validates the correctness of the befs inode
-	Returns BEFS_OK if the inode should be used, otherwise
-	returns BEFS_BAD_INODE
-*/
+ * Validates the correctness of the befs inode
+ * Returns BEFS_OK if the inode should be used, otherwise
+ * returns BEFS_BAD_INODE
+ */
 int
-befs_check_inode(struct super_block *sb, befs_inode * raw_inode,
+befs_check_inode(struct super_block *sb, befs_inode *raw_inode,
 		 befs_blocknr_t inode)
 {
 	u32 magic1 = fs32_to_cpu(sb, raw_inode->magic1);
diff --git a/fs/befs/inode.h b/fs/befs/inode.h
index 9dc7fd9b7570..2219e412f49b 100644
--- a/fs/befs/inode.h
+++ b/fs/befs/inode.h
@@ -1,8 +1,7 @@
 /*
  * inode.h
- * 
+ *
  */
 
-int befs_check_inode(struct super_block *sb, befs_inode * raw_inode,
+int befs_check_inode(struct super_block *sb, befs_inode *raw_inode,
 		     befs_blocknr_t inode);
-
diff --git a/fs/befs/io.c b/fs/befs/io.c
index b4a558126ee1..227cb86e07fe 100644
--- a/fs/befs/io.c
+++ b/fs/befs/io.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2001 Will Dyson <will_dyson@pobox.com
  *
- * Based on portions of file.c and inode.c 
+ * Based on portions of file.c and inode.c
  * by Makoto Kato (m_kato@ga2.so-net.ne.jp)
  *
  * Many thanks to Dominic Giampaolo, author of Practical File System
@@ -19,8 +19,7 @@
 /*
  * Converts befs notion of disk addr to a disk offset and uses
  * linux kernel function sb_bread() to get the buffer containing
- * the offset. -Will Dyson
- *
+ * the offset.
  */
 
 struct buffer_head *
@@ -55,7 +54,7 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
 	befs_debug(sb, "<--- %s", __func__);
 	return bh;
 
-      error:
+error:
 	befs_debug(sb, "<--- %s ERROR", __func__);
 	return NULL;
 }
diff --git a/fs/befs/io.h b/fs/befs/io.h
index 78d7bc6e60de..9b3e1967cb31 100644
--- a/fs/befs/io.h
+++ b/fs/befs/io.h
@@ -4,4 +4,3 @@
 
 struct buffer_head *befs_bread_iaddr(struct super_block *sb,
 				     befs_inode_addr iaddr);
-
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 647a276eba56..19407165f4aa 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -18,6 +18,7 @@
 #include <linux/parser.h>
 #include <linux/namei.h>
 #include <linux/sched.h>
+#include <linux/exportfs.h>
 
 #include "befs.h"
 #include "btree.h"
@@ -37,7 +38,8 @@ static int befs_readdir(struct file *, struct dir_context *);
 static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 static int befs_readpage(struct file *file, struct page *page);
 static sector_t befs_bmap(struct address_space *mapping, sector_t block);
-static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int);
+static struct dentry *befs_lookup(struct inode *, struct dentry *,
+				  unsigned int);
 static struct inode *befs_iget(struct super_block *, unsigned long);
 static struct inode *befs_alloc_inode(struct super_block *sb);
 static void befs_destroy_inode(struct inode *inode);
@@ -51,6 +53,10 @@ static void befs_put_super(struct super_block *);
 static int befs_remount(struct super_block *, int *, char *);
 static int befs_statfs(struct dentry *, struct kstatfs *);
 static int parse_options(char *, struct befs_mount_options *);
+static struct dentry *befs_fh_to_dentry(struct super_block *sb,
+				struct fid *fid, int fh_len, int fh_type);
+static struct dentry *befs_fh_to_parent(struct super_block *sb,
+				struct fid *fid, int fh_len, int fh_type);
 
 static const struct super_operations befs_sops = {
 	.alloc_inode	= befs_alloc_inode,	/* allocate a new inode */
@@ -83,9 +89,14 @@ static const struct address_space_operations befs_symlink_aops = {
 	.readpage	= befs_symlink_readpage,
 };
 
-/* 
+static const struct export_operations befs_export_operations = {
+	.fh_to_dentry	= befs_fh_to_dentry,
+	.fh_to_parent	= befs_fh_to_parent,
+};
+
+/*
  * Called by generic_file_read() to read a page of data
- * 
+ *
  * In turn, simply calls a generic block read function and
  * passes it the address of befs_get_block, for mapping file
  * positions to disk blocks.
@@ -102,15 +113,13 @@ befs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, befs_get_block);
 }
 
-/* 
- * Generic function to map a file position (block) to a 
+/*
+ * Generic function to map a file position (block) to a
  * disk offset (passed back in bh_result).
  *
  * Used by many higher level functions.
  *
  * Calls befs_fblock2brun() in datastream.c to do the real work.
- *
- * -WD 10-26-01
  */
 
 static int
@@ -269,15 +278,15 @@ befs_alloc_inode(struct super_block *sb)
 	struct befs_inode_info *bi;
 
 	bi = kmem_cache_alloc(befs_inode_cachep, GFP_KERNEL);
-        if (!bi)
-                return NULL;
-        return &bi->vfs_inode;
+	if (!bi)
+		return NULL;
+	return &bi->vfs_inode;
 }
 
 static void befs_i_callback(struct rcu_head *head)
 {
 	struct inode *inode = container_of(head, struct inode, i_rcu);
-        kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
+	kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
 }
 
 static void befs_destroy_inode(struct inode *inode)
@@ -287,7 +296,7 @@ static void befs_destroy_inode(struct inode *inode)
 
 static void init_once(void *foo)
 {
-        struct befs_inode_info *bi = (struct befs_inode_info *) foo;
+	struct befs_inode_info *bi = (struct befs_inode_info *) foo;
 
 	inode_init_once(&bi->vfs_inode);
 }
@@ -338,7 +347,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 	/*
 	 * set uid and gid.  But since current BeOS is single user OS, so
 	 * you can change by "uid" or "gid" options.
-	 */   
+	 */
 
 	inode->i_uid = befs_sb->mount_opts.use_uid ?
 		befs_sb->mount_opts.uid :
@@ -353,14 +362,14 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 	 * BEFS's time is 64 bits, but current VFS is 32 bits...
 	 * BEFS don't have access time. Nor inode change time. VFS
 	 * doesn't have creation time.
-	 * Also, the lower 16 bits of the last_modified_time and 
+	 * Also, the lower 16 bits of the last_modified_time and
 	 * create_time are just a counter to help ensure uniqueness
 	 * for indexing purposes. (PFD, page 54)
 	 */
 
 	inode->i_mtime.tv_sec =
 	    fs64_to_cpu(sb, raw_inode->last_modified_time) >> 16;
-	inode->i_mtime.tv_nsec = 0;   /* lower 16 bits are not a time */	
+	inode->i_mtime.tv_nsec = 0;   /* lower 16 bits are not a time */
 	inode->i_ctime = inode->i_mtime;
 	inode->i_atime = inode->i_mtime;
 
@@ -414,10 +423,10 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
 	unlock_new_inode(inode);
 	return inode;
 
-      unacquire_bh:
+unacquire_bh:
 	brelse(bh);
 
-      unacquire_none:
+unacquire_none:
 	iget_failed(inode);
 	befs_debug(sb, "<--- %s - Bad inode", __func__);
 	return ERR_PTR(-EIO);
@@ -442,7 +451,7 @@ befs_init_inodecache(void)
 }
 
 /* Called at fs teardown.
- * 
+ *
  * Taken from NFS implementation by Al Viro.
  */
 static void
@@ -491,13 +500,10 @@ fail:
 }
 
 /*
- * UTF-8 to NLS charset  convert routine
- * 
+ * UTF-8 to NLS charset convert routine
  *
- * Changed 8/10/01 by Will Dyson. Now use uni2char() / char2uni() rather than
- * the nls tables directly
+ * Uses uni2char() / char2uni() rather than the nls tables directly
  */
-
 static int
 befs_utf2nls(struct super_block *sb, const char *in,
 	     int in_len, char **out, int *out_len)
@@ -521,9 +527,8 @@ befs_utf2nls(struct super_block *sb, const char *in,
 	}
 
 	*out = result = kmalloc(maxlen, GFP_NOFS);
-	if (!*out) {
+	if (!*out)
 		return -ENOMEM;
-	}
 
 	for (i = o = 0; i < in_len; i += utflen, o += unilen) {
 
@@ -546,7 +551,7 @@ befs_utf2nls(struct super_block *sb, const char *in,
 
 	return o;
 
-      conv_err:
+conv_err:
 	befs_error(sb, "Name using character set %s contains a character that "
 		   "cannot be converted to unicode.", nls->charset);
 	befs_debug(sb, "<--- %s", __func__);
@@ -561,18 +566,18 @@ befs_utf2nls(struct super_block *sb, const char *in,
  * @in_len: Length of input string in bytes
  * @out: The output string in UTF-8 format
  * @out_len: Length of the output buffer
- * 
+ *
  * Converts input string @in, which is in the format of the loaded NLS map,
  * into a utf8 string.
- * 
+ *
  * The destination string @out is allocated by this function and the caller is
  * responsible for freeing it with kfree()
- * 
+ *
  * On return, *@out_len is the length of @out in bytes.
  *
  * On success, the return value is the number of utf8 characters written to
  * the output buffer @out.
- *  
+ *
  * On Failure, a negative number coresponding to the error code is returned.
  */
 
@@ -585,9 +590,11 @@ befs_nls2utf(struct super_block *sb, const char *in,
 	wchar_t uni;
 	int unilen, utflen;
 	char *result;
-	/* There're nls characters that will translate to 3-chars-wide UTF-8
-	 * characters, a additional byte is needed to save the final \0
-	 * in special cases */
+	/*
+	 * There are nls characters that will translate to 3-chars-wide UTF-8
+	 * characters, an additional byte is needed to save the final \0
+	 * in special cases
+	 */
 	int maxlen = (3 * in_len) + 1;
 
 	befs_debug(sb, "---> %s\n", __func__);
@@ -624,14 +631,41 @@ befs_nls2utf(struct super_block *sb, const char *in,
 
 	return i;
 
-      conv_err:
-	befs_error(sb, "Name using charecter set %s contains a charecter that "
+conv_err:
+	befs_error(sb, "Name using character set %s contains a character that "
 		   "cannot be converted to unicode.", nls->charset);
 	befs_debug(sb, "<--- %s", __func__);
 	kfree(result);
 	return -EILSEQ;
 }
 
+static struct inode *befs_nfs_get_inode(struct super_block *sb, uint64_t ino,
+					 uint32_t generation)
+{
+	/* No need to handle i_generation */
+	return befs_iget(sb, ino);
+}
+
+/*
+ * Map a NFS file handle to a corresponding dentry
+ */
+static struct dentry *befs_fh_to_dentry(struct super_block *sb,
+				struct fid *fid, int fh_len, int fh_type)
+{
+	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+				    befs_nfs_get_inode);
+}
+
+/*
+ * Find the parent for a file specified by NFS handle
+ */
+static struct dentry *befs_fh_to_parent(struct super_block *sb,
+				struct fid *fid, int fh_len, int fh_type)
+{
+	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+				    befs_nfs_get_inode);
+}
+
 enum {
 	Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err,
 };
@@ -666,6 +700,7 @@ parse_options(char *options, struct befs_mount_options *opts)
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
+
 		if (!*p)
 			continue;
 
@@ -721,7 +756,7 @@ parse_options(char *options, struct befs_mount_options *opts)
 }
 
 /* This function has the responsibiltiy of getting the
- * filesystem ready for unmounting. 
+ * filesystem ready for unmounting.
  * Basically, we free everything that we allocated in
  * befs_read_inode
  */
@@ -782,8 +817,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	 * Linux 2.4.10 and later refuse to read blocks smaller than
 	 * the logical block size for the device. But we also need to read at
 	 * least 1k to get the second 512 bytes of the volume.
-	 * -WD 10-26-01
-	 */ 
+	 */
 	blocksize = sb_min_blocksize(sb, 1024);
 	if (!blocksize) {
 		if (!silent)
@@ -791,7 +825,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 		goto unacquire_priv_sbp;
 	}
 
-	if (!(bh = sb_bread(sb, sb_block))) {
+	bh = sb_bread(sb, sb_block);
+	if (!bh) {
 		if (!silent)
 			befs_error(sb, "unable to read superblock");
 		goto unacquire_priv_sbp;
@@ -816,7 +851,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 
 	brelse(bh);
 
-	if( befs_sb->num_blocks > ~((sector_t)0) ) {
+	if (befs_sb->num_blocks > ~((sector_t)0)) {
 		if (!silent)
 			befs_error(sb, "blocks count: %llu is larger than the host can use",
 					befs_sb->num_blocks);
@@ -831,6 +866,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	/* Set real blocksize of fs */
 	sb_set_blocksize(sb, (ulong) befs_sb->block_size);
 	sb->s_op = &befs_sops;
+	sb->s_export_op = &befs_export_operations;
 	root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir)));
 	if (IS_ERR(root)) {
 		ret = PTR_ERR(root);
@@ -861,16 +897,16 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	return 0;
-/*****************/
-      unacquire_bh:
+
+unacquire_bh:
 	brelse(bh);
 
-      unacquire_priv_sbp:
+unacquire_priv_sbp:
 	kfree(befs_sb->mount_opts.iocharset);
 	kfree(sb->s_fs_info);
 	sb->s_fs_info = NULL;
 
-      unacquire_none:
+unacquire_none:
 	return ret;
 }
 
@@ -919,7 +955,7 @@ static struct file_system_type befs_fs_type = {
 	.name		= "befs",
 	.mount		= befs_mount,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV,	
+	.fs_flags	= FS_REQUIRES_DEV,
 };
 MODULE_ALIAS_FS("befs");
 
@@ -956,9 +992,9 @@ exit_befs_fs(void)
 }
 
 /*
-Macros that typecheck the init and exit functions,
-ensures that they are called at init and cleanup,
-and eliminates warnings about unused functions.
-*/
+ * Macros that typecheck the init and exit functions,
+ * ensures that they are called at init and cleanup,
+ * and eliminates warnings about unused functions.
+ */
 module_init(init_befs_fs)
 module_exit(exit_befs_fs)
diff --git a/fs/befs/super.h b/fs/befs/super.h
index dc4556376a22..ec1df30a7e9a 100644
--- a/fs/befs/super.h
+++ b/fs/befs/super.h
@@ -2,7 +2,5 @@
  * super.h
  */
 
-int befs_load_sb(struct super_block *sb, befs_super_block * disk_sb);
-
+int befs_load_sb(struct super_block *sb, befs_super_block *disk_sb);
 int befs_check_sb(struct super_block *sb);
-
diff --git a/fs/compat.c b/fs/compat.c
index 543b48c29ac3..3f4908c28698 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -487,45 +487,6 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
 	return compat_sys_fcntl64(fd, cmd, arg);
 }
 
-COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p)
-{
-	long ret;
-	aio_context_t ctx64;
-
-	mm_segment_t oldfs = get_fs();
-	if (unlikely(get_user(ctx64, ctx32p)))
-		return -EFAULT;
-
-	set_fs(KERNEL_DS);
-	/* The __user pointer cast is valid because of the set_fs() */
-	ret = sys_io_setup(nr_reqs, (aio_context_t __user *) &ctx64);
-	set_fs(oldfs);
-	/* truncating is ok because it's a user address */
-	if (!ret)
-		ret = put_user((u32) ctx64, ctx32p);
-	return ret;
-}
-
-COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
-		       compat_long_t, min_nr,
-		       compat_long_t, nr,
-		       struct io_event __user *, events,
-		       struct compat_timespec __user *, timeout)
-{
-	struct timespec t;
-	struct timespec __user *ut = NULL;
-
-	if (timeout) {
-		if (compat_get_timespec(&t, timeout))
-			return -EFAULT;
-
-		ut = compat_alloc_user_space(sizeof(*ut));
-		if (copy_to_user(ut, &t, sizeof(t)) )
-			return -EFAULT;
-	} 
-	return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
-}
-
 /* A write operation does a read from user space and vice versa */
 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 
@@ -602,42 +563,6 @@ out:
 	return ret;
 }
 
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
-{
-	compat_uptr_t uptr;
-	int i;
-
-	for (i = 0; i < nr; ++i) {
-		if (get_user(uptr, ptr32 + i))
-			return -EFAULT;
-		if (put_user(compat_ptr(uptr), ptr64 + i))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-#define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
-
-COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
-		       int, nr, u32 __user *, iocb)
-{
-	struct iocb __user * __user *iocb64; 
-	long ret;
-
-	if (unlikely(nr < 0))
-		return -EINVAL;
-
-	if (nr > MAX_AIO_SUBMITS)
-		nr = MAX_AIO_SUBMITS;
-	
-	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
-	ret = copy_iocb(nr, iocb, iocb64);
-	if (!ret)
-		ret = do_io_submit(ctx_id, nr, iocb64, 1);
-	return ret;
-}
-
 struct compat_ncp_mount_data {
 	compat_int_t version;
 	compat_uint_t ncp_fd;
diff --git a/fs/exec.c b/fs/exec.c
index 8112eacf10f3..eadbf5069c38 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -19,7 +19,7 @@
  * current->executable is only used by the procfs.  This allows a dispatch
  * table to check for several different types  of binary formats.  We keep
  * trying until we recognize the file or we run out of supported binary
- * formats. 
+ * formats.
  */
 
 #include <linux/slab.h>
@@ -1268,6 +1268,13 @@ int flush_old_exec(struct linux_binprm * bprm)
 	flush_thread();
 	current->personality &= ~bprm->per_clear;
 
+	/*
+	 * We have to apply CLOEXEC before we change whether the process is
+	 * dumpable (in setup_new_exec) to avoid a race with a process in userspace
+	 * trying to access the should-be-closed file descriptors of a process
+	 * undergoing exec(2).
+	 */
+	do_close_on_exec(current->files);
 	return 0;
 
 out:
@@ -1330,7 +1337,6 @@ void setup_new_exec(struct linux_binprm * bprm)
 	   group */
 	current->self_exec_id++;
 	flush_signal_handlers(current, 0);
-	do_close_on_exec(current->files);
 }
 EXPORT_SYMBOL(setup_new_exec);
 
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e173afe92661..0093ea2512a8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1478,6 +1478,10 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 		inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
 	else
 		ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
+	if (i_size_read(inode) < 0) {
+		ret = -EFSCORRUPTED;
+		goto bad_inode;
+	}
 	ei->i_dtime = 0;
 	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
 	ei->i_state = 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index f7e28f8ea04d..b5b1259e064f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -96,10 +96,6 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
 	return &mountpoint_hashtable[tmp & mp_hash_mask];
 }
 
-/*
- * allocation is serialized by namespace_sem, but we need the spinlock to
- * serialize with freeing.
- */
 static int mnt_alloc_id(struct mount *mnt)
 {
 	int res;
@@ -1034,6 +1030,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
 		if (IS_MNT_SLAVE(old))
 			list_add(&mnt->mnt_slave, &old->mnt_slave);
 		mnt->mnt_master = old->mnt_master;
+	} else {
+		CLEAR_MNT_SHARED(mnt);
 	}
 	if (flag & CL_MAKE_SHARED)
 		set_mnt_shared(mnt);
@@ -1828,9 +1826,7 @@ struct vfsmount *clone_private_mount(const struct path *path)
 	if (IS_MNT_UNBINDABLE(old_mnt))
 		return ERR_PTR(-EINVAL);
 
-	down_read(&namespace_sem);
 	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
-	up_read(&namespace_sem);
 	if (IS_ERR(new_mnt))
 		return ERR_CAST(new_mnt);
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index cb22a9f9ae7e..fad81041f5ab 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1273,8 +1273,8 @@ out_error:
  */
 static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	int error;
 	struct inode *inode = d_inode(dentry);
+	int error = 0;
 
 	/*
 	 * I believe we can only get a negative dentry here in the case of a
@@ -1293,7 +1293,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
 		return 0;
 	}
 
-	error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (nfs_mapping_need_revalidate_inode(inode))
+		error = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
 			__func__, inode->i_ino, error ? "invalid" : "valid");
 	return !error;
@@ -2285,8 +2286,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str
 		if (cache == NULL)
 			goto out;
 		/* Found an entry, is our attribute cache valid? */
-		if (!nfs_attribute_cache_expired(inode) &&
-		    !(nfsi->cache_validity & NFS_INO_INVALID_ATTR))
+		if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
 			break;
 		err = -ECHILD;
 		if (!may_block)
@@ -2334,12 +2334,12 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred,
 		cache = NULL;
 	if (cache == NULL)
 		goto out;
-	err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode);
-	if (err)
+	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
 		goto out;
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
 	res->mask = cache->mask;
+	err = 0;
 out:
 	rcu_read_unlock();
 	return err;
@@ -2491,12 +2491,13 @@ EXPORT_SYMBOL_GPL(nfs_may_open);
 static int nfs_execute_ok(struct inode *inode, int mask)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
-	int ret;
+	int ret = 0;
 
-	if (mask & MAY_NOT_BLOCK)
-		ret = nfs_revalidate_inode_rcu(server, inode);
-	else
-		ret = nfs_revalidate_inode(server, inode);
+	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) {
+		if (mask & MAY_NOT_BLOCK)
+			return -ECHILD;
+		ret = __nfs_revalidate_inode(server, inode);
+	}
 	if (ret == 0 && !execute_ok(inode))
 		ret = -EACCES;
 	return ret;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 55208b9b3c11..157cb43ce9db 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -101,21 +101,11 @@ EXPORT_SYMBOL_GPL(nfs_file_release);
 static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_inode *nfsi = NFS_I(inode);
-	const unsigned long force_reval = NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED;
-	unsigned long cache_validity = nfsi->cache_validity;
-
-	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) &&
-	    (cache_validity & force_reval) != force_reval)
-		goto out_noreval;
 
 	if (filp->f_flags & O_DIRECT)
 		goto force_reval;
-	if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
-		goto force_reval;
-	if (nfs_attribute_timeout(inode))
+	if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE))
 		goto force_reval;
-out_noreval:
 	return 0;
 force_reval:
 	return __nfs_revalidate_inode(server, inode);
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index a5589b791439..f956ca20a8a3 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -282,7 +282,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 			     s->nfs_client->cl_minorversion);
 
 out_test_devid:
-	if (filelayout_test_devid_unavailable(devid))
+	if (ret->ds_clp == NULL ||
+	    filelayout_test_devid_unavailable(devid))
 		ret = NULL;
 out:
 	return ret;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 9e111d07f667..45962fe5098c 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1126,7 +1126,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
 	case -EPIPE:
 		dprintk("%s DS connection error %d\n", __func__,
 			task->tk_status);
-		nfs4_mark_deviceid_unavailable(devid);
+		nfs4_delete_deviceid(devid->ld, devid->nfs_client,
+				&devid->deviceid);
 		rpc_wake_up(&tbl->slot_tbl_waitq);
 		/* fall through */
 	default:
@@ -1175,7 +1176,8 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
 	default:
 		dprintk("%s DS connection error %d\n", __func__,
 			task->tk_status);
-		nfs4_mark_deviceid_unavailable(devid);
+		nfs4_delete_deviceid(devid->ld, devid->nfs_client,
+				&devid->deviceid);
 	}
 	/* FIXME: Need to prevent infinite looping here. */
 	return -NFS4ERR_RESET_TO_PNFS;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 3cc39d1c1206..e5a6f248697b 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -177,7 +177,7 @@ out_err:
 static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg,
 		struct nfs4_deviceid_node *devid)
 {
-	nfs4_mark_deviceid_unavailable(devid);
+	nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid);
 	if (!ff_layout_has_available_ds(lseg))
 		pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
 				lseg);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5864146e05e6..011e4f8c1e01 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -160,6 +160,43 @@ int nfs_sync_mapping(struct address_space *mapping)
 	return ret;
 }
 
+static int nfs_attribute_timeout(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
+}
+
+static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags)
+{
+	unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+
+	/* Special case for the pagecache or access cache */
+	if (flags == NFS_INO_REVAL_PAGECACHE &&
+	    !(cache_validity & NFS_INO_REVAL_FORCED))
+		return false;
+	return (cache_validity & flags) != 0;
+}
+
+static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags)
+{
+	unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+
+	if ((cache_validity & flags) != 0)
+		return true;
+	if (nfs_attribute_timeout(inode))
+		return true;
+	return false;
+}
+
+bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
+{
+	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+		return nfs_check_cache_invalid_delegated(inode, flags);
+
+	return nfs_check_cache_invalid_not_delegated(inode, flags);
+}
+
 static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -795,6 +832,8 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
 	if (!is_sync)
 		return;
 	inode = d_inode(ctx->dentry);
+	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+		return;
 	nfsi = NFS_I(inode);
 	if (inode->i_mapping->nrpages == 0)
 		return;
@@ -1044,13 +1083,6 @@ out:
 	return status;
 }
 
-int nfs_attribute_timeout(struct inode *inode)
-{
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
-}
-
 int nfs_attribute_cache_expired(struct inode *inode)
 {
 	if (nfs_have_delegated_attributes(inode))
@@ -1073,15 +1105,6 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
 
-int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
-{
-	if (!(NFS_I(inode)->cache_validity &
-			(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
-			&& !nfs_attribute_cache_expired(inode))
-		return NFS_STALE(inode) ? -ESTALE : 0;
-	return -ECHILD;
-}
-
 static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
@@ -1114,17 +1137,8 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
 
 bool nfs_mapping_need_revalidate_inode(struct inode *inode)
 {
-	unsigned long cache_validity = NFS_I(inode)->cache_validity;
-
-	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
-		const unsigned long force_reval =
-			NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED;
-		return (cache_validity & force_reval) == force_reval;
-	}
-
-	return (cache_validity & NFS_INO_REVAL_PAGECACHE)
-		|| nfs_attribute_timeout(inode)
-		|| NFS_STALE(inode);
+	return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
+		NFS_STALE(inode);
 }
 
 int nfs_revalidate_mapping_rcu(struct inode *inode)
@@ -1536,13 +1550,6 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr
 {
 	unsigned long invalid = NFS_INO_INVALID_ATTR;
 
-	/*
-	 * Don't revalidate the pagecache if we hold a delegation, but do
-	 * force an attribute update
-	 */
-	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
-		invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_FORCED;
-
 	if (S_ISDIR(inode->i_mode))
 		invalid |= NFS_INO_INVALID_DATA;
 	nfs_set_cache_invalid(inode, invalid);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 6b79c2ca9b9a..09ca5095c04e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -381,6 +381,7 @@ extern int nfs_drop_inode(struct inode *);
 extern void nfs_clear_inode(struct inode *);
 extern void nfs_evict_inode(struct inode *);
 void nfs_zap_acl_cache(struct inode *inode);
+extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
 extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
 extern int nfs_wait_atomic_killable(atomic_t *p);
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d33242c8d95d..6dcbc5defb7a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1089,8 +1089,15 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 
 	spin_lock(&dir->i_lock);
 	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
-	if (!cinfo->atomic || cinfo->before != dir->i_version)
+	if (cinfo->atomic && cinfo->before == dir->i_version) {
+		nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
+		nfsi->attrtimeo_timestamp = jiffies;
+	} else {
 		nfs_force_lookup_revalidate(dir);
+		if (cinfo->before != dir->i_version)
+			nfsi->cache_validity |= NFS_INO_INVALID_ACCESS |
+				NFS_INO_INVALID_ACL;
+	}
 	dir->i_version = cinfo->after;
 	nfsi->attr_gencount = nfs_inc_attr_generation_counter();
 	nfs_fscache_invalidate(dir);
@@ -3115,6 +3122,16 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 			res_stateid = &calldata->res.stateid;
 			renew_lease(server, calldata->timestamp);
 			break;
+		case -NFS4ERR_ACCESS:
+			if (calldata->arg.bitmask != NULL) {
+				calldata->arg.bitmask = NULL;
+				calldata->res.fattr = NULL;
+				task->tk_status = 0;
+				rpc_restart_call_prepare(task);
+				goto out_release;
+
+			}
+			break;
 		case -NFS4ERR_ADMIN_REVOKED:
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
@@ -3140,7 +3157,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 			res_stateid, calldata->arg.fmode);
 out_release:
 	nfs_release_seqid(calldata->arg.seqid);
-	nfs_refresh_inode(calldata->inode, calldata->res.fattr);
+	nfs_refresh_inode(calldata->inode, &calldata->fattr);
 	dprintk("%s: done, ret = %d!\n", __func__, task->tk_status);
 }
 
@@ -3193,9 +3210,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		goto out_wait;
 	}
 
-	if (calldata->arg.fmode == 0) {
+	if (calldata->arg.fmode == 0)
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
 
+	if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
 		/* Close-to-open cache consistency revalidation */
 		if (!nfs4_have_delegation(inode, FMODE_READ))
 			calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
@@ -3207,7 +3225,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		nfs4_map_atomic_open_share(NFS_SERVER(inode),
 				calldata->arg.fmode, 0);
 
-	nfs_fattr_init(calldata->res.fattr);
+	if (calldata->res.fattr == NULL)
+		calldata->arg.bitmask = NULL;
+	else if (calldata->arg.bitmask == NULL)
+		calldata->res.fattr = NULL;
 	calldata->timestamp = jiffies;
 	if (nfs4_setup_sequence(NFS_SERVER(inode),
 				&calldata->arg.seq_args,
@@ -3274,6 +3295,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
 	calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask);
 	if (IS_ERR(calldata->arg.seqid))
 		goto out_free_calldata;
+	nfs_fattr_init(&calldata->fattr);
 	calldata->arg.fmode = 0;
 	calldata->lr.arg.ld_private = &calldata->lr.ld_private;
 	calldata->res.fattr = &calldata->fattr;
@@ -5673,6 +5695,14 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 	case -NFS4ERR_STALE_STATEID:
 		task->tk_status = 0;
 		break;
+	case -NFS4ERR_ACCESS:
+		if (data->args.bitmask) {
+			data->args.bitmask = NULL;
+			data->res.fattr = NULL;
+			task->tk_status = 0;
+			rpc_restart_call_prepare(task);
+			return;
+		}
 	default:
 		if (nfs4_async_handle_error(task, data->res.server,
 					    NULL, NULL) == -EAGAIN) {
@@ -5692,6 +5722,7 @@ static void nfs4_delegreturn_release(void *calldata)
 		if (data->lr.roc)
 			pnfs_roc_release(&data->lr.arg, &data->lr.res,
 					data->res.lr_ret);
+		nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
 		nfs_iput_and_deactive(inode);
 	}
 	kfree(calldata);
@@ -5780,10 +5811,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	if (status != 0)
 		goto out;
 	status = data->rpc_status;
-	if (status == 0)
-		nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
-	else
-		nfs_refresh_inode(inode, &data->fattr);
 out:
 	rpc_put_task(task);
 	return status;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 95baf7d340f0..1d152f4470cd 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -494,21 +494,18 @@ nfs4_alloc_state_owner(struct nfs_server *server,
 }
 
 static void
-nfs4_drop_state_owner(struct nfs4_state_owner *sp)
-{
-	struct rb_node *rb_node = &sp->so_server_node;
-
-	if (!RB_EMPTY_NODE(rb_node)) {
-		struct nfs_server *server = sp->so_server;
-		struct nfs_client *clp = server->nfs_client;
-
-		spin_lock(&clp->cl_lock);
-		if (!RB_EMPTY_NODE(rb_node)) {
-			rb_erase(rb_node, &server->state_owners);
-			RB_CLEAR_NODE(rb_node);
-		}
-		spin_unlock(&clp->cl_lock);
-	}
+nfs4_reset_state_owner(struct nfs4_state_owner *sp)
+{
+	/* This state_owner is no longer usable, but must
+	 * remain in place so that state recovery can find it
+	 * and the opens associated with it.
+	 * It may also be used for new 'open' request to
+	 * return a delegation to the server.
+	 * So update the 'create_time' so that it looks like
+	 * a new state_owner.  This will cause the server to
+	 * request an OPEN_CONFIRM to start a new sequence.
+	 */
+	sp->so_seqid.create_time = ktime_get();
 }
 
 static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
@@ -797,21 +794,33 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
 
 /*
  * Search the state->lock_states for an existing lock_owner
- * that is compatible with current->files
+ * that is compatible with either of the given owners.
+ * If the second is non-zero, then the first refers to a Posix-lock
+ * owner (current->files) and the second refers to a flock/OFD
+ * owner (struct file*).  In that case, prefer a match for the first
+ * owner.
+ * If both sorts of locks are held on the one file we cannot know
+ * which stateid was intended to be used, so a "correct" choice cannot
+ * be made.  Failing that, a "consistent" choice is preferable.  The
+ * consistent choice we make is to prefer the first owner, that of a
+ * Posix lock.
  */
 static struct nfs4_lock_state *
 __nfs4_find_lock_state(struct nfs4_state *state,
 		       fl_owner_t fl_owner, fl_owner_t fl_owner2)
 {
-	struct nfs4_lock_state *pos;
+	struct nfs4_lock_state *pos, *ret = NULL;
 	list_for_each_entry(pos, &state->lock_states, ls_locks) {
-		if (pos->ls_owner != fl_owner &&
-		    pos->ls_owner != fl_owner2)
-			continue;
-		atomic_inc(&pos->ls_count);
-		return pos;
+		if (pos->ls_owner == fl_owner) {
+			ret = pos;
+			break;
+		}
+		if (pos->ls_owner == fl_owner2)
+			ret = pos;
 	}
-	return NULL;
+	if (ret)
+		atomic_inc(&ret->ls_count);
+	return ret;
 }
 
 /*
@@ -1101,7 +1110,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
 
 	sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid);
 	if (status == -NFS4ERR_BAD_SEQID)
-		nfs4_drop_state_owner(sp);
+		nfs4_reset_state_owner(sp);
 	if (!nfs4_has_session(sp->so_server->nfs_client))
 		nfs_increment_seqid(status, seqid);
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1af6268a7d8c..e9255cb453e6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -502,11 +502,13 @@ static int nfs4_stat_to_errno(int);
 				(compound_encode_hdr_maxsz + \
 				 encode_sequence_maxsz + \
 				 encode_putfh_maxsz + \
+				 encode_layoutreturn_maxsz + \
 				 encode_open_downgrade_maxsz)
 #define NFS4_dec_open_downgrade_sz \
 				(compound_decode_hdr_maxsz + \
 				 decode_sequence_maxsz + \
 				 decode_putfh_maxsz + \
+				 decode_layoutreturn_maxsz + \
 				 decode_open_downgrade_maxsz)
 #define NFS4_enc_close_sz	(compound_encode_hdr_maxsz + \
 				 encode_sequence_maxsz + \
@@ -2277,9 +2279,9 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_putfh(xdr, args->fh, &hdr);
 	if (args->lr_args)
 		encode_layoutreturn(xdr, args->lr_args, &hdr);
-	encode_close(xdr, args, &hdr);
 	if (args->bitmask != NULL)
 		encode_getfattr(xdr, args->bitmask, &hdr);
+	encode_close(xdr, args, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -2356,6 +2358,8 @@ static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
+	if (args->lr_args)
+		encode_layoutreturn(xdr, args->lr_args, &hdr);
 	encode_open_downgrade(xdr, args, &hdr);
 	encode_nops(&hdr);
 }
@@ -2701,7 +2705,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req,
 	encode_putfh(xdr, args->fhandle, &hdr);
 	if (args->lr_args)
 		encode_layoutreturn(xdr, args->lr_args, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
+	if (args->bitmask)
+		encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_delegreturn(xdr, args->stateid, &hdr);
 	encode_nops(&hdr);
 }
@@ -6151,6 +6156,12 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp,
 	status = decode_putfh(xdr);
 	if (status)
 		goto out;
+	if (res->lr_res) {
+		status = decode_layoutreturn(xdr, res->lr_res);
+		res->lr_ret = status;
+		if (status)
+			goto out;
+	}
 	status = decode_open_downgrade(xdr, res);
 out:
 	return status;
@@ -6484,16 +6495,12 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		if (status)
 			goto out;
 	}
+	if (res->fattr != NULL) {
+		status = decode_getfattr(xdr, res->fattr, res->server);
+		if (status != 0)
+			goto out;
+	}
 	status = decode_close(xdr, res);
-	if (status != 0)
-		goto out;
-	/*
-	 * Note: Server may do delete on close for this file
-	 * 	in which case the getattr call will fail with
-	 * 	an ESTALE error. Shouldn't be a problem,
-	 * 	though, since fattr->valid will remain unset.
-	 */
-	decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -6966,9 +6973,11 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp,
 		if (status)
 			goto out;
 	}
-	status = decode_getfattr(xdr, res->fattr, res->server);
-	if (status != 0)
-		goto out;
+	if (res->fattr) {
+		status = decode_getfattr(xdr, res->fattr, res->server);
+		if (status != 0)
+			goto out;
+	}
 	status = decode_delegreturn(xdr);
 out:
 	return status;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 896df7bdf85f..59554f3adf29 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1251,6 +1251,7 @@ bool pnfs_roc(struct inode *ino,
 	nfs4_stateid stateid;
 	enum pnfs_iomode iomode = 0;
 	bool layoutreturn = false, roc = false;
+	bool skip_read = false;
 
 	if (!nfs_have_layout(ino))
 		return false;
@@ -1270,18 +1271,27 @@ retry:
 	}
 
 	/* no roc if we hold a delegation */
-	if (nfs4_check_delegation(ino, FMODE_READ))
-		goto out_noroc;
+	if (nfs4_check_delegation(ino, FMODE_READ)) {
+		if (nfs4_check_delegation(ino, FMODE_WRITE))
+			goto out_noroc;
+		skip_read = true;
+	}
 
 	list_for_each_entry(ctx, &nfsi->open_files, list) {
 		state = ctx->state;
+		if (state == NULL)
+			continue;
 		/* Don't return layout if there is open file state */
-		if (state != NULL && state->state != 0)
+		if (state->state & FMODE_WRITE)
 			goto out_noroc;
+		if (state->state & FMODE_READ)
+			skip_read = true;
 	}
 
 
 	list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) {
+		if (skip_read && lseg->pls_range.iomode == IOMODE_READ)
+			continue;
 		/* If we are sending layoutreturn, invalidate all valid lsegs */
 		if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags))
 			continue;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 741077deef3b..a3645249f7ec 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -150,12 +150,10 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
  */
 void fsnotify_unmount_inodes(struct super_block *sb)
 {
-	struct inode *inode, *next_i, *need_iput = NULL;
+	struct inode *inode, *iput_inode = NULL;
 
 	spin_lock(&sb->s_inode_list_lock);
-	list_for_each_entry_safe(inode, next_i, &sb->s_inodes, i_sb_list) {
-		struct inode *need_iput_tmp;
-
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		/*
 		 * We cannot __iget() an inode in state I_FREEING,
 		 * I_WILL_FREE, or I_NEW which is fine because by that point
@@ -178,49 +176,24 @@ void fsnotify_unmount_inodes(struct super_block *sb)
 			continue;
 		}
 
-		need_iput_tmp = need_iput;
-		need_iput = NULL;
-
-		/* In case fsnotify_inode_delete() drops a reference. */
-		if (inode != need_iput_tmp)
-			__iget(inode);
-		else
-			need_iput_tmp = NULL;
+		__iget(inode);
 		spin_unlock(&inode->i_lock);
-
-		/* In case the dropping of a reference would nuke next_i. */
-		while (&next_i->i_sb_list != &sb->s_inodes) {
-			spin_lock(&next_i->i_lock);
-			if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
-						atomic_read(&next_i->i_count)) {
-				__iget(next_i);
-				need_iput = next_i;
-				spin_unlock(&next_i->i_lock);
-				break;
-			}
-			spin_unlock(&next_i->i_lock);
-			next_i = list_next_entry(next_i, i_sb_list);
-		}
-
-		/*
-		 * We can safely drop s_inode_list_lock here because either
-		 * we actually hold references on both inode and next_i or
-		 * end of list.  Also no new inodes will be added since the
-		 * umount has begun.
-		 */
 		spin_unlock(&sb->s_inode_list_lock);
 
-		if (need_iput_tmp)
-			iput(need_iput_tmp);
+		if (iput_inode)
+			iput(iput_inode);
 
 		/* for each watch, send FS_UNMOUNT and then remove it */
 		fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 
 		fsnotify_inode_delete(inode);
 
-		iput(inode);
+		iput_inode = inode;
 
 		spin_lock(&sb->s_inode_list_lock);
 	}
 	spin_unlock(&sb->s_inode_list_lock);
+
+	if (iput_inode)
+		iput(iput_inode);
 }
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 87e577a49b0d..cec495a921e3 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -634,7 +634,15 @@ static void qsync_work_fn(struct work_struct *work)
 						      dqi_sync_work.work);
 	struct super_block *sb = oinfo->dqi_gqinode->i_sb;
 
-	dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
+	/*
+	 * We have to be careful here not to deadlock on s_umount as umount
+	 * disabling quotas may be in progress and it waits for this work to
+	 * complete. If trylock fails, we'll do the sync next time...
+	 */
+	if (down_read_trylock(&sb->s_umount)) {
+		dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
+		up_read(&sb->s_umount);
+	}
 	schedule_delayed_work(&oinfo->dqi_sync_work,
 			      msecs_to_jiffies(oinfo->dqi_syncms));
 }
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8a54fd8a4fa5..32c5a40c1257 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -454,7 +454,7 @@ out:
 /* Sync changes in local quota file into global quota file and
  * reinitialize local quota file.
  * The function expects local quota file to be already locked and
- * dqonoff_mutex locked. */
+ * s_umount locked in shared mode. */
 static int ocfs2_recover_local_quota_file(struct inode *lqinode,
 					  int type,
 					  struct ocfs2_quota_recovery *rec)
@@ -597,7 +597,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
 	printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for "
 	       "slot %u\n", osb->dev_str, slot_num);
 
-	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+	down_read(&sb->s_umount);
 	for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
 		if (list_empty(&(rec->r_list[type])))
 			continue;
@@ -674,7 +674,7 @@ out_put:
 			break;
 	}
 out:
-	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+	up_read(&sb->s_umount);
 	kfree(rec);
 	return status;
 }
@@ -840,7 +840,10 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
 	}
 	ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
 
-	/* dqonoff_mutex protects us against racing with recovery thread... */
+	/*
+	 * s_umount held in exclusive mode protects us against racing with
+	 * recovery thread...
+	 */
 	if (oinfo->dqi_rec) {
 		ocfs2_free_quota_recovery(oinfo->dqi_rec);
 		mark_clean = 0;
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index d171d2c53f7f..f8933cb53d68 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4834,7 +4834,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 
 	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
 			&len, is_dedupe);
-	if (ret || len == 0)
+	if (ret <= 0)
 		goto out_unlock;
 
 	/* Lock out changes to the allocation maps and remap. */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c894d945b084..a24e42f95341 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -985,7 +985,6 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
 	for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
 		if (!sb_has_quota_loaded(sb, type))
 			continue;
-		/* Cancel periodic syncing before we grab dqonoff_mutex */
 		oinfo = sb_dqinfo(sb, type)->dqi_priv;
 		cancel_delayed_work_sync(&oinfo->dqi_sync_work);
 		inode = igrab(sb->s_dquot.files[type]);
diff --git a/fs/pnode.c b/fs/pnode.c
index 234a9ac49958..06a793f4ae38 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -67,49 +67,47 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
 
 static int do_make_slave(struct mount *mnt)
 {
-	struct mount *peer_mnt = mnt, *master = mnt->mnt_master;
-	struct mount *slave_mnt;
+	struct mount *master, *slave_mnt;
 
-	/*
-	 * slave 'mnt' to a peer mount that has the
-	 * same root dentry. If none is available then
-	 * slave it to anything that is available.
-	 */
-	while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
-	       peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ;
-
-	if (peer_mnt == mnt) {
-		peer_mnt = next_peer(mnt);
-		if (peer_mnt == mnt)
-			peer_mnt = NULL;
-	}
-	if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) &&
-	    list_empty(&mnt->mnt_share))
-		mnt_release_group_id(mnt);
-
-	list_del_init(&mnt->mnt_share);
-	mnt->mnt_group_id = 0;
-
-	if (peer_mnt)
-		master = peer_mnt;
-
-	if (master) {
-		list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
-			slave_mnt->mnt_master = master;
-		list_move(&mnt->mnt_slave, &master->mnt_slave_list);
-		list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
-		INIT_LIST_HEAD(&mnt->mnt_slave_list);
+	if (list_empty(&mnt->mnt_share)) {
+		if (IS_MNT_SHARED(mnt)) {
+			mnt_release_group_id(mnt);
+			CLEAR_MNT_SHARED(mnt);
+		}
+		master = mnt->mnt_master;
+		if (!master) {
+			struct list_head *p = &mnt->mnt_slave_list;
+			while (!list_empty(p)) {
+				slave_mnt = list_first_entry(p,
+						struct mount, mnt_slave);
+				list_del_init(&slave_mnt->mnt_slave);
+				slave_mnt->mnt_master = NULL;
+			}
+			return 0;
+		}
 	} else {
-		struct list_head *p = &mnt->mnt_slave_list;
-		while (!list_empty(p)) {
-                        slave_mnt = list_first_entry(p,
-					struct mount, mnt_slave);
-			list_del_init(&slave_mnt->mnt_slave);
-			slave_mnt->mnt_master = NULL;
+		struct mount *m;
+		/*
+		 * slave 'mnt' to a peer mount that has the
+		 * same root dentry. If none is available then
+		 * slave it to anything that is available.
+		 */
+		for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
+			if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
+				master = m;
+				break;
+			}
 		}
+		list_del_init(&mnt->mnt_share);
+		mnt->mnt_group_id = 0;
+		CLEAR_MNT_SHARED(mnt);
 	}
+	list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
+		slave_mnt->mnt_master = master;
+	list_move(&mnt->mnt_slave, &master->mnt_slave_list);
+	list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
+	INIT_LIST_HEAD(&mnt->mnt_slave_list);
 	mnt->mnt_master = master;
-	CLEAR_MNT_SHARED(mnt);
 	return 0;
 }
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8738a0d62c09..406fed92362a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -119,8 +119,7 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *   dqonoff_mutex > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
- * dqonoff_mutex > i_mutex comes from dquot_quota_sync, dquot_enable, etc.
+ *   s_umount > i_mutex > journal_lock > dquot->dq_lock > dqio_mutex
  */
 
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_list_lock);
@@ -572,7 +571,8 @@ int dquot_scan_active(struct super_block *sb,
 	struct dquot *dquot, *old_dquot = NULL;
 	int ret = 0;
 
-	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+	WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount));
+
 	spin_lock(&dq_list_lock);
 	list_for_each_entry(dquot, &inuse_list, dq_inuse) {
 		if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
@@ -603,7 +603,6 @@ int dquot_scan_active(struct super_block *sb,
 	spin_unlock(&dq_list_lock);
 out:
 	dqput(old_dquot);
-	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return ret;
 }
 EXPORT_SYMBOL(dquot_scan_active);
@@ -617,7 +616,8 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
 	int cnt;
 	int err, ret = 0;
 
-	mutex_lock(&dqopt->dqonoff_mutex);
+	WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount));
+
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
@@ -653,7 +653,6 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
 		    && info_dirty(&dqopt->info[cnt]))
 			sb->dq_op->write_info(sb, cnt);
 	dqstats_inc(DQST_SYNCS);
-	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	return ret;
 }
@@ -683,7 +682,6 @@ int dquot_quota_sync(struct super_block *sb, int type)
 	 * Now when everything is written we can discard the pagecache so
 	 * that userspace sees the changes.
 	 */
-	mutex_lock(&dqopt->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
@@ -693,7 +691,6 @@ int dquot_quota_sync(struct super_block *sb, int type)
 		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
 		inode_unlock(dqopt->files[cnt]);
 	}
-	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	return 0;
 }
@@ -935,7 +932,7 @@ static int dqinit_needed(struct inode *inode, int type)
 	return 0;
 }
 
-/* This routine is guarded by dqonoff_mutex mutex */
+/* This routine is guarded by s_umount semaphore */
 static void add_dquot_ref(struct super_block *sb, int type)
 {
 	struct inode *inode, *old_inode = NULL;
@@ -2050,21 +2047,13 @@ int dquot_get_next_id(struct super_block *sb, struct kqid *qid)
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int err;
 
-	mutex_lock(&dqopt->dqonoff_mutex);
-	if (!sb_has_quota_active(sb, qid->type)) {
-		err = -ESRCH;
-		goto out;
-	}
-	if (!dqopt->ops[qid->type]->get_next_id) {
-		err = -ENOSYS;
-		goto out;
-	}
+	if (!sb_has_quota_active(sb, qid->type))
+		return -ESRCH;
+	if (!dqopt->ops[qid->type]->get_next_id)
+		return -ENOSYS;
 	mutex_lock(&dqopt->dqio_mutex);
 	err = dqopt->ops[qid->type]->get_next_id(sb, qid);
 	mutex_unlock(&dqopt->dqio_mutex);
-out:
-	mutex_unlock(&dqopt->dqonoff_mutex);
-
 	return err;
 }
 EXPORT_SYMBOL(dquot_get_next_id);
@@ -2107,6 +2096,10 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
 	struct quota_info *dqopt = sb_dqopt(sb);
 	struct inode *toputinode[MAXQUOTAS];
 
+	/* s_umount should be held in exclusive mode */
+	if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount)))
+		up_read(&sb->s_umount);
+
 	/* Cannot turn off usage accounting without turning off limits, or
 	 * suspend quotas and simultaneously turn quotas off. */
 	if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED))
@@ -2114,18 +2107,14 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
 	    DQUOT_USAGE_ENABLED)))
 		return -EINVAL;
 
-	/* We need to serialize quota_off() for device */
-	mutex_lock(&dqopt->dqonoff_mutex);
-
 	/*
 	 * Skip everything if there's nothing to do. We have to do this because
 	 * sometimes we are called when fill_super() failed and calling
 	 * sync_fs() in such cases does no good.
 	 */
-	if (!sb_any_quota_loaded(sb)) {
-		mutex_unlock(&dqopt->dqonoff_mutex);
+	if (!sb_any_quota_loaded(sb))
 		return 0;
-	}
+
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		toputinode[cnt] = NULL;
 		if (type != -1 && cnt != type)
@@ -2179,7 +2168,6 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
 		dqopt->info[cnt].dqi_bgrace = 0;
 		dqopt->ops[cnt] = NULL;
 	}
-	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	/* Skip syncing and setting flags if quota files are hidden */
 	if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
@@ -2196,20 +2184,14 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
 	 * must also discard the blockdev buffers so that we see the
 	 * changes done by userspace on the next quotaon() */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (toputinode[cnt]) {
-			mutex_lock(&dqopt->dqonoff_mutex);
-			/* If quota was reenabled in the meantime, we have
-			 * nothing to do */
-			if (!sb_has_quota_loaded(sb, cnt)) {
-				inode_lock(toputinode[cnt]);
-				toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
+		/* This can happen when suspending quotas on remount-ro... */
+		if (toputinode[cnt] && !sb_has_quota_loaded(sb, cnt)) {
+			inode_lock(toputinode[cnt]);
+			toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
 				  S_NOATIME | S_NOQUOTA);
-				truncate_inode_pages(&toputinode[cnt]->i_data,
-						     0);
-				inode_unlock(toputinode[cnt]);
-				mark_inode_dirty_sync(toputinode[cnt]);
-			}
-			mutex_unlock(&dqopt->dqonoff_mutex);
+			truncate_inode_pages(&toputinode[cnt]->i_data, 0);
+			inode_unlock(toputinode[cnt]);
+			mark_inode_dirty_sync(toputinode[cnt]);
 		}
 	if (sb->s_bdev)
 		invalidate_bdev(sb->s_bdev);
@@ -2281,6 +2263,10 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		error = -EINVAL;
 		goto out_fmt;
 	}
+	if (sb_has_quota_loaded(sb, type)) {
+		error = -EBUSY;
+		goto out_fmt;
+	}
 
 	if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
 		/* As we bypass the pagecache we must now flush all the
@@ -2292,11 +2278,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		sync_filesystem(sb);
 		invalidate_bdev(sb->s_bdev);
 	}
-	mutex_lock(&dqopt->dqonoff_mutex);
-	if (sb_has_quota_loaded(sb, type)) {
-		error = -EBUSY;
-		goto out_lock;
-	}
 
 	if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
 		/* We don't want quota and atime on quota files (deadlocks
@@ -2317,7 +2298,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 	error = -EIO;
 	dqopt->files[type] = igrab(inode);
 	if (!dqopt->files[type])
-		goto out_lock;
+		goto out_file_flags;
 	error = -EINVAL;
 	if (!fmt->qf_ops->check_quota_file(sb, type))
 		goto out_file_init;
@@ -2340,14 +2321,13 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 	spin_unlock(&dq_state_lock);
 
 	add_dquot_ref(sb, type);
-	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	return 0;
 
 out_file_init:
 	dqopt->files[type] = NULL;
 	iput(inode);
-out_lock:
+out_file_flags:
 	if (oldflags != -1) {
 		inode_lock(inode);
 		/* Set the flags back (in the case of accidental quotaon()
@@ -2356,7 +2336,6 @@ out_lock:
 		inode->i_flags |= oldflags;
 		inode_unlock(inode);
 	}
-	mutex_unlock(&dqopt->dqonoff_mutex);
 out_fmt:
 	put_quota_format(fmt);
 
@@ -2371,15 +2350,16 @@ int dquot_resume(struct super_block *sb, int type)
 	int ret = 0, cnt;
 	unsigned int flags;
 
+	/* s_umount should be held in exclusive mode */
+	if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount)))
+		up_read(&sb->s_umount);
+
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
-
-		mutex_lock(&dqopt->dqonoff_mutex);
-		if (!sb_has_quota_suspended(sb, cnt)) {
-			mutex_unlock(&dqopt->dqonoff_mutex);
+		if (!sb_has_quota_suspended(sb, cnt))
 			continue;
-		}
+
 		inode = dqopt->files[cnt];
 		dqopt->files[cnt] = NULL;
 		spin_lock(&dq_state_lock);
@@ -2388,7 +2368,6 @@ int dquot_resume(struct super_block *sb, int type)
 							cnt);
 		dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt);
 		spin_unlock(&dq_state_lock);
-		mutex_unlock(&dqopt->dqonoff_mutex);
 
 		flags = dquot_generic_flag(flags, cnt);
 		ret = vfs_load_quota_inode(inode, cnt,
@@ -2424,42 +2403,30 @@ EXPORT_SYMBOL(dquot_quota_on);
 int dquot_enable(struct inode *inode, int type, int format_id,
 		 unsigned int flags)
 {
-	int ret = 0;
 	struct super_block *sb = inode->i_sb;
-	struct quota_info *dqopt = sb_dqopt(sb);
 
 	/* Just unsuspend quotas? */
 	BUG_ON(flags & DQUOT_SUSPENDED);
+	/* s_umount should be held in exclusive mode */
+	if (WARN_ON_ONCE(down_read_trylock(&sb->s_umount)))
+		up_read(&sb->s_umount);
 
 	if (!flags)
 		return 0;
 	/* Just updating flags needed? */
 	if (sb_has_quota_loaded(sb, type)) {
-		mutex_lock(&dqopt->dqonoff_mutex);
-		/* Now do a reliable test... */
-		if (!sb_has_quota_loaded(sb, type)) {
-			mutex_unlock(&dqopt->dqonoff_mutex);
-			goto load_quota;
-		}
 		if (flags & DQUOT_USAGE_ENABLED &&
-		    sb_has_quota_usage_enabled(sb, type)) {
-			ret = -EBUSY;
-			goto out_lock;
-		}
+		    sb_has_quota_usage_enabled(sb, type))
+			return -EBUSY;
 		if (flags & DQUOT_LIMITS_ENABLED &&
-		    sb_has_quota_limits_enabled(sb, type)) {
-			ret = -EBUSY;
-			goto out_lock;
-		}
+		    sb_has_quota_limits_enabled(sb, type))
+			return -EBUSY;
 		spin_lock(&dq_state_lock);
 		sb_dqopt(sb)->flags |= dquot_state_flag(flags, type);
 		spin_unlock(&dq_state_lock);
-out_lock:
-		mutex_unlock(&dqopt->dqonoff_mutex);
-		return ret;
+		return 0;
 	}
 
-load_quota:
 	return vfs_load_quota_inode(inode, type, format_id, flags);
 }
 EXPORT_SYMBOL(dquot_enable);
@@ -2751,7 +2718,6 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state)
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int type;
   
-	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
 	memset(state, 0, sizeof(*state));
 	for (type = 0; type < MAXQUOTAS; type++) {
 		if (!sb_has_quota_active(sb, type))
@@ -2773,7 +2739,6 @@ int dquot_get_state(struct super_block *sb, struct qc_state *state)
 		tstate->nextents = 1;	/* We don't know... */
 		spin_unlock(&dq_data_lock);
 	}
-	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return 0;
 }
 EXPORT_SYMBOL(dquot_get_state);
@@ -2787,18 +2752,13 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii)
 	if ((ii->i_fieldmask & QC_WARNS_MASK) ||
 	    (ii->i_fieldmask & QC_RT_SPC_TIMER))
 		return -EINVAL;
-	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	if (!sb_has_quota_active(sb, type)) {
-		err = -ESRCH;
-		goto out;
-	}
+	if (!sb_has_quota_active(sb, type))
+		return -ESRCH;
 	mi = sb_dqopt(sb)->info + type;
 	if (ii->i_fieldmask & QC_FLAGS) {
 		if ((ii->i_flags & QCI_ROOT_SQUASH &&
-		     mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) {
-			err = -EINVAL;
-			goto out;
-		}
+		     mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD))
+			return -EINVAL;
 	}
 	spin_lock(&dq_data_lock);
 	if (ii->i_fieldmask & QC_SPC_TIMER)
@@ -2815,8 +2775,6 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii)
 	mark_info_dirty(sb, type);
 	/* Force write to disk */
 	sb->dq_op->write_info(sb, type);
-out:
-	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	return err;
 }
 EXPORT_SYMBOL(dquot_set_dqinfo);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 5acd0c4769af..07e08c7d05ca 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -104,13 +104,9 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
 {
 	__u32 fmt;
 
-	mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
-	if (!sb_has_quota_active(sb, type)) {
-		mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+	if (!sb_has_quota_active(sb, type))
 		return -ESRCH;
-	}
 	fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
-	mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
 	if (copy_to_user(addr, &fmt, sizeof(fmt)))
 		return -EFAULT;
 	return 0;
@@ -789,9 +785,14 @@ static int quotactl_cmd_write(int cmd)
 	}
 	return 1;
 }
-
 #endif /* CONFIG_BLOCK */
 
+/* Return true if quotactl command is manipulating quota on/off state */
+static bool quotactl_cmd_onoff(int cmd)
+{
+	return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF);
+}
+
 /*
  * look up a superblock on which quota ops will be performed
  * - use the name of a block device to find the superblock thereon
@@ -809,7 +810,9 @@ static struct super_block *quotactl_block(const char __user *special, int cmd)
 	putname(tmp);
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);
-	if (quotactl_cmd_write(cmd))
+	if (quotactl_cmd_onoff(cmd))
+		sb = get_super_exclusive_thawed(bdev);
+	else if (quotactl_cmd_write(cmd))
 		sb = get_super_thawed(bdev);
 	else
 		sb = get_super(bdev);
@@ -872,7 +875,10 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
 
 	ret = do_quotactl(sb, type, cmds, id, addr, pathp);
 
-	drop_super(sb);
+	if (!quotactl_cmd_onoff(cmds))
+		drop_super(sb);
+	else
+		drop_super_exclusive(sb);
 out:
 	if (pathp && !IS_ERR(pathp))
 		path_put(pathp);
diff --git a/fs/read_write.c b/fs/read_write.c
index da6de12b5c46..7537b6b6b5a2 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1669,6 +1669,9 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
  * Check that the two inodes are eligible for cloning, the ranges make
  * sense, and then flush all dirty data.  Caller must ensure that the
  * inodes have been locked against any other modifications.
+ *
+ * Returns: 0 for "nothing to clone", 1 for "something to clone", or
+ * the usual negative error code.
  */
 int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 			       struct inode *inode_out, loff_t pos_out,
@@ -1695,17 +1698,15 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 
 	/* Are we going all the way to the end? */
 	isize = i_size_read(inode_in);
-	if (isize == 0) {
-		*len = 0;
+	if (isize == 0)
 		return 0;
-	}
 
 	/* Zero length dedupe exits immediately; reflink goes to EOF. */
 	if (*len == 0) {
-		if (is_dedupe) {
-			*len = 0;
+		if (is_dedupe || pos_in == isize)
 			return 0;
-		}
+		if (pos_in > isize)
+			return -EINVAL;
 		*len = isize - pos_in;
 	}
 
@@ -1769,7 +1770,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 			return -EBADE;
 	}
 
-	return 0;
+	return 1;
 }
 EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
 
@@ -1955,6 +1956,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 		goto out;
 	ret = 0;
 
+	if (off + len > i_size_read(src))
+		return -EINVAL;
+
 	/* pre-format output fields to sane values */
 	for (i = 0; i < count; i++) {
 		same->info[i].bytes_deduped = 0ULL;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 368bfb92b115..a11f271800ef 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -190,6 +190,13 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 	 */
 	m->version = file->f_version;
 
+	/*
+	 * if request is to read from zero offset, reset iterator to first
+	 * record as it might have been already advanced by previous requests
+	 */
+	if (*ppos == 0)
+		m->index = 0;
+
 	/* Don't assume *ppos is where we left it */
 	if (unlikely(*ppos != m->read_pos)) {
 		while ((err = traverse(m, *ppos)) == -EAGAIN)
diff --git a/fs/splice.c b/fs/splice.c
index 8ed7c9d8c0fb..873d83104e79 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1087,7 +1087,13 @@ EXPORT_SYMBOL(do_splice_direct);
 
 static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
 {
-	while (pipe->nrbufs == pipe->buffers) {
+	for (;;) {
+		if (unlikely(!pipe->readers)) {
+			send_sig(SIGPIPE, current, 0);
+			return -EPIPE;
+		}
+		if (pipe->nrbufs != pipe->buffers)
+			return 0;
 		if (flags & SPLICE_F_NONBLOCK)
 			return -EAGAIN;
 		if (signal_pending(current))
@@ -1096,7 +1102,6 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
 		pipe_wait(pipe);
 		pipe->waiting_writers--;
 	}
-	return 0;
 }
 
 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
diff --git a/fs/super.c b/fs/super.c
index c183835566c1..1709ed029a2c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -244,7 +244,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 	mutex_init(&s->s_vfs_rename_mutex);
 	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
 	mutex_init(&s->s_dquot.dqio_mutex);
-	mutex_init(&s->s_dquot.dqonoff_mutex);
 	s->s_maxbytes = MAX_NON_LFS;
 	s->s_op = &default_op;
 	s->s_time_gran = 1000000000;
@@ -558,6 +557,13 @@ void drop_super(struct super_block *sb)
 
 EXPORT_SYMBOL(drop_super);
 
+void drop_super_exclusive(struct super_block *sb)
+{
+	up_write(&sb->s_umount);
+	put_super(sb);
+}
+EXPORT_SYMBOL(drop_super_exclusive);
+
 /**
  *	iterate_supers - call function for all active superblocks
  *	@f: function to call
@@ -628,15 +634,7 @@ void iterate_supers_type(struct file_system_type *type,
 
 EXPORT_SYMBOL(iterate_supers_type);
 
-/**
- *	get_super - get the superblock of a device
- *	@bdev: device to get the superblock for
- *	
- *	Scans the superblock list and finds the superblock of the file system
- *	mounted on the device given. %NULL is returned if no match is found.
- */
-
-struct super_block *get_super(struct block_device *bdev)
+static struct super_block *__get_super(struct block_device *bdev, bool excl)
 {
 	struct super_block *sb;
 
@@ -651,11 +649,17 @@ rescan:
 		if (sb->s_bdev == bdev) {
 			sb->s_count++;
 			spin_unlock(&sb_lock);
-			down_read(&sb->s_umount);
+			if (!excl)
+				down_read(&sb->s_umount);
+			else
+				down_write(&sb->s_umount);
 			/* still alive? */
 			if (sb->s_root && (sb->s_flags & MS_BORN))
 				return sb;
-			up_read(&sb->s_umount);
+			if (!excl)
+				up_read(&sb->s_umount);
+			else
+				up_write(&sb->s_umount);
 			/* nope, got unmounted */
 			spin_lock(&sb_lock);
 			__put_super(sb);
@@ -666,32 +670,67 @@ rescan:
 	return NULL;
 }
 
-EXPORT_SYMBOL(get_super);
-
 /**
- *	get_super_thawed - get thawed superblock of a device
+ *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
  *
  *	Scans the superblock list and finds the superblock of the file system
- *	mounted on the device. The superblock is returned once it is thawed
- *	(or immediately if it was not frozen). %NULL is returned if no match
- *	is found.
+ *	mounted on the device given. %NULL is returned if no match is found.
  */
-struct super_block *get_super_thawed(struct block_device *bdev)
+struct super_block *get_super(struct block_device *bdev)
+{
+	return __get_super(bdev, false);
+}
+EXPORT_SYMBOL(get_super);
+
+static struct super_block *__get_super_thawed(struct block_device *bdev,
+					      bool excl)
 {
 	while (1) {
-		struct super_block *s = get_super(bdev);
+		struct super_block *s = __get_super(bdev, excl);
 		if (!s || s->s_writers.frozen == SB_UNFROZEN)
 			return s;
-		up_read(&s->s_umount);
+		if (!excl)
+			up_read(&s->s_umount);
+		else
+			up_write(&s->s_umount);
 		wait_event(s->s_writers.wait_unfrozen,
 			   s->s_writers.frozen == SB_UNFROZEN);
 		put_super(s);
 	}
 }
+
+/**
+ *	get_super_thawed - get thawed superblock of a device
+ *	@bdev: device to get the superblock for
+ *
+ *	Scans the superblock list and finds the superblock of the file system
+ *	mounted on the device. The superblock is returned once it is thawed
+ *	(or immediately if it was not frozen). %NULL is returned if no match
+ *	is found.
+ */
+struct super_block *get_super_thawed(struct block_device *bdev)
+{
+	return __get_super_thawed(bdev, false);
+}
 EXPORT_SYMBOL(get_super_thawed);
 
 /**
+ *	get_super_exclusive_thawed - get thawed superblock of a device
+ *	@bdev: device to get the superblock for
+ *
+ *	Scans the superblock list and finds the superblock of the file system
+ *	mounted on the device. The superblock is returned once it is thawed
+ *	(or immediately if it was not frozen) and s_umount semaphore is held
+ *	in exclusive mode. %NULL is returned if no match is found.
+ */
+struct super_block *get_super_exclusive_thawed(struct block_device *bdev)
+{
+	return __get_super_thawed(bdev, true);
+}
+EXPORT_SYMBOL(get_super_exclusive_thawed);
+
+/**
  * get_active_super - get an active reference to the superblock of a device
  * @bdev: device to get the superblock for
  *
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 45ceb94e89e4..1bc0bd6a9848 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1191,7 +1191,7 @@ out:
 	return err;
 }
 
-void ufs_truncate_blocks(struct inode *inode)
+static void ufs_truncate_blocks(struct inode *inode)
 {
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	      S_ISLNK(inode->i_mode)))
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index aca2d4bd4303..07593a362cd0 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1161,7 +1161,7 @@ xfs_reflink_remap_range(
 
 	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
 			&len, is_dedupe);
-	if (ret || len == 0)
+	if (ret <= 0)
 		goto out_unlock;
 
 	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);