From 79d7e39ee1d4b423206f6cfd2cd2eed6253d95fe Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 21 Mar 2010 19:24:23 -0400
Subject: sb_entry() has been killed a couple of years ago and resurrected on
 mismerge

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4079ef99900f..d13a6b83ad0c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1314,7 +1314,6 @@ extern int send_sigurg(struct fown_struct *fown);
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
 
-#define sb_entry(list)  list_entry((list), struct super_block, s_list)
 #define S_BIAS (1<<30)
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
-- 
cgit v1.2.3-70-g09d2


From b20bd1a5e78af267dc4b6e1ffed48d5d776302c5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 22 Mar 2010 08:53:19 -0400
Subject: get rid of S_BIAS

use atomic_inc_not_zero(&sb->s_active) instead of playing games with
checking ->s_count > S_BIAS

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/notify/inotify/inotify.c | 32 +++++++++++++-------------------
 fs/super.c                  | 28 +++++++++++-----------------
 include/linux/fs.h          |  1 -
 3 files changed, 24 insertions(+), 37 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 40b1cf914ccb..441ef136af22 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -110,14 +110,10 @@ EXPORT_SYMBOL_GPL(get_inotify_watch);
 int pin_inotify_watch(struct inotify_watch *watch)
 {
 	struct super_block *sb = watch->inode->i_sb;
-	spin_lock(&sb_lock);
-	if (sb->s_count >= S_BIAS) {
-		atomic_inc(&sb->s_active);
-		spin_unlock(&sb_lock);
+	if (atomic_inc_not_zero(&sb->s_active)) {
 		atomic_inc(&watch->count);
 		return 1;
 	}
-	spin_unlock(&sb_lock);
 	return 0;
 }
 
@@ -518,16 +514,16 @@ EXPORT_SYMBOL_GPL(inotify_init_watch);
  * ->s_umount, which will almost certainly wait until the superblock is shut
  * down and the watch in question is pining for fjords.  That's fine, but
  * there is a problem - we might have hit the window between ->s_active
- * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
- * is past the point of no return and is heading for shutdown) and the
- * moment when deactivate_super() acquires ->s_umount.  We could just do
- * drop_super() yield() and retry, but that's rather antisocial and this
- * stuff is luser-triggerable.  OTOH, having grabbed ->s_umount and having
- * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
- * that we won't race with inotify_umount_inodes().  So we could grab a
- * reference to watch and do the rest as above, just with drop_super() instead
- * of deactivate_super(), right?  Wrong.  We had to drop ih->mutex before we
- * could grab ->s_umount.  So the watch could've been gone already.
+ * getting to 0 (i.e. the moment when superblock is past the point of no return
+ * and is heading for shutdown) and the moment when deactivate_super() acquires
+ * ->s_umount.  We could just do drop_super() yield() and retry, but that's
+ * rather antisocial and this stuff is luser-triggerable.  OTOH, having grabbed
+ * ->s_umount and having found that we'd got there first (i.e. that ->s_root is
+ * non-NULL) we know that we won't race with inotify_umount_inodes().  So we
+ * could grab a reference to watch and do the rest as above, just with
+ * drop_super() instead of deactivate_super(), right?  Wrong.  We had to drop
+ * ih->mutex before we could grab ->s_umount.  So the watch could've been gone
+ * already.
  *
  * That still can be dealt with - we need to save watch->wd, do idr_find()
  * and compare its result with our pointer.  If they match, we either have
@@ -565,14 +561,12 @@ static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
 	struct super_block *sb = watch->inode->i_sb;
 	s32 wd = watch->wd;
 
-	spin_lock(&sb_lock);
-	if (sb->s_count >= S_BIAS) {
-		atomic_inc(&sb->s_active);
-		spin_unlock(&sb_lock);
+	if (atomic_inc_not_zero(&sb->s_active)) {
 		get_inotify_watch(watch);
 		mutex_unlock(&ih->mutex);
 		return 1;	/* the best outcome */
 	}
+	spin_lock(&sb_lock);
 	sb->s_count++;
 	spin_unlock(&sb_lock);
 	mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
diff --git a/fs/super.c b/fs/super.c
index d8c8b1d2d010..bc734f8b3e18 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -93,7 +93,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		 * subclass.
 		 */
 		down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
-		s->s_count = S_BIAS;
+		s->s_count = 1;
 		atomic_set(&s->s_active, 1);
 		mutex_init(&s->s_vfs_rename_mutex);
 		mutex_init(&s->s_dquot.dqio_mutex);
@@ -189,9 +189,7 @@ void put_super(struct super_block *sb)
 void deactivate_super(struct super_block *s)
 {
 	struct file_system_type *fs = s->s_type;
-	if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
-		s->s_count -= S_BIAS-1;
-		spin_unlock(&sb_lock);
+	if (atomic_dec_and_test(&s->s_active)) {
 		vfs_dq_off(s, 0);
 		down_write(&s->s_umount);
 		fs->kill_sb(s);
@@ -216,9 +214,7 @@ EXPORT_SYMBOL(deactivate_super);
 void deactivate_locked_super(struct super_block *s)
 {
 	struct file_system_type *fs = s->s_type;
-	if (atomic_dec_and_lock(&s->s_active, &sb_lock)) {
-		s->s_count -= S_BIAS-1;
-		spin_unlock(&sb_lock);
+	if (atomic_dec_and_test(&s->s_active)) {
 		vfs_dq_off(s, 0);
 		fs->kill_sb(s);
 		put_filesystem(fs);
@@ -243,21 +239,19 @@ EXPORT_SYMBOL(deactivate_locked_super);
  */
 static int grab_super(struct super_block *s) __releases(sb_lock)
 {
+	if (atomic_inc_not_zero(&s->s_active)) {
+		spin_unlock(&sb_lock);
+		down_write(&s->s_umount);
+		return 1;
+	}
+	/* it's going away */
 	s->s_count++;
 	spin_unlock(&sb_lock);
+	/* usually that'll be enough for it to die... */
 	down_write(&s->s_umount);
-	if (s->s_root) {
-		spin_lock(&sb_lock);
-		if (s->s_count > S_BIAS) {
-			atomic_inc(&s->s_active);
-			s->s_count--;
-			spin_unlock(&sb_lock);
-			return 1;
-		}
-		spin_unlock(&sb_lock);
-	}
 	up_write(&s->s_umount);
 	put_super(s);
+	/* ... but in case it wasn't, let's at least yield() */
 	yield();
 	return 0;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d13a6b83ad0c..62f84d955b83 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1314,7 +1314,6 @@ extern int send_sigurg(struct fown_struct *fown);
 extern struct list_head super_blocks;
 extern spinlock_t sb_lock;
 
-#define S_BIAS (1<<30)
 struct super_block {
 	struct list_head	s_list;		/* Keep this first */
 	dev_t			s_dev;		/* search index; _not_ kdev_t */
-- 
cgit v1.2.3-70-g09d2


From 8edd64bd6089e21f47dcdebb14b598b713213ddc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 22 Mar 2010 19:56:42 -0400
Subject: get rid of restarts in sync_filesystems()

At the same time we can kill s_need_restart and local mutex in there.
__put_super() made public for a while; will be gone later.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         |  2 +-
 fs/sync.c          | 28 +++-------------------------
 include/linux/fs.h |  2 +-
 3 files changed, 5 insertions(+), 27 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/fs/super.c b/fs/super.c
index 0390461dfca0..ba99524998f7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -130,7 +130,7 @@ static inline void destroy_super(struct super_block *s)
  * Drop a superblock's refcount.  Returns non-zero if the superblock was
  * destroyed.  The caller must hold sb_lock.
  */
-static int __put_super(struct super_block *sb)
+int __put_super(struct super_block *sb)
 {
 	int ret = 0;
 
diff --git a/fs/sync.c b/fs/sync.c
index ad6691bae370..f3f0a0e1948f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -80,35 +80,15 @@ EXPORT_SYMBOL_GPL(sync_filesystem);
 /*
  * Sync all the data for all the filesystems (called by sys_sync() and
  * emergency sync)
- *
- * This operation is careful to avoid the livelock which could easily happen
- * if two or more filesystems are being continuously dirtied.  s_need_sync
- * is used only here.  We set it against all filesystems and then clear it as
- * we sync them.  So redirtied filesystems are skipped.
- *
- * But if process A is currently running sync_filesystems and then process B
- * calls sync_filesystems as well, process B will set all the s_need_sync
- * flags again, which will cause process A to resync everything.  Fix that with
- * a local mutex.
  */
 static void sync_filesystems(int wait)
 {
-	struct super_block *sb;
-	static DEFINE_MUTEX(mutex);
+	struct super_block *sb, *n;
 
-	mutex_lock(&mutex);		/* Could be down_interruptible */
 	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &super_blocks, s_list)
-		if (!list_empty(&sb->s_instances))
-			sb->s_need_sync = 1;
-
-restart:
-	list_for_each_entry(sb, &super_blocks, s_list) {
+	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
 		if (list_empty(&sb->s_instances))
 			continue;
-		if (!sb->s_need_sync)
-			continue;
-		sb->s_need_sync = 0;
 		sb->s_count++;
 		spin_unlock(&sb_lock);
 
@@ -119,11 +99,9 @@ restart:
 
 		/* restart only when sb is no longer on the list */
 		spin_lock(&sb_lock);
-		if (__put_super_and_need_restart(sb))
-			goto restart;
+		__put_super(sb);
 	}
 	spin_unlock(&sb_lock);
-	mutex_unlock(&mutex);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 62f84d955b83..e1c7427802b8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1332,7 +1332,6 @@ struct super_block {
 	struct rw_semaphore	s_umount;
 	struct mutex		s_lock;
 	int			s_count;
-	int			s_need_sync;
 	atomic_t		s_active;
 #ifdef CONFIG_SECURITY
 	void                    *s_security;
@@ -1780,6 +1779,7 @@ extern int get_sb_pseudo(struct file_system_type *, char *,
 	struct vfsmount *mnt);
 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
 int __put_super_and_need_restart(struct super_block *sb);
+int __put_super(struct super_block *sb);
 void put_super(struct super_block *sb);
 
 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
-- 
cgit v1.2.3-70-g09d2


From 35cf7ba0b46dc3582a01c3860b14bff122662aa3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 22 Mar 2010 21:13:53 -0400
Subject: Bury __put_super_and_need_restart()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/drop_caches.c   |  1 +
 fs/internal.h      |  2 ++
 fs/quota/quota.c   |  1 +
 fs/super.c         | 30 ++----------------------------
 include/linux/fs.h |  3 ---
 5 files changed, 6 insertions(+), 31 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 42728a1f795f..52047cf4177f 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -8,6 +8,7 @@
 #include <linux/writeback.h>
 #include <linux/sysctl.h>
 #include <linux/gfp.h>
+#include "internal.h"
 
 /* A global variable is a bit ugly, but it keeps the code simple */
 int sysctl_drop_caches;
diff --git a/fs/internal.h b/fs/internal.h
index 8a03a5447bdf..6b706bc60a66 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -87,6 +87,8 @@ extern struct file *get_empty_filp(void);
  * super.c
  */
 extern int do_remount_sb(struct super_block *, int, void *, int);
+extern void __put_super(struct super_block *sb);
+extern void put_super(struct super_block *sb);
 
 /*
  * open.c
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 2196f8b07c1f..3ce1553ea7eb 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,7 @@
 #include <linux/quotaops.h>
 #include <linux/types.h>
 #include <linux/writeback.h>
+#include "../internal.h"
 
 static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 				     qid_t id)
diff --git a/fs/super.c b/fs/super.c
index 1f72e0d42d8f..95adbb3d8e58 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -127,40 +127,14 @@ static inline void destroy_super(struct super_block *s)
 /* Superblock refcounting  */
 
 /*
- * Drop a superblock's refcount.  Returns non-zero if the superblock was
- * destroyed.  The caller must hold sb_lock.
+ * Drop a superblock's refcount.  The caller must hold sb_lock.
  */
-int __put_super(struct super_block *sb)
+void __put_super(struct super_block *sb)
 {
-	int ret = 0;
-
 	if (!--sb->s_count) {
 		list_del_init(&sb->s_list);
 		destroy_super(sb);
-		ret = 1;
-	}
-	return ret;
-}
-
-/*
- * Drop a superblock's refcount.
- * Returns non-zero if the superblock is about to be destroyed and
- * at least is already removed from super_blocks list, so if we are
- * making a loop through super blocks then we need to restart.
- * The caller must hold sb_lock.
- */
-int __put_super_and_need_restart(struct super_block *sb)
-{
-	/* check for race with generic_shutdown_super() */
-	if (list_empty(&sb->s_instances)) {
-		/* super block is removed, need to restart... */
-		__put_super(sb);
-		return 1;
 	}
-	/* can't be the last, since s_list is still in use */
-	sb->s_count--;
-	BUG_ON(sb->s_count == 0);
-	return 0;
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e1c7427802b8..523086714c74 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1778,9 +1778,6 @@ extern int get_sb_pseudo(struct file_system_type *, char *,
 	const struct super_operations *ops, unsigned long,
 	struct vfsmount *mnt);
 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
-int __put_super_and_need_restart(struct super_block *sb);
-int __put_super(struct super_block *sb);
-void put_super(struct super_block *sb);
 
 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
 #define fops_get(fops) \
-- 
cgit v1.2.3-70-g09d2


From 01a05b337a5b647909e1d6670f57e7202318a5fb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 23 Mar 2010 06:06:58 -0400
Subject: new helper: iterate_supers()

... and switch the simple "loop over superblocks and do something"
loops to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c        | 24 ++++++++----------------
 fs/drop_caches.c   | 25 ++-----------------------
 fs/quota/quota.c   | 33 +++++++++------------------------
 fs/super.c         | 30 ++++++++++++++++++++++++++++++
 fs/sync.c          | 25 ++++++-------------------
 include/linux/fs.h |  1 +
 6 files changed, 56 insertions(+), 82 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/fs/buffer.c b/fs/buffer.c
index ded29b0fdac3..2914d9adfb50 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -560,25 +560,17 @@ repeat:
 	return err;
 }
 
-static void do_thaw_all(struct work_struct *work)
+static void do_thaw_one(struct super_block *sb, void *unused)
 {
-	struct super_block *sb, *n;
 	char b[BDEVNAME_SIZE];
+	while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
+		printk(KERN_WARNING "Emergency Thaw on %s\n",
+		       bdevname(sb->s_bdev, b));
+}
 
-	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
-			printk(KERN_WARNING "Emergency Thaw on %s\n",
-			       bdevname(sb->s_bdev, b));
-		up_read(&sb->s_umount);
-		spin_lock(&sb_lock);
-	}
-	spin_unlock(&sb_lock);
+static void do_thaw_all(struct work_struct *work)
+{
+	iterate_supers(do_thaw_one, NULL);
 	kfree(work);
 	printk(KERN_WARNING "Emergency Thaw complete\n");
 }
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 52047cf4177f..83c4f600786a 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -8,12 +8,11 @@
 #include <linux/writeback.h>
 #include <linux/sysctl.h>
 #include <linux/gfp.h>
-#include "internal.h"
 
 /* A global variable is a bit ugly, but it keeps the code simple */
 int sysctl_drop_caches;
 
-static void drop_pagecache_sb(struct super_block *sb)
+static void drop_pagecache_sb(struct super_block *sb, void *unused)
 {
 	struct inode *inode, *toput_inode = NULL;
 
@@ -34,26 +33,6 @@ static void drop_pagecache_sb(struct super_block *sb)
 	iput(toput_inode);
 }
 
-static void drop_pagecache(void)
-{
-	struct super_block *sb, *n;
-
-	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root)
-			drop_pagecache_sb(sb);
-		up_read(&sb->s_umount);
-		spin_lock(&sb_lock);
-		__put_super(sb);
-	}
-	spin_unlock(&sb_lock);
-}
-
 static void drop_slab(void)
 {
 	int nr_objects;
@@ -69,7 +48,7 @@ int drop_caches_sysctl_handler(ctl_table *table, int write,
 	proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (write) {
 		if (sysctl_drop_caches & 1)
-			drop_pagecache();
+			iterate_supers(drop_pagecache_sb, NULL);
 		if (sysctl_drop_caches & 2)
 			drop_slab();
 	}
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 3ce1553ea7eb..ce3dfd066f59 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,7 +18,6 @@
 #include <linux/quotaops.h>
 #include <linux/types.h>
 #include <linux/writeback.h>
-#include "../internal.h"
 
 static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 				     qid_t id)
@@ -46,36 +45,22 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 	return security_quotactl(cmd, type, id, sb);
 }
 
+static void quota_sync_one(struct super_block *sb, void *arg)
+{
+	if (sb->s_qcop && sb->s_qcop->quota_sync)
+		sb->s_qcop->quota_sync(sb, *(int *)arg, 1);
+}
+
 static int quota_sync_all(int type)
 {
-	struct super_block *sb, *n;
 	int ret;
 
 	if (type >= MAXQUOTAS)
 		return -EINVAL;
 	ret = security_quotactl(Q_SYNC, type, 0, NULL);
-	if (ret)
-		return ret;
-
-	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
-			continue;
-		if (!sb->s_qcop || !sb->s_qcop->quota_sync)
-			continue;
-
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_read(&sb->s_umount);
-		if (sb->s_root)
-			sb->s_qcop->quota_sync(sb, type, 1);
-		up_read(&sb->s_umount);
-		spin_lock(&sb_lock);
-		__put_super(sb);
-	}
-	spin_unlock(&sb_lock);
-
-	return 0;
+	if (!ret)
+		iterate_supers(quota_sync_one, &type);
+	return ret;
 }
 
 static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
diff --git a/fs/super.c b/fs/super.c
index 95adbb3d8e58..2c3e370c60d9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -391,6 +391,36 @@ void sync_supers(void)
 	spin_unlock(&sb_lock);
 }
 
+/**
+ *	iterate_supers - call function for all active superblocks
+ *	@f: function to call
+ *	@arg: argument to pass to it
+ *
+ *	Scans the superblock list and calls given function, passing it
+ *	locked superblock and given argument.
+ */
+void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
+{
+	struct super_block *sb, *n;
+
+	spin_lock(&sb_lock);
+	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+		if (list_empty(&sb->s_instances))
+			continue;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+
+		down_read(&sb->s_umount);
+		if (sb->s_root)
+			f(sb, arg);
+		up_read(&sb->s_umount);
+
+		spin_lock(&sb_lock);
+		__put_super(sb);
+	}
+	spin_unlock(&sb_lock);
+}
+
 /**
  *	get_super - get the superblock of a device
  *	@bdev: device to get the superblock for
diff --git a/fs/sync.c b/fs/sync.c
index f3f0a0e1948f..d5369203f8e4 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -77,31 +77,18 @@ int sync_filesystem(struct super_block *sb)
 }
 EXPORT_SYMBOL_GPL(sync_filesystem);
 
+static void sync_one_sb(struct super_block *sb, void *arg)
+{
+	if (!(sb->s_flags & MS_RDONLY) && sb->s_bdi)
+		__sync_filesystem(sb, *(int *)arg);
+}
 /*
  * Sync all the data for all the filesystems (called by sys_sync() and
  * emergency sync)
  */
 static void sync_filesystems(int wait)
 {
-	struct super_block *sb, *n;
-
-	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
-		if (list_empty(&sb->s_instances))
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-
-		down_read(&sb->s_umount);
-		if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi)
-			__sync_filesystem(sb, wait);
-		up_read(&sb->s_umount);
-
-		/* restart only when sb is no longer on the list */
-		spin_lock(&sb_lock);
-		__put_super(sb);
-	}
-	spin_unlock(&sb_lock);
+	iterate_supers(sync_one_sb, &wait);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 523086714c74..eeb04ba17b63 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2324,6 +2324,7 @@ extern struct super_block *get_super(struct block_device *);
 extern struct super_block *get_active_super(struct block_device *bdev);
 extern struct super_block *user_get_super(dev_t);
 extern void drop_super(struct super_block *sb);
+extern void iterate_supers(void (*)(struct super_block *, void *), void *);
 
 extern int dcache_dir_open(struct inode *, struct file *);
 extern int dcache_dir_close(struct inode *, struct file *);
-- 
cgit v1.2.3-70-g09d2


From 18e9e5104fcd9a973ffe3eed3816c87f2a1b6cd2 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 23 Mar 2010 10:34:56 -0400
Subject: Introduce freeze_super and thaw_super for the fsfreeze ioctl

Currently the way we do freezing is by passing sb>s_bdev to freeze_bdev and then
letting it do all the work.  But freezing is more of an fs thing, and doesn't
really have much to do with the bdev at all, all the work gets done with the
super.  In btrfs we do not populate s_bdev, since we can have multiple bdev's
for one fs and setting s_bdev makes removing devices from a pool kind of tricky.
This means that freezing a btrfs filesystem fails, which causes us to corrupt
with things like tux-on-ice which use the fsfreeze mechanism.  So instead of
populating sb->s_bdev with a random bdev in our pool, I've broken the actual fs
freezing stuff into freeze_super and thaw_super.  These just take the
super_block that we're freezing and does the appropriate work.  It's basically
just copy and pasted from freeze_bdev.  I've then converted freeze_bdev over to
use the new super helpers.  I've tested this with ext4 and btrfs and verified
everything continues to work the same as before.

The only new gotcha is multiple calls to the fsfreeze ioctl will return EBUSY if
the fs is already frozen.  I thought this was a better solution than adding a
freeze counter to the super_block, but if everybody hates this idea I'm open to
suggestions.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c     | 74 ++++++++++----------------------------------
 fs/ioctl.c         | 15 ++-------
 fs/super.c         | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  2 ++
 4 files changed, 110 insertions(+), 71 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 39cb6591d37d..1aba036dcabf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -245,38 +245,14 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	sb = get_active_super(bdev);
 	if (!sb)
 		goto out;
-	down_write(&sb->s_umount);
-	if (sb->s_flags & MS_RDONLY) {
-		sb->s_frozen = SB_FREEZE_TRANS;
-		up_write(&sb->s_umount);
+	error = freeze_super(sb);
+	if (error) {
+		deactivate_super(sb);
+		bdev->bd_fsfreeze_count--;
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		return sb;
-	}
-
-	sb->s_frozen = SB_FREEZE_WRITE;
-	smp_wmb();
-
-	sync_filesystem(sb);
-
-	sb->s_frozen = SB_FREEZE_TRANS;
-	smp_wmb();
-
-	sync_blockdev(sb->s_bdev);
-
-	if (sb->s_op->freeze_fs) {
-		error = sb->s_op->freeze_fs(sb);
-		if (error) {
-			printk(KERN_ERR
-				"VFS:Filesystem freeze failed\n");
-			sb->s_frozen = SB_UNFROZEN;
-			deactivate_locked_super(sb);
-			bdev->bd_fsfreeze_count--;
-			mutex_unlock(&bdev->bd_fsfreeze_mutex);
-			return ERR_PTR(error);
-		}
+		return ERR_PTR(error);
 	}
-	up_write(&sb->s_umount);
-
+	deactivate_super(sb);
  out:
 	sync_blockdev(bdev);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
@@ -297,40 +273,22 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 
 	mutex_lock(&bdev->bd_fsfreeze_mutex);
 	if (!bdev->bd_fsfreeze_count)
-		goto out_unlock;
+		goto out;
 
 	error = 0;
 	if (--bdev->bd_fsfreeze_count > 0)
-		goto out_unlock;
+		goto out;
 
 	if (!sb)
-		goto out_unlock;
-
-	BUG_ON(sb->s_bdev != bdev);
-	down_write(&sb->s_umount);
-	if (sb->s_flags & MS_RDONLY)
-		goto out_unfrozen;
-
-	if (sb->s_op->unfreeze_fs) {
-		error = sb->s_op->unfreeze_fs(sb);
-		if (error) {
-			printk(KERN_ERR
-				"VFS:Filesystem thaw failed\n");
-			sb->s_frozen = SB_FREEZE_TRANS;
-			bdev->bd_fsfreeze_count++;
-			mutex_unlock(&bdev->bd_fsfreeze_mutex);
-			return error;
-		}
-	}
-
-out_unfrozen:
-	sb->s_frozen = SB_UNFROZEN;
-	smp_wmb();
-	wake_up(&sb->s_wait_unfrozen);
+		goto out;
 
-	if (sb)
-		deactivate_locked_super(sb);
-out_unlock:
+	error = thaw_super(sb);
+	if (error) {
+		bdev->bd_fsfreeze_count++;
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		return error;
+	}
+out:
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return 0;
 }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 7faefb4da939..2d140a713861 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -525,15 +525,8 @@ static int ioctl_fsfreeze(struct file *filp)
 	if (sb->s_op->freeze_fs == NULL)
 		return -EOPNOTSUPP;
 
-	/* If a blockdevice-backed filesystem isn't specified, return. */
-	if (sb->s_bdev == NULL)
-		return -EINVAL;
-
 	/* Freeze */
-	sb = freeze_bdev(sb->s_bdev);
-	if (IS_ERR(sb))
-		return PTR_ERR(sb);
-	return 0;
+	return freeze_super(sb);
 }
 
 static int ioctl_fsthaw(struct file *filp)
@@ -543,12 +536,8 @@ static int ioctl_fsthaw(struct file *filp)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	/* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
-	if (sb->s_bdev == NULL)
-		return -EINVAL;
-
 	/* Thaw */
-	return thaw_bdev(sb->s_bdev, sb);
+	return thaw_super(sb);
 }
 
 /*
diff --git a/fs/super.c b/fs/super.c
index c248ac6a1a21..89afca5055ab 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -944,6 +944,96 @@ out:
 
 EXPORT_SYMBOL_GPL(vfs_kern_mount);
 
+/**
+ * freeze_super -- lock the filesystem and force it into a consistent state
+ * @super: the super to lock
+ *
+ * Syncs the super to make sure the filesystem is consistent and calls the fs's
+ * freeze_fs.  Subsequent calls to this without first thawing the fs will return
+ * -EBUSY.
+ */
+int freeze_super(struct super_block *sb)
+{
+	int ret;
+
+	atomic_inc(&sb->s_active);
+	down_write(&sb->s_umount);
+	if (sb->s_frozen) {
+		deactivate_locked_super(sb);
+		return -EBUSY;
+	}
+
+	if (sb->s_flags & MS_RDONLY) {
+		sb->s_frozen = SB_FREEZE_TRANS;
+		smp_wmb();
+		up_write(&sb->s_umount);
+		return 0;
+	}
+
+	sb->s_frozen = SB_FREEZE_WRITE;
+	smp_wmb();
+
+	sync_filesystem(sb);
+
+	sb->s_frozen = SB_FREEZE_TRANS;
+	smp_wmb();
+
+	sync_blockdev(sb->s_bdev);
+	if (sb->s_op->freeze_fs) {
+		ret = sb->s_op->freeze_fs(sb);
+		if (ret) {
+			printk(KERN_ERR
+				"VFS:Filesystem freeze failed\n");
+			sb->s_frozen = SB_UNFROZEN;
+			deactivate_locked_super(sb);
+			return ret;
+		}
+	}
+	up_write(&sb->s_umount);
+	return 0;
+}
+EXPORT_SYMBOL(freeze_super);
+
+/**
+ * thaw_super -- unlock filesystem
+ * @sb: the super to thaw
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_super().
+ */
+int thaw_super(struct super_block *sb)
+{
+	int error;
+
+	down_write(&sb->s_umount);
+	if (sb->s_frozen == SB_UNFROZEN) {
+		up_write(&sb->s_umount);
+		return -EINVAL;
+	}
+
+	if (sb->s_flags & MS_RDONLY)
+		goto out;
+
+	if (sb->s_op->unfreeze_fs) {
+		error = sb->s_op->unfreeze_fs(sb);
+		if (error) {
+			printk(KERN_ERR
+				"VFS:Filesystem thaw failed\n");
+			sb->s_frozen = SB_FREEZE_TRANS;
+			up_write(&sb->s_umount);
+			return error;
+		}
+	}
+
+out:
+	sb->s_frozen = SB_UNFROZEN;
+	smp_wmb();
+	wake_up(&sb->s_wait_unfrozen);
+	deactivate_locked_super(sb);
+
+	return 0;
+}
+EXPORT_SYMBOL(thaw_super);
+
 static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
 {
 	int err;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index eeb04ba17b63..d51256bc6328 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1797,6 +1797,8 @@ extern void drop_collected_mounts(struct vfsmount *);
 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
 			  struct vfsmount *);
 extern int vfs_statfs(struct dentry *, struct kstatfs *);
+extern int freeze_super(struct super_block *super);
+extern int thaw_super(struct super_block *super);
 
 extern int current_umask(void);
 
-- 
cgit v1.2.3-70-g09d2


From bb4354538eb7b92f32cfedbad68c7be266c0b467 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 13 May 2010 17:53:14 -0700
Subject: fs: xattr_handler table should be const

The entries in xattr handler table should be immutable (ie const)
like other operation tables.

Later patches convert common filesystems. Uncoverted filesystems
will still work, but will generate a compiler warning.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/generic_acl.c            |  4 ++--
 fs/xattr.c                  | 14 +++++++-------
 include/linux/fs.h          |  2 +-
 include/linux/generic_acl.h |  4 ++--
 include/linux/xattr.h       |  2 +-
 mm/shmem.c                  |  4 ++--
 6 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index fe5df5457656..99800e564157 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -201,7 +201,7 @@ generic_check_acl(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
-struct xattr_handler generic_acl_access_handler = {
+const struct xattr_handler generic_acl_access_handler = {
 	.prefix = POSIX_ACL_XATTR_ACCESS,
 	.flags	= ACL_TYPE_ACCESS,
 	.list	= generic_acl_list,
@@ -209,7 +209,7 @@ struct xattr_handler generic_acl_access_handler = {
 	.set	= generic_acl_set,
 };
 
-struct xattr_handler generic_acl_default_handler = {
+const struct xattr_handler generic_acl_default_handler = {
 	.prefix = POSIX_ACL_XATTR_DEFAULT,
 	.flags	= ACL_TYPE_DEFAULT,
 	.list	= generic_acl_list,
diff --git a/fs/xattr.c b/fs/xattr.c
index 46f87e828b48..01bb8135e14a 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -590,10 +590,10 @@ strcmp_prefix(const char *a, const char *a_prefix)
 /*
  * Find the xattr_handler with the matching prefix.
  */
-static struct xattr_handler *
-xattr_resolve_name(struct xattr_handler **handlers, const char **name)
+static const struct xattr_handler *
+xattr_resolve_name(const struct xattr_handler **handlers, const char **name)
 {
-	struct xattr_handler *handler;
+	const struct xattr_handler *handler;
 
 	if (!*name)
 		return NULL;
@@ -614,7 +614,7 @@ xattr_resolve_name(struct xattr_handler **handlers, const char **name)
 ssize_t
 generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
 {
-	struct xattr_handler *handler;
+	const struct xattr_handler *handler;
 
 	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
@@ -629,7 +629,7 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
 ssize_t
 generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-	struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
+	const struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
 	unsigned int size = 0;
 
 	if (!buffer) {
@@ -659,7 +659,7 @@ generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 int
 generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
 {
-	struct xattr_handler *handler;
+	const struct xattr_handler *handler;
 
 	if (size == 0)
 		value = "";  /* empty EA, do not remove */
@@ -676,7 +676,7 @@ generic_setxattr(struct dentry *dentry, const char *name, const void *value, siz
 int
 generic_removexattr(struct dentry *dentry, const char *name)
 {
-	struct xattr_handler *handler;
+	const struct xattr_handler *handler;
 
 	handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
 	if (!handler)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d51256bc6328..6660db898c41 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1336,7 +1336,7 @@ struct super_block {
 #ifdef CONFIG_SECURITY
 	void                    *s_security;
 #endif
-	struct xattr_handler	**s_xattr;
+	const struct xattr_handler **s_xattr;
 
 	struct list_head	s_inodes;	/* all inodes */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h
index ca666d18ed67..574bea4013b6 100644
--- a/include/linux/generic_acl.h
+++ b/include/linux/generic_acl.h
@@ -5,8 +5,8 @@
 
 struct inode;
 
-extern struct xattr_handler generic_acl_access_handler;
-extern struct xattr_handler generic_acl_default_handler;
+extern const struct xattr_handler generic_acl_access_handler;
+extern const struct xattr_handler generic_acl_default_handler;
 
 int generic_acl_init(struct inode *, struct inode *);
 int generic_acl_chmod(struct inode *);
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index fb9b7e6e1e2d..0cfa1e9c4cc1 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -37,7 +37,7 @@ struct inode;
 struct dentry;
 
 struct xattr_handler {
-	char *prefix;
+	const char *prefix;
 	int flags;	/* fs private flags passed back to the handlers */
 	size_t (*list)(struct dentry *dentry, char *list, size_t list_size,
 		       const char *name, size_t name_len, int handler_flags);
diff --git a/mm/shmem.c b/mm/shmem.c
index eef4ebea5158..717aa62ff127 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2071,14 +2071,14 @@ static int shmem_xattr_security_set(struct dentry *dentry, const char *name,
 					  size, flags);
 }
 
-static struct xattr_handler shmem_xattr_security_handler = {
+static const struct xattr_handler shmem_xattr_security_handler = {
 	.prefix = XATTR_SECURITY_PREFIX,
 	.list   = shmem_xattr_security_list,
 	.get    = shmem_xattr_security_get,
 	.set    = shmem_xattr_security_set,
 };
 
-static struct xattr_handler *shmem_xattr_handlers[] = {
+static const struct xattr_handler *shmem_xattr_handlers[] = {
 	&generic_acl_access_handler,
 	&generic_acl_default_handler,
 	&shmem_xattr_security_handler,
-- 
cgit v1.2.3-70-g09d2


From 8018ab057480974e7f26a387bf4ce040e9a5f6f1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 Mar 2010 17:32:25 +0100
Subject: sanitize vfs_fsync calling conventions

Now that the last user passing a NULL file pointer is gone we can remove
the redundant dentry argument and associated hacks inside vfs_fsynmc_range.

The next step will be removig the dentry argument from ->fsync, but given
the luck with the last round of method prototype changes I'd rather
defer this until after the main merge window.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/loop.c                |  4 ++--
 drivers/md/bitmap.c                 |  2 +-
 drivers/usb/gadget/storage_common.c |  2 +-
 fs/ceph/file.c                      |  3 +--
 fs/coda/file.c                      |  2 +-
 fs/ecryptfs/file.c                  |  4 +---
 fs/nfsd/nfs4recover.c               |  6 +++---
 fs/nfsd/vfs.c                       |  5 ++---
 fs/sync.c                           | 42 +++++++------------------------------
 include/linux/fs.h                  |  6 +++---
 mm/msync.c                          |  2 +-
 11 files changed, 24 insertions(+), 54 deletions(-)

(limited to 'include/linux/fs.h')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a90e83c9be96..6120922f459f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -485,7 +485,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 				goto out;
 			}
 
-			ret = vfs_fsync(file, file->f_path.dentry, 0);
+			ret = vfs_fsync(file, 0);
 			if (unlikely(ret)) {
 				ret = -EIO;
 				goto out;
@@ -495,7 +495,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 		ret = lo_send(lo, bio, pos);
 
 		if (barrier && !ret) {
-			ret = vfs_fsync(file, file->f_path.dentry, 0);
+			ret = vfs_fsync(file, 0);
 			if (unlikely(ret))
 				ret = -EIO;
 		}
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index f084249295d9..53e8bea295e2 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1692,7 +1692,7 @@ int bitmap_create(mddev_t *mddev)
 		 * and bypass the page cache, we must sync the file
 		 * first.
 		 */
-		vfs_fsync(file, file->f_dentry, 1);
+		vfs_fsync(file, 1);
 	}
 	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
 	if (!mddev->bitmap_info.external)
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index 868d8ee86756..04c462ff0ea6 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -654,7 +654,7 @@ static int fsg_lun_fsync_sub(struct fsg_lun *curlun)
 
 	if (curlun->ro || !filp)
 		return 0;
-	return vfs_fsync(filp, filp->f_path.dentry, 1);
+	return vfs_fsync(filp, 1);
 }
 
 static void store_cdrom_address(u8 *dest, int msf, u32 addr)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ed6f19721d6e..7d634938edc9 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -844,8 +844,7 @@ retry_snap:
 		if ((ret >= 0 || ret == -EIOCBQUEUED) &&
 		    ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
 		     || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
-			err = vfs_fsync_range(file, file->f_path.dentry,
-					      pos, pos + ret - 1, 1);
+			err = vfs_fsync_range(file, pos, pos + ret - 1, 1);
 			if (err < 0)
 				ret = err;
 		}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 4c813f2cdc52..7196077b1688 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -217,7 +217,7 @@ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
 	BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
 	host_file = cfi->cfi_container;
 
-	err = vfs_fsync(host_file, host_file->f_path.dentry, datasync);
+	err = vfs_fsync(host_file, datasync);
 	if ( !err && !datasync ) {
 		lock_kernel();
 		err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index e7440a6f5ebf..3bdddbcc785f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -276,9 +276,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
 static int
 ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 {
-	return vfs_fsync(ecryptfs_file_to_lower(file),
-			 ecryptfs_dentry_to_lower(dentry),
-			 datasync);
+	return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
 }
 
 static int ecryptfs_fasync(int fd, struct file *file, int flag)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index dada03f2c13b..7e26caab2a26 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -158,7 +158,7 @@ out_unlock:
 	mutex_unlock(&dir->d_inode->i_mutex);
 	if (status == 0) {
 		clp->cl_firststate = 1;
-		vfs_fsync(rec_file, rec_file->f_path.dentry, 0);
+		vfs_fsync(rec_file, 0);
 	}
 	nfs4_reset_creds(original_cred);
 	dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
@@ -288,7 +288,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
 	status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
 	nfs4_reset_creds(original_cred);
 	if (status == 0)
-		vfs_fsync(rec_file, rec_file->f_path.dentry, 0);
+		vfs_fsync(rec_file, 0);
 	mnt_drop_write(rec_file->f_path.mnt);
 out:
 	if (status)
@@ -325,7 +325,7 @@ nfsd4_recdir_purge_old(void) {
 		goto out;
 	status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old);
 	if (status == 0)
-		vfs_fsync(rec_file, rec_file->f_path.dentry, 0);
+		vfs_fsync(rec_file, 0);
 	mnt_drop_write(rec_file->f_path.mnt);
 out:
 	if (status)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23c06f77f4ca..ebbf3b6b2457 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -999,7 +999,7 @@ static int wait_for_concurrent_writes(struct file *file)
 
 	if (inode->i_state & I_DIRTY) {
 		dprintk("nfsd: write sync %d\n", task_pid_nr(current));
-		err = vfs_fsync(file, file->f_path.dentry, 0);
+		err = vfs_fsync(file, 0);
 	}
 	last_ino = inode->i_ino;
 	last_dev = inode->i_sb->s_dev;
@@ -1175,8 +1175,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (err)
 		goto out;
 	if (EX_ISSYNC(fhp->fh_export)) {
-		int err2 = vfs_fsync_range(file, file->f_path.dentry,
-				offset, end, 0);
+		int err2 = vfs_fsync_range(file, offset, end, 0);
 
 		if (err2 != -EINVAL)
 			err = nfserrno(err2);
diff --git a/fs/sync.c b/fs/sync.c
index d5369203f8e4..5a537ccd2e85 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(file_fsync);
 /**
  * vfs_fsync_range - helper to sync a range of data & metadata to disk
  * @file:		file to sync
- * @dentry:		dentry of @file
  * @start:		offset in bytes of the beginning of data range to sync
  * @end:		offset in bytes of the end of data range (inclusive)
  * @datasync:		perform only datasync
@@ -166,32 +165,13 @@ EXPORT_SYMBOL(file_fsync);
  * Write back data in range @start..@end and metadata for @file to disk.  If
  * @datasync is set only metadata needed to access modified file data is
  * written.
- *
- * In case this function is called from nfsd @file may be %NULL and
- * only @dentry is set.  This can only happen when the filesystem
- * implements the export_operations API.
  */
-int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
-		    loff_t end, int datasync)
+int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	const struct file_operations *fop;
-	struct address_space *mapping;
+	struct address_space *mapping = file->f_mapping;
 	int err, ret;
 
-	/*
-	 * Get mapping and operations from the file in case we have
-	 * as file, or get the default values for them in case we
-	 * don't have a struct file available.  Damn nfsd..
-	 */
-	if (file) {
-		mapping = file->f_mapping;
-		fop = file->f_op;
-	} else {
-		mapping = dentry->d_inode->i_mapping;
-		fop = dentry->d_inode->i_fop;
-	}
-
-	if (!fop || !fop->fsync) {
+	if (!file->f_op || !file->f_op->fsync) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -203,7 +183,7 @@ int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
 	 * livelocks in fsync_buffers_list().
 	 */
 	mutex_lock(&mapping->host->i_mutex);
-	err = fop->fsync(file, dentry, datasync);
+	err = file->f_op->fsync(file, file->f_path.dentry, datasync);
 	if (!ret)
 		ret = err;
 	mutex_unlock(&mapping->host->i_mutex);
@@ -216,19 +196,14 @@ EXPORT_SYMBOL(vfs_fsync_range);
 /**
  * vfs_fsync - perform a fsync or fdatasync on a file
  * @file:		file to sync
- * @dentry:		dentry of @file
  * @datasync:		only perform a fdatasync operation
  *
  * Write back data and metadata for @file to disk.  If @datasync is
  * set only metadata needed to access modified file data is written.
- *
- * In case this function is called from nfsd @file may be %NULL and
- * only @dentry is set.  This can only happen when the filesystem
- * implements the export_operations API.
  */
-int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int vfs_fsync(struct file *file, int datasync)
 {
-	return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
+	return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
 }
 EXPORT_SYMBOL(vfs_fsync);
 
@@ -239,7 +214,7 @@ static int do_fsync(unsigned int fd, int datasync)
 
 	file = fget(fd);
 	if (file) {
-		ret = vfs_fsync(file, file->f_path.dentry, datasync);
+		ret = vfs_fsync(file, datasync);
 		fput(file);
 	}
 	return ret;
@@ -267,8 +242,7 @@ int generic_write_sync(struct file *file, loff_t pos, loff_t count)
 {
 	if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
 		return 0;
-	return vfs_fsync_range(file, file->f_path.dentry, pos,
-			       pos + count - 1,
+	return vfs_fsync_range(file, pos, pos + count - 1,
 			       (file->f_flags & __O_SYNC) ? 0 : 1);
 }
 EXPORT_SYMBOL(generic_write_sync);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6660db898c41..f3e108314c93 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2084,9 +2084,9 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
 extern int filemap_fdatawrite_range(struct address_space *mapping,
 				loff_t start, loff_t end);
 
-extern int vfs_fsync_range(struct file *file, struct dentry *dentry,
-			   loff_t start, loff_t end, int datasync);
-extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
+extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
+			   int datasync);
+extern int vfs_fsync(struct file *file, int datasync);
 extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
 extern void sync_supers(void);
 extern void emergency_sync(void);
diff --git a/mm/msync.c b/mm/msync.c
index 4083209b7f02..632df4527c01 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -82,7 +82,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
 				(vma->vm_flags & VM_SHARED)) {
 			get_file(file);
 			up_read(&mm->mmap_sem);
-			error = vfs_fsync(file, file->f_path.dentry, 0);
+			error = vfs_fsync(file, 0);
 			fput(file);
 			if (error || start >= end)
 				goto out;
-- 
cgit v1.2.3-70-g09d2


From 51ee049e771c14a29aaee8ecd6cbbe14db088f3a Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Tue, 27 Apr 2010 14:23:57 -0700
Subject: vfs: add lockdep annotation to s_vfs_rename_key for ecryptfs

 >  =============================================
 >  [ INFO: possible recursive locking detected ]
 >  2.6.31-2-generic #14~rbd3
 >  ---------------------------------------------
 >  firefox-3.5/4162 is trying to acquire lock:
 >   (&s->s_vfs_rename_mutex){+.+.+.}, at: [<ffffffff81139d31>] lock_rename+0x41/0xf0
 >
 >  but task is already holding lock:
 >   (&s->s_vfs_rename_mutex){+.+.+.}, at: [<ffffffff81139d31>] lock_rename+0x41/0xf0
 >
 >  other info that might help us debug this:
 >  3 locks held by firefox-3.5/4162:
 >   #0:  (&s->s_vfs_rename_mutex){+.+.+.}, at: [<ffffffff81139d31>] lock_rename+0x41/0xf0
 >   #1:  (&sb->s_type->i_mutex_key#11/1){+.+.+.}, at: [<ffffffff81139d5a>] lock_rename+0x6a/0xf0
 >   #2:  (&sb->s_type->i_mutex_key#11/2){+.+.+.}, at: [<ffffffff81139d6f>] lock_rename+0x7f/0xf0
 >
 >  stack backtrace:
 >  Pid: 4162, comm: firefox-3.5 Tainted: G         C 2.6.31-2-generic #14~rbd3
 >  Call Trace:
 >   [<ffffffff8108ae74>] print_deadlock_bug+0xf4/0x100
 >   [<ffffffff8108ce26>] validate_chain+0x4c6/0x750
 >   [<ffffffff8108d2e7>] __lock_acquire+0x237/0x430
 >   [<ffffffff8108d585>] lock_acquire+0xa5/0x150
 >   [<ffffffff81139d31>] ? lock_rename+0x41/0xf0
 >   [<ffffffff815526ad>] __mutex_lock_common+0x4d/0x3d0
 >   [<ffffffff81139d31>] ? lock_rename+0x41/0xf0
 >   [<ffffffff81139d31>] ? lock_rename+0x41/0xf0
 >   [<ffffffff8120eaf9>] ? ecryptfs_rename+0x99/0x170
 >   [<ffffffff81552b36>] mutex_lock_nested+0x46/0x60
 >   [<ffffffff81139d31>] lock_rename+0x41/0xf0
 >   [<ffffffff8120eb2a>] ecryptfs_rename+0xca/0x170
 >   [<ffffffff81139a9e>] vfs_rename_dir+0x13e/0x160
 >   [<ffffffff8113ac7e>] vfs_rename+0xee/0x290
 >   [<ffffffff8113c212>] ? __lookup_hash+0x102/0x160
 >   [<ffffffff8113d512>] sys_renameat+0x252/0x280
 >   [<ffffffff81133eb4>] ? cp_new_stat+0xe4/0x100
 >   [<ffffffff8101316a>] ? sysret_check+0x2e/0x69
 >   [<ffffffff8108c34d>] ? trace_hardirqs_on_caller+0x14d/0x190
 >   [<ffffffff8113d55b>] sys_rename+0x1b/0x20
 >   [<ffffffff81013132>] system_call_fastpath+0x16/0x1b

The trace above is totally reproducible by doing a cross-directory
rename on an ecryptfs directory.

The issue seems to be that sys_renameat() does lock_rename() then calls
into the filesystem; if the filesystem is ecryptfs, then
ecryptfs_rename() again does lock_rename() on the lower filesystem, and
lockdep can't tell that the two s_vfs_rename_mutexes are different.  It
seems an annotation like the following is sufficient to fix this (it
does get rid of the lockdep trace in my simple tests); however I would
like to make sure I'm not misunderstanding the locking, hence the CC
list...

Signed-off-by: Roland Dreier <rdreier@cisco.com>
Cc: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Cc: Dustin Kirkland <kirkland@canonical.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 1 +
 include/linux/fs.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux/fs.h')

diff --git a/fs/super.c b/fs/super.c
index 89afca5055ab..69688b15f1fa 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -88,6 +88,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		s->s_count = 1;
 		atomic_set(&s->s_active, 1);
 		mutex_init(&s->s_vfs_rename_mutex);
+		lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
 		mutex_init(&s->s_dquot.dqio_mutex);
 		mutex_init(&s->s_dquot.dqonoff_mutex);
 		init_rwsem(&s->s_dquot.dqptr_sem);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f3e108314c93..9626c5fbb0e1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1741,6 +1741,7 @@ struct file_system_type {
 
 	struct lock_class_key s_lock_key;
 	struct lock_class_key s_umount_key;
+	struct lock_class_key s_vfs_rename_key;
 
 	struct lock_class_key i_lock_key;
 	struct lock_class_key i_mutex_key;
-- 
cgit v1.2.3-70-g09d2


From a1bd120d13e586ea1c424048fd2c8420a442852a Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 4 Mar 2010 17:29:14 +0300
Subject: vfs: Add inode uid,gid,mode init helper

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c         | 20 ++++++++++++++++++++
 include/linux/fs.h |  3 ++-
 2 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux/fs.h')

diff --git a/fs/inode.c b/fs/inode.c
index 498b10f04c3c..2bee20ae3d65 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1606,3 +1606,23 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
 				  inode->i_ino);
 }
 EXPORT_SYMBOL(init_special_inode);
+
+/**
+ * Init uid,gid,mode for new inode according to posix standards
+ * @inode: New inode
+ * @dir: Directory inode
+ * @mode: mode of the new inode
+ */
+void inode_init_owner(struct inode *inode, const struct inode *dir,
+			mode_t mode)
+{
+	inode->i_uid = current_fsuid();
+	if (dir && dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else
+		inode->i_gid = current_fsgid();
+	inode->i_mode = mode;
+}
+EXPORT_SYMBOL(inode_init_owner);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9626c5fbb0e1..8a733862a411 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1428,7 +1428,8 @@ extern void dentry_unhash(struct dentry *dentry);
  * VFS file helper functions.
  */
 extern int file_permission(struct file *, int);
-
+extern void inode_init_owner(struct inode *inode, const struct inode *dir,
+			mode_t mode);
 /*
  * VFS FS_IOC_FIEMAP helper definitions.
  */
-- 
cgit v1.2.3-70-g09d2