summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c6
-rw-r--r--fs/9p/vfs_inode.c2
-rw-r--r--fs/9p/vfs_inode_dotl.c2
-rw-r--r--fs/autofs4/autofs_i.h8
-rw-r--r--fs/autofs4/expire.c27
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/autofs4/waitq.c7
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/btrfs/check-integrity.c63
-rw-r--r--fs/btrfs/check-integrity.h6
-rw-r--r--fs/btrfs/compression.c17
-rw-r--r--fs/btrfs/ctree.c19
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/delayed-inode.c27
-rw-r--r--fs/btrfs/delayed-inode.h10
-rw-r--r--fs/btrfs/disk-io.c97
-rw-r--r--fs/btrfs/disk-io.h4
-rw-r--r--fs/btrfs/extent-tree.c35
-rw-r--r--fs/btrfs/extent_io.c133
-rw-r--r--fs/btrfs/extent_io.h12
-rw-r--r--fs/btrfs/file.c44
-rw-r--r--fs/btrfs/free-space-cache.c18
-rw-r--r--fs/btrfs/hash.c5
-rw-r--r--fs/btrfs/hash.h1
-rw-r--r--fs/btrfs/inode.c105
-rw-r--r--fs/btrfs/ordered-data.c9
-rw-r--r--fs/btrfs/ordered-data.h2
-rw-r--r--fs/btrfs/raid56.c17
-rw-r--r--fs/btrfs/reada.c2
-rw-r--r--fs/btrfs/scrub.c65
-rw-r--r--fs/btrfs/super.c61
-rw-r--r--fs/btrfs/tests/btrfs-tests.c8
-rw-r--r--fs/btrfs/tests/btrfs-tests.h27
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c13
-rw-r--r--fs/btrfs/tests/extent-io-tests.c86
-rw-r--r--fs/btrfs/tests/free-space-tests.c76
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c30
-rw-r--r--fs/btrfs/tests/inode-tests.c344
-rw-r--r--fs/btrfs/tests/qgroup-tests.c111
-rw-r--r--fs/btrfs/transaction.c10
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c4
-rw-r--r--fs/btrfs/volumes.c240
-rw-r--r--fs/btrfs/volumes.h6
-rw-r--r--fs/buffer.c80
-rw-r--r--fs/cachefiles/interface.c2
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/cache.c141
-rw-r--r--fs/ceph/cache.h44
-rw-r--r--fs/ceph/caps.c23
-rw-r--r--fs/ceph/export.c10
-rw-r--r--fs/ceph/file.c29
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/cifs/cifs_unicode.c33
-rw-r--r--fs/cifs/cifs_unicode.h2
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c14
-rw-r--r--fs/cifs/ntlmssp.h2
-rw-r--r--fs/cifs/sess.c80
-rw-r--r--fs/cifs/smb2pdu.c37
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/coredump.c4
-rw-r--r--fs/crypto/crypto.c3
-rw-r--r--fs/dax.c7
-rw-r--r--fs/dcache.c79
-rw-r--r--fs/debugfs/file.c7
-rw-r--r--fs/devpts/inode.c191
-rw-r--r--fs/direct-io.c35
-rw-r--r--fs/ecryptfs/crypto.c8
-rw-r--r--fs/ecryptfs/file.c19
-rw-r--r--fs/ecryptfs/main.c3
-rw-r--r--fs/exofs/ore.c2
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c8
-rw-r--r--fs/ext4/mmp.c4
-rw-r--r--fs/ext4/namei.c3
-rw-r--r--fs/ext4/page-io.c7
-rw-r--r--fs/ext4/readpage.c9
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/f2fs/checkpoint.c10
-rw-r--r--fs/f2fs/data.c47
-rw-r--r--fs/f2fs/f2fs.h5
-rw-r--r--fs/f2fs/gc.c12
-rw-r--r--fs/f2fs/inline.c3
-rw-r--r--fs/f2fs/node.c10
-rw-r--r--fs/f2fs/segment.c14
-rw-r--r--fs/f2fs/trace.c7
-rw-r--r--fs/fat/misc.c2
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fscache/page.c2
-rw-r--r--fs/fuse/dir.c6
-rw-r--r--fs/fuse/fuse_i.h9
-rw-r--r--fs/fuse/inode.c19
-rw-r--r--fs/gfs2/aops.c49
-rw-r--r--fs/gfs2/bmap.c5
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/dir.c5
-rw-r--r--fs/gfs2/export.c11
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/gfs2/glock.c11
-rw-r--r--fs/gfs2/glock.h10
-rw-r--r--fs/gfs2/inode.c130
-rw-r--r--fs/gfs2/inode.h4
-rw-r--r--fs/gfs2/log.c8
-rw-r--r--fs/gfs2/lops.c23
-rw-r--r--fs/gfs2/lops.h2
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c18
-rw-r--r--fs/gfs2/ops_fstype.c6
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/gfs2/recovery.c6
-rw-r--r--fs/gfs2/recovery.h4
-rw-r--r--fs/gfs2/rgrp.c21
-rw-r--r--fs/gfs2/super.c24
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/hfsplus/part_tbl.c5
-rw-r--r--fs/hfsplus/super.c6
-rw-r--r--fs/hfsplus/wrapper.c15
-rw-r--r--fs/internal.h1
-rw-r--r--fs/isofs/compress.c2
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c42
-rw-r--r--fs/jbd2/recovery.c4
-rw-r--r--fs/jfs/jfs_logmgr.c6
-rw-r--r--fs/jfs/jfs_metapage.c10
-rw-r--r--fs/libfs.c113
-rw-r--r--fs/lockd/svc.c13
-rw-r--r--fs/locks.c2
-rw-r--r--fs/logfs/dev_bdev.c17
-rw-r--r--fs/mpage.c41
-rw-r--r--fs/namei.c110
-rw-r--r--fs/namespace.c11
-rw-r--r--fs/nfs/blocklayout/blocklayout.c22
-rw-r--r--fs/nfs/dir.c41
-rw-r--r--fs/nfs/direct.c10
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/nfs4proc.c18
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/pnfs.c10
-rw-r--r--fs/nfs/pnfs_nfs.c12
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/nfs2acl.c20
-rw-r--r--fs/nfsd/nfs3acl.c16
-rw-r--r--fs/nfsd/nfs4acl.c16
-rw-r--r--fs/nfsd/nfs4callback.c18
-rw-r--r--fs/nfsd/nfs4state.c67
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--fs/nilfs2/btnode.c6
-rw-r--r--fs/nilfs2/btnode.h2
-rw-r--r--fs/nilfs2/btree.c6
-rw-r--r--fs/nilfs2/gcinode.c5
-rw-r--r--fs/nilfs2/mdt.c11
-rw-r--r--fs/nilfs2/segbuf.c18
-rw-r--r--fs/nilfs2/the_nilfs.c2
-rw-r--r--fs/ntfs/aops.c6
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/logfile.c2
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ocfs2/Makefile2
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/buffer_head_io.c13
-rw-r--r--fs/ocfs2/cluster/heartbeat.c14
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/overlayfs/dir.c67
-rw-r--r--fs/overlayfs/inode.c61
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/super.c12
-rw-r--r--fs/posix_acl.c42
-rw-r--r--fs/proc/root.c7
-rw-r--r--fs/read_write.c18
-rw-r--r--fs/reiserfs/inode.c4
-rw-r--r--fs/reiserfs/journal.c14
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/super.c11
-rw-r--r--fs/squashfs/block.c4
-rw-r--r--fs/timerfd.c10
-rw-r--r--fs/ubifs/file.c24
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/directory.c2
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/udf/partition.c13
-rw-r--r--fs/udf/super.c22
-rw-r--r--fs/udf/udf_sb.h5
-rw-r--r--fs/ufs/balloc.c2
-rw-r--r--fs/ufs/util.c2
-rw-r--r--fs/xfs/xfs_aops.c11
-rw-r--r--fs/xfs/xfs_buf.c32
-rw-r--r--fs/xfs/xfs_ioctl.c6
197 files changed, 2525 insertions, 1805 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b84c291ba1eb..d7b78d531e63 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9fs_proto_dotu(v9ses));
fid = file->private_data;
if (!fid) {
- fid = v9fs_fid_clone(file->f_path.dentry);
+ fid = v9fs_fid_clone(file_dentry(file));
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
* because we want write after unlink usecase
* to work.
*/
- fid = v9fs_writeback_fid(file->f_path.dentry);
+ fid = v9fs_writeback_fid(file_dentry(file));
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
mutex_unlock(&v9inode->v_mutex);
@@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
* because we want write after unlink usecase
* to work.
*/
- fid = v9fs_writeback_fid(filp->f_path.dentry);
+ fid = v9fs_writeback_fid(file_dentry(filp));
if (IS_ERR(fid)) {
retval = PTR_ERR(fid);
mutex_unlock(&v9inode->v_mutex);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f4645c515262..e2e7c749925a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -853,7 +853,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct p9_fid *fid, *inode_fid;
struct dentry *res = NULL;
- if (d_unhashed(dentry)) {
+ if (d_in_lookup(dentry)) {
res = v9fs_vfs_lookup(dir, dentry, 0);
if (IS_ERR(res))
return PTR_ERR(res);
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index a34702c998f5..1b51eaa5e2dd 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -254,7 +254,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
struct posix_acl *pacl = NULL, *dacl = NULL;
struct dentry *res = NULL;
- if (d_unhashed(dentry)) {
+ if (d_in_lookup(dentry)) {
res = v9fs_vfs_lookup(dir, dentry, 0);
if (IS_ERR(res))
return PTR_ERR(res);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f0d268b97d19..a439548de785 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -70,9 +70,13 @@ struct autofs_info {
};
#define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */
-#define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered
+#define AUTOFS_INF_WANT_EXPIRE (1<<1) /* the dentry is being considered
* for expiry, so RCU_walk is
- * not permitted
+ * not permitted. If it progresses to
+ * actual expiry attempt, the flag is
+ * not cleared when EXPIRING is set -
+ * in that case it gets cleared only
+ * when it comes to clearing EXPIRING.
*/
#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 9510d8d2e9cd..b493909e7492 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -316,19 +316,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
if (ino->flags & AUTOFS_INF_PENDING)
goto out;
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
- ino->flags |= AUTOFS_INF_NO_RCU;
+ ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
spin_lock(&sbi->fs_lock);
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
ino->flags |= AUTOFS_INF_EXPIRING;
- smp_mb();
- ino->flags &= ~AUTOFS_INF_NO_RCU;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
return root;
}
- ino->flags &= ~AUTOFS_INF_NO_RCU;
+ ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
}
out:
spin_unlock(&sbi->fs_lock);
@@ -446,7 +444,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
while ((dentry = get_next_positive_subdir(dentry, root))) {
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
- if (ino->flags & AUTOFS_INF_NO_RCU)
+ if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
expired = NULL;
else
expired = should_expire(dentry, mnt, timeout, how);
@@ -455,7 +453,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
continue;
}
ino = autofs4_dentry_ino(expired);
- ino->flags |= AUTOFS_INF_NO_RCU;
+ ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
spin_lock(&sbi->fs_lock);
@@ -465,7 +463,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
goto found;
}
- ino->flags &= ~AUTOFS_INF_NO_RCU;
+ ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
if (expired != dentry)
dput(expired);
spin_unlock(&sbi->fs_lock);
@@ -475,17 +473,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
found:
pr_debug("returning %p %pd\n", expired, expired);
ino->flags |= AUTOFS_INF_EXPIRING;
- smp_mb();
- ino->flags &= ~AUTOFS_INF_NO_RCU;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
- spin_lock(&sbi->lookup_lock);
- spin_lock(&expired->d_parent->d_lock);
- spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&expired->d_parent->d_subdirs, &expired->d_child);
- spin_unlock(&expired->d_lock);
- spin_unlock(&expired->d_parent->d_lock);
- spin_unlock(&sbi->lookup_lock);
return expired;
}
@@ -496,7 +485,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
int status;
/* Block on any pending expire */
- if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)))
+ if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
return 0;
if (rcu_walk)
return -ECHILD;
@@ -554,7 +543,7 @@ int autofs4_expire_run(struct super_block *sb,
ino = autofs4_dentry_ino(dentry);
/* avoid rapid-fire expire attempts if expiry fails */
ino->last_used = now;
- ino->flags &= ~AUTOFS_INF_EXPIRING;
+ ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -583,7 +572,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock);
/* avoid rapid-fire expire attempts if expiry fails */
ino->last_used = now;
- ino->flags &= ~AUTOFS_INF_EXPIRING;
+ ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
dput(dentry);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 78bd80298528..3767f6641af1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -458,7 +458,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
*/
struct inode *inode;
- if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
+ if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
return 0;
if (d_mountpoint(dentry))
return 0;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 0146d911f468..631f1554c87b 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -66,11 +66,12 @@ static int autofs4_write(struct autofs_sb_info *sbi,
set_fs(KERNEL_DS);
mutex_lock(&sbi->pipe_mutex);
- wr = __vfs_write(file, data, bytes, &file->f_pos);
- while (bytes && wr) {
+ while (bytes) {
+ wr = __vfs_write(file, data, bytes, &file->f_pos);
+ if (wr <= 0)
+ break;
data += wr;
bytes -= wr;
- wr = __vfs_write(file, data, bytes, &file->f_pos);
}
mutex_unlock(&sbi->pipe_mutex);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e158b22ef32f..a7a28110dc80 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2275,7 +2275,7 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* Align to page */
- if (!dump_skip(cprm, dataoff - cprm->file->f_pos))
+ if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump;
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 71ade0e556b7..203589311bf8 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1787,7 +1787,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
goto end_coredump;
}
- if (!dump_skip(cprm, dataoff - cprm->file->f_pos))
+ if (!dump_skip(cprm, dataoff - cprm->pos))
goto end_coredump;
if (!elf_fdpic_dump_segments(cprm))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 71ccab1d22c6..d012be4ab977 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -493,7 +493,7 @@ long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax)
if (size < 0)
return size;
- if (!ops->direct_access)
+ if (!blk_queue_dax(bdev_get_queue(bdev)) || !ops->direct_access)
return -EOPNOTSUPP;
if ((sector + DIV_ROUND_UP(size, 512)) >
part_nr_sects_read(bdev->bd_part))
@@ -1287,7 +1287,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
- if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
+ if (IS_ENABLED(CONFIG_BLK_DEV_DAX) &&
+ blk_queue_dax(disk->queue))
bdev->bd_inode->i_flags = S_DAX;
else
bdev->bd_inode->i_flags = 0;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b677a6ea6001..5d5cae05818d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1673,6 +1673,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
}
bio->bi_bdev = block_ctx->dev->bdev;
bio->bi_iter.bi_sector = dev_bytenr >> 9;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -1685,7 +1686,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
"btrfsic: error, failed to add a single page!\n");
return -1;
}
- if (submit_bio_wait(READ, bio)) {
+ if (submit_bio_wait(bio)) {
printk(KERN_INFO
"btrfsic: read error at logical %llu dev %s!\n",
block_ctx->start, block_ctx->dev->name);
@@ -2206,7 +2207,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
block->dev_bytenr, block->mirror_num);
next_block = block->next_in_same_bio;
block->iodone_w_error = iodone_w_error;
- if (block->submit_bio_bh_rw & REQ_FLUSH) {
+ if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
dev_state->last_flush_gen++;
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
@@ -2242,7 +2243,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
block->dev_bytenr, block->mirror_num);
block->iodone_w_error = iodone_w_error;
- if (block->submit_bio_bh_rw & REQ_FLUSH) {
+ if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
dev_state->last_flush_gen++;
if ((dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
@@ -2645,7 +2646,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
* This algorithm is recursive because the amount of used stack space
* is very small and the max recursion depth is limited.
*/
- indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
+ indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)",
btrfsic_get_block_type(state, block),
block->logical_bytenr, block->dev_state->name,
block->dev_bytenr, block->mirror_num);
@@ -2855,12 +2856,12 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
return ds;
}
-int btrfsic_submit_bh(int rw, struct buffer_head *bh)
+int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh)
{
struct btrfsic_dev_state *dev_state;
if (!btrfsic_is_initialized)
- return submit_bh(rw, bh);
+ return submit_bh(op, op_flags, bh);
mutex_lock(&btrfsic_mutex);
/* since btrfsic_submit_bh() might also be called before
@@ -2869,26 +2870,26 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
/* Only called to write the superblock (incl. FLUSH/FUA) */
if (NULL != dev_state &&
- (rw & WRITE) && bh->b_size > 0) {
+ (op == REQ_OP_WRITE) && bh->b_size > 0) {
u64 dev_bytenr;
dev_bytenr = 4096 * bh->b_blocknr;
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
- "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
- " size=%zu, data=%p, bdev=%p)\n",
- rw, (unsigned long long)bh->b_blocknr,
+ "submit_bh(op=0x%x,0x%x, blocknr=%llu "
+ "(bytenr %llu), size=%zu, data=%p, bdev=%p)\n",
+ op, op_flags, (unsigned long long)bh->b_blocknr,
dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
btrfsic_process_written_block(dev_state, dev_bytenr,
&bh->b_data, 1, NULL,
- NULL, bh, rw);
- } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+ NULL, bh, op_flags);
+ } else if (NULL != dev_state && (op_flags & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
- "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
- rw, bh->b_bdev);
+ "submit_bh(op=0x%x,0x%x FLUSH, bdev=%p)\n",
+ op, op_flags, bh->b_bdev);
if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
if ((dev_state->state->print_mask &
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
@@ -2906,7 +2907,7 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
block->never_written = 0;
block->iodone_w_error = 0;
block->flush_gen = dev_state->last_flush_gen + 1;
- block->submit_bio_bh_rw = rw;
+ block->submit_bio_bh_rw = op_flags;
block->orig_bio_bh_private = bh->b_private;
block->orig_bio_bh_end_io.bh = bh->b_end_io;
block->next_in_same_bio = NULL;
@@ -2915,10 +2916,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
}
}
mutex_unlock(&btrfsic_mutex);
- return submit_bh(rw, bh);
+ return submit_bh(op, op_flags, bh);
}
-static void __btrfsic_submit_bio(int rw, struct bio *bio)
+static void __btrfsic_submit_bio(struct bio *bio)
{
struct btrfsic_dev_state *dev_state;
@@ -2930,7 +2931,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
* btrfsic_mount(), this might return NULL */
dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
if (NULL != dev_state &&
- (rw & WRITE) && NULL != bio->bi_io_vec) {
+ (bio_op(bio) == REQ_OP_WRITE) && NULL != bio->bi_io_vec) {
unsigned int i;
u64 dev_bytenr;
u64 cur_bytenr;
@@ -2942,9 +2943,9 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
- "submit_bio(rw=0x%x, bi_vcnt=%u,"
+ "submit_bio(rw=%d,0x%x, bi_vcnt=%u,"
" bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
- rw, bio->bi_vcnt,
+ bio_op(bio), bio->bi_rw, bio->bi_vcnt,
(unsigned long long)bio->bi_iter.bi_sector,
dev_bytenr, bio->bi_bdev);
@@ -2975,18 +2976,18 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
btrfsic_process_written_block(dev_state, dev_bytenr,
mapped_datav, bio->bi_vcnt,
bio, &bio_is_patched,
- NULL, rw);
+ NULL, bio->bi_rw);
while (i > 0) {
i--;
kunmap(bio->bi_io_vec[i].bv_page);
}
kfree(mapped_datav);
- } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
+ } else if (NULL != dev_state && (bio->bi_rw & REQ_PREFLUSH)) {
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
- "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
- rw, bio->bi_bdev);
+ "submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
+ bio_op(bio), bio->bi_rw, bio->bi_bdev);
if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
if ((dev_state->state->print_mask &
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
@@ -3004,7 +3005,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
block->never_written = 0;
block->iodone_w_error = 0;
block->flush_gen = dev_state->last_flush_gen + 1;
- block->submit_bio_bh_rw = rw;
+ block->submit_bio_bh_rw = bio->bi_rw;
block->orig_bio_bh_private = bio->bi_private;
block->orig_bio_bh_end_io.bio = bio->bi_end_io;
block->next_in_same_bio = NULL;
@@ -3016,16 +3017,16 @@ leave:
mutex_unlock(&btrfsic_mutex);
}
-void btrfsic_submit_bio(int rw, struct bio *bio)
+void btrfsic_submit_bio(struct bio *bio)
{
- __btrfsic_submit_bio(rw, bio);
- submit_bio(rw, bio);
+ __btrfsic_submit_bio(bio);
+ submit_bio(bio);
}
-int btrfsic_submit_bio_wait(int rw, struct bio *bio)
+int btrfsic_submit_bio_wait(struct bio *bio)
{
- __btrfsic_submit_bio(rw, bio);
- return submit_bio_wait(rw, bio);
+ __btrfsic_submit_bio(bio);
+ return submit_bio_wait(bio);
}
int btrfsic_mount(struct btrfs_root *root,
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h
index 13b8566c97ab..f78dff1c7e86 100644
--- a/fs/btrfs/check-integrity.h
+++ b/fs/btrfs/check-integrity.h
@@ -20,9 +20,9 @@
#define __BTRFS_CHECK_INTEGRITY__
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
-int btrfsic_submit_bh(int rw, struct buffer_head *bh);
-void btrfsic_submit_bio(int rw, struct bio *bio);
-int btrfsic_submit_bio_wait(int rw, struct bio *bio);
+int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh);
+void btrfsic_submit_bio(struct bio *bio);
+int btrfsic_submit_bio_wait(struct bio *bio);
#else
#define btrfsic_submit_bh submit_bh
#define btrfsic_submit_bio submit_bio
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 658c39b70fba..cefedabf0a92 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -363,6 +363,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
kfree(cb);
return -ENOMEM;
}
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
atomic_inc(&cb->pending_bios);
@@ -373,7 +374,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
page = compressed_pages[pg_index];
page->mapping = inode->i_mapping;
if (bio->bi_iter.bi_size)
- ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
+ ret = io_tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
bio, 0);
else
@@ -401,13 +402,14 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
BUG_ON(ret); /* -ENOMEM */
}
- ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
+ ret = btrfs_map_bio(root, bio, 0, 1);
BUG_ON(ret); /* -ENOMEM */
bio_put(bio);
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
BUG_ON(!bio);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
bio_add_page(bio, page, PAGE_SIZE, 0);
@@ -431,7 +433,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
BUG_ON(ret); /* -ENOMEM */
}
- ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
+ ret = btrfs_map_bio(root, bio, 0, 1);
BUG_ON(ret); /* -ENOMEM */
bio_put(bio);
@@ -646,6 +648,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
if (!comp_bio)
goto fail2;
+ bio_set_op_attrs (comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
atomic_inc(&cb->pending_bios);
@@ -656,7 +659,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
- ret = tree->ops->merge_bio_hook(READ, page, 0,
+ ret = tree->ops->merge_bio_hook(page, 0,
PAGE_SIZE,
comp_bio, 0);
else
@@ -687,8 +690,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
root->sectorsize);
- ret = btrfs_map_bio(root, READ, comp_bio,
- mirror_num, 0);
+ ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
if (ret) {
bio->bi_error = ret;
bio_endio(comp_bio);
@@ -699,6 +701,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
GFP_NOFS);
BUG_ON(!comp_bio);
+ bio_set_op_attrs(comp_bio, REQ_OP_READ, 0);
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
@@ -717,7 +720,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
BUG_ON(ret); /* -ENOMEM */
}
- ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
+ ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
if (ret) {
bio->bi_error = ret;
bio_endio(comp_bio);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 427c36b430a6..a85cf7d23309 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1373,7 +1373,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
BUG_ON(tm->slot != 0);
- eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
+ eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start,
+ eb->len);
if (!eb_rewin) {
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
@@ -1454,7 +1455,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
} else if (old_root) {
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
- eb = alloc_dummy_extent_buffer(root->fs_info, logical);
+ eb = alloc_dummy_extent_buffer(root->fs_info, logical,
+ root->nodesize);
} else {
btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
eb = btrfs_clone_extent_buffer(eb_root);
@@ -1552,6 +1554,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid, root->fs_info->generation);
if (!should_cow_block(trans, root, buf)) {
+ trans->dirty = true;
*cow_ret = buf;
return 0;
}
@@ -1783,10 +1786,12 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
if (!err) {
tmp = (struct btrfs_disk_key *)(kaddr + offset -
map_start);
- } else {
+ } else if (err == 1) {
read_extent_buffer(eb, &unaligned,
offset, sizeof(unaligned));
tmp = &unaligned;
+ } else {
+ return err;
}
} else {
@@ -2510,6 +2515,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,
if (!btrfs_buffer_uptodate(tmp, 0, 0))
ret = -EIO;
free_extent_buffer(tmp);
+ } else {
+ ret = PTR_ERR(tmp);
}
return ret;
}
@@ -2773,8 +2780,10 @@ again:
* then we don't want to set the path blocking,
* so we test it here
*/
- if (!should_cow_block(trans, root, b))
+ if (!should_cow_block(trans, root, b)) {
+ trans->dirty = true;
goto cow_done;
+ }
/*
* must have write locks on this node and the
@@ -2823,6 +2832,8 @@ cow_done:
}
ret = key_search(b, key, level, &prev_cmp, &slot);
+ if (ret < 0)
+ goto done;
if (level != 0) {
int dec = 0;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 101c3cfd3f7c..b2620d1f883f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2518,7 +2518,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
int btrfs_async_run_delayed_refs(struct btrfs_root *root,
- unsigned long count, int wait);
+ unsigned long count, u64 transid, int wait);
int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
@@ -3091,7 +3091,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root,
struct btrfs_root *parent_root,
u64 new_dirid);
-int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 61561c2a3f96..d3aaabbfada0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
return 0;
}
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
- struct list_head *del_list)
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list)
{
struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *item;
delayed_node = btrfs_get_delayed_node(inode);
if (!delayed_node)
- return;
+ return false;
+
+ /*
+ * We can only do one readdir with delayed items at a time because of
+ * item->readdir_list.
+ */
+ inode_unlock_shared(inode);
+ inode_lock(inode);
mutex_lock(&delayed_node->mutex);
item = __btrfs_first_delayed_insertion_item(delayed_node);
@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
* requeue or dequeue this delayed node.
*/
atomic_dec(&delayed_node->refs);
+
+ return true;
}
-void btrfs_put_delayed_items(struct list_head *ins_list,
- struct list_head *del_list)
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list)
{
struct btrfs_delayed_item *curr, *next;
@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
if (atomic_dec_and_test(&curr->refs))
kfree(curr);
}
+
+ /*
+ * The VFS is going to do up_read(), so we need to downgrade back to a
+ * read lock.
+ */
+ downgrade_write(&inode->i_rwsem);
}
int btrfs_should_delete_dir_index(struct list_head *del_list,
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 0167853c84ae..2495b3d4075f 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
/* Used for readdir() */
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
- struct list_head *del_list);
-void btrfs_put_delayed_items(struct list_head *ins_list,
- struct list_head *del_list);
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list);
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+ struct list_head *ins_list,
+ struct list_head *del_list);
int btrfs_should_delete_dir_index(struct list_head *del_list,
u64 index);
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6628fca9f4ed..9a726ded2c6d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -124,7 +124,6 @@ struct async_submit_bio {
struct list_head list;
extent_submit_bio_hook_t *submit_bio_start;
extent_submit_bio_hook_t *submit_bio_done;
- int rw;
int mirror_num;
unsigned long bio_flags;
/*
@@ -727,7 +726,7 @@ static void end_workqueue_bio(struct bio *bio)
fs_info = end_io_wq->info;
end_io_wq->error = bio->bi_error;
- if (bio->bi_rw & REQ_WRITE) {
+ if (bio_op(bio) == REQ_OP_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
wq = fs_info->endio_meta_write_workers;
func = btrfs_endio_meta_write_helper;
@@ -797,7 +796,7 @@ static void run_one_async_start(struct btrfs_work *work)
int ret;
async = container_of(work, struct async_submit_bio, work);
- ret = async->submit_bio_start(async->inode, async->rw, async->bio,
+ ret = async->submit_bio_start(async->inode, async->bio,
async->mirror_num, async->bio_flags,
async->bio_offset);
if (ret)
@@ -830,9 +829,8 @@ static void run_one_async_done(struct btrfs_work *work)
return;
}
- async->submit_bio_done(async->inode, async->rw, async->bio,
- async->mirror_num, async->bio_flags,
- async->bio_offset);
+ async->submit_bio_done(async->inode, async->bio, async->mirror_num,
+ async->bio_flags, async->bio_offset);
}
static void run_one_async_free(struct btrfs_work *work)
@@ -844,7 +842,7 @@ static void run_one_async_free(struct btrfs_work *work)
}
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- int rw, struct bio *bio, int mirror_num,
+ struct bio *bio, int mirror_num,
unsigned long bio_flags,
u64 bio_offset,
extent_submit_bio_hook_t *submit_bio_start,
@@ -857,7 +855,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
return -ENOMEM;
async->inode = inode;
- async->rw = rw;
async->bio = bio;
async->mirror_num = mirror_num;
async->submit_bio_start = submit_bio_start;
@@ -873,7 +870,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
atomic_inc(&fs_info->nr_async_submits);
- if (rw & REQ_SYNC)
+ if (bio->bi_rw & REQ_SYNC)
btrfs_set_work_high_priority(&async->work);
btrfs_queue_work(fs_info->workers, &async->work);
@@ -903,9 +900,8 @@ static int btree_csum_one_bio(struct bio *bio)
return ret;
}
-static int __btree_submit_bio_start(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
+static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
/*
@@ -915,7 +911,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw,
return btree_csum_one_bio(bio);
}
-static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
+static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
@@ -925,7 +921,7 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
* when we're called for a write, we're already in the async
* submission context. Just jump into btrfs_map_bio
*/
- ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
+ ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 1);
if (ret) {
bio->bi_error = ret;
bio_endio(bio);
@@ -944,14 +940,14 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
return 1;
}
-static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
int async = check_async_write(inode, bio_flags);
int ret;
- if (!(rw & REQ_WRITE)) {
+ if (bio_op(bio) != REQ_OP_WRITE) {
/*
* called for a read, do the setup so that checksum validation
* can happen in the async kernel threads
@@ -960,21 +956,19 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
bio, BTRFS_WQ_ENDIO_METADATA);
if (ret)
goto out_w_error;
- ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
- mirror_num, 0);
+ ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0);
} else if (!async) {
ret = btree_csum_one_bio(bio);
if (ret)
goto out_w_error;
- ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
- mirror_num, 0);
+ ret = btrfs_map_bio(BTRFS_I(inode)->root, bio, mirror_num, 0);
} else {
/*
* kthread helpers are used to submit writes so that
* checksumming can happen in parallel across all CPUs
*/
ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
- inode, rw, bio, mirror_num, 0,
+ inode, bio, mirror_num, 0,
bio_offset,
__btree_submit_bio_start,
__btree_submit_bio_done);
@@ -1098,7 +1092,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
struct inode *btree_inode = root->fs_info->btree_inode;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
+ if (IS_ERR(buf))
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, 0, WAIT_NONE, btree_get_extent, 0);
@@ -1114,7 +1108,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
int ret;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
+ if (IS_ERR(buf))
return 0;
set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
@@ -1147,7 +1141,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr)
{
if (btrfs_test_is_dummy_root(root))
- return alloc_test_extent_buffer(root->fs_info, bytenr);
+ return alloc_test_extent_buffer(root->fs_info, bytenr,
+ root->nodesize);
return alloc_extent_buffer(root->fs_info, bytenr);
}
@@ -1171,8 +1166,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
int ret;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(buf))
+ return buf;
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
if (ret) {
@@ -1314,14 +1309,16 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/* Should only be used by the testing infrastructure */
-struct btrfs_root *btrfs_alloc_dummy_root(void)
+struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root;
root = btrfs_alloc_root(NULL, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
- __setup_root(4096, 4096, 4096, root, NULL, 1);
+ /* We don't use the stripesize in selftest, set it as sectorsize */
+ __setup_root(nodesize, sectorsize, sectorsize, root, NULL,
+ BTRFS_ROOT_TREE_OBJECTID);
set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
root->alloc_bytenr = 0;
@@ -1803,6 +1800,13 @@ static int cleaner_kthread(void *arg)
if (btrfs_need_cleaner_sleep(root))
goto sleep;
+ /*
+ * Do not do anything if we might cause open_ctree() to block
+ * before we have finished mounting the filesystem.
+ */
+ if (!root->fs_info->open)
+ goto sleep;
+
if (!mutex_trylock(&root->fs_info->cleaner_mutex))
goto sleep;
@@ -2517,7 +2521,6 @@ int open_ctree(struct super_block *sb,
int num_backups_tried = 0;
int backup_index = 0;
int max_active;
- bool cleaner_mutex_locked = false;
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -2797,7 +2800,7 @@ int open_ctree(struct super_block *sb,
nodesize = btrfs_super_nodesize(disk_super);
sectorsize = btrfs_super_sectorsize(disk_super);
- stripesize = btrfs_super_stripesize(disk_super);
+ stripesize = sectorsize;
fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
@@ -2996,13 +2999,6 @@ retry_root_backup:
goto fail_sysfs;
}
- /*
- * Hold the cleaner_mutex thread here so that we don't block
- * for a long time on btrfs_recover_relocation. cleaner_kthread
- * will wait for us to finish mounting the filesystem.
- */
- mutex_lock(&fs_info->cleaner_mutex);
- cleaner_mutex_locked = true;
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
@@ -3062,8 +3058,10 @@ retry_root_backup:
ret = btrfs_cleanup_fs_roots(fs_info);
if (ret)
goto fail_qgroup;
- /* We locked cleaner_mutex before creating cleaner_kthread. */
+
+ mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(tree_root);
+ mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
btrfs_warn(fs_info, "failed to recover relocation: %d",
ret);
@@ -3071,8 +3069,6 @@ retry_root_backup:
goto fail_qgroup;
}
}
- mutex_unlock(&fs_info->cleaner_mutex);
- cleaner_mutex_locked = false;
location.objectid = BTRFS_FS_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY;
@@ -3186,10 +3182,6 @@ fail_cleaner:
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
fail_sysfs:
- if (cleaner_mutex_locked) {
- mutex_unlock(&fs_info->cleaner_mutex);
- cleaner_mutex_locked = false;
- }
btrfs_sysfs_remove_mounted(fs_info);
fail_fsdev_sysfs:
@@ -3420,9 +3412,9 @@ static int write_dev_supers(struct btrfs_device *device,
* to go down lazy.
*/
if (i == 0)
- ret = btrfsic_submit_bh(WRITE_FUA, bh);
+ ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_FUA, bh);
else
- ret = btrfsic_submit_bh(WRITE_SYNC, bh);
+ ret = btrfsic_submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
if (ret)
errors++;
}
@@ -3486,12 +3478,13 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
bio->bi_end_io = btrfs_end_empty_barrier;
bio->bi_bdev = device->bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
init_completion(&device->flush_wait);
bio->bi_private = &device->flush_wait;
device->flush_bio = bio;
bio_get(bio);
- btrfsic_submit_bio(WRITE_FLUSH, bio);
+ btrfsic_submit_bio(bio);
return 0;
}
@@ -4130,6 +4123,16 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
* Hint to catch really bogus numbers, bitflips or so, more exact checks are
* done later
*/
+ if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+ btrfs_err(fs_info, "bytes_used is too small %llu",
+ btrfs_super_bytes_used(sb));
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(btrfs_super_stripesize(sb))) {
+ btrfs_err(fs_info, "invalid stripesize %u",
+ btrfs_super_stripesize(sb));
+ ret = -EINVAL;
+ }
if (btrfs_super_num_devices(sb) > (1UL << 31))
printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
btrfs_super_num_devices(sb));
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 8e79d0070bcf..dbf3e1aab69e 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -90,7 +90,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
void btrfs_free_fs_root(struct btrfs_root *root);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-struct btrfs_root *btrfs_alloc_dummy_root(void);
+struct btrfs_root *btrfs_alloc_dummy_root(u32 sectorsize, u32 nodesize);
#endif
/*
@@ -122,7 +122,7 @@ void btrfs_csum_final(u32 crc, char *result);
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
- int rw, struct bio *bio, int mirror_num,
+ struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 bio_offset,
extent_submit_bio_hook_t *submit_bio_start,
extent_submit_bio_hook_t *submit_bio_done);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a400951e8678..b480fd555774 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2042,8 +2042,13 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
struct btrfs_bio *bbio = NULL;
+ /*
+ * Avoid races with device replace and make sure our bbio has devices
+ * associated to its stripes that don't go away while we are discarding.
+ */
+ btrfs_bio_counter_inc_blocked(root->fs_info);
/* Tell the block device(s) that the sectors can be discarded */
- ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
+ ret = btrfs_map_block(root->fs_info, REQ_OP_DISCARD,
bytenr, &num_bytes, &bbio, 0);
/* Error condition is -ENOMEM */
if (!ret) {
@@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
}
btrfs_put_bbio(bbio);
}
+ btrfs_bio_counter_dec(root->fs_info);
if (actual_bytes)
*actual_bytes = discarded_bytes;
@@ -2829,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
struct async_delayed_refs {
struct btrfs_root *root;
+ u64 transid;
int count;
int error;
int sync;
@@ -2844,6 +2851,10 @@ static void delayed_ref_async_start(struct btrfs_work *work)
async = container_of(work, struct async_delayed_refs, work);
+ /* if the commit is already started, we don't need to wait here */
+ if (btrfs_transaction_blocked(async->root->fs_info))
+ goto done;
+
trans = btrfs_join_transaction(async->root);
if (IS_ERR(trans)) {
async->error = PTR_ERR(trans);
@@ -2855,10 +2866,15 @@ static void delayed_ref_async_start(struct btrfs_work *work)
* wait on delayed refs
*/
trans->sync = true;
+
+ /* Don't bother flushing if we got into a different transaction */
+ if (trans->transid > async->transid)
+ goto end;
+
ret = btrfs_run_delayed_refs(trans, async->root, async->count);
if (ret)
async->error = ret;
-
+end:
ret = btrfs_end_transaction(trans, async->root);
if (ret && !async->error)
async->error = ret;
@@ -2870,7 +2886,7 @@ done:
}
int btrfs_async_run_delayed_refs(struct btrfs_root *root,
- unsigned long count, int wait)
+ unsigned long count, u64 transid, int wait)
{
struct async_delayed_refs *async;
int ret;
@@ -2882,6 +2898,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
async->root = root->fs_info->tree_root;
async->count = count;
async->error = 0;
+ async->transid = transid;
if (wait)
async->sync = 1;
else
@@ -8010,8 +8027,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf;
buf = btrfs_find_create_tree_block(root, bytenr);
- if (!buf)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(buf))
+ return buf;
+
btrfs_set_header_generation(buf, trans->transid);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
btrfs_tree_lock(buf);
@@ -8038,7 +8056,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
}
- trans->blocks_used++;
+ trans->dirty = true;
/* this returns a buffer locked for blocking */
return buf;
}
@@ -8653,8 +8671,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
next = btrfs_find_tree_block(root->fs_info, bytenr);
if (!next) {
next = btrfs_find_create_tree_block(root, bytenr);
- if (!next)
- return -ENOMEM;
+ if (IS_ERR(next))
+ return PTR_ERR(next);
+
btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
level - 1);
reada = 1;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3cd57825c75f..27c214941004 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2025,9 +2025,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
bio->bi_iter.bi_size = 0;
map_length = length;
+ /*
+ * Avoid races with device replace and make sure our bbio has devices
+ * associated to its stripes that don't go away while we are doing the
+ * read repair operation.
+ */
+ btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_block(fs_info, WRITE, logical,
&map_length, &bbio, mirror_num);
if (ret) {
+ btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
}
@@ -2037,14 +2044,17 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
dev = bbio->stripes[mirror_num-1].dev;
btrfs_put_bbio(bbio);
if (!dev || !dev->bdev || !dev->writeable) {
+ btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return -EIO;
}
bio->bi_bdev = dev->bdev;
+ bio->bi_rw = WRITE_SYNC;
bio_add_page(bio, page, length, pg_offset);
- if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
+ if (btrfsic_submit_bio_wait(bio)) {
/* try to remap that extent elsewhere? */
+ btrfs_bio_counter_dec(fs_info);
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
@@ -2054,6 +2064,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
btrfs_ino(inode), start,
rcu_str_deref(dev->name), sector);
+ btrfs_bio_counter_dec(fs_info);
bio_put(bio);
return 0;
}
@@ -2376,7 +2387,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
int read_mode;
int ret;
- BUG_ON(failed_bio->bi_rw & REQ_WRITE);
+ BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
if (ret)
@@ -2402,12 +2413,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
free_io_failure(inode, failrec);
return -EIO;
}
+ bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
- failrec->this_mirror,
+ ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
failrec->bio_flags, 0);
if (ret) {
free_io_failure(inode, failrec);
@@ -2713,8 +2724,8 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
}
-static int __must_check submit_one_bio(int rw, struct bio *bio,
- int mirror_num, unsigned long bio_flags)
+static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
+ unsigned long bio_flags)
{
int ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
@@ -2725,33 +2736,32 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
start = page_offset(page) + bvec->bv_offset;
bio->bi_private = NULL;
-
bio_get(bio);
if (tree->ops && tree->ops->submit_bio_hook)
- ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
+ ret = tree->ops->submit_bio_hook(page->mapping->host, bio,
mirror_num, bio_flags, start);
else
- btrfsic_submit_bio(rw, bio);
+ btrfsic_submit_bio(bio);
bio_put(bio);
return ret;
}
-static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page,
+static int merge_bio(struct extent_io_tree *tree, struct page *page,
unsigned long offset, size_t size, struct bio *bio,
unsigned long bio_flags)
{
int ret = 0;
if (tree->ops && tree->ops->merge_bio_hook)
- ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio,
+ ret = tree->ops->merge_bio_hook(page, offset, size, bio,
bio_flags);
BUG_ON(ret < 0);
return ret;
}
-static int submit_extent_page(int rw, struct extent_io_tree *tree,
+static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
struct writeback_control *wbc,
struct page *page, sector_t sector,
size_t size, unsigned long offset,
@@ -2779,10 +2789,9 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
if (prev_bio_flags != bio_flags || !contig ||
force_bio_submit ||
- merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
+ merge_bio(tree, page, offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) {
- ret = submit_one_bio(rw, bio, mirror_num,
- prev_bio_flags);
+ ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
if (ret < 0) {
*bio_ret = NULL;
return ret;
@@ -2803,6 +2812,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
bio_add_page(bio, page, page_size, offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
+ bio_set_op_attrs(bio, op, op_flags);
if (wbc) {
wbc_init_bio(wbc, bio);
wbc_account_io(wbc, page, page_size);
@@ -2811,7 +2821,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
if (bio_ret)
*bio_ret = bio;
else
- ret = submit_one_bio(rw, bio, mirror_num, bio_flags);
+ ret = submit_one_bio(bio, mirror_num, bio_flags);
return ret;
}
@@ -2875,7 +2885,7 @@ static int __do_readpage(struct extent_io_tree *tree,
get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw,
+ unsigned long *bio_flags, int read_flags,
u64 *prev_em_start)
{
struct inode *inode = page->mapping->host;
@@ -3058,8 +3068,8 @@ static int __do_readpage(struct extent_io_tree *tree,
}
pnr -= page->index;
- ret = submit_extent_page(rw, tree, NULL, page,
- sector, disk_io_size, pg_offset,
+ ret = submit_extent_page(REQ_OP_READ, read_flags, tree, NULL,
+ page, sector, disk_io_size, pg_offset,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
*bio_flags,
@@ -3090,7 +3100,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw,
+ unsigned long *bio_flags,
u64 *prev_em_start)
{
struct inode *inode;
@@ -3111,7 +3121,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], get_extent, em_cached, bio,
- mirror_num, bio_flags, rw, prev_em_start);
+ mirror_num, bio_flags, 0, prev_em_start);
put_page(pages[index]);
}
}
@@ -3121,7 +3131,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
int nr_pages, get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw,
+ unsigned long *bio_flags,
u64 *prev_em_start)
{
u64 start = 0;
@@ -3143,7 +3153,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
index - first_index, start,
end, get_extent, em_cached,
bio, mirror_num, bio_flags,
- rw, prev_em_start);
+ prev_em_start);
start = page_start;
end = start + PAGE_SIZE - 1;
first_index = index;
@@ -3154,7 +3164,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
__do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start,
end, get_extent, em_cached, bio,
- mirror_num, bio_flags, rw,
+ mirror_num, bio_flags,
prev_em_start);
}
@@ -3162,7 +3172,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+ unsigned long *bio_flags, int read_flags)
{
struct inode *inode = page->mapping->host;
struct btrfs_ordered_extent *ordered;
@@ -3182,7 +3192,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
}
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
- bio_flags, rw, NULL);
+ bio_flags, read_flags, NULL);
return ret;
}
@@ -3194,9 +3204,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
int ret;
ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
- &bio_flags, READ);
+ &bio_flags, 0);
if (bio)
- ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
+ ret = submit_one_bio(bio, mirror_num, bio_flags);
return ret;
}
@@ -3430,8 +3440,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
page->index, cur, end);
}
- ret = submit_extent_page(write_flags, tree, wbc, page,
- sector, iosize, pg_offset,
+ ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+ page, sector, iosize, pg_offset,
bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
0, 0, 0, false);
@@ -3470,13 +3480,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
size_t pg_offset = 0;
loff_t i_size = i_size_read(inode);
unsigned long end_index = i_size >> PAGE_SHIFT;
- int write_flags;
+ int write_flags = 0;
unsigned long nr_written = 0;
if (wbc->sync_mode == WB_SYNC_ALL)
write_flags = WRITE_SYNC;
- else
- write_flags = WRITE;
trace___extent_writepage(page, inode, wbc);
@@ -3720,7 +3728,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start;
unsigned long i, num_pages;
unsigned long bio_flags = 0;
- int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
+ int write_flags = (epd->sync_io ? WRITE_SYNC : 0) | REQ_META;
int ret = 0;
clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -3734,9 +3742,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p);
set_page_writeback(p);
- ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
- PAGE_SIZE, 0, bdev, &epd->bio,
- -1, end_bio_extent_buffer_writepage,
+ ret = submit_extent_page(REQ_OP_WRITE, write_flags, tree, wbc,
+ p, offset >> 9, PAGE_SIZE, 0, bdev,
+ &epd->bio, -1,
+ end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags, false);
epd->bio_flags = bio_flags;
if (ret) {
@@ -4046,13 +4055,12 @@ retry:
static void flush_epd_write_bio(struct extent_page_data *epd)
{
if (epd->bio) {
- int rw = WRITE;
int ret;
- if (epd->sync_io)
- rw = WRITE_SYNC;
+ bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
+ epd->sync_io ? WRITE_SYNC : 0);
- ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
+ ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
BUG_ON(ret < 0); /* -ENOMEM */
epd->bio = NULL;
}
@@ -4179,19 +4187,19 @@ int extent_readpages(struct extent_io_tree *tree,
if (nr < ARRAY_SIZE(pagepool))
continue;
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, READ, &prev_em_start);
+ &bio, 0, &bio_flags, &prev_em_start);
nr = 0;
}
if (nr)
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, READ, &prev_em_start);
+ &bio, 0, &bio_flags, &prev_em_start);
if (em_cached)
free_extent_map(em_cached);
BUG_ON(!list_empty(pages));
if (bio)
- return submit_one_bio(READ, bio, 0, bio_flags);
+ return submit_one_bio(bio, 0, bio_flags);
return 0;
}
@@ -4718,16 +4726,16 @@ err:
}
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start)
+ u64 start, u32 nodesize)
{
unsigned long len;
if (!fs_info) {
/*
* Called only from tests that don't always have a fs_info
- * available, but we know that nodesize is 4096
+ * available
*/
- len = 4096;
+ len = nodesize;
} else {
len = fs_info->tree_root->nodesize;
}
@@ -4823,7 +4831,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start)
+ u64 start, u32 nodesize)
{
struct extent_buffer *eb, *exists = NULL;
int ret;
@@ -4831,7 +4839,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
eb = find_extent_buffer(fs_info, start);
if (eb)
return eb;
- eb = alloc_dummy_extent_buffer(fs_info, start);
+ eb = alloc_dummy_extent_buffer(fs_info, start, nodesize);
if (!eb)
return NULL;
eb->fs_info = fs_info;
@@ -4882,18 +4890,25 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
int uptodate = 1;
int ret;
+ if (!IS_ALIGNED(start, fs_info->tree_root->sectorsize)) {
+ btrfs_err(fs_info, "bad tree block start %llu", start);
+ return ERR_PTR(-EINVAL);
+ }
+
eb = find_extent_buffer(fs_info, start);
if (eb)
return eb;
eb = __alloc_extent_buffer(fs_info, start, len);
if (!eb)
- return NULL;
+ return ERR_PTR(-ENOMEM);
for (i = 0; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
- if (!p)
+ if (!p) {
+ exists = ERR_PTR(-ENOMEM);
goto free_eb;
+ }
spin_lock(&mapping->private_lock);
if (PagePrivate(p)) {
@@ -4938,8 +4953,10 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
again:
ret = radix_tree_preload(GFP_NOFS);
- if (ret)
+ if (ret) {
+ exists = ERR_PTR(ret);
goto free_eb;
+ }
spin_lock(&fs_info->buffer_lock);
ret = radix_tree_insert(&fs_info->buffer_radix,
@@ -5217,7 +5234,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
err = __extent_read_full_page(tree, page,
get_extent, &bio,
mirror_num, &bio_flags,
- READ | REQ_META);
+ REQ_META);
if (err)
ret = err;
} else {
@@ -5226,8 +5243,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
}
if (bio) {
- err = submit_one_bio(READ | REQ_META, bio, mirror_num,
- bio_flags);
+ err = submit_one_bio(bio, mirror_num, bio_flags);
if (err)
return err;
}
@@ -5323,6 +5339,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
return ret;
}
+/*
+ * return 0 if the item is found within a page.
+ * return 1 if the item spans two pages.
+ * return -EINVAL otherwise.
+ */
int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
unsigned long min_len, char **map,
unsigned long *map_start,
@@ -5337,7 +5358,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
PAGE_SHIFT;
if (i != end_i)
- return -EINVAL;
+ return 1;
if (i == 0) {
offset = start_offset;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1baf19c9b79d..bc2729a7612d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -63,16 +63,16 @@ struct btrfs_root;
struct btrfs_io_bio;
struct io_failure_record;
-typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags, u64 bio_offset);
+typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset);
struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written);
int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
extent_submit_bio_hook_t *submit_bio_hook;
- int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset,
+ int (*merge_bio_hook)(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
@@ -348,7 +348,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start);
+ u64 start, u32 nodesize);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
@@ -468,5 +468,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
u64 *end, u64 max_bytes);
#endif
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
- u64 start);
+ u64 start, u32 nodesize);
#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e0c9bd3fb02d..2234e88cf674 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1534,30 +1534,30 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
reserve_bytes = round_up(write_bytes + sector_offset,
root->sectorsize);
- if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
- BTRFS_INODE_PREALLOC)) &&
- check_can_nocow(inode, pos, &write_bytes) > 0) {
- /*
- * For nodata cow case, no need to reserve
- * data space.
- */
- only_release_metadata = true;
- /*
- * our prealloc extent may be smaller than
- * write_bytes, so scale down.
- */
- num_pages = DIV_ROUND_UP(write_bytes + offset,
- PAGE_SIZE);
- reserve_bytes = round_up(write_bytes + sector_offset,
- root->sectorsize);
- goto reserve_metadata;
- }
-
ret = btrfs_check_data_free_space(inode, pos, write_bytes);
- if (ret < 0)
- break;
+ if (ret < 0) {
+ if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) &&
+ check_can_nocow(inode, pos, &write_bytes) > 0) {
+ /*
+ * For nodata cow case, no need to reserve
+ * data space.
+ */
+ only_release_metadata = true;
+ /*
+ * our prealloc extent may be smaller than
+ * write_bytes, so scale down.
+ */
+ num_pages = DIV_ROUND_UP(write_bytes + offset,
+ PAGE_SIZE);
+ reserve_bytes = round_up(write_bytes +
+ sector_offset,
+ root->sectorsize);
+ } else {
+ break;
+ }
+ }
-reserve_metadata:
ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
if (ret) {
if (!only_release_metadata)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index c6dc1183f542..69d270f6602c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -29,7 +29,7 @@
#include "inode-map.h"
#include "volumes.h"
-#define BITS_PER_BITMAP (PAGE_SIZE * 8)
+#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
#define MAX_CACHE_BYTES_PER_GIG SZ_32K
struct btrfs_trim_range {
@@ -1415,11 +1415,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
u64 offset)
{
u64 bitmap_start;
- u32 bytes_per_bitmap;
+ u64 bytes_per_bitmap;
bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
bitmap_start = offset - ctl->start;
- bitmap_start = div_u64(bitmap_start, bytes_per_bitmap);
+ bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
bitmap_start *= bytes_per_bitmap;
bitmap_start += ctl->start;
@@ -1638,10 +1638,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
u64 bitmap_bytes;
u64 extent_bytes;
u64 size = block_group->key.offset;
- u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
- u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg);
+ u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
+ u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
- max_bitmaps = max_t(u32, max_bitmaps, 1);
+ max_bitmaps = max_t(u64, max_bitmaps, 1);
ASSERT(ctl->total_bitmaps <= max_bitmaps);
@@ -1660,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
* sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
* we add more bitmaps.
*/
- bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE;
+ bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit;
if (bitmap_bytes >= max_bytes) {
ctl->extents_thresh = 0;
@@ -3662,7 +3662,7 @@ have_info:
if (tmp->offset + tmp->bytes < offset)
break;
if (offset + bytes < tmp->offset) {
- n = rb_prev(&info->offset_index);
+ n = rb_prev(&tmp->offset_index);
continue;
}
info = tmp;
@@ -3676,7 +3676,7 @@ have_info:
if (offset + bytes < tmp->offset)
break;
if (tmp->offset + tmp->bytes < offset) {
- n = rb_next(&info->offset_index);
+ n = rb_next(&tmp->offset_index);
continue;
}
info = tmp;
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
index aae520b2aee5..a97fdc156a03 100644
--- a/fs/btrfs/hash.c
+++ b/fs/btrfs/hash.c
@@ -24,6 +24,11 @@ int __init btrfs_hash_init(void)
return PTR_ERR_OR_ZERO(tfm);
}
+const char* btrfs_crc32c_impl(void)
+{
+ return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm));
+}
+
void btrfs_hash_exit(void)
{
crypto_free_shash(tfm);
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index 118a2316e5d3..c3a2ec554361 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -22,6 +22,7 @@
int __init btrfs_hash_init(void);
void btrfs_hash_exit(void);
+const char* btrfs_crc32c_impl(void);
u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 270499598ed4..df731c0ebec7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1823,7 +1823,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
* extent_io.c merge_bio_hook, this must check the chunk tree to make sure
* we don't create bios that span stripes or chunks
*/
-int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
+int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags)
{
@@ -1838,7 +1838,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
length = bio->bi_iter.bi_size;
map_length = length;
- ret = btrfs_map_block(root->fs_info, rw, logical,
+ ret = btrfs_map_block(root->fs_info, bio_op(bio), logical,
&map_length, NULL, 0);
/* Will always return 0 with map_multi == NULL */
BUG_ON(ret < 0);
@@ -1855,9 +1855,8 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_start(struct inode *inode, int rw,
- struct bio *bio, int mirror_num,
- unsigned long bio_flags,
+static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
+ int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1876,14 +1875,14 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
* At IO completion time the cums attached on the ordered extent record
* are inserted into the btree
*/
-static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
+static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
- ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
+ ret = btrfs_map_bio(root, bio, mirror_num, 1);
if (ret) {
bio->bi_error = ret;
bio_endio(bio);
@@ -1895,7 +1894,7 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
* extent_io.c submission hook. This does the right thing for csum calculation
* on write, or reading the csums from the tree before a read
*/
-static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
+static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
@@ -1910,7 +1909,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
if (btrfs_is_free_space_inode(inode))
metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
- if (!(rw & REQ_WRITE)) {
+ if (bio_op(bio) != REQ_OP_WRITE) {
ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
if (ret)
goto out;
@@ -1932,7 +1931,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
goto mapit;
/* we're doing a write, do the async checksumming */
ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
- inode, rw, bio, mirror_num,
+ inode, bio, mirror_num,
bio_flags, bio_offset,
__btrfs_submit_bio_start,
__btrfs_submit_bio_done);
@@ -1944,7 +1943,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
}
mapit:
- ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+ ret = btrfs_map_bio(root, bio, mirror_num, 0);
out:
if (ret < 0) {
@@ -3271,7 +3270,16 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
/* grab metadata reservation from transaction handle */
if (reserve) {
ret = btrfs_orphan_reserve_metadata(trans, inode);
- BUG_ON(ret); /* -ENOSPC in reservation; Logic error? JDM */
+ ASSERT(!ret);
+ if (ret) {
+ atomic_dec(&root->orphan_inodes);
+ clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
+ &BTRFS_I(inode)->runtime_flags);
+ if (insert)
+ clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+ &BTRFS_I(inode)->runtime_flags);
+ return ret;
+ }
}
/* insert an orphan item to track this unlinked/truncated file */
@@ -4549,6 +4557,7 @@ delete:
BUG_ON(ret);
if (btrfs_should_throttle_delayed_refs(trans, root))
btrfs_async_run_delayed_refs(root,
+ trans->transid,
trans->delayed_ref_updates * 2, 0);
if (be_nice) {
if (truncate_space_check(trans, root,
@@ -5748,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
int name_len;
int is_curr = 0; /* ctx->pos points to the current index? */
bool emitted;
+ bool put = false;
/* FIXME, use a real flag for deciding about the key type */
if (root->fs_info->tree_root == root)
@@ -5765,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (key_type == BTRFS_DIR_INDEX_KEY) {
INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list);
- btrfs_get_delayed_items(inode, &ins_list, &del_list);
+ put = btrfs_readdir_get_delayed_items(inode, &ins_list,
+ &del_list);
}
key.type = key_type;
@@ -5912,8 +5923,8 @@ next:
nopos:
ret = 0;
err:
- if (key_type == BTRFS_DIR_INDEX_KEY)
- btrfs_put_delayed_items(&ins_list, &del_list);
+ if (put)
+ btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
btrfs_free_path(path);
return ret;
}
@@ -6979,7 +6990,18 @@ insert:
* existing will always be non-NULL, since there must be
* extent causing the -EEXIST.
*/
- if (start >= extent_map_end(existing) ||
+ if (existing->start == em->start &&
+ extent_map_end(existing) == extent_map_end(em) &&
+ em->block_start == existing->block_start) {
+ /*
+ * these two extents are the same, it happens
+ * with inlines especially
+ */
+ free_extent_map(em);
+ em = existing;
+ err = 0;
+
+ } else if (start >= extent_map_end(existing) ||
start <= existing->start) {
/*
* The existing extent map is the one nearest to
@@ -7767,12 +7789,12 @@ err:
}
static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
- int rw, int mirror_num)
+ int mirror_num)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
- BUG_ON(rw & REQ_WRITE);
+ BUG_ON(bio_op(bio) == REQ_OP_WRITE);
bio_get(bio);
@@ -7781,7 +7803,7 @@ static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
if (ret)
goto err;
- ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+ ret = btrfs_map_bio(root, bio, mirror_num, 0);
err:
bio_put(bio);
return ret;
@@ -7832,7 +7854,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
int read_mode;
int ret;
- BUG_ON(failed_bio->bi_rw & REQ_WRITE);
+ BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
if (ret)
@@ -7860,13 +7882,13 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
free_io_failure(inode, failrec);
return -EIO;
}
+ bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
btrfs_debug(BTRFS_I(inode)->root->fs_info,
"Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
read_mode, failrec->this_mirror, failrec->in_validation);
- ret = submit_dio_repair_bio(inode, bio, read_mode,
- failrec->this_mirror);
+ ret = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
if (ret) {
free_io_failure(inode, failrec);
bio_put(bio);
@@ -8156,7 +8178,7 @@ static void btrfs_endio_direct_write(struct bio *bio)
bio_put(bio);
}
-static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
+static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
struct bio *bio, int mirror_num,
unsigned long bio_flags, u64 offset)
{
@@ -8174,8 +8196,8 @@ static void btrfs_end_dio_bio(struct bio *bio)
if (err)
btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
- "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
- btrfs_ino(dip->inode), bio->bi_rw,
+ "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
+ btrfs_ino(dip->inode), bio_op(bio), bio->bi_rw,
(unsigned long long)bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, err);
@@ -8249,11 +8271,11 @@ static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
}
static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
- int rw, u64 file_offset, int skip_sum,
+ u64 file_offset, int skip_sum,
int async_submit)
{
struct btrfs_dio_private *dip = bio->bi_private;
- int write = rw & REQ_WRITE;
+ bool write = bio_op(bio) == REQ_OP_WRITE;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
@@ -8274,8 +8296,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
if (write && async_submit) {
ret = btrfs_wq_submit_bio(root->fs_info,
- inode, rw, bio, 0, 0,
- file_offset,
+ inode, bio, 0, 0, file_offset,
__btrfs_submit_bio_start_direct_io,
__btrfs_submit_bio_done);
goto err;
@@ -8294,13 +8315,13 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
goto err;
}
map:
- ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
+ ret = btrfs_map_bio(root, bio, 0, async_submit);
err:
bio_put(bio);
return ret;
}
-static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
+static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
int skip_sum)
{
struct inode *inode = dip->inode;
@@ -8319,8 +8340,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
int i;
map_length = orig_bio->bi_iter.bi_size;
- ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
- &map_length, NULL, 0);
+ ret = btrfs_map_block(root->fs_info, bio_op(orig_bio),
+ start_sector << 9, &map_length, NULL, 0);
if (ret)
return -EIO;
@@ -8340,6 +8361,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
if (!bio)
return -ENOMEM;
+ bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_rw);
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
@@ -8359,7 +8381,7 @@ next_block:
* before we're done setting it up
*/
atomic_inc(&dip->pending_bios);
- ret = __btrfs_submit_dio_bio(bio, inode, rw,
+ ret = __btrfs_submit_dio_bio(bio, inode,
file_offset, skip_sum,
async_submit);
if (ret) {
@@ -8377,12 +8399,13 @@ next_block:
start_sector, GFP_NOFS);
if (!bio)
goto out_err;
+ bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_rw);
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
btrfs_io_bio(bio)->logical = file_offset;
map_length = orig_bio->bi_iter.bi_size;
- ret = btrfs_map_block(root->fs_info, rw,
+ ret = btrfs_map_block(root->fs_info, bio_op(orig_bio),
start_sector << 9,
&map_length, NULL, 0);
if (ret) {
@@ -8402,7 +8425,7 @@ next_block:
}
submit:
- ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
+ ret = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
async_submit);
if (!ret)
return 0;
@@ -8422,14 +8445,14 @@ out_err:
return 0;
}
-static void btrfs_submit_direct(int rw, struct bio *dio_bio,
- struct inode *inode, loff_t file_offset)
+static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
+ loff_t file_offset)
{
struct btrfs_dio_private *dip = NULL;
struct bio *io_bio = NULL;
struct btrfs_io_bio *btrfs_bio;
int skip_sum;
- int write = rw & REQ_WRITE;
+ bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
int ret = 0;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -8480,7 +8503,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
dio_data->unsubmitted_oe_range_end;
}
- ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
+ ret = btrfs_submit_direct_hook(dip, skip_sum);
if (!ret)
return;
@@ -10514,7 +10537,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
static const struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .iterate = btrfs_real_readdir,
+ .iterate_shared = btrfs_real_readdir,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 559170464d7c..aca8264f4a49 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
return count;
}
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len)
{
struct btrfs_root *root;
struct list_head splice;
int done;
+ int total_done = 0;
INIT_LIST_HEAD(&splice);
@@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
done = btrfs_wait_ordered_extents(root, nr,
range_start, range_len);
btrfs_put_fs_root(root);
+ total_done += done;
spin_lock(&fs_info->ordered_root_lock);
if (nr != -1) {
@@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
list_splice_tail(&splice, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
mutex_unlock(&fs_info->ordered_operations_mutex);
+
+ return total_done;
}
/*
@@ -964,6 +968,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct rb_node *prev = NULL;
struct btrfs_ordered_extent *test;
int ret = 1;
+ u64 orig_offset = offset;
spin_lock_irq(&tree->lock);
if (ordered) {
@@ -979,7 +984,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
/* truncate file */
if (disk_i_size > i_size) {
- BTRFS_I(inode)->disk_i_size = i_size;
+ BTRFS_I(inode)->disk_i_size = orig_offset;
ret = 0;
goto out;
}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 2049c9be85ee..451507776ff5 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
const u64 range_start, const u64 range_len);
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len);
void btrfs_get_logged_extents(struct inode *inode,
struct list_head *logged_list,
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index f8b6d411a034..cd8d302a1f61 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1320,7 +1320,9 @@ write_data:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- submit_bio(WRITE, bio);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+ submit_bio(bio);
}
return;
@@ -1573,11 +1575,12 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_rmw_end_io;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
btrfs_bio_wq_end_io(rbio->fs_info, bio,
BTRFS_WQ_ENDIO_RAID56);
- submit_bio(READ, bio);
+ submit_bio(bio);
}
/* the actual write will happen once the reads are done */
return 0;
@@ -2097,11 +2100,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid_recover_end_io;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
btrfs_bio_wq_end_io(rbio->fs_info, bio,
BTRFS_WQ_ENDIO_RAID56);
- submit_bio(READ, bio);
+ submit_bio(bio);
}
out:
return 0;
@@ -2433,7 +2437,9 @@ submit_write:
bio->bi_private = rbio;
bio->bi_end_io = raid_write_end_io;
- submit_bio(WRITE, bio);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
+
+ submit_bio(bio);
}
return;
@@ -2610,11 +2616,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
bio->bi_private = rbio;
bio->bi_end_io = raid56_parity_scrub_end_io;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
btrfs_bio_wq_end_io(rbio->fs_info, bio,
BTRFS_WQ_ENDIO_RAID56);
- submit_bio(READ, bio);
+ submit_bio(bio);
}
/* the actual write will happen once the reads are done */
return;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 298631eaee78..8428db7cd88f 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
do {
enqueued = 0;
+ mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (atomic_read(&device->reada_in_flight) <
MAX_IN_FLIGHT)
enqueued += reada_start_machine_dev(fs_info,
device);
}
+ mutex_unlock(&fs_devices->device_list_mutex);
total += enqueued;
} while (enqueued && total < 10000);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 46d847f66e4b..e08b6bc676e3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1504,8 +1504,9 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
sblock->no_io_error_seen = 0;
} else {
bio->bi_iter.bi_sector = page->physical >> 9;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
- if (btrfsic_submit_bio_wait(READ, bio))
+ if (btrfsic_submit_bio_wait(bio))
sblock->no_io_error_seen = 0;
}
@@ -1583,6 +1584,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
bio->bi_iter.bi_sector = page_bad->physical >> 9;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
if (PAGE_SIZE != ret) {
@@ -1590,7 +1592,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
}
- if (btrfsic_submit_bio_wait(WRITE, bio)) {
+ if (btrfsic_submit_bio_wait(bio)) {
btrfs_dev_stat_inc_and_print(page_bad->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
btrfs_dev_replace_stats_inc(
@@ -1684,6 +1686,7 @@ again:
bio->bi_end_io = scrub_wr_bio_end_io;
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
sbio->err = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical_for_dev_replace ||
@@ -1731,7 +1734,7 @@ static void scrub_wr_submit(struct scrub_ctx *sctx)
* orders the requests before sending them to the driver which
* doubled the write performance on spinning disks when measured
* with Linux 3.5 */
- btrfsic_submit_bio(WRITE, sbio->bio);
+ btrfsic_submit_bio(sbio->bio);
}
static void scrub_wr_bio_end_io(struct bio *bio)
@@ -2041,7 +2044,7 @@ static void scrub_submit(struct scrub_ctx *sctx)
sbio = sctx->bios[sctx->curr];
sctx->curr = -1;
scrub_pending_bio_inc(sctx);
- btrfsic_submit_bio(READ, sbio->bio);
+ btrfsic_submit_bio(sbio->bio);
}
static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
@@ -2088,6 +2091,7 @@ again:
bio->bi_end_io = scrub_bio_end_io;
bio->bi_bdev = sbio->dev->bdev;
bio->bi_iter.bi_sector = sbio->physical >> 9;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
sbio->err = 0;
} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
spage->physical ||
@@ -3582,6 +3586,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
*/
scrub_pause_on(fs_info);
ret = btrfs_inc_block_group_ro(root, cache);
+ if (!ret && is_dev_replace) {
+ /*
+ * If we are doing a device replace wait for any tasks
+ * that started dellaloc right before we set the block
+ * group to RO mode, as they might have just allocated
+ * an extent from it or decided they could do a nocow
+ * write. And if any such tasks did that, wait for their
+ * ordered extents to complete and then commit the
+ * current transaction, so that we can later see the new
+ * extent items in the extent tree - the ordered extents
+ * create delayed data references (for cow writes) when
+ * they complete, which will be run and insert the
+ * corresponding extent items into the extent tree when
+ * we commit the transaction they used when running
+ * inode.c:btrfs_finish_ordered_io(). We later use
+ * the commit root of the extent tree to find extents
+ * to copy from the srcdev into the tgtdev, and we don't
+ * want to miss any new extents.
+ */
+ btrfs_wait_block_group_reservations(cache);
+ btrfs_wait_nocow_writers(cache);
+ ret = btrfs_wait_ordered_roots(fs_info, -1,
+ cache->key.objectid,
+ cache->key.offset);
+ if (ret > 0) {
+ struct btrfs_trans_handle *trans;
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ ret = PTR_ERR(trans);
+ else
+ ret = btrfs_commit_transaction(trans,
+ root);
+ if (ret) {
+ scrub_pause_off(fs_info);
+ btrfs_put_block_group(cache);
+ break;
+ }
+ }
+ }
scrub_pause_off(fs_info);
if (ret == 0) {
@@ -3602,9 +3646,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
break;
}
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
found_key.offset, cache, is_dev_replace);
@@ -3640,6 +3686,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_off(fs_info);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
+ dev_replace->cursor_left = dev_replace->cursor_right;
+ dev_replace->item_needs_writeback = 1;
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
+
if (ro_set)
btrfs_dec_block_group_ro(root, cache);
@@ -3677,9 +3728,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
ret = -ENOMEM;
break;
}
-
- dev_replace->cursor_left = dev_replace->cursor_right;
- dev_replace->item_needs_writeback = 1;
skip:
key.offset = found_key.offset + length;
btrfs_release_path(path);
@@ -4392,6 +4440,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
ret = bio_add_page(bio, page, PAGE_SIZE, 0);
if (ret != PAGE_SIZE) {
leave_with_eio:
@@ -4400,7 +4449,7 @@ leave_with_eio:
return -EIO;
}
- if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
+ if (btrfsic_submit_bio_wait(bio))
goto leave_with_eio;
bio_put(bio);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4e59a91a11e0..60e7179ed4b7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -235,7 +235,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
trans->aborted = errno;
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
- if (!trans->blocks_used && list_empty(&trans->new_bgs)) {
+ if (!trans->dirty && list_empty(&trans->new_bgs)) {
const char *errstr;
errstr = btrfs_decode_error(errno);
@@ -1807,6 +1807,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
}
sb->s_flags &= ~MS_RDONLY;
+
+ fs_info->open = 1;
}
out:
wake_up_process(fs_info->transaction_kthread);
@@ -2303,7 +2305,7 @@ static void btrfs_interface_exit(void)
static void btrfs_print_mod_info(void)
{
- printk(KERN_INFO "Btrfs loaded"
+ printk(KERN_INFO "Btrfs loaded, crc32c=%s"
#ifdef CONFIG_BTRFS_DEBUG
", debug=on"
#endif
@@ -2313,33 +2315,48 @@ static void btrfs_print_mod_info(void)
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
", integrity-checker=on"
#endif
- "\n");
+ "\n",
+ btrfs_crc32c_impl());
}
static int btrfs_run_sanity_tests(void)
{
- int ret;
-
+ int ret, i;
+ u32 sectorsize, nodesize;
+ u32 test_sectorsize[] = {
+ PAGE_SIZE,
+ };
ret = btrfs_init_test_fs();
if (ret)
return ret;
-
- ret = btrfs_test_free_space_cache();
- if (ret)
- goto out;
- ret = btrfs_test_extent_buffer_operations();
- if (ret)
- goto out;
- ret = btrfs_test_extent_io();
- if (ret)
- goto out;
- ret = btrfs_test_inodes();
- if (ret)
- goto out;
- ret = btrfs_test_qgroups();
- if (ret)
- goto out;
- ret = btrfs_test_free_space_tree();
+ for (i = 0; i < ARRAY_SIZE(test_sectorsize); i++) {
+ sectorsize = test_sectorsize[i];
+ for (nodesize = sectorsize;
+ nodesize <= BTRFS_MAX_METADATA_BLOCKSIZE;
+ nodesize <<= 1) {
+ pr_info("BTRFS: selftest: sectorsize: %u nodesize: %u\n",
+ sectorsize, nodesize);
+ ret = btrfs_test_free_space_cache(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_buffer_operations(sectorsize,
+ nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_io(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_inodes(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_qgroups(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ ret = btrfs_test_free_space_tree(sectorsize, nodesize);
+ if (ret)
+ goto out;
+ }
+ }
out:
btrfs_destroy_test_fs();
return ret;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index f54bf450bad3..02223f3f78f4 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -68,7 +68,7 @@ int btrfs_init_test_fs(void)
if (IS_ERR(test_mnt)) {
printk(KERN_ERR "btrfs: cannot mount test file system\n");
unregister_filesystem(&test_type);
- return ret;
+ return PTR_ERR(test_mnt);
}
return 0;
}
@@ -175,7 +175,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
}
struct btrfs_block_group_cache *
-btrfs_alloc_dummy_block_group(unsigned long length)
+btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize)
{
struct btrfs_block_group_cache *cache;
@@ -192,8 +192,8 @@ btrfs_alloc_dummy_block_group(unsigned long length)
cache->key.objectid = 0;
cache->key.offset = length;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- cache->sectorsize = 4096;
- cache->full_stripe_len = 4096;
+ cache->sectorsize = sectorsize;
+ cache->full_stripe_len = sectorsize;
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index 054b8c73c951..66fb6b701eb7 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -26,27 +26,28 @@
struct btrfs_root;
struct btrfs_trans_handle;
-int btrfs_test_free_space_cache(void);
-int btrfs_test_extent_buffer_operations(void);
-int btrfs_test_extent_io(void);
-int btrfs_test_inodes(void);
-int btrfs_test_qgroups(void);
-int btrfs_test_free_space_tree(void);
+int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize);
+int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize);
+int btrfs_test_extent_io(u32 sectorsize, u32 nodesize);
+int btrfs_test_inodes(u32 sectorsize, u32 nodesize);
+int btrfs_test_qgroups(u32 sectorsize, u32 nodesize);
+int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize);
int btrfs_init_test_fs(void);
void btrfs_destroy_test_fs(void);
struct inode *btrfs_new_test_inode(void);
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void);
void btrfs_free_dummy_root(struct btrfs_root *root);
struct btrfs_block_group_cache *
-btrfs_alloc_dummy_block_group(unsigned long length);
+btrfs_alloc_dummy_block_group(unsigned long length, u32 sectorsize);
void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
#else
-static inline int btrfs_test_free_space_cache(void)
+static inline int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
{
return 0;
}
-static inline int btrfs_test_extent_buffer_operations(void)
+static inline int btrfs_test_extent_buffer_operations(u32 sectorsize,
+ u32 nodesize)
{
return 0;
}
@@ -57,19 +58,19 @@ static inline int btrfs_init_test_fs(void)
static inline void btrfs_destroy_test_fs(void)
{
}
-static inline int btrfs_test_extent_io(void)
+static inline int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
{
return 0;
}
-static inline int btrfs_test_inodes(void)
+static inline int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
{
return 0;
}
-static inline int btrfs_test_qgroups(void)
+static inline int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
{
return 0;
}
-static inline int btrfs_test_free_space_tree(void)
+static inline int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
{
return 0;
}
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index f51963a8f929..4f8cbd1ec5ee 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -22,7 +22,7 @@
#include "../extent_io.h"
#include "../disk-io.h"
-static int test_btrfs_split_item(void)
+static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
{
struct btrfs_path *path;
struct btrfs_root *root;
@@ -40,7 +40,7 @@ static int test_btrfs_split_item(void)
test_msg("Running btrfs_split_item tests\n");
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Could not allocate root\n");
return PTR_ERR(root);
@@ -53,7 +53,8 @@ static int test_btrfs_split_item(void)
return -ENOMEM;
}
- path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096);
+ path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, nodesize,
+ nodesize);
if (!eb) {
test_msg("Could not allocate dummy buffer\n");
ret = -ENOMEM;
@@ -222,8 +223,8 @@ out:
return ret;
}
-int btrfs_test_extent_buffer_operations(void)
+int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize)
{
- test_msg("Running extent buffer operation tests");
- return test_btrfs_split_item();
+ test_msg("Running extent buffer operation tests\n");
+ return test_btrfs_split_item(sectorsize, nodesize);
}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 55724607f79b..d19ab0317283 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/sizes.h>
#include "btrfs-tests.h"
+#include "../ctree.h"
#include "../extent_io.h"
#define PROCESS_UNLOCK (1 << 0)
@@ -65,7 +66,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
return count;
}
-static int test_find_delalloc(void)
+static int test_find_delalloc(u32 sectorsize)
{
struct inode *inode;
struct extent_io_tree tmp;
@@ -113,7 +114,7 @@ static int test_find_delalloc(void)
* |--- delalloc ---|
* |--- search ---|
*/
- set_extent_delalloc(&tmp, 0, 4095, NULL);
+ set_extent_delalloc(&tmp, 0, sectorsize - 1, NULL);
start = 0;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -122,9 +123,9 @@ static int test_find_delalloc(void)
test_msg("Should have found at least one delalloc\n");
goto out_bits;
}
- if (start != 0 || end != 4095) {
- test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n",
- start, end);
+ if (start != 0 || end != (sectorsize - 1)) {
+ test_msg("Expected start 0 end %u, got start %llu end %llu\n",
+ sectorsize - 1, start, end);
goto out_bits;
}
unlock_extent(&tmp, start, end);
@@ -144,7 +145,7 @@ static int test_find_delalloc(void)
test_msg("Couldn't find the locked page\n");
goto out_bits;
}
- set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL);
+ set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, NULL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -172,7 +173,7 @@ static int test_find_delalloc(void)
* |--- delalloc ---|
* |--- search ---|
*/
- test_start = max_bytes + 4096;
+ test_start = max_bytes + sectorsize;
locked_page = find_lock_page(inode->i_mapping, test_start >>
PAGE_SHIFT);
if (!locked_page) {
@@ -272,6 +273,16 @@ out:
return ret;
}
+/**
+ * test_bit_in_byte - Determine whether a bit is set in a byte
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit_in_byte(int nr, const u8 *addr)
+{
+ return 1UL & (addr[nr / BITS_PER_BYTE] >> (nr & (BITS_PER_BYTE - 1)));
+}
+
static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
unsigned long len)
{
@@ -298,25 +309,29 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
return -EINVAL;
}
- bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
- sizeof(long) * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
- sizeof(long) * BITS_PER_BYTE);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
- test_msg("Setting straddling pages failed\n");
- return -EINVAL;
- }
+ /* Straddling pages test */
+ if (len > PAGE_SIZE) {
+ bitmap_set(bitmap,
+ (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+ sizeof(long) * BITS_PER_BYTE);
+ extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
+ sizeof(long) * BITS_PER_BYTE);
+ if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ test_msg("Setting straddling pages failed\n");
+ return -EINVAL;
+ }
- bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
- bitmap_clear(bitmap,
- (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
- sizeof(long) * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
- sizeof(long) * BITS_PER_BYTE);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
- test_msg("Clearing straddling pages failed\n");
- return -EINVAL;
+ bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
+ bitmap_clear(bitmap,
+ (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+ sizeof(long) * BITS_PER_BYTE);
+ extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
+ extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
+ sizeof(long) * BITS_PER_BYTE);
+ if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+ test_msg("Clearing straddling pages failed\n");
+ return -EINVAL;
+ }
}
/*
@@ -333,7 +348,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
for (i = 0; i < len * BITS_PER_BYTE; i++) {
int bit, bit1;
- bit = !!test_bit(i, bitmap);
+ bit = !!test_bit_in_byte(i, (u8 *)bitmap);
bit1 = !!extent_buffer_test_bit(eb, 0, i);
if (bit1 != bit) {
test_msg("Testing bit pattern failed\n");
@@ -351,15 +366,22 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
return 0;
}
-static int test_eb_bitmaps(void)
+static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
{
- unsigned long len = PAGE_SIZE * 4;
+ unsigned long len;
unsigned long *bitmap;
struct extent_buffer *eb;
int ret;
test_msg("Running extent buffer bitmap tests\n");
+ /*
+ * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
+ * BTRFS_MAX_METADATA_BLOCKSIZE.
+ */
+ len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE)
+ ? sectorsize * 4 : sectorsize;
+
bitmap = kmalloc(len, GFP_KERNEL);
if (!bitmap) {
test_msg("Couldn't allocate test bitmap\n");
@@ -379,7 +401,7 @@ static int test_eb_bitmaps(void)
/* Do it over again with an extent buffer which isn't page-aligned. */
free_extent_buffer(eb);
- eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len);
+ eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len);
if (!eb) {
test_msg("Couldn't allocate test extent buffer\n");
kfree(bitmap);
@@ -393,17 +415,17 @@ out:
return ret;
}
-int btrfs_test_extent_io(void)
+int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
{
int ret;
test_msg("Running extent I/O tests\n");
- ret = test_find_delalloc();
+ ret = test_find_delalloc(sectorsize);
if (ret)
goto out;
- ret = test_eb_bitmaps();
+ ret = test_eb_bitmaps(sectorsize, nodesize);
out:
test_msg("Extent I/O tests finished\n");
return ret;
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 0eeb8f3d6b67..3956bb2ff84c 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -22,7 +22,7 @@
#include "../disk-io.h"
#include "../free-space-cache.h"
-#define BITS_PER_BITMAP (PAGE_SIZE * 8)
+#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
/*
* This test just does basic sanity checking, making sure we can add an extent
@@ -99,7 +99,8 @@ static int test_extents(struct btrfs_block_group_cache *cache)
return 0;
}
-static int test_bitmaps(struct btrfs_block_group_cache *cache)
+static int test_bitmaps(struct btrfs_block_group_cache *cache,
+ u32 sectorsize)
{
u64 next_bitmap_offset;
int ret;
@@ -139,7 +140,7 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache)
* The first bitmap we have starts at offset 0 so the next one is just
* at the end of the first bitmap.
*/
- next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+ next_bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
/* Test a bit straddling two bitmaps */
ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M,
@@ -167,9 +168,10 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache)
}
/* This is the high grade jackassery */
-static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
+static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
+ u32 sectorsize)
{
- u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+ u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
int ret;
test_msg("Running bitmap and extent tests\n");
@@ -401,7 +403,8 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache)
* requests.
*/
static int
-test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
+test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
+ u32 sectorsize)
{
int ret;
u64 offset;
@@ -539,7 +542,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* The goal is to test that the bitmap entry space stealing doesn't
* steal this space region.
*/
- ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, 4096);
+ ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
@@ -597,8 +600,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return -ENOENT;
}
- if (cache->free_space_ctl->free_space != (SZ_1M + 4096)) {
- test_msg("Cache free space is not 1Mb + 4Kb\n");
+ if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) {
+ test_msg("Cache free space is not 1Mb + %u\n", sectorsize);
return -EINVAL;
}
@@ -611,22 +614,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return -EINVAL;
}
- /* All that remains is a 4Kb free space region in a bitmap. Confirm. */
+ /*
+ * All that remains is a sectorsize free space region in a bitmap.
+ * Confirm.
+ */
ret = check_num_extents_and_bitmaps(cache, 1, 1);
if (ret)
return ret;
- if (cache->free_space_ctl->free_space != 4096) {
- test_msg("Cache free space is not 4Kb\n");
+ if (cache->free_space_ctl->free_space != sectorsize) {
+ test_msg("Cache free space is not %u\n", sectorsize);
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache,
- 0, 4096, 0,
+ 0, sectorsize, 0,
&max_extent_size);
if (offset != (SZ_128M + SZ_16M)) {
- test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n",
- offset);
+ test_msg("Failed to allocate %u, returned offset : %llu\n",
+ sectorsize, offset);
return -EINVAL;
}
@@ -733,7 +739,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
* The goal is to test that the bitmap entry space stealing doesn't
* steal this space region.
*/
- ret = btrfs_add_free_space(cache, SZ_32M, 8192);
+ ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize);
if (ret) {
test_msg("Error adding free space: %d\n", ret);
return ret;
@@ -757,7 +763,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
/*
* Confirm that our extent entry didn't stole all free space from the
- * bitmap, because of the small 8Kb free space region.
+ * bitmap, because of the small 2 * sectorsize free space region.
*/
ret = check_num_extents_and_bitmaps(cache, 2, 1);
if (ret)
@@ -783,8 +789,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return -ENOENT;
}
- if (cache->free_space_ctl->free_space != (SZ_1M + 8192)) {
- test_msg("Cache free space is not 1Mb + 8Kb\n");
+ if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) {
+ test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize);
return -EINVAL;
}
@@ -796,21 +802,25 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return -EINVAL;
}
- /* All that remains is a 8Kb free space region in a bitmap. Confirm. */
+ /*
+ * All that remains is 2 * sectorsize free space region
+ * in a bitmap. Confirm.
+ */
ret = check_num_extents_and_bitmaps(cache, 1, 1);
if (ret)
return ret;
- if (cache->free_space_ctl->free_space != 8192) {
- test_msg("Cache free space is not 8Kb\n");
+ if (cache->free_space_ctl->free_space != 2 * sectorsize) {
+ test_msg("Cache free space is not %u\n", 2 * sectorsize);
return -EINVAL;
}
offset = btrfs_find_space_for_alloc(cache,
- 0, 8192, 0,
+ 0, 2 * sectorsize, 0,
&max_extent_size);
if (offset != SZ_32M) {
- test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n",
+ test_msg("Failed to allocate %u, offset: %llu\n",
+ 2 * sectorsize,
offset);
return -EINVAL;
}
@@ -825,7 +835,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
return 0;
}
-int btrfs_test_free_space_cache(void)
+int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
{
struct btrfs_block_group_cache *cache;
struct btrfs_root *root = NULL;
@@ -833,13 +843,19 @@ int btrfs_test_free_space_cache(void)
test_msg("Running btrfs free space cache tests\n");
- cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024);
+ /*
+ * For ppc64 (with 64k page size), bytes per bitmap might be
+ * larger than 1G. To make bitmap test available in ppc64,
+ * alloc dummy block group whose size cross bitmaps.
+ */
+ cache = btrfs_alloc_dummy_block_group(BITS_PER_BITMAP * sectorsize
+ + PAGE_SIZE, sectorsize);
if (!cache) {
test_msg("Couldn't run the tests\n");
return 0;
}
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out;
@@ -855,14 +871,14 @@ int btrfs_test_free_space_cache(void)
ret = test_extents(cache);
if (ret)
goto out;
- ret = test_bitmaps(cache);
+ ret = test_bitmaps(cache, sectorsize);
if (ret)
goto out;
- ret = test_bitmaps_and_extents(cache);
+ ret = test_bitmaps_and_extents(cache, sectorsize);
if (ret)
goto out;
- ret = test_steal_space_from_bitmap_to_extent(cache);
+ ret = test_steal_space_from_bitmap_to_extent(cache, sectorsize);
out:
btrfs_free_dummy_block_group(cache);
btrfs_free_dummy_root(root);
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index 7cea4462acd5..aac507085ab0 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -16,6 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
+#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../disk-io.h"
@@ -30,7 +31,7 @@ struct free_space_extent {
* The test cases align their operations to this in order to hit some of the
* edge cases in the bitmap code.
*/
-#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096)
+#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE)
static int __check_free_space_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
@@ -439,7 +440,8 @@ typedef int (*test_func_t)(struct btrfs_trans_handle *,
struct btrfs_block_group_cache *,
struct btrfs_path *);
-static int run_test(test_func_t test_func, int bitmaps)
+static int run_test(test_func_t test_func, int bitmaps,
+ u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root = NULL;
struct btrfs_block_group_cache *cache = NULL;
@@ -447,7 +449,7 @@ static int run_test(test_func_t test_func, int bitmaps)
struct btrfs_path *path = NULL;
int ret;
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate dummy root\n");
ret = PTR_ERR(root);
@@ -466,7 +468,8 @@ static int run_test(test_func_t test_func, int bitmaps)
root->fs_info->free_space_root = root;
root->fs_info->tree_root = root;
- root->node = alloc_test_extent_buffer(root->fs_info, 4096);
+ root->node = alloc_test_extent_buffer(root->fs_info,
+ nodesize, nodesize);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
ret = -ENOMEM;
@@ -474,9 +477,9 @@ static int run_test(test_func_t test_func, int bitmaps)
}
btrfs_set_header_level(root->node, 0);
btrfs_set_header_nritems(root->node, 0);
- root->alloc_bytenr += 8192;
+ root->alloc_bytenr += 2 * nodesize;
- cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE);
+ cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE, sectorsize);
if (!cache) {
test_msg("Couldn't allocate dummy block group cache\n");
ret = -ENOMEM;
@@ -534,17 +537,18 @@ out:
return ret;
}
-static int run_test_both_formats(test_func_t test_func)
+static int run_test_both_formats(test_func_t test_func,
+ u32 sectorsize, u32 nodesize)
{
int ret;
- ret = run_test(test_func, 0);
+ ret = run_test(test_func, 0, sectorsize, nodesize);
if (ret)
return ret;
- return run_test(test_func, 1);
+ return run_test(test_func, 1, sectorsize, nodesize);
}
-int btrfs_test_free_space_tree(void)
+int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
{
test_func_t tests[] = {
test_empty_block_group,
@@ -561,9 +565,11 @@ int btrfs_test_free_space_tree(void)
test_msg("Running free space tree tests\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- int ret = run_test_both_formats(tests[i]);
+ int ret = run_test_both_formats(tests[i], sectorsize,
+ nodesize);
if (ret) {
- test_msg("%pf failed\n", tests[i]);
+ test_msg("%pf : sectorsize %u failed\n",
+ tests[i], sectorsize);
return ret;
}
}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 8a25fe8b7c45..29648c0a39f1 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -16,6 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
+#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../btrfs_inode.h"
@@ -86,19 +87,19 @@ static void insert_inode_item_key(struct btrfs_root *root)
* diagram of how the extents will look though this may not be possible we still
* want to make sure everything acts normally (the last number is not inclusive)
*
- * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288]
- * [hole ][inline][ hole ][ regular ][regular1 split][ hole ]
+ * [0 - 5][5 - 6][ 6 - 4096 ][ 4096 - 4100][4100 - 8195][8195 - 12291]
+ * [hole ][inline][hole but no extent][ hole ][ regular ][regular1 split]
*
- * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056]
- * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ]
+ * [12291 - 16387][16387 - 24579][24579 - 28675][ 28675 - 32771][32771 - 36867 ]
+ * [ hole ][regular1 split][ prealloc ][ prealloc1 ][prealloc1 written]
*
- * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ]
- * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent]
+ * [36867 - 45059][45059 - 53251][53251 - 57347][57347 - 61443][61443- 69635]
+ * [ prealloc1 ][ compressed ][ compressed1 ][ regular ][ compressed1]
*
- * [81920-86016]
- * [ regular ]
+ * [69635-73731][ 73731 - 86019 ][86019-90115]
+ * [ regular ][ hole but no extent][ regular ]
*/
-static void setup_file_extents(struct btrfs_root *root)
+static void setup_file_extents(struct btrfs_root *root, u32 sectorsize)
{
int slot = 0;
u64 disk_bytenr = SZ_1M;
@@ -119,7 +120,7 @@ static void setup_file_extents(struct btrfs_root *root)
insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
slot);
slot++;
- offset = 4096;
+ offset = sectorsize;
/* Now another hole */
insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
@@ -128,99 +129,106 @@ static void setup_file_extents(struct btrfs_root *root)
offset += 4;
/* Now for a regular extent */
- insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0,
+ disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- disk_bytenr += 4096;
- offset += 4095;
+ disk_bytenr += sectorsize;
+ offset += sectorsize - 1;
/*
* Now for 3 extents that were split from a hole punch so we test
* offsets properly.
*/
- insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr,
+ 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG,
- 0, slot);
+ offset += sectorsize;
+ insert_extent(root, offset, sectorsize, sectorsize, 0, 0, 0,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+ offset += sectorsize;
+ insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize,
+ 2 * sectorsize, disk_bytenr, 4 * sectorsize,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 8192;
- disk_bytenr += 16384;
+ offset += 2 * sectorsize;
+ disk_bytenr += 4 * sectorsize;
/* Now for a unwritten prealloc extent */
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr,
+ sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
slot++;
- offset += 4096;
+ offset += sectorsize;
/*
* We want to jack up disk_bytenr a little more so the em stuff doesn't
* merge our records.
*/
- disk_bytenr += 8192;
+ disk_bytenr += 2 * sectorsize;
/*
* Now for a partially written prealloc extent, basically the same as
* the hole punch example above. Ram_bytes never changes when you mark
* extents written btw.
*/
- insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr,
+ 4 * sectorsize, BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ offset += sectorsize;
+ insert_extent(root, offset, sectorsize, 4 * sectorsize, sectorsize,
+ disk_bytenr, 4 * sectorsize, BTRFS_FILE_EXTENT_REG, 0,
+ slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+ offset += sectorsize;
+ insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize,
+ 2 * sectorsize, disk_bytenr, 4 * sectorsize,
BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
slot++;
- offset += 8192;
- disk_bytenr += 16384;
+ offset += 2 * sectorsize;
+ disk_bytenr += 4 * sectorsize;
/* Now a normal compressed extent */
- insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ insert_extent(root, offset, 2 * sectorsize, 2 * sectorsize, 0,
+ disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG,
+ BTRFS_COMPRESS_ZLIB, slot);
slot++;
- offset += 8192;
+ offset += 2 * sectorsize;
/* No merges */
- disk_bytenr += 8192;
+ disk_bytenr += 2 * sectorsize;
/* Now a split compressed extent */
- insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ insert_extent(root, offset, sectorsize, 4 * sectorsize, 0, disk_bytenr,
+ sectorsize, BTRFS_FILE_EXTENT_REG,
+ BTRFS_COMPRESS_ZLIB, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096,
+ offset += sectorsize;
+ insert_extent(root, offset, sectorsize, sectorsize, 0,
+ disk_bytenr + sectorsize, sectorsize,
BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 4096;
- insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096,
+ offset += sectorsize;
+ insert_extent(root, offset, 2 * sectorsize, 4 * sectorsize,
+ 2 * sectorsize, disk_bytenr, sectorsize,
BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
slot++;
- offset += 8192;
- disk_bytenr += 8192;
+ offset += 2 * sectorsize;
+ disk_bytenr += 2 * sectorsize;
/* Now extents that have a hole but no hole extent */
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr,
+ sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
slot++;
- offset += 16384;
- disk_bytenr += 4096;
- insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
- BTRFS_FILE_EXTENT_REG, 0, slot);
+ offset += 4 * sectorsize;
+ disk_bytenr += sectorsize;
+ insert_extent(root, offset, sectorsize, sectorsize, 0, disk_bytenr,
+ sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
}
static unsigned long prealloc_only = 0;
static unsigned long compressed_only = 0;
static unsigned long vacancy_only = 0;
-static noinline int test_btrfs_get_extent(void)
+static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
{
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
@@ -240,7 +248,7 @@ static noinline int test_btrfs_get_extent(void)
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
goto out;
@@ -256,7 +264,7 @@ static noinline int test_btrfs_get_extent(void)
goto out;
}
- root->node = alloc_dummy_extent_buffer(NULL, 4096);
+ root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
goto out;
@@ -273,7 +281,7 @@ static noinline int test_btrfs_get_extent(void)
/* First with no extents */
BTRFS_I(inode)->root = root;
- em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, 0, sectorsize, 0);
if (IS_ERR(em)) {
em = NULL;
test_msg("Got an error when we shouldn't have\n");
@@ -295,7 +303,7 @@ static noinline int test_btrfs_get_extent(void)
* setup_file_extents, so if you change anything there you need to
* update the comment and update the expected values below.
*/
- setup_file_extents(root);
+ setup_file_extents(root, sectorsize);
em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
if (IS_ERR(em)) {
@@ -318,7 +326,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -327,7 +335,8 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected an inline, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4091) {
+
+ if (em->start != offset || em->len != (sectorsize - 5)) {
test_msg("Unexpected extent wanted start %llu len 1, got start "
"%llu len %llu\n", offset, em->start, em->len);
goto out;
@@ -344,7 +353,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -366,7 +375,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* Regular extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -375,7 +384,7 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4095) {
+ if (em->start != offset || em->len != sectorsize - 1) {
test_msg("Unexpected extent wanted start %llu len 4095, got "
"start %llu len %llu\n", offset, em->start, em->len);
goto out;
@@ -393,7 +402,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* The next 3 are split extents */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -402,9 +411,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -421,7 +431,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -430,9 +440,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -442,7 +453,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -451,9 +462,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 2 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -475,7 +487,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* Prealloc extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -484,9 +496,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
@@ -503,7 +516,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* The next 3 are a half written prealloc extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -512,9 +525,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
@@ -532,7 +546,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -541,9 +555,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -564,7 +579,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -573,9 +588,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 2 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != prealloc_only) {
@@ -598,7 +614,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* Now for the compressed extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -607,9 +623,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 2 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u,"
+ "got start %llu len %llu\n",
+ offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
@@ -631,7 +648,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* Split compressed extent */
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -640,9 +657,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u,"
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
@@ -665,7 +683,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -674,9 +692,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -691,7 +710,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -701,9 +720,10 @@ static noinline int test_btrfs_get_extent(void)
disk_bytenr, em->block_start);
goto out;
}
- if (em->start != offset || em->len != 8192) {
- test_msg("Unexpected extent wanted start %llu len 8192, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 2 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, 2 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != compressed_only) {
@@ -725,7 +745,7 @@ static noinline int test_btrfs_get_extent(void)
free_extent_map(em);
/* A hole between regular extents but no hole extent */
- em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset + 6, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -734,9 +754,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -765,9 +786,10 @@ static noinline int test_btrfs_get_extent(void)
* length of the actual hole, if this changes we'll have to change this
* test.
*/
- if (em->start != offset || em->len != 12288) {
- test_msg("Unexpected extent wanted start %llu len 12288, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != 3 * sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u, "
+ "got start %llu len %llu\n",
+ offset, 3 * sectorsize, em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
@@ -783,7 +805,7 @@ static noinline int test_btrfs_get_extent(void)
offset = em->start + em->len;
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -792,9 +814,10 @@ static noinline int test_btrfs_get_extent(void)
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != offset || em->len != 4096) {
- test_msg("Unexpected extent wanted start %llu len 4096, got "
- "start %llu len %llu\n", offset, em->start, em->len);
+ if (em->start != offset || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %llu len %u,"
+ "got start %llu len %llu\n",
+ offset, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -815,7 +838,7 @@ out:
return ret;
}
-static int test_hole_first(void)
+static int test_hole_first(u32 sectorsize, u32 nodesize)
{
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
@@ -832,7 +855,7 @@ static int test_hole_first(void)
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
goto out;
@@ -844,7 +867,7 @@ static int test_hole_first(void)
goto out;
}
- root->node = alloc_dummy_extent_buffer(NULL, 4096);
+ root->node = alloc_dummy_extent_buffer(NULL, nodesize, nodesize);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
goto out;
@@ -861,9 +884,9 @@ static int test_hole_first(void)
* btrfs_get_extent.
*/
insert_inode_item_key(root);
- insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096,
- BTRFS_FILE_EXTENT_REG, 0, 1);
- em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0);
+ insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
+ sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
+ em = btrfs_get_extent(inode, NULL, 0, 0, 2 * sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
@@ -872,9 +895,10 @@ static int test_hole_first(void)
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
- if (em->start != 0 || em->len != 4096) {
- test_msg("Unexpected extent wanted start 0 len 4096, got start "
- "%llu len %llu\n", em->start, em->len);
+ if (em->start != 0 || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start 0 len %u, "
+ "got start %llu len %llu\n",
+ sectorsize, em->start, em->len);
goto out;
}
if (em->flags != vacancy_only) {
@@ -884,18 +908,19 @@ static int test_hole_first(void)
}
free_extent_map(em);
- em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0);
+ em = btrfs_get_extent(inode, NULL, 0, sectorsize, 2 * sectorsize, 0);
if (IS_ERR(em)) {
test_msg("Got an error when we shouldn't have\n");
goto out;
}
- if (em->block_start != 4096) {
+ if (em->block_start != sectorsize) {
test_msg("Expected a real extent, got %llu\n", em->block_start);
goto out;
}
- if (em->start != 4096 || em->len != 4096) {
- test_msg("Unexpected extent wanted start 4096 len 4096, got "
- "start %llu len %llu\n", em->start, em->len);
+ if (em->start != sectorsize || em->len != sectorsize) {
+ test_msg("Unexpected extent wanted start %u len %u, "
+ "got start %llu len %llu\n",
+ sectorsize, sectorsize, em->start, em->len);
goto out;
}
if (em->flags != 0) {
@@ -912,7 +937,7 @@ out:
return ret;
}
-static int test_extent_accounting(void)
+static int test_extent_accounting(u32 sectorsize, u32 nodesize)
{
struct inode *inode = NULL;
struct btrfs_root *root = NULL;
@@ -924,7 +949,7 @@ static int test_extent_accounting(void)
return ret;
}
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
goto out;
@@ -954,10 +979,11 @@ static int test_extent_accounting(void)
goto out;
}
- /* [BTRFS_MAX_EXTENT_SIZE][4k] */
+ /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
- BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
+ BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
+ NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -969,10 +995,10 @@ static int test_extent_accounting(void)
goto out;
}
- /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
+ /* [BTRFS_MAX_EXTENT_SIZE/2][sectorsize HOLE][the rest] */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
BTRFS_MAX_EXTENT_SIZE >> 1,
- (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+ (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
NULL, GFP_KERNEL);
@@ -987,10 +1013,11 @@ static int test_extent_accounting(void)
goto out;
}
- /* [BTRFS_MAX_EXTENT_SIZE][4K] */
+ /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */
BTRFS_I(inode)->outstanding_extents++;
ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
- (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+ (BTRFS_MAX_EXTENT_SIZE >> 1)
+ + sectorsize - 1,
NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
@@ -1004,16 +1031,17 @@ static int test_extent_accounting(void)
}
/*
- * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
+ * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize]
*
* I'm artificially adding 2 to outstanding_extents because in the
* buffered IO case we'd add things up as we go, but I don't feel like
* doing that here, this isn't the interesting case we want to test.
*/
BTRFS_I(inode)->outstanding_extents += 2;
- ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
- (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
- NULL);
+ ret = btrfs_set_extent_delalloc(inode,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize,
+ (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
+ NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1025,10 +1053,13 @@ static int test_extent_accounting(void)
goto out;
}
- /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
+ /*
+ * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize]
+ */
BTRFS_I(inode)->outstanding_extents++;
- ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
- BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+ ret = btrfs_set_extent_delalloc(inode,
+ BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1042,8 +1073,8 @@ static int test_extent_accounting(void)
/* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
- BTRFS_MAX_EXTENT_SIZE+4096,
- BTRFS_MAX_EXTENT_SIZE+8191,
+ BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
NULL, GFP_KERNEL);
@@ -1063,8 +1094,9 @@ static int test_extent_accounting(void)
* might fail and I'd rather satisfy my paranoia at this point.
*/
BTRFS_I(inode)->outstanding_extents++;
- ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
- BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+ ret = btrfs_set_extent_delalloc(inode,
+ BTRFS_MAX_EXTENT_SIZE + sectorsize,
+ BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL);
if (ret) {
test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
goto out;
@@ -1103,7 +1135,7 @@ out:
return ret;
}
-int btrfs_test_inodes(void)
+int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
{
int ret;
@@ -1112,13 +1144,13 @@ int btrfs_test_inodes(void)
set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
test_msg("Running btrfs_get_extent tests\n");
- ret = test_btrfs_get_extent();
+ ret = test_btrfs_get_extent(sectorsize, nodesize);
if (ret)
return ret;
test_msg("Running hole first btrfs_get_extent test\n");
- ret = test_hole_first();
+ ret = test_hole_first(sectorsize, nodesize);
if (ret)
return ret;
test_msg("Running outstanding_extents tests\n");
- return test_extent_accounting();
+ return test_extent_accounting(sectorsize, nodesize);
}
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 8aa4ded31326..57a12c0d680b 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -16,6 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
+#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
#include "../transaction.h"
@@ -216,7 +217,8 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
return ret;
}
-static int test_no_shared_qgroup(struct btrfs_root *root)
+static int test_no_shared_qgroup(struct btrfs_root *root,
+ u32 sectorsize, u32 nodesize)
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -227,7 +229,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
btrfs_init_dummy_trans(&trans);
test_msg("Qgroup basic add\n");
- ret = btrfs_create_qgroup(NULL, fs_info, 5);
+ ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID);
if (ret) {
test_msg("Couldn't create a qgroup %d\n", ret);
return ret;
@@ -238,18 +240,19 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
* we can only call btrfs_qgroup_account_extent() directly to test
* quota.
*/
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
+ ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
+ BTRFS_FS_TREE_OBJECTID);
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -257,32 +260,33 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+ nodesize, nodesize)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
old_roots = NULL;
new_roots = NULL;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = remove_extent_item(root, 4096, 4096);
+ ret = remove_extent_item(root, nodesize, nodesize);
if (ret)
return -EINVAL;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -290,14 +294,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return -EINVAL;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 0, 0)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
@@ -310,7 +314,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
* right, also remove one of the roots and make sure the exclusive count is
* adjusted properly.
*/
-static int test_multiple_refs(struct btrfs_root *root)
+static int test_multiple_refs(struct btrfs_root *root,
+ u32 sectorsize, u32 nodesize)
{
struct btrfs_trans_handle trans;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -322,25 +327,29 @@ static int test_multiple_refs(struct btrfs_root *root)
test_msg("Qgroup multiple refs test\n");
- /* We have 5 created already from the previous test */
- ret = btrfs_create_qgroup(NULL, fs_info, 256);
+ /*
+ * We have BTRFS_FS_TREE_OBJECTID created already from the
+ * previous test.
+ */
+ ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID);
if (ret) {
test_msg("Couldn't create a qgroup %d\n", ret);
return ret;
}
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
+ ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
+ BTRFS_FS_TREE_OBJECTID);
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -348,30 +357,32 @@ static int test_multiple_refs(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+ nodesize, nodesize)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = add_tree_ref(root, 4096, 4096, 0, 256);
+ ret = add_tree_ref(root, nodesize, nodesize, 0,
+ BTRFS_FIRST_FREE_OBJECTID);
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -379,35 +390,38 @@ static int test_multiple_refs(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 0)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+ nodesize, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
- if (btrfs_verify_qgroup_counts(fs_info, 256, 4096, 0)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
+ nodesize, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots);
if (ret) {
ulist_free(old_roots);
test_msg("Couldn't find old roots: %d\n", ret);
return ret;
}
- ret = remove_extent_ref(root, 4096, 4096, 0, 256);
+ ret = remove_extent_ref(root, nodesize, nodesize, 0,
+ BTRFS_FIRST_FREE_OBJECTID);
if (ret)
return ret;
- ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
@@ -415,19 +429,21 @@ static int test_multiple_refs(struct btrfs_root *root)
return ret;
}
- ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
- old_roots, new_roots);
+ ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
+ nodesize, old_roots, new_roots);
if (ret) {
test_msg("Couldn't account space for a qgroup %d\n", ret);
return ret;
}
- if (btrfs_verify_qgroup_counts(fs_info, 256, 0, 0)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
+ 0, 0)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
- if (btrfs_verify_qgroup_counts(fs_info, 5, 4096, 4096)) {
+ if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
+ nodesize, nodesize)) {
test_msg("Qgroup counts didn't match expected values\n");
return -EINVAL;
}
@@ -435,13 +451,13 @@ static int test_multiple_refs(struct btrfs_root *root)
return 0;
}
-int btrfs_test_qgroups(void)
+int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
{
struct btrfs_root *root;
struct btrfs_root *tmp_root;
int ret = 0;
- root = btrfs_alloc_dummy_root();
+ root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(root)) {
test_msg("Couldn't allocate root\n");
return PTR_ERR(root);
@@ -468,7 +484,8 @@ int btrfs_test_qgroups(void)
* Can't use bytenr 0, some things freak out
* *cough*backref walking code*cough*
*/
- root->node = alloc_test_extent_buffer(root->fs_info, 4096);
+ root->node = alloc_test_extent_buffer(root->fs_info, nodesize,
+ nodesize);
if (!root->node) {
test_msg("Couldn't allocate dummy buffer\n");
ret = -ENOMEM;
@@ -476,16 +493,16 @@ int btrfs_test_qgroups(void)
}
btrfs_set_header_level(root->node, 0);
btrfs_set_header_nritems(root->node, 0);
- root->alloc_bytenr += 8192;
+ root->alloc_bytenr += 2 * nodesize;
- tmp_root = btrfs_alloc_dummy_root();
+ tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
goto out;
}
- tmp_root->root_key.objectid = 5;
+ tmp_root->root_key.objectid = BTRFS_FS_TREE_OBJECTID;
root->fs_info->fs_root = tmp_root;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
@@ -493,14 +510,14 @@ int btrfs_test_qgroups(void)
goto out;
}
- tmp_root = btrfs_alloc_dummy_root();
+ tmp_root = btrfs_alloc_dummy_root(sectorsize, nodesize);
if (IS_ERR(tmp_root)) {
test_msg("Couldn't allocate a fs root\n");
ret = PTR_ERR(tmp_root);
goto out;
}
- tmp_root->root_key.objectid = 256;
+ tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID;
ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
if (ret) {
test_msg("Couldn't insert fs root %d\n", ret);
@@ -508,10 +525,10 @@ int btrfs_test_qgroups(void)
}
test_msg("Running qgroup tests\n");
- ret = test_no_shared_qgroup(root);
+ ret = test_no_shared_qgroup(root, sectorsize, nodesize);
if (ret)
goto out;
- ret = test_multiple_refs(root);
+ ret = test_multiple_refs(root, sectorsize, nodesize);
out:
btrfs_free_dummy_root(root);
return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f6e24cb423ae..948aa186b353 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -818,6 +818,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *info = root->fs_info;
+ u64 transid = trans->transid;
unsigned long cur = trans->delayed_ref_updates;
int lock = (trans->type != TRANS_JOIN_NOLOCK);
int err = 0;
@@ -905,7 +906,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_trans_handle_cachep, trans);
if (must_run_delayed_refs) {
- btrfs_async_run_delayed_refs(root, cur,
+ btrfs_async_run_delayed_refs(root, cur, transid,
must_run_delayed_refs == 1);
}
return err;
@@ -1311,11 +1312,6 @@ int btrfs_defrag_root(struct btrfs_root *root)
return ret;
}
-/* Bisesctability fixup, remove in 4.8 */
-#ifndef btrfs_std_error
-#define btrfs_std_error btrfs_handle_fs_error
-#endif
-
/*
* Do all special snapshot related qgroup dirty hack.
*
@@ -1385,7 +1381,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
switch_commit_roots(trans->transaction, fs_info);
ret = btrfs_write_and_wait_transaction(trans, src);
if (ret)
- btrfs_std_error(fs_info, ret,
+ btrfs_handle_fs_error(fs_info, ret,
"Error while writing out transaction for qgroup");
out:
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 9fe0ec2bf0fe..c5abee4f01ad 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -110,7 +110,6 @@ struct btrfs_trans_handle {
u64 chunk_bytes_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
- unsigned long blocks_used;
unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
@@ -121,6 +120,7 @@ struct btrfs_trans_handle {
bool can_flush_pending_bgs;
bool reloc_reserved;
bool sync;
+ bool dirty;
unsigned int type;
/*
* this root is only needed to validate that the root passed to
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index b7665af471d8..c05f69a8ec42 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2422,8 +2422,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
root_owner = btrfs_header_owner(parent);
next = btrfs_find_create_tree_block(root, bytenr);
- if (!next)
- return -ENOMEM;
+ if (IS_ERR(next))
+ return PTR_ERR(next);
if (*level == 1) {
ret = wc->process_func(root, next, wc, ptr_gen);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bdc62561ede8..0fb4a959012e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -462,7 +462,7 @@ loop_lock:
sync_pending = 0;
}
- btrfsic_submit_bio(cur->bi_rw, cur);
+ btrfsic_submit_bio(cur);
num_run++;
batch_run++;
@@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
u64 dev_extent_len = 0;
u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
int i, ret = 0;
+ struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
/* Just in case */
root = root->fs_info->chunk_root;
@@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root);
+ /*
+ * Take the device list mutex to prevent races with the final phase of
+ * a device replace operation that replaces the device object associated
+ * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
+ */
+ mutex_lock(&fs_devices->device_list_mutex);
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
ret = btrfs_free_dev_extent(trans, device,
map->stripes[i].physical,
&dev_extent_len);
if (ret) {
+ mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, root, ret);
goto out;
}
@@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
if (map->stripes[i].dev) {
ret = btrfs_update_device(trans, map->stripes[i].dev);
if (ret) {
+ mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, root, ret);
goto out;
}
}
}
+ mutex_unlock(&fs_devices->device_list_mutex);
+
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
@@ -4230,6 +4241,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
if (IS_ERR(uuid_root)) {
ret = PTR_ERR(uuid_root);
btrfs_abort_transaction(trans, tree_root, ret);
+ btrfs_end_transaction(trans, tree_root);
return ret;
}
@@ -4682,12 +4694,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (type & BTRFS_BLOCK_GROUP_RAID5) {
raid_stripe_len = find_raid56_stripe_len(ndevs - 1,
- btrfs_super_stripesize(info->super_copy));
+ extent_root->stripesize);
data_stripes = num_stripes - 1;
}
if (type & BTRFS_BLOCK_GROUP_RAID6) {
raid_stripe_len = find_raid56_stripe_len(ndevs - 2,
- btrfs_super_stripesize(info->super_copy));
+ extent_root->stripesize);
data_stripes = num_stripes - 2;
}
@@ -5248,7 +5260,7 @@ void btrfs_put_bbio(struct btrfs_bio *bbio)
kfree(bbio);
}
-static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret,
int mirror_num, int need_raid_map)
@@ -5334,7 +5346,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
raid56_full_stripe_start *= full_stripe_len;
}
- if (rw & REQ_DISCARD) {
+ if (op == REQ_OP_DISCARD) {
/* we don't discard raid56 yet */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
ret = -EOPNOTSUPP;
@@ -5347,7 +5359,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
For other RAID types and for RAID[56] reads, just allow a single
stripe (on a single disk). */
if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
- (rw & REQ_WRITE)) {
+ (op == REQ_OP_WRITE)) {
max_len = stripe_len * nr_data_stripes(map) -
(offset - raid56_full_stripe_start);
} else {
@@ -5372,8 +5384,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
- !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
- dev_replace->tgtdev != NULL) {
+ op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
+ op != REQ_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) {
/*
* in dev-replace case, for repair case (that's the only
* case where the mirror is selected explicitly when
@@ -5460,15 +5472,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
(offset + *length);
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
- if (rw & REQ_DISCARD)
+ if (op == REQ_OP_DISCARD)
num_stripes = min_t(u64, map->num_stripes,
stripe_nr_end - stripe_nr_orig);
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
&stripe_index);
- if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)))
+ if (op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
+ op != REQ_GET_READ_MIRRORS)
mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
- if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))
+ if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
+ op == REQ_GET_READ_MIRRORS)
num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
@@ -5481,7 +5495,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
- if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) {
+ if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
+ op == REQ_GET_READ_MIRRORS) {
num_stripes = map->num_stripes;
} else if (mirror_num) {
stripe_index = mirror_num - 1;
@@ -5495,9 +5510,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
stripe_index *= map->sub_stripes;
- if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
+ if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS)
num_stripes = map->sub_stripes;
- else if (rw & REQ_DISCARD)
+ else if (op == REQ_OP_DISCARD)
num_stripes = min_t(u64, map->sub_stripes *
(stripe_nr_end - stripe_nr_orig),
map->num_stripes);
@@ -5515,7 +5530,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
if (need_raid_map &&
- ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
+ (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS ||
mirror_num > 1)) {
/* push stripe_nr back to the start of the full stripe */
stripe_nr = div_u64(raid56_full_stripe_start,
@@ -5543,8 +5558,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
/* We distribute the parity blocks across stripes */
div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
&stripe_index);
- if (!(rw & (REQ_WRITE | REQ_DISCARD |
- REQ_GET_READ_MIRRORS)) && mirror_num <= 1)
+ if ((op != REQ_OP_WRITE && op != REQ_OP_DISCARD &&
+ op != REQ_GET_READ_MIRRORS) && mirror_num <= 1)
mirror_num = 1;
}
} else {
@@ -5567,9 +5582,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
num_alloc_stripes = num_stripes;
if (dev_replace_is_ongoing) {
- if (rw & (REQ_WRITE | REQ_DISCARD))
+ if (op == REQ_OP_WRITE || op == REQ_OP_DISCARD)
num_alloc_stripes <<= 1;
- if (rw & REQ_GET_READ_MIRRORS)
+ if (op == REQ_GET_READ_MIRRORS)
num_alloc_stripes++;
tgtdev_indexes = num_stripes;
}
@@ -5584,7 +5599,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
/* build raid_map */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK &&
- need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
+ need_raid_map &&
+ ((op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS) ||
mirror_num > 1)) {
u64 tmp;
unsigned rot;
@@ -5609,7 +5625,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
RAID6_Q_STRIPE;
}
- if (rw & REQ_DISCARD) {
+ if (op == REQ_OP_DISCARD) {
u32 factor = 0;
u32 sub_stripes = 0;
u64 stripes_per_dev = 0;
@@ -5689,14 +5705,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
}
- if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
+ if (op == REQ_OP_WRITE || op == REQ_GET_READ_MIRRORS)
max_errors = btrfs_chunk_max_errors(map);
if (bbio->raid_map)
sort_parity_stripes(bbio, num_stripes);
tgtdev_indexes = 0;
- if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
+ if (dev_replace_is_ongoing &&
+ (op == REQ_OP_WRITE || op == REQ_OP_DISCARD) &&
dev_replace->tgtdev != NULL) {
int index_where_to_add;
u64 srcdev_devid = dev_replace->srcdev->devid;
@@ -5731,7 +5748,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
}
num_stripes = index_where_to_add;
- } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) &&
+ } else if (dev_replace_is_ongoing && (op == REQ_GET_READ_MIRRORS) &&
dev_replace->tgtdev != NULL) {
u64 srcdev_devid = dev_replace->srcdev->devid;
int index_srcdev = 0;
@@ -5762,20 +5779,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
}
if (found) {
- if (physical_of_found + map->stripe_len <=
- dev_replace->cursor_left) {
- struct btrfs_bio_stripe *tgtdev_stripe =
- bbio->stripes + num_stripes;
+ struct btrfs_bio_stripe *tgtdev_stripe =
+ bbio->stripes + num_stripes;
- tgtdev_stripe->physical = physical_of_found;
- tgtdev_stripe->length =
- bbio->stripes[index_srcdev].length;
- tgtdev_stripe->dev = dev_replace->tgtdev;
- bbio->tgtdev_map[index_srcdev] = num_stripes;
+ tgtdev_stripe->physical = physical_of_found;
+ tgtdev_stripe->length =
+ bbio->stripes[index_srcdev].length;
+ tgtdev_stripe->dev = dev_replace->tgtdev;
+ bbio->tgtdev_map[index_srcdev] = num_stripes;
- tgtdev_indexes++;
- num_stripes++;
- }
+ tgtdev_indexes++;
+ num_stripes++;
}
}
@@ -5806,21 +5820,21 @@ out:
return ret;
}
-int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num)
{
- return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
+ return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
mirror_num, 0);
}
/* For Scrub/replace */
-int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num,
int need_raid_map)
{
- return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
+ return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
mirror_num, need_raid_map);
}
@@ -5934,7 +5948,7 @@ static void btrfs_end_bio(struct bio *bio)
BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev;
if (dev->bdev) {
- if (bio->bi_rw & WRITE)
+ if (bio_op(bio) == REQ_OP_WRITE)
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_WRITE_ERRS);
else
@@ -5988,7 +6002,7 @@ static void btrfs_end_bio(struct bio *bio)
*/
static noinline void btrfs_schedule_bio(struct btrfs_root *root,
struct btrfs_device *device,
- int rw, struct bio *bio)
+ struct bio *bio)
{
int should_queue = 1;
struct btrfs_pending_bios *pending_bios;
@@ -5999,9 +6013,9 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
}
/* don't bother with additional async steps for reads, right now */
- if (!(rw & REQ_WRITE)) {
+ if (bio_op(bio) == REQ_OP_READ) {
bio_get(bio);
- btrfsic_submit_bio(rw, bio);
+ btrfsic_submit_bio(bio);
bio_put(bio);
return;
}
@@ -6015,7 +6029,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
atomic_inc(&root->fs_info->nr_async_bios);
WARN_ON(bio->bi_next);
bio->bi_next = NULL;
- bio->bi_rw |= rw;
spin_lock(&device->io_lock);
if (bio->bi_rw & REQ_SYNC)
@@ -6041,7 +6054,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root,
static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
struct bio *bio, u64 physical, int dev_nr,
- int rw, int async)
+ int async)
{
struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
@@ -6055,8 +6068,8 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
rcu_read_lock();
name = rcu_dereference(dev->name);
- pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu "
- "(%s id %llu), size=%u\n", rw,
+ pr_debug("btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu "
+ "(%s id %llu), size=%u\n", bio_op(bio), bio->bi_rw,
(u64)bio->bi_iter.bi_sector, (u_long)dev->bdev->bd_dev,
name->str, dev->devid, bio->bi_iter.bi_size);
rcu_read_unlock();
@@ -6067,9 +6080,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
btrfs_bio_counter_inc_noblocked(root->fs_info);
if (async)
- btrfs_schedule_bio(root, dev, rw, bio);
+ btrfs_schedule_bio(root, dev, bio);
else
- btrfsic_submit_bio(rw, bio);
+ btrfsic_submit_bio(bio);
}
static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
@@ -6086,7 +6099,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
}
}
-int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
+int btrfs_map_bio(struct btrfs_root *root, struct bio *bio,
int mirror_num, int async_submit)
{
struct btrfs_device *dev;
@@ -6103,8 +6116,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
map_length = length;
btrfs_bio_counter_inc_blocked(root->fs_info);
- ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
- mirror_num, 1);
+ ret = __btrfs_map_block(root->fs_info, bio_op(bio), logical,
+ &map_length, &bbio, mirror_num, 1);
if (ret) {
btrfs_bio_counter_dec(root->fs_info);
return ret;
@@ -6118,10 +6131,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
atomic_set(&bbio->stripes_pending, bbio->num_stripes);
if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
- ((rw & WRITE) || (mirror_num > 1))) {
+ ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
/* In this case, map_length has been set to the length of
a single stripe; not the whole write */
- if (rw & WRITE) {
+ if (bio_op(bio) == REQ_OP_WRITE) {
ret = raid56_parity_write(root, bio, bbio, map_length);
} else {
ret = raid56_parity_recover(root, bio, bbio, map_length,
@@ -6140,7 +6153,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
dev = bbio->stripes[dev_nr].dev;
- if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
+ if (!dev || !dev->bdev ||
+ (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) {
bbio_error(bbio, first_bio, logical);
continue;
}
@@ -6152,7 +6166,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bio = first_bio;
submit_stripe_bio(root, bbio, bio,
- bbio->stripes[dev_nr].physical, dev_nr, rw,
+ bbio->stripes[dev_nr].physical, dev_nr,
async_submit);
}
btrfs_bio_counter_dec(root->fs_info);
@@ -6250,27 +6264,23 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
return dev;
}
-static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
- struct extent_buffer *leaf,
- struct btrfs_chunk *chunk)
+/* Return -EIO if any error, otherwise return 0. */
+static int btrfs_check_chunk_valid(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, u64 logical)
{
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
- struct map_lookup *map;
- struct extent_map *em;
- u64 logical;
u64 length;
u64 stripe_len;
- u64 devid;
- u8 uuid[BTRFS_UUID_SIZE];
- int num_stripes;
- int ret;
- int i;
+ u16 num_stripes;
+ u16 sub_stripes;
+ u64 type;
- logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
- /* Validation check */
+ sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ type = btrfs_chunk_type(leaf, chunk);
+
if (!num_stripes) {
btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
num_stripes);
@@ -6281,6 +6291,11 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
"invalid chunk logical %llu", logical);
return -EIO;
}
+ if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) {
+ btrfs_err(root->fs_info, "invalid chunk sectorsize %u",
+ btrfs_chunk_sector_size(leaf, chunk));
+ return -EIO;
+ }
if (!length || !IS_ALIGNED(length, root->sectorsize)) {
btrfs_err(root->fs_info,
"invalid chunk length %llu", length);
@@ -6292,13 +6307,54 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
return -EIO;
}
if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
- btrfs_chunk_type(leaf, chunk)) {
+ type) {
btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
~(BTRFS_BLOCK_GROUP_TYPE_MASK |
BTRFS_BLOCK_GROUP_PROFILE_MASK) &
btrfs_chunk_type(leaf, chunk));
return -EIO;
}
+ if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+ (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+ ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+ num_stripes != 1)) {
+ btrfs_err(root->fs_info,
+ "invalid num_stripes:sub_stripes %u:%u for profile %llu",
+ num_stripes, sub_stripes,
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk)
+{
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ struct map_lookup *map;
+ struct extent_map *em;
+ u64 logical;
+ u64 length;
+ u64 stripe_len;
+ u64 devid;
+ u8 uuid[BTRFS_UUID_SIZE];
+ int num_stripes;
+ int ret;
+ int i;
+
+ logical = key->offset;
+ length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+ ret = btrfs_check_chunk_valid(root, leaf, chunk, logical);
+ if (ret)
+ return ret;
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6546,6 +6602,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
u32 array_size;
u32 len = 0;
u32 cur_offset;
+ u64 type;
struct btrfs_key key;
ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
@@ -6555,8 +6612,8 @@ int btrfs_read_sys_array(struct btrfs_root *root)
* overallocate but we can keep it as-is, only the first page is used.
*/
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
- if (!sb)
- return -ENOMEM;
+ if (IS_ERR(sb))
+ return PTR_ERR(sb);
set_extent_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
/*
@@ -6612,6 +6669,15 @@ int btrfs_read_sys_array(struct btrfs_root *root)
break;
}
+ type = btrfs_chunk_type(sb, chunk);
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+ btrfs_err(root->fs_info,
+ "invalid chunk type %llu in sys_array at offset %u",
+ type, cur_offset);
+ ret = -EIO;
+ break;
+ }
+
len = btrfs_chunk_item_size(num_stripes);
if (cur_offset + len > array_size)
goto out_short_read;
@@ -6630,12 +6696,14 @@ int btrfs_read_sys_array(struct btrfs_root *root)
sb_array_offset += len;
cur_offset += len;
}
+ clear_extent_buffer_uptodate(sb);
free_extent_buffer_stale(sb);
return ret;
out_short_read:
printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
len, cur_offset);
+ clear_extent_buffer_uptodate(sb);
free_extent_buffer_stale(sb);
return -EIO;
}
@@ -6648,6 +6716,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
struct btrfs_key found_key;
int ret;
int slot;
+ u64 total_dev = 0;
root = root->fs_info->chunk_root;
@@ -6689,6 +6758,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
ret = read_one_dev(root, leaf, dev_item);
if (ret)
goto error;
+ total_dev++;
} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
struct btrfs_chunk *chunk;
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -6698,6 +6768,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
}
path->slots[0]++;
}
+
+ /*
+ * After loading chunk tree, we've got all device information,
+ * do another round of validation checks.
+ */
+ if (total_dev != root->fs_info->fs_devices->total_devices) {
+ btrfs_err(root->fs_info,
+ "super_num_devices %llu mismatch with num_devices %llu found here",
+ btrfs_super_num_devices(root->fs_info->super_copy),
+ total_dev);
+ ret = -EINVAL;
+ goto error;
+ }
+ if (btrfs_super_total_bytes(root->fs_info->super_copy) <
+ root->fs_info->fs_devices->total_rw_bytes) {
+ btrfs_err(root->fs_info,
+ "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
+ btrfs_super_total_bytes(root->fs_info->super_copy),
+ root->fs_info->fs_devices->total_rw_bytes);
+ ret = -EINVAL;
+ goto error;
+ }
ret = 0;
error:
unlock_chunks(root);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 0ac90f8d85bd..6613e6335ca2 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -375,10 +375,10 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
u64 end, u64 *length);
void btrfs_get_bbio(struct btrfs_bio *bbio);
void btrfs_put_bbio(struct btrfs_bio *bbio);
-int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num);
-int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw,
+int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int op,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num,
int need_raid_map);
@@ -391,7 +391,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 type);
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
-int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
+int btrfs_map_bio(struct btrfs_root *root, struct bio *bio,
int mirror_num, int async_submit);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);
diff --git a/fs/buffer.c b/fs/buffer.c
index 754813a6962b..b9fa1be75e69 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -45,7 +45,7 @@
#include <trace/events/block.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
-static int submit_bh_wbc(int rw, struct buffer_head *bh,
+static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
unsigned long bio_flags,
struct writeback_control *wbc);
@@ -153,7 +153,7 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
- /* This happens, due to failed READA attempts. */
+ /* This happens, due to failed read-ahead attempts. */
clear_buffer_uptodate(bh);
}
unlock_buffer(bh);
@@ -588,7 +588,7 @@ void write_boundary_block(struct block_device *bdev,
struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
if (bh) {
if (buffer_dirty(bh))
- ll_rw_block(WRITE, 1, &bh);
+ ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
put_bh(bh);
}
}
@@ -1225,7 +1225,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh)
} else {
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(READ, bh);
+ submit_bh(REQ_OP_READ, 0, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
@@ -1395,7 +1395,7 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
if (likely(bh)) {
- ll_rw_block(READA, 1, &bh);
+ ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
brelse(bh);
}
}
@@ -1687,7 +1687,7 @@ static struct buffer_head *create_page_buffers(struct page *page, struct inode *
* WB_SYNC_ALL, the writes are posted using WRITE_SYNC; this
* causes the writes to be flagged as synchronous writes.
*/
-static int __block_write_full_page(struct inode *inode, struct page *page,
+int __block_write_full_page(struct inode *inode, struct page *page,
get_block_t *get_block, struct writeback_control *wbc,
bh_end_io_t *handler)
{
@@ -1697,7 +1697,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
struct buffer_head *bh, *head;
unsigned int blocksize, bbits;
int nr_underway = 0;
- int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+ int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
head = create_page_buffers(page, inode,
(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1786,7 +1786,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh_wbc(write_op, bh, 0, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc);
nr_underway++;
}
bh = next;
@@ -1840,7 +1840,7 @@ recover:
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh_wbc(write_op, bh, 0, wbc);
+ submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, 0, wbc);
nr_underway++;
}
bh = next;
@@ -1848,6 +1848,7 @@ recover:
unlock_page(page);
goto done;
}
+EXPORT_SYMBOL(__block_write_full_page);
/*
* If a page has any new buffers, zero them out here, and mark them uptodate
@@ -1955,7 +1956,7 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
*wait_bh++=bh;
}
}
@@ -2248,7 +2249,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
if (buffer_uptodate(bh))
end_buffer_async_read(bh, 1);
else
- submit_bh(READ, bh);
+ submit_bh(REQ_OP_READ, 0, bh);
}
return 0;
}
@@ -2582,7 +2583,7 @@ int nobh_write_begin(struct address_space *mapping,
if (block_start < from || block_end > to) {
lock_buffer(bh);
bh->b_end_io = end_buffer_read_nobh;
- submit_bh(READ, bh);
+ submit_bh(REQ_OP_READ, 0, bh);
nr_reads++;
}
}
@@ -2852,7 +2853,7 @@ int block_truncate_page(struct address_space *mapping,
if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
err = -EIO;
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
wait_on_buffer(bh);
/* Uhhuh. Read error. Complain and punt. */
if (!buffer_uptodate(bh))
@@ -2949,7 +2950,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
* errors, this only handles the "we need to be able to
* do IO at the final sector" case.
*/
-void guard_bio_eod(int rw, struct bio *bio)
+void guard_bio_eod(int op, struct bio *bio)
{
sector_t maxsector;
struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
@@ -2979,13 +2980,13 @@ void guard_bio_eod(int rw, struct bio *bio)
bvec->bv_len -= truncated_bytes;
/* ..and clear the end of the buffer for reads */
- if ((rw & RW_MASK) == READ) {
+ if (op == REQ_OP_READ) {
zero_user(bvec->bv_page, bvec->bv_offset + bvec->bv_len,
truncated_bytes);
}
}
-static int submit_bh_wbc(int rw, struct buffer_head *bh,
+static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
unsigned long bio_flags, struct writeback_control *wbc)
{
struct bio *bio;
@@ -2999,7 +3000,7 @@ static int submit_bh_wbc(int rw, struct buffer_head *bh,
/*
* Only clear out a write error when rewriting
*/
- if (test_set_buffer_req(bh) && (rw & WRITE))
+ if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
clear_buffer_write_io_error(bh);
/*
@@ -3024,39 +3025,42 @@ static int submit_bh_wbc(int rw, struct buffer_head *bh,
bio->bi_flags |= bio_flags;
/* Take care of bh's that straddle the end of the device */
- guard_bio_eod(rw, bio);
+ guard_bio_eod(op, bio);
if (buffer_meta(bh))
- rw |= REQ_META;
+ op_flags |= REQ_META;
if (buffer_prio(bh))
- rw |= REQ_PRIO;
+ op_flags |= REQ_PRIO;
+ bio_set_op_attrs(bio, op, op_flags);
- submit_bio(rw, bio);
+ submit_bio(bio);
return 0;
}
-int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
+int _submit_bh(int op, int op_flags, struct buffer_head *bh,
+ unsigned long bio_flags)
{
- return submit_bh_wbc(rw, bh, bio_flags, NULL);
+ return submit_bh_wbc(op, op_flags, bh, bio_flags, NULL);
}
EXPORT_SYMBOL_GPL(_submit_bh);
-int submit_bh(int rw, struct buffer_head *bh)
+int submit_bh(int op, int op_flags, struct buffer_head *bh)
{
- return submit_bh_wbc(rw, bh, 0, NULL);
+ return submit_bh_wbc(op, op_flags, bh, 0, NULL);
}
EXPORT_SYMBOL(submit_bh);
/**
* ll_rw_block: low-level access to block devices (DEPRECATED)
- * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
+ * @op: whether to %READ or %WRITE
+ * @op_flags: rq_flag_bits
* @nr: number of &struct buffer_heads in the array
* @bhs: array of pointers to &struct buffer_head
*
* ll_rw_block() takes an array of pointers to &struct buffer_heads, and
- * requests an I/O operation on them, either a %READ or a %WRITE. The third
- * %READA option is described in the documentation for generic_make_request()
- * which ll_rw_block() calls.
+ * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
+ * @op_flags contains flags modifying the detailed I/O behavior, most notably
+ * %REQ_RAHEAD.
*
* This function drops any buffer that it cannot get a lock on (with the
* BH_Lock state bit), any buffer that appears to be clean when doing a write
@@ -3072,7 +3076,7 @@ EXPORT_SYMBOL(submit_bh);
* All of the buffers must be for the same device, and must also be a
* multiple of the current approved size for the device.
*/
-void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
+void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[])
{
int i;
@@ -3081,18 +3085,18 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
if (!trylock_buffer(bh))
continue;
- if (rw == WRITE) {
+ if (op == WRITE) {
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
- submit_bh(WRITE, bh);
+ submit_bh(op, op_flags, bh);
continue;
}
} else {
if (!buffer_uptodate(bh)) {
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
- submit_bh(rw, bh);
+ submit_bh(op, op_flags, bh);
continue;
}
}
@@ -3101,7 +3105,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
}
EXPORT_SYMBOL(ll_rw_block);
-void write_dirty_buffer(struct buffer_head *bh, int rw)
+void write_dirty_buffer(struct buffer_head *bh, int op_flags)
{
lock_buffer(bh);
if (!test_clear_buffer_dirty(bh)) {
@@ -3110,7 +3114,7 @@ void write_dirty_buffer(struct buffer_head *bh, int rw)
}
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
- submit_bh(rw, bh);
+ submit_bh(REQ_OP_WRITE, op_flags, bh);
}
EXPORT_SYMBOL(write_dirty_buffer);
@@ -3119,7 +3123,7 @@ EXPORT_SYMBOL(write_dirty_buffer);
* and then start new I/O and then wait upon it. The caller must have a ref on
* the buffer_head.
*/
-int __sync_dirty_buffer(struct buffer_head *bh, int rw)
+int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
{
int ret = 0;
@@ -3128,7 +3132,7 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw)
if (test_clear_buffer_dirty(bh)) {
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(rw, bh);
+ ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
wait_on_buffer(bh);
if (!ret && !buffer_uptodate(bh))
ret = -EIO;
@@ -3391,7 +3395,7 @@ int bh_submit_read(struct buffer_head *bh)
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(READ, bh);
+ submit_bh(REQ_OP_READ, 0, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return 0;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 861d611b8c05..ce5f345d70f5 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -380,7 +380,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache)
* check if the backing cache is updated to FS-Cache
* - called by FS-Cache when evaluates if need to invalidate the cache
*/
-static bool cachefiles_check_consistency(struct fscache_operation *op)
+static int cachefiles_check_consistency(struct fscache_operation *op)
{
struct cachefiles_object *object;
struct cachefiles_cache *cache;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index eeb71e5de27a..26a9d10d75e9 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -276,8 +276,10 @@ static void finish_read(struct ceph_osd_request *req)
for (i = 0; i < num_pages; i++) {
struct page *page = osd_data->pages[i];
- if (rc < 0 && rc != -ENOENT)
+ if (rc < 0 && rc != -ENOENT) {
+ ceph_fscache_readpage_cancel(inode, page);
goto unlock;
+ }
if (bytes < (int)PAGE_SIZE) {
/* zero (remainder of) page */
int s = bytes < 0 ? 0 : bytes;
@@ -535,8 +537,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
- ceph_readpage_to_fscache(inode, page);
-
set_page_writeback(page);
err = ceph_osdc_writepages(osdc, ceph_vino(inode),
&ci->i_layout, snapc,
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index c052b5bf219b..238c55b01723 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -25,6 +25,7 @@
#include "cache.h"
struct ceph_aux_inode {
+ u64 version;
struct timespec mtime;
loff_t size;
};
@@ -69,15 +70,8 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
&ceph_fscache_fsid_object_def,
fsc, true);
-
- if (fsc->fscache == NULL) {
+ if (!fsc->fscache)
pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
- return 0;
- }
-
- fsc->revalidate_wq = alloc_workqueue("ceph-revalidate", 0, 1);
- if (fsc->revalidate_wq == NULL)
- return -ENOMEM;
return 0;
}
@@ -105,6 +99,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
const struct inode* inode = &ci->vfs_inode;
memset(&aux, 0, sizeof(aux));
+ aux.version = ci->i_version;
aux.mtime = inode->i_mtime;
aux.size = i_size_read(inode);
@@ -131,6 +126,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
return FSCACHE_CHECKAUX_OBSOLETE;
memset(&aux, 0, sizeof(aux));
+ aux.version = ci->i_version;
aux.mtime = inode->i_mtime;
aux.size = i_size_read(inode);
@@ -181,32 +177,26 @@ static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
.now_uncached = ceph_fscache_inode_now_uncached,
};
-void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
- struct ceph_inode_info* ci)
+void ceph_fscache_register_inode_cookie(struct inode *inode)
{
- struct inode* inode = &ci->vfs_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
/* No caching for filesystem */
if (fsc->fscache == NULL)
return;
/* Only cache for regular files that are read only */
- if ((ci->vfs_inode.i_mode & S_IFREG) == 0)
+ if (!S_ISREG(inode->i_mode))
return;
- /* Avoid multiple racing open requests */
- inode_lock(inode);
-
- if (ci->fscache)
- goto done;
-
- ci->fscache = fscache_acquire_cookie(fsc->fscache,
- &ceph_fscache_inode_object_def,
- ci, true);
- fscache_check_consistency(ci->fscache);
-done:
+ inode_lock_nested(inode, I_MUTEX_CHILD);
+ if (!ci->fscache) {
+ ci->fscache = fscache_acquire_cookie(fsc->fscache,
+ &ceph_fscache_inode_object_def,
+ ci, false);
+ }
inode_unlock(inode);
-
}
void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
@@ -222,6 +212,34 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
fscache_relinquish_cookie(cookie, 0);
}
+static bool ceph_fscache_can_enable(void *data)
+{
+ struct inode *inode = data;
+ return !inode_is_open_for_write(inode);
+}
+
+void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ if (!fscache_cookie_valid(ci->fscache))
+ return;
+
+ if (inode_is_open_for_write(inode)) {
+ dout("fscache_file_set_cookie %p %p disabling cache\n",
+ inode, filp);
+ fscache_disable_cookie(ci->fscache, false);
+ fscache_uncache_all_inode_pages(ci->fscache, inode);
+ } else {
+ fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
+ inode);
+ if (fscache_cookie_enabled(ci->fscache)) {
+ dout("fscache_file_set_cookie %p %p enabing cache\n",
+ inode, filp);
+ }
+ }
+}
+
static void ceph_vfs_readpage_complete(struct page *page, void *data, int error)
{
if (!error)
@@ -238,8 +256,7 @@ static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int
static inline bool cache_valid(struct ceph_inode_info *ci)
{
- return ((ceph_caps_issued(ci) & CEPH_CAP_FILE_CACHE) &&
- (ci->i_fscache_gen == ci->i_rdcache_gen));
+ return ci->i_fscache_gen == ci->i_rdcache_gen;
}
@@ -332,69 +349,27 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
{
- if (fsc->revalidate_wq)
- destroy_workqueue(fsc->revalidate_wq);
-
fscache_relinquish_cookie(fsc->fscache, 0);
fsc->fscache = NULL;
}
-static void ceph_revalidate_work(struct work_struct *work)
-{
- int issued;
- u32 orig_gen;
- struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
- i_revalidate_work);
- struct inode *inode = &ci->vfs_inode;
-
- spin_lock(&ci->i_ceph_lock);
- issued = __ceph_caps_issued(ci, NULL);
- orig_gen = ci->i_rdcache_gen;
- spin_unlock(&ci->i_ceph_lock);
-
- if (!(issued & CEPH_CAP_FILE_CACHE)) {
- dout("revalidate_work lost cache before validation %p\n",
- inode);
- goto out;
- }
-
- if (!fscache_check_consistency(ci->fscache))
- fscache_invalidate(ci->fscache);
-
- spin_lock(&ci->i_ceph_lock);
- /* Update the new valid generation (backwards sanity check too) */
- if (orig_gen > ci->i_fscache_gen) {
- ci->i_fscache_gen = orig_gen;
- }
- spin_unlock(&ci->i_ceph_lock);
-
-out:
- iput(&ci->vfs_inode);
-}
-
-void ceph_queue_revalidate(struct inode *inode)
+/*
+ * caller should hold CEPH_CAP_FILE_{RD,CACHE}
+ */
+void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
{
- struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
- struct ceph_inode_info *ci = ceph_inode(inode);
-
- if (fsc->revalidate_wq == NULL || ci->fscache == NULL)
+ if (cache_valid(ci))
return;
- ihold(inode);
-
- if (queue_work(ceph_sb_to_client(inode->i_sb)->revalidate_wq,
- &ci->i_revalidate_work)) {
- dout("ceph_queue_revalidate %p\n", inode);
- } else {
- dout("ceph_queue_revalidate %p failed\n)", inode);
- iput(inode);
+ /* resue i_truncate_mutex. There should be no pending
+ * truncate while the caller holds CEPH_CAP_FILE_RD */
+ mutex_lock(&ci->i_truncate_mutex);
+ if (!cache_valid(ci)) {
+ if (fscache_check_consistency(ci->fscache))
+ fscache_invalidate(ci->fscache);
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_fscache_gen = ci->i_rdcache_gen;
+ spin_unlock(&ci->i_ceph_lock);
}
-}
-
-void ceph_fscache_inode_init(struct ceph_inode_info *ci)
-{
- ci->fscache = NULL;
- /* The first load is verifed cookie open time */
- ci->i_fscache_gen = 1;
- INIT_WORK(&ci->i_revalidate_work, ceph_revalidate_work);
+ mutex_unlock(&ci->i_truncate_mutex);
}
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index 5ac591bd012b..7e72c7594f0c 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -34,10 +34,10 @@ void ceph_fscache_unregister(void);
int ceph_fscache_register_fs(struct ceph_fs_client* fsc);
void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc);
-void ceph_fscache_inode_init(struct ceph_inode_info *ci);
-void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
- struct ceph_inode_info* ci);
+void ceph_fscache_register_inode_cookie(struct inode *inode);
void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
+void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp);
+void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci);
int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
int ceph_readpages_from_fscache(struct inode *inode,
@@ -46,12 +46,11 @@ int ceph_readpages_from_fscache(struct inode *inode,
unsigned *nr_pages);
void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
-void ceph_queue_revalidate(struct inode *inode);
-static inline void ceph_fscache_update_objectsize(struct inode *inode)
+static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
- struct ceph_inode_info *ci = ceph_inode(inode);
- fscache_attr_changed(ci->fscache);
+ ci->fscache = NULL;
+ ci->i_fscache_gen = 0;
}
static inline void ceph_fscache_invalidate(struct inode *inode)
@@ -88,6 +87,11 @@ static inline void ceph_fscache_readpages_cancel(struct inode *inode,
return fscache_readpages_cancel(ci->fscache, pages);
}
+static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
+{
+ ci->i_fscache_gen = ci->i_rdcache_gen - 1;
+}
+
#else
static inline int ceph_fscache_register(void)
@@ -112,8 +116,20 @@ static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
}
-static inline void ceph_fscache_register_inode_cookie(struct ceph_fs_client* parent_fsc,
- struct ceph_inode_info* ci)
+static inline void ceph_fscache_register_inode_cookie(struct inode *inode)
+{
+}
+
+static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
+{
+}
+
+static inline void ceph_fscache_file_set_cookie(struct inode *inode,
+ struct file *filp)
+{
+}
+
+static inline void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
{
}
@@ -141,10 +157,6 @@ static inline void ceph_readpage_to_fscache(struct inode *inode,
{
}
-static inline void ceph_fscache_update_objectsize(struct inode *inode)
-{
-}
-
static inline void ceph_fscache_invalidate(struct inode *inode)
{
}
@@ -154,10 +166,6 @@ static inline void ceph_invalidate_fscache_page(struct inode *inode,
{
}
-static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
-{
-}
-
static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
{
return 1;
@@ -173,7 +181,7 @@ static inline void ceph_fscache_readpages_cancel(struct inode *inode,
{
}
-static inline void ceph_queue_revalidate(struct inode *inode)
+static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
{
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c17b5d76d75e..6f60d0a3d0f9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2393,6 +2393,9 @@ again:
snap_rwsem_locked = true;
}
*got = need | (have & want);
+ if ((need & CEPH_CAP_FILE_RD) &&
+ !(*got & CEPH_CAP_FILE_CACHE))
+ ceph_disable_fscache_readpage(ci);
__take_cap_refs(ci, *got, true);
ret = 1;
}
@@ -2554,6 +2557,9 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
break;
}
+ if ((_got & CEPH_CAP_FILE_RD) && (_got & CEPH_CAP_FILE_CACHE))
+ ceph_fscache_revalidate_cookie(ci);
+
*got = _got;
return 0;
}
@@ -2795,7 +2801,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
bool writeback = false;
bool queue_trunc = false;
bool queue_invalidate = false;
- bool queue_revalidate = false;
bool deleted_inode = false;
bool fill_inline = false;
@@ -2837,8 +2842,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
ci->i_rdcache_revoking = ci->i_rdcache_gen;
}
}
-
- ceph_fscache_invalidate(inode);
}
/* side effects now are allowed */
@@ -2880,11 +2883,6 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
}
}
- /* Do we need to revalidate our fscache cookie. Don't bother on the
- * first cache cap as we already validate at cookie creation time. */
- if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
- queue_revalidate = true;
-
if (newcaps & CEPH_CAP_ANY_RD) {
/* ctime/mtime/atime? */
ceph_decode_timespec(&mtime, &grant->mtime);
@@ -2993,11 +2991,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
if (fill_inline)
ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
- if (queue_trunc) {
+ if (queue_trunc)
ceph_queue_vmtruncate(inode);
- ceph_queue_revalidate(inode);
- } else if (queue_revalidate)
- ceph_queue_revalidate(inode);
if (writeback)
/*
@@ -3199,10 +3194,8 @@ static void handle_cap_trunc(struct inode *inode,
truncate_seq, truncate_size, size);
spin_unlock(&ci->i_ceph_lock);
- if (queue_trunc) {
+ if (queue_trunc)
ceph_queue_vmtruncate(inode);
- ceph_fscache_invalidate(inode);
- }
}
/*
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 6e72c98162d5..1780218a48f0 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -95,10 +95,8 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
}
dentry = d_obtain_alias(inode);
- if (IS_ERR(dentry)) {
- iput(inode);
+ if (IS_ERR(dentry))
return dentry;
- }
err = ceph_init_dentry(dentry);
if (err < 0) {
dput(dentry);
@@ -167,10 +165,8 @@ static struct dentry *__get_parent(struct super_block *sb,
return ERR_PTR(-ENOENT);
dentry = d_obtain_alias(inode);
- if (IS_ERR(dentry)) {
- iput(inode);
+ if (IS_ERR(dentry))
return dentry;
- }
err = ceph_init_dentry(dentry);
if (err < 0) {
dput(dentry);
@@ -210,7 +206,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
dout("fh_to_parent %llx\n", cfh->parent_ino);
dentry = __get_parent(sb, NULL, cfh->ino);
- if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+ if (unlikely(dentry == ERR_PTR(-ENOENT)))
dentry = __fh_to_dentry(sb, cfh->parent_ino);
return dentry;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index a888df6f2d71..0daaf7ceedc5 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -137,23 +137,11 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
{
struct ceph_file_info *cf;
int ret = 0;
- struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
- struct ceph_mds_client *mdsc = fsc->mdsc;
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
- /* First file open request creates the cookie, we want to keep
- * this cookie around for the filetime of the inode as not to
- * have to worry about fscache register / revoke / operation
- * races.
- *
- * Also, if we know the operation is going to invalidate data
- * (non readonly) just nuke the cache right away.
- */
- ceph_fscache_register_inode_cookie(mdsc->fsc, ci);
- if ((fmode & CEPH_FILE_MODE_WR))
- ceph_fscache_invalidate(inode);
+ ceph_fscache_register_inode_cookie(inode);
+ ceph_fscache_file_set_cookie(inode, file);
case S_IFDIR:
dout("init_file %p %p 0%o (regular)\n", inode, file,
inode->i_mode);
@@ -406,7 +394,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
- if (d_unhashed(dentry)) {
+ if (d_in_lookup(dentry)) {
dn = ceph_finish_lookup(req, dentry, err);
if (IS_ERR(dn))
err = PTR_ERR(dn);
@@ -1349,7 +1337,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
retry_snap:
- if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) {
err = -ENOSPC;
goto out;
}
@@ -1407,7 +1395,6 @@ retry_snap:
iov_iter_advance(from, written);
ceph_put_snap_context(snapc);
} else {
- loff_t old_size = i_size_read(inode);
/*
* No need to acquire the i_truncate_mutex. Because
* the MDS revokes Fwb caps before sending truncate
@@ -1418,8 +1405,6 @@ retry_snap:
written = generic_perform_write(file, from, pos);
if (likely(written >= 0))
iocb->ki_pos = pos + written;
- if (i_size_read(inode) > old_size)
- ceph_fscache_update_objectsize(inode);
inode_unlock(inode);
}
@@ -1440,7 +1425,7 @@ retry_snap:
ceph_put_cap_refs(ci, got);
if (written >= 0) {
- if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL))
iocb->ki_flags |= IOCB_DSYNC;
written = generic_write_sync(iocb, written);
@@ -1672,8 +1657,8 @@ static long ceph_fallocate(struct file *file, int mode,
goto unlock;
}
- if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) &&
- !(mode & FALLOC_FL_PUNCH_HOLE)) {
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) &&
+ !(mode & FALLOC_FL_PUNCH_HOLE)) {
ret = -ENOSPC;
goto unlock;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 0130a8592191..0168b49fb6ad 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -103,7 +103,6 @@ struct ceph_fs_client {
#ifdef CONFIG_CEPH_FSCACHE
struct fscache_cookie *fscache;
- struct workqueue_struct *revalidate_wq;
#endif
};
@@ -360,8 +359,7 @@ struct ceph_inode_info {
#ifdef CONFIG_CEPH_FSCACHE
struct fscache_cookie *fscache;
- u32 i_fscache_gen; /* sequence, for delayed fscache validate */
- struct work_struct i_revalidate_work;
+ u32 i_fscache_gen;
#endif
struct inode vfs_inode; /* at end */
};
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 687471dc04a0..6edd825231c5 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -92,7 +92,7 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
}
if (i < CHRDEV_MAJOR_DYN_END)
- pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range",
+ pr_warn("CHRDEV \"%s\" major number %d goes below the dynamic allocation range\n",
name, i);
if (i == 0) {
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 5a53ac6b1e02..02b071bf3732 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target)
case SFM_SLASH:
*target = '\\';
break;
+ case SFM_SPACE:
+ *target = ' ';
+ break;
+ case SFM_PERIOD:
+ *target = '.';
+ break;
default:
return false;
}
@@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char)
return dest_char;
}
-static __le16 convert_to_sfm_char(char src_char)
+static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
{
__le16 dest_char;
@@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char)
case '|':
dest_char = cpu_to_le16(SFM_PIPE);
break;
+ case '.':
+ if (end_of_string)
+ dest_char = cpu_to_le16(SFM_PERIOD);
+ else
+ dest_char = 0;
+ break;
+ case ' ':
+ if (end_of_string)
+ dest_char = cpu_to_le16(SFM_SPACE);
+ else
+ dest_char = 0;
+ break;
default:
dest_char = 0;
}
@@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
/* see if we must remap this char */
if (map_chars == SFU_MAP_UNI_RSVD)
dst_char = convert_to_sfu_char(src_char);
- else if (map_chars == SFM_MAP_UNI_RSVD)
- dst_char = convert_to_sfm_char(src_char);
- else
+ else if (map_chars == SFM_MAP_UNI_RSVD) {
+ bool end_of_string;
+
+ if (i == srclen - 1)
+ end_of_string = true;
+ else
+ end_of_string = false;
+
+ dst_char = convert_to_sfm_char(src_char, end_of_string);
+ } else
dst_char = 0;
/*
* FIXME: We can not handle remapping backslash (UNI_SLASH)
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index bdc52cb9a676..479bc0a941f3 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -64,6 +64,8 @@
#define SFM_LESSTHAN ((__u16) 0xF023)
#define SFM_PIPE ((__u16) 0xF027)
#define SFM_SLASH ((__u16) 0xF026)
+#define SFM_PERIOD ((__u16) 0xF028)
+#define SFM_SPACE ((__u16) 0xF029)
/*
* Mapping mechanism to use when one of the seven reserved characters is
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5d8b7edf8a8f..5d841f39c4b7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp;
extern mempool_t *cifs_mid_poolp;
struct workqueue_struct *cifsiod_wq;
+__u32 cifs_lock_secret;
/*
* Bumps refcount for cifs super block.
@@ -1266,6 +1267,8 @@ init_cifs(void)
spin_lock_init(&cifs_file_list_lock);
spin_lock_init(&GlobalMid_Lock);
+ get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
+
if (cifs_max_pending < 2) {
cifs_max_pending = 2;
cifs_dbg(FYI, "cifs_max_pending set to min of 2\n");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index bba106cdc43c..8f1d8c1e72be 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1619,6 +1619,7 @@ void cifs_oplock_break(struct work_struct *work);
extern const struct slow_work_ops cifs_oplock_break_ops;
extern struct workqueue_struct *cifsiod_wq;
+extern __u32 cifs_lock_secret;
extern mempool_t *cifs_mid_poolp;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 66736f57b5ab..7d2b15c06090 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -428,7 +428,9 @@ cifs_echo_request(struct work_struct *work)
* server->ops->need_neg() == true. Also, no need to ping if
* we got a response recently.
*/
- if (!server->ops->need_neg || server->ops->need_neg(server) ||
+
+ if (server->tcpStatus == CifsNeedReconnect ||
+ server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew ||
(server->ops->can_echo && !server->ops->can_echo(server)) ||
time_before(jiffies, server->lstrp + echo_interval - HZ))
goto requeue_echo;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index c3eb998a99bd..fb0903fffc22 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -445,7 +445,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
* Check for hashed negative dentry. We have already revalidated
* the dentry and it is fine. No need to perform another lookup.
*/
- if (!d_unhashed(direntry))
+ if (!d_in_lookup(direntry))
return -ENOENT;
res = cifs_lookup(inode, direntry, 0);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9793ae0bcaa2..d4890b6dc22d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1112,6 +1112,12 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
return rc;
}
+static __u32
+hash_lockowner(fl_owner_t owner)
+{
+ return cifs_lock_secret ^ hash32_ptr((const void *)owner);
+}
+
struct lock_to_push {
struct list_head llist;
__u64 offset;
@@ -1178,7 +1184,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
else
type = CIFS_WRLCK;
lck = list_entry(el, struct lock_to_push, llist);
- lck->pid = flock->fl_pid;
+ lck->pid = hash_lockowner(flock->fl_owner);
lck->netfid = cfile->fid.netfid;
lck->length = length;
lck->type = type;
@@ -1305,7 +1311,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
posix_lock_type = CIFS_RDLCK;
else
posix_lock_type = CIFS_WRLCK;
- rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
+ rc = CIFSSMBPosixLock(xid, tcon, netfid,
+ hash_lockowner(flock->fl_owner),
flock->fl_start, length, flock,
posix_lock_type, wait_flag);
return rc;
@@ -1505,7 +1512,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
posix_lock_type = CIFS_UNLCK;
rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
- current->tgid, flock->fl_start, length,
+ hash_lockowner(flock->fl_owner),
+ flock->fl_start, length,
NULL, posix_lock_type, wait_flag);
goto out;
}
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 848249fa120f..3079b38f0afb 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE {
int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses);
-int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen,
+int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
struct cifs_ses *ses,
const struct nls_table *nls_cp);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index af0ec2d5ad0e..538d9b55699a 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
sec_blob->DomainName.MaximumLength = 0;
}
-/* We do not malloc the blob, it is passed in pbuffer, because its
- maximum possible size is fixed and small, making this approach cleaner.
- This function returns the length of the data in the blob */
-int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+static int size_of_ntlmssp_blob(struct cifs_ses *ses)
+{
+ int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len
+ - CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
+
+ if (ses->domainName)
+ sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+ else
+ sz += 2;
+
+ if (ses->user_name)
+ sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
+ else
+ sz += 2;
+
+ return sz;
+}
+
+int build_ntlmssp_auth_blob(unsigned char **pbuffer,
u16 *buflen,
struct cifs_ses *ses,
const struct nls_table *nls_cp)
{
int rc;
- AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
+ AUTHENTICATE_MESSAGE *sec_blob;
__u32 flags;
unsigned char *tmp;
+ rc = setup_ntlmv2_rsp(ses, nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
+ *buflen = 0;
+ goto setup_ntlmv2_ret;
+ }
+ *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+ sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer;
+
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmAuthenticate;
@@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
}
- tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
+ tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
sec_blob->NegotiateFlags = cpu_to_le32(flags);
sec_blob->LmChallengeResponse.BufferOffset =
@@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->LmChallengeResponse.Length = 0;
sec_blob->LmChallengeResponse.MaximumLength = 0;
- sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->NtChallengeResponse.BufferOffset =
+ cpu_to_le32(tmp - *pbuffer);
if (ses->user_name != NULL) {
- rc = setup_ntlmv2_rsp(ses, nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
- goto setup_ntlmv2_ret;
- }
memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
ses->auth_key.len - CIFS_SESS_KEY_SIZE);
tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
@@ -423,23 +443,23 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
}
if (ses->domainName == NULL) {
- sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->DomainName.Length = 0;
sec_blob->DomainName.MaximumLength = 0;
tmp += 2;
} else {
int len;
len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
- CIFS_MAX_USERNAME_LEN, nls_cp);
+ CIFS_MAX_DOMAINNAME_LEN, nls_cp);
len *= 2; /* unicode is 2 bytes each */
- sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->DomainName.Length = cpu_to_le16(len);
sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
tmp += len;
}
if (ses->user_name == NULL) {
- sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->UserName.Length = 0;
sec_blob->UserName.MaximumLength = 0;
tmp += 2;
@@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
CIFS_MAX_USERNAME_LEN, nls_cp);
len *= 2; /* unicode is 2 bytes each */
- sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->UserName.Length = cpu_to_le16(len);
sec_blob->UserName.MaximumLength = cpu_to_le16(len);
tmp += len;
}
- sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->WorkstationName.Length = 0;
sec_blob->WorkstationName.MaximumLength = 0;
tmp += 2;
@@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
(ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
&& !calc_seckey(ses)) {
memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
sec_blob->SessionKey.MaximumLength =
cpu_to_le16(CIFS_CPHTXT_SIZE);
tmp += CIFS_CPHTXT_SIZE;
} else {
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->SessionKey.Length = 0;
sec_blob->SessionKey.MaximumLength = 0;
}
+ *buflen = tmp - *pbuffer;
setup_ntlmv2_ret:
- *buflen = tmp - pbuffer;
return rc;
}
@@ -690,6 +710,8 @@ sess_auth_lanman(struct sess_data *sess_data)
rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
true : false, lnm_session_key);
+ if (rc)
+ goto out;
memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
bcc_ptr += CIFS_AUTH_RESP_SIZE;
@@ -1266,7 +1288,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
struct cifs_ses *ses = sess_data->ses;
__u16 bytes_remaining;
char *bcc_ptr;
- char *ntlmsspblob = NULL;
+ unsigned char *ntlmsspblob = NULL;
u16 blob_len;
cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
@@ -1279,19 +1301,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
/* Build security blob before we assemble the request */
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
smb_buf = (struct smb_hdr *)pSMB;
- /*
- * 5 is an empirical value, large enough to hold
- * authenticate message plus max 10 of av paris,
- * domain, user, workstation names, flags, etc.
- */
- ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
- GFP_KERNEL);
- if (!ntlmsspblob) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = build_ntlmssp_auth_blob(ntlmsspblob,
+ rc = build_ntlmssp_auth_blob(&ntlmsspblob,
&blob_len, ses, sess_data->nls_cp);
if (rc)
goto out_free_ntlmsspblob;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8f38e33d365b..29e06db5f187 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -588,7 +588,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
u16 blob_length = 0;
struct key *spnego_key = NULL;
char *security_blob = NULL;
- char *ntlmssp_blob = NULL;
+ unsigned char *ntlmssp_blob = NULL;
bool use_spnego = false; /* else use raw ntlmssp */
cifs_dbg(FYI, "Session Setup\n");
@@ -713,13 +713,7 @@ ssetup_ntlmssp_authenticate:
iov[1].iov_len = blob_length;
} else if (phase == NtLmAuthenticate) {
req->hdr.SessionId = ses->Suid;
- ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
- GFP_KERNEL);
- if (ntlmssp_blob == NULL) {
- rc = -ENOMEM;
- goto ssetup_exit;
- }
- rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses,
+ rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
nls_cp);
if (rc) {
cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
@@ -1818,6 +1812,33 @@ SMB2_echo(struct TCP_Server_Info *server)
cifs_dbg(FYI, "In echo request\n");
+ if (server->tcpStatus == CifsNeedNegotiate) {
+ struct list_head *tmp, *tmp2;
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+
+ cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &server->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ list_for_each(tmp2, &ses->tcon_list) {
+ tcon = list_entry(tmp2, struct cifs_tcon,
+ tcon_list);
+ /* add check for persistent handle reconnect */
+ if (tcon && tcon->need_reconnect) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ rc = smb2_reconnect(SMB2_ECHO, tcon);
+ spin_lock(&cifs_tcp_ses_lock);
+ }
+ }
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+ }
+
+ /* if no session, renegotiate failed above */
+ if (server->tcpStatus == CifsNeedNegotiate)
+ return -EIO;
+
rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
if (rc)
return rc;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 33b7ee34eda5..bbc1252a59f5 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -357,8 +357,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf,
len = simple_write_to_buffer(buffer->bin_buffer,
buffer->bin_buffer_size, ppos, buf, count);
- if (len > 0)
- *ppos += len;
out:
mutex_unlock(&buffer->mutex);
return len;
diff --git a/fs/coredump.c b/fs/coredump.c
index 38a7ab87e10a..281b768000e6 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -794,6 +794,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
return 0;
file->f_pos = pos;
cprm->written += n;
+ cprm->pos += n;
nr -= n;
}
return 1;
@@ -808,6 +809,7 @@ int dump_skip(struct coredump_params *cprm, size_t nr)
if (dump_interrupted() ||
file->f_op->llseek(file, nr, SEEK_CUR) < 0)
return 0;
+ cprm->pos += nr;
return 1;
} else {
while (nr > PAGE_SIZE) {
@@ -822,7 +824,7 @@ EXPORT_SYMBOL(dump_skip);
int dump_align(struct coredump_params *cprm, int align)
{
- unsigned mod = cprm->file->f_pos & (align - 1);
+ unsigned mod = cprm->pos & (align - 1);
if (align & (align - 1))
return 0;
return mod ? dump_skip(cprm, align - mod) : 1;
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 2fc8c43ce531..c502c116924c 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -318,6 +318,7 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
bio->bi_bdev = inode->i_sb->s_bdev;
bio->bi_iter.bi_sector =
pblk << (inode->i_sb->s_blocksize_bits - 9);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = bio_add_page(bio, ciphertext_page,
inode->i_sb->s_blocksize, 0);
if (ret != inode->i_sb->s_blocksize) {
@@ -327,7 +328,7 @@ int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
err = -EIO;
goto errout;
}
- err = submit_bio_wait(WRITE, bio);
+ err = submit_bio_wait(bio);
if ((err == 0) && bio->bi_error)
err = -EIO;
bio_put(bio);
diff --git a/fs/dax.c b/fs/dax.c
index 761495bf5eb9..e207f8f9b700 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
dax.addr += first;
size = map_len - first;
}
- max = min(pos + size, end);
+ /*
+ * pos + size is one past the last offset for IO,
+ * so pos + size can overflow loff_t at extreme offsets.
+ * Cast to u64 to catch this and get the true minimum.
+ */
+ max = min_t(u64, pos + size, end);
}
if (iov_iter_rw(iter) == WRITE) {
diff --git a/fs/dcache.c b/fs/dcache.c
index ad4a542e9bab..d6847d7b123d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry)
}
EXPORT_SYMBOL(d_drop);
+static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
+{
+ struct dentry *next;
+ /*
+ * Inform d_walk() and shrink_dentry_list() that we are no longer
+ * attached to the dentry tree
+ */
+ dentry->d_flags |= DCACHE_DENTRY_KILLED;
+ if (unlikely(list_empty(&dentry->d_child)))
+ return;
+ __list_del_entry(&dentry->d_child);
+ /*
+ * Cursors can move around the list of children. While we'd been
+ * a normal list member, it didn't matter - ->d_child.next would've
+ * been updated. However, from now on it won't be and for the
+ * things like d_walk() it might end up with a nasty surprise.
+ * Normally d_walk() doesn't care about cursors moving around -
+ * ->d_lock on parent prevents that and since a cursor has no children
+ * of its own, we get through it without ever unlocking the parent.
+ * There is one exception, though - if we ascend from a child that
+ * gets killed as soon as we unlock it, the next sibling is found
+ * using the value left in its ->d_child.next. And if _that_
+ * pointed to a cursor, and cursor got moved (e.g. by lseek())
+ * before d_walk() regains parent->d_lock, we'll end up skipping
+ * everything the cursor had been moved past.
+ *
+ * Solution: make sure that the pointer left behind in ->d_child.next
+ * points to something that won't be moving around. I.e. skip the
+ * cursors.
+ */
+ while (dentry->d_child.next != &parent->d_subdirs) {
+ next = list_entry(dentry->d_child.next, struct dentry, d_child);
+ if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
+ break;
+ dentry->d_child.next = next->d_child.next;
+ }
+}
+
static void __dentry_kill(struct dentry *dentry)
{
struct dentry *parent = NULL;
@@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry)
}
/* if it was on the hash then remove it */
__d_drop(dentry);
- __list_del_entry(&dentry->d_child);
- /*
- * Inform d_walk() that we are no longer attached to the
- * dentry tree
- */
- dentry->d_flags |= DCACHE_DENTRY_KILLED;
+ dentry_unlist(dentry, parent);
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1203,6 +1236,9 @@ resume:
struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
+ if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
+ continue;
+
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
ret = enter(data, dentry);
@@ -1636,7 +1672,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
struct dentry *dentry = __d_alloc(parent->d_sb, name);
if (!dentry)
return NULL;
-
+ dentry->d_flags |= DCACHE_RCUACCESS;
spin_lock(&parent->d_lock);
/*
* don't need child lock because it is not subject
@@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
}
EXPORT_SYMBOL(d_alloc);
+struct dentry *d_alloc_cursor(struct dentry * parent)
+{
+ struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+ if (dentry) {
+ dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
+ dentry->d_parent = dget(parent);
+ }
+ return dentry;
+}
+
/**
* d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
* @sb: the superblock
@@ -2358,7 +2404,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
{
BUG_ON(!d_unhashed(entry));
hlist_bl_lock(b);
- entry->d_flags |= DCACHE_RCUACCESS;
hlist_bl_add_head_rcu(&entry->d_hash, b);
hlist_bl_unlock(b);
}
@@ -2458,7 +2503,6 @@ retry:
rcu_read_unlock();
goto retry;
}
- rcu_read_unlock();
/*
* No changes for the parent since the beginning of d_lookup().
* Since all removals from the chain happen with hlist_bl_lock(),
@@ -2471,8 +2515,6 @@ retry:
continue;
if (dentry->d_parent != parent)
continue;
- if (d_unhashed(dentry))
- continue;
if (parent->d_flags & DCACHE_OP_COMPARE) {
int tlen = dentry->d_name.len;
const char *tname = dentry->d_name.name;
@@ -2484,9 +2526,18 @@ retry:
if (dentry_cmp(dentry, str, len))
continue;
}
- dget(dentry);
hlist_bl_unlock(b);
- /* somebody is doing lookup for it right now; wait for it */
+ /* now we can try to grab a reference */
+ if (!lockref_get_not_dead(&dentry->d_lockref)) {
+ rcu_read_unlock();
+ goto retry;
+ }
+
+ rcu_read_unlock();
+ /*
+ * somebody is likely to be still doing lookup for it;
+ * wait for them to finish
+ */
spin_lock(&dentry->d_lock);
d_wait_lookup(dentry);
/*
@@ -2517,6 +2568,7 @@ retry:
dput(new);
return dentry;
}
+ rcu_read_unlock();
/* we can't take ->d_lock here; it's OK, though. */
new->d_flags |= DCACHE_PAR_LOOKUP;
new->d_wait = wq;
@@ -2843,6 +2895,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
/* ... and switch them in the tree */
if (IS_ROOT(dentry)) {
/* splicing a tree */
+ dentry->d_flags |= DCACHE_RCUACCESS;
dentry->d_parent = target->d_parent;
target->d_parent = target;
list_del_init(&target->d_child);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 9c1c9a01b7e5..592059f88e04 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -127,7 +127,6 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
r = real_fops->open(inode, filp);
out:
- fops_put(real_fops);
debugfs_use_file_finish(srcu_idx);
return r;
}
@@ -262,8 +261,10 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
if (real_fops->open) {
r = real_fops->open(inode, filp);
-
- if (filp->f_op != proxy_fops) {
+ if (r) {
+ replace_fops(filp, d_inode(dentry)->i_fop);
+ goto free_proxy;
+ } else if (filp->f_op != proxy_fops) {
/* No protection against file removal anymore. */
WARN(1, "debugfs file owner replaced proxy fops: %pd",
dentry);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 0b2954d7172d..37c134a132c7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -95,8 +95,6 @@ static struct ctl_table pty_root_table[] = {
static DEFINE_MUTEX(allocated_ptys_lock);
-static struct vfsmount *devpts_mnt;
-
struct pts_mount_opts {
int setuid;
int setgid;
@@ -104,7 +102,7 @@ struct pts_mount_opts {
kgid_t gid;
umode_t mode;
umode_t ptmxmode;
- int newinstance;
+ int reserve;
int max;
};
@@ -117,11 +115,9 @@ static const match_table_t tokens = {
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_mode, "mode=%o"},
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
{Opt_ptmxmode, "ptmxmode=%o"},
{Opt_newinstance, "newinstance"},
{Opt_max, "max=%d"},
-#endif
{Opt_err, NULL}
};
@@ -137,15 +133,48 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
return sb->s_fs_info;
}
-static inline struct super_block *pts_sb_from_inode(struct inode *inode)
+struct pts_fs_info *devpts_acquire(struct file *filp)
{
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
- if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
- return inode->i_sb;
-#endif
- if (!devpts_mnt)
- return NULL;
- return devpts_mnt->mnt_sb;
+ struct pts_fs_info *result;
+ struct path path;
+ struct super_block *sb;
+ int err;
+
+ path = filp->f_path;
+ path_get(&path);
+
+ /* Has the devpts filesystem already been found? */
+ sb = path.mnt->mnt_sb;
+ if (sb->s_magic != DEVPTS_SUPER_MAGIC) {
+ /* Is a devpts filesystem at "pts" in the same directory? */
+ err = path_pts(&path);
+ if (err) {
+ result = ERR_PTR(err);
+ goto out;
+ }
+
+ /* Is the path the root of a devpts filesystem? */
+ result = ERR_PTR(-ENODEV);
+ sb = path.mnt->mnt_sb;
+ if ((sb->s_magic != DEVPTS_SUPER_MAGIC) ||
+ (path.mnt->mnt_root != sb->s_root))
+ goto out;
+ }
+
+ /*
+ * pty code needs to hold extra references in case of last /dev/tty close
+ */
+ atomic_inc(&sb->s_active);
+ result = DEVPTS_SB(sb);
+
+out:
+ path_put(&path);
+ return result;
+}
+
+void devpts_release(struct pts_fs_info *fsi)
+{
+ deactivate_super(fsi->sb);
}
#define PARSE_MOUNT 0
@@ -154,9 +183,7 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode)
/*
* parse_mount_options():
* Set @opts to mount options specified in @data. If an option is not
- * specified in @data, set it to its default value. The exception is
- * 'newinstance' option which can only be set/cleared on a mount (i.e.
- * cannot be changed during remount).
+ * specified in @data, set it to its default value.
*
* Note: @data may be NULL (in which case all options are set to default).
*/
@@ -174,9 +201,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
opts->max = NR_UNIX98_PTY_MAX;
- /* newinstance makes sense only on initial mount */
+ /* Only allow instances mounted from the initial mount
+ * namespace to tap the reserve pool of ptys.
+ */
if (op == PARSE_MOUNT)
- opts->newinstance = 0;
+ opts->reserve =
+ (current->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns);
while ((p = strsep(&data, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
@@ -211,16 +241,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
return -EINVAL;
opts->mode = option & S_IALLUGO;
break;
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
case Opt_ptmxmode:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->ptmxmode = option & S_IALLUGO;
break;
case Opt_newinstance:
- /* newinstance makes sense only on initial mount */
- if (op == PARSE_MOUNT)
- opts->newinstance = 1;
break;
case Opt_max:
if (match_int(&args[0], &option) ||
@@ -228,7 +254,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
return -EINVAL;
opts->max = option;
break;
-#endif
default:
pr_err("called with bogus options\n");
return -EINVAL;
@@ -238,7 +263,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
return 0;
}
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
static int mknod_ptmx(struct super_block *sb)
{
int mode;
@@ -305,12 +329,6 @@ static void update_ptmx_mode(struct pts_fs_info *fsi)
inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
}
}
-#else
-static inline void update_ptmx_mode(struct pts_fs_info *fsi)
-{
- return;
-}
-#endif
static int devpts_remount(struct super_block *sb, int *flags, char *data)
{
@@ -344,11 +362,9 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",gid=%u",
from_kgid_munged(&init_user_ns, opts->gid));
seq_printf(seq, ",mode=%03o", opts->mode);
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
if (opts->max < NR_UNIX98_PTY_MAX)
seq_printf(seq, ",max=%d", opts->max);
-#endif
return 0;
}
@@ -410,40 +426,11 @@ fail:
return -ENOMEM;
}
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
-static int compare_init_pts_sb(struct super_block *s, void *p)
-{
- if (devpts_mnt)
- return devpts_mnt->mnt_sb == s;
- return 0;
-}
-
/*
* devpts_mount()
*
- * If the '-o newinstance' mount option was specified, mount a new
- * (private) instance of devpts. PTYs created in this instance are
- * independent of the PTYs in other devpts instances.
- *
- * If the '-o newinstance' option was not specified, mount/remount the
- * initial kernel mount of devpts. This type of mount gives the
- * legacy, single-instance semantics.
- *
- * The 'newinstance' option is needed to support multiple namespace
- * semantics in devpts while preserving backward compatibility of the
- * current 'single-namespace' semantics. i.e all mounts of devpts
- * without the 'newinstance' mount option should bind to the initial
- * kernel mount, like mount_single().
- *
- * Mounts with 'newinstance' option create a new, private namespace.
- *
- * NOTE:
- *
- * For single-mount semantics, devpts cannot use mount_single(),
- * because mount_single()/sget() find and use the super-block from
- * the most recent mount of devpts. But that recent mount may be a
- * 'newinstance' mount and mount_single() would pick the newinstance
- * super-block instead of the initial super-block.
+ * Mount a new (private) instance of devpts. PTYs created in this
+ * instance are independent of the PTYs in other devpts instances.
*/
static struct dentry *devpts_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
@@ -456,18 +443,7 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
if (error)
return ERR_PTR(error);
- /* Require newinstance for all user namespace mounts to ensure
- * the mount options are not changed.
- */
- if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
- return ERR_PTR(-EINVAL);
-
- if (opts.newinstance)
- s = sget(fs_type, NULL, set_anon_super, flags, NULL);
- else
- s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags,
- NULL);
-
+ s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s))
return ERR_CAST(s);
@@ -491,18 +467,6 @@ out_undo_sget:
return ERR_PTR(error);
}
-#else
-/*
- * This supports only the legacy single-instance semantics (no
- * multiple-instance semantics)
- */
-static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data)
-{
- return mount_single(fs_type, flags, data, devpts_fill_super);
-}
-#endif
-
static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
@@ -516,9 +480,7 @@ static struct file_system_type devpts_fs_type = {
.name = "devpts",
.mount = devpts_mount,
.kill_sb = devpts_kill_sb,
-#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
.fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
-#endif
};
/*
@@ -531,16 +493,13 @@ int devpts_new_index(struct pts_fs_info *fsi)
int index;
int ida_ret;
- if (!fsi)
- return -ENODEV;
-
retry:
if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
return -ENOMEM;
mutex_lock(&allocated_ptys_lock);
- if (pty_count >= pty_limit -
- (fsi->mount_opts.newinstance ? pty_reserve : 0)) {
+ if (pty_count >= (pty_limit -
+ (fsi->mount_opts.reserve ? 0 : pty_reserve))) {
mutex_unlock(&allocated_ptys_lock);
return -ENOSPC;
}
@@ -571,30 +530,6 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx)
mutex_unlock(&allocated_ptys_lock);
}
-/*
- * pty code needs to hold extra references in case of last /dev/tty close
- */
-struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
-{
- struct super_block *sb;
- struct pts_fs_info *fsi;
-
- sb = pts_sb_from_inode(ptmx_inode);
- if (!sb)
- return NULL;
- fsi = DEVPTS_SB(sb);
- if (!fsi)
- return NULL;
-
- atomic_inc(&sb->s_active);
- return fsi;
-}
-
-void devpts_put_ref(struct pts_fs_info *fsi)
-{
- deactivate_super(fsi->sb);
-}
-
/**
* devpts_pty_new -- create a new inode in /dev/pts/
* @ptmx_inode: inode of the master
@@ -607,16 +542,12 @@ void devpts_put_ref(struct pts_fs_info *fsi)
struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
{
struct dentry *dentry;
- struct super_block *sb;
+ struct super_block *sb = fsi->sb;
struct inode *inode;
struct dentry *root;
struct pts_mount_opts *opts;
char s[12];
- if (!fsi)
- return ERR_PTR(-ENODEV);
-
- sb = fsi->sb;
root = sb->s_root;
opts = &fsi->mount_opts;
@@ -676,20 +607,8 @@ void devpts_pty_kill(struct dentry *dentry)
static int __init init_devpts_fs(void)
{
int err = register_filesystem(&devpts_fs_type);
- struct ctl_table_header *table;
-
if (!err) {
- struct vfsmount *mnt;
-
- table = register_sysctl_table(pty_root_table);
- mnt = kern_mount(&devpts_fs_type);
- if (IS_ERR(mnt)) {
- err = PTR_ERR(mnt);
- unregister_filesystem(&devpts_fs_type);
- unregister_sysctl_table(table);
- } else {
- devpts_mnt = mnt;
- }
+ register_sysctl_table(pty_root_table);
}
return err;
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f3b4408be590..7c3ce73cb617 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -108,7 +108,8 @@ struct dio_submit {
/* dio_state communicated between submission path and end_io */
struct dio {
int flags; /* doesn't change */
- int rw;
+ int op;
+ int op_flags;
blk_qc_t bio_cookie;
struct block_device *bio_bdev;
struct inode *inode;
@@ -163,7 +164,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
&sdio->from);
- if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
+ if (ret < 0 && sdio->blocks_available && (dio->op == REQ_OP_WRITE)) {
struct page *page = ZERO_PAGE(0);
/*
* A memory fault, but the filesystem has some outstanding
@@ -242,7 +243,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
transferred = dio->result;
/* Check for short read case */
- if ((dio->rw == READ) && ((offset + transferred) > dio->i_size))
+ if ((dio->op == REQ_OP_READ) &&
+ ((offset + transferred) > dio->i_size))
transferred = dio->i_size - offset;
}
@@ -273,7 +275,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
*/
dio->iocb->ki_pos += transferred;
- if (dio->rw & WRITE)
+ if (dio->op == REQ_OP_WRITE)
ret = generic_write_sync(dio->iocb, transferred);
dio->iocb->ki_complete(dio->iocb, ret, 0);
}
@@ -375,6 +377,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = first_sector;
+ bio_set_op_attrs(bio, dio->op, dio->op_flags);
if (dio->is_async)
bio->bi_end_io = dio_bio_end_aio;
else
@@ -402,17 +405,16 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
dio->refcount++;
spin_unlock_irqrestore(&dio->bio_lock, flags);
- if (dio->is_async && dio->rw == READ && dio->should_dirty)
+ if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty)
bio_set_pages_dirty(bio);
dio->bio_bdev = bio->bi_bdev;
if (sdio->submit_io) {
- sdio->submit_io(dio->rw, bio, dio->inode,
- sdio->logical_offset_in_bio);
+ sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio);
dio->bio_cookie = BLK_QC_T_NONE;
} else
- dio->bio_cookie = submit_bio(dio->rw, bio);
+ dio->bio_cookie = submit_bio(bio);
sdio->bio = NULL;
sdio->boundary = 0;
@@ -478,14 +480,14 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
if (bio->bi_error)
dio->io_error = -EIO;
- if (dio->is_async && dio->rw == READ && dio->should_dirty) {
+ if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
err = bio->bi_error;
bio_check_pages_dirty(bio); /* transfers ownership */
} else {
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
- if (dio->rw == READ && !PageCompound(page) &&
+ if (dio->op == REQ_OP_READ && !PageCompound(page) &&
dio->should_dirty)
set_page_dirty_lock(page);
put_page(page);
@@ -638,7 +640,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
* which may decide to handle it or also return an unmapped
* buffer head.
*/
- create = dio->rw & WRITE;
+ create = dio->op == REQ_OP_WRITE;
if (dio->flags & DIO_SKIP_HOLES) {
if (fs_startblk <= ((i_size_read(dio->inode) - 1) >>
i_blkbits))
@@ -788,7 +790,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
{
int ret = 0;
- if (dio->rw & WRITE) {
+ if (dio->op == REQ_OP_WRITE) {
/*
* Read accounting is performed in submit_bio()
*/
@@ -988,7 +990,7 @@ do_holes:
loff_t i_size_aligned;
/* AKPM: eargh, -ENOTBLK is a hack */
- if (dio->rw & WRITE) {
+ if (dio->op == REQ_OP_WRITE) {
put_page(page);
return -ENOTBLK;
}
@@ -1202,7 +1204,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
dio->is_async = true;
dio->inode = inode;
- dio->rw = iov_iter_rw(iter) == WRITE ? WRITE_ODIRECT : READ;
+ if (iov_iter_rw(iter) == WRITE) {
+ dio->op = REQ_OP_WRITE;
+ dio->op_flags = WRITE_ODIRECT;
+ } else {
+ dio->op = REQ_OP_READ;
+ }
/*
* For AIO O_(D)SYNC writes we need to defer completions to a workqueue
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 0d8eb3455b34..e5e29f8c920b 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -45,7 +45,7 @@
* ecryptfs_to_hex
* @dst: Buffer to take hex character representation of contents of
* src; must be at least of size (src_size * 2)
- * @src: Buffer to be converted to a hex string respresentation
+ * @src: Buffer to be converted to a hex string representation
* @src_size: number of bytes to convert
*/
void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
@@ -60,7 +60,7 @@ void ecryptfs_to_hex(char *dst, char *src, size_t src_size)
* ecryptfs_from_hex
* @dst: Buffer to take the bytes from src hex; must be at least of
* size (src_size / 2)
- * @src: Buffer to be converted from a hex string respresentation to raw value
+ * @src: Buffer to be converted from a hex string representation to raw value
* @dst_size: size of dst buffer, or number of hex characters pairs to convert
*/
void ecryptfs_from_hex(char *dst, char *src, int dst_size)
@@ -953,7 +953,7 @@ struct ecryptfs_cipher_code_str_map_elem {
};
/* Add support for additional ciphers by adding elements here. The
- * cipher_code is whatever OpenPGP applicatoins use to identify the
+ * cipher_code is whatever OpenPGP applications use to identify the
* ciphers. List in order of probability. */
static struct ecryptfs_cipher_code_str_map_elem
ecryptfs_cipher_code_str_map[] = {
@@ -1410,7 +1410,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry,
*
* Common entry point for reading file metadata. From here, we could
* retrieve the header information from the header region of the file,
- * the xattr region of the file, or some other repostory that is
+ * the xattr region of the file, or some other repository that is
* stored separately from the file itself. The current implementation
* supports retrieving the metadata information from the file contents
* and from the xattr region.
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 7000b96b783e..ca4e83750214 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -169,9 +169,22 @@ out:
return rc;
}
+static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct file *lower_file = ecryptfs_file_to_lower(file);
+ /*
+ * Don't allow mmap on top of file systems that don't support it
+ * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs
+ * allows recursive mounting, this will need to be extended.
+ */
+ if (!lower_file->f_op->mmap)
+ return -ENODEV;
+ return generic_file_mmap(file, vma);
+}
+
/**
* ecryptfs_open
- * @inode: inode speciying file to open
+ * @inode: inode specifying file to open
* @file: Structure to return filled in
*
* Opens the file specified by inode.
@@ -240,7 +253,7 @@ out:
/**
* ecryptfs_dir_open
- * @inode: inode speciying file to open
+ * @inode: inode specifying file to open
* @file: Structure to return filled in
*
* Opens the file specified by inode.
@@ -403,7 +416,7 @@ const struct file_operations ecryptfs_main_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
#endif
- .mmap = generic_file_mmap,
+ .mmap = ecryptfs_mmap,
.open = ecryptfs_open,
.flush = ecryptfs_flush,
.release = ecryptfs_release,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1698132d0e57..612004495141 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -738,8 +738,7 @@ static void ecryptfs_free_kmem_caches(void)
struct ecryptfs_cache_info *info;
info = &ecryptfs_cache_infos[i];
- if (*(info->cache))
- kmem_cache_destroy(*(info->cache));
+ kmem_cache_destroy(*(info->cache));
}
}
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 7bd8ac8dfb28..8bb72807e70d 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -878,7 +878,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
} else {
bio = master_dev->bio;
/* FIXME: bio_set_dir() */
- bio->bi_rw |= REQ_WRITE;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
}
osd_req_write(or, _ios_obj(ios, cur_comp),
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 799a92bdf577..e04ec868e37e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -473,7 +473,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
trace_ext4_read_block_bitmap_load(sb, block_group);
bh->b_end_io = ext4_end_bitmap_read;
get_bh(bh);
- submit_bh(READ | REQ_META | REQ_PRIO, bh);
+ submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
return bh;
verify:
err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 35f351895b89..9e66cd1d7b78 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -214,7 +214,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
trace_ext4_load_inode_bitmap(sb, block_group);
bh->b_end_io = ext4_end_bitmap_read;
get_bh(bh);
- submit_bh(READ | REQ_META | REQ_PRIO, bh);
+ submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
put_bh(bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5a6277d80f7c..3131747199e1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -987,7 +987,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return bh;
if (!bh || buffer_uptodate(bh))
return bh;
- ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
+ ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
@@ -1141,7 +1141,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
*wait_bh++ = bh;
decrypt = ext4_encrypted_inode(inode) &&
S_ISREG(inode->i_mode);
@@ -3727,7 +3727,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
if (!buffer_uptodate(bh)) {
err = -EIO;
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
wait_on_buffer(bh);
/* Uhhuh. Read error. Complain and punt. */
if (!buffer_uptodate(bh))
@@ -4310,7 +4310,7 @@ make_io:
trace_ext4_load_inode(inode);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(READ | REQ_META | REQ_PRIO, bh);
+ submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
EXT4_ERROR_INODE_BLOCK(inode, block,
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 23d436d6f8b8..d89754ef1aab 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
lock_buffer(bh);
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
- submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
+ submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
sb_end_write(sb);
if (unlikely(!buffer_uptodate(bh)))
@@ -88,7 +88,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
get_bh(*bh);
lock_buffer(*bh);
(*bh)->b_end_io = end_buffer_read_sync;
- submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
+ submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh);
wait_on_buffer(*bh);
if (!buffer_uptodate(*bh)) {
ret = -EIO;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 4637c439ca54..34c0142caf6a 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1447,7 +1447,8 @@ restart:
}
bh_use[ra_max] = bh;
if (bh)
- ll_rw_block(READ | REQ_META | REQ_PRIO,
+ ll_rw_block(REQ_OP_READ,
+ REQ_META | REQ_PRIO,
1, &bh);
}
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 5ad05af51dd8..a6132a730967 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -339,9 +339,10 @@ void ext4_io_submit(struct ext4_io_submit *io)
struct bio *bio = io->io_bio;
if (bio) {
- int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ?
- WRITE_SYNC : WRITE;
- submit_bio(io_op, io->io_bio);
+ int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : 0;
+ bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
+ submit_bio(io->io_bio);
}
io->io_bio = NULL;
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 18b2cf23d40f..bfc7f4d30643 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -235,7 +235,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
*/
if (bio && (last_block_in_bio != blocks[0] - 1)) {
submit_and_realloc:
- submit_bio(READ, bio);
+ submit_bio(bio);
bio = NULL;
}
if (bio == NULL) {
@@ -258,6 +258,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
bio->bi_end_io = mpage_end_io;
bio->bi_private = ctx;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
}
length = first_hole << blkbits;
@@ -267,14 +268,14 @@ int ext4_mpage_readpages(struct address_space *mapping,
if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
(relative_block == map.m_len)) ||
(first_hole != blocks_per_page)) {
- submit_bio(READ, bio);
+ submit_bio(bio);
bio = NULL;
} else
last_block_in_bio = blocks[blocks_per_page - 1];
goto next_page;
confused:
if (bio) {
- submit_bio(READ, bio);
+ submit_bio(bio);
bio = NULL;
}
if (!PageUptodate(page))
@@ -287,6 +288,6 @@ int ext4_mpage_readpages(struct address_space *mapping,
}
BUG_ON(pages && !list_empty(pages));
if (bio)
- submit_bio(READ, bio);
+ submit_bio(bio);
return 0;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c13a4e464738..1c593aa0218e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4307,7 +4307,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
goto out_bdev;
}
journal->j_private = sb;
- ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
+ ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);
wait_on_buffer(journal->j_sb_buffer);
if (!buffer_uptodate(journal->j_sb_buffer)) {
ext4_msg(sb, KERN_ERR, "I/O error on journal device");
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 389160049993..124b4a3017b5 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -63,14 +63,15 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = META,
- .rw = READ_SYNC | REQ_META | REQ_PRIO,
+ .op = REQ_OP_READ,
+ .op_flags = READ_SYNC | REQ_META | REQ_PRIO,
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
};
if (unlikely(!is_meta))
- fio.rw &= ~REQ_META;
+ fio.op_flags &= ~REQ_META;
repeat:
page = f2fs_grab_cache_page(mapping, index, false);
if (!page) {
@@ -157,13 +158,14 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = META,
- .rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
+ .op = REQ_OP_READ,
+ .op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
.encrypted_page = NULL,
};
struct blk_plug plug;
if (unlikely(type == META_POR))
- fio.rw &= ~REQ_META;
+ fio.op_flags &= ~REQ_META;
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9a8bbc1fb1fa..8769e8349dff 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -97,12 +97,11 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
return bio;
}
-static inline void __submit_bio(struct f2fs_sb_info *sbi, int rw,
- struct bio *bio)
+static inline void __submit_bio(struct f2fs_sb_info *sbi, struct bio *bio)
{
- if (!is_read_io(rw))
+ if (!is_read_io(bio_op(bio)))
atomic_inc(&sbi->nr_wb_bios);
- submit_bio(rw, bio);
+ submit_bio(bio);
}
static void __submit_merged_bio(struct f2fs_bio_info *io)
@@ -112,12 +111,14 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
if (!io->bio)
return;
- if (is_read_io(fio->rw))
+ if (is_read_io(fio->op))
trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
else
trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
- __submit_bio(io->sbi, fio->rw, io->bio);
+ bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+
+ __submit_bio(io->sbi, io->bio);
io->bio = NULL;
}
@@ -183,10 +184,12 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
+ io->fio.op = REQ_OP_WRITE;
if (test_opt(sbi, NOBARRIER))
- io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
+ io->fio.op_flags = WRITE_FLUSH | REQ_META | REQ_PRIO;
else
- io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+ io->fio.op_flags = WRITE_FLUSH_FUA | REQ_META |
+ REQ_PRIO;
}
__submit_merged_bio(io);
out:
@@ -228,14 +231,16 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
f2fs_trace_ios(fio, 0);
/* Allocate a new bio */
- bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->rw));
+ bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->op));
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
return -EFAULT;
}
+ bio->bi_rw = fio->op_flags;
+ bio_set_op_attrs(bio, fio->op, fio->op_flags);
- __submit_bio(fio->sbi, fio->rw, bio);
+ __submit_bio(fio->sbi, bio);
return 0;
}
@@ -244,7 +249,7 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io;
- bool is_read = is_read_io(fio->rw);
+ bool is_read = is_read_io(fio->op);
struct page *bio_page;
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
@@ -256,7 +261,7 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
down_write(&io->io_rwsem);
if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
- io->fio.rw != fio->rw))
+ (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags)))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
@@ -390,7 +395,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
}
struct page *get_read_data_page(struct inode *inode, pgoff_t index,
- int rw, bool for_write)
+ int op_flags, bool for_write)
{
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
@@ -400,7 +405,8 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.type = DATA,
- .rw = rw,
+ .op = REQ_OP_READ,
+ .op_flags = op_flags,
.encrypted_page = NULL,
};
@@ -1051,7 +1057,7 @@ got_it:
*/
if (bio && (last_block_in_bio != block_nr - 1)) {
submit_and_realloc:
- __submit_bio(F2FS_I_SB(inode), READ, bio);
+ __submit_bio(F2FS_I_SB(inode), bio);
bio = NULL;
}
if (bio == NULL) {
@@ -1080,6 +1086,7 @@ submit_and_realloc:
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
bio->bi_end_io = f2fs_read_end_io;
bio->bi_private = ctx;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
}
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
@@ -1094,7 +1101,7 @@ set_error_page:
goto next_page;
confused:
if (bio) {
- __submit_bio(F2FS_I_SB(inode), READ, bio);
+ __submit_bio(F2FS_I_SB(inode), bio);
bio = NULL;
}
unlock_page(page);
@@ -1104,7 +1111,7 @@ next_page:
}
BUG_ON(pages && !list_empty(pages));
if (bio)
- __submit_bio(F2FS_I_SB(inode), READ, bio);
+ __submit_bio(F2FS_I_SB(inode), bio);
return 0;
}
@@ -1221,7 +1228,8 @@ static int f2fs_write_data_page(struct page *page,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
- .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ .op = REQ_OP_WRITE,
+ .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
.page = page,
.encrypted_page = NULL,
};
@@ -1662,7 +1670,8 @@ repeat:
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
- .rw = READ_SYNC,
+ .op = REQ_OP_READ,
+ .op_flags = READ_SYNC,
.old_blkaddr = blkaddr,
.new_blkaddr = blkaddr,
.page = page,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 916e7c238e3d..23ae6a81ccd6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -686,14 +686,15 @@ enum page_type {
struct f2fs_io_info {
struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
- int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
+ int op; /* contains REQ_OP_ */
+ int op_flags; /* rq_flag_bits */
block_t new_blkaddr; /* new block address to be written */
block_t old_blkaddr; /* old block address before Cow */
struct page *page; /* page to be written */
struct page *encrypted_page; /* encrypted page */
};
-#define is_read_io(rw) (((rw) & 1) == READ)
+#define is_read_io(rw) (rw == READ)
struct f2fs_bio_info {
struct f2fs_sb_info *sbi; /* f2fs superblock */
struct bio *bio; /* bios to merge */
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 38d56f678912..f06ed73adf99 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -538,7 +538,8 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.type = DATA,
- .rw = READ_SYNC,
+ .op = REQ_OP_READ,
+ .op_flags = READ_SYNC,
.encrypted_page = NULL,
};
struct dnode_of_data dn;
@@ -612,7 +613,8 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
/* allocate block address */
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
- fio.rw = WRITE_SYNC;
+ fio.op = REQ_OP_WRITE;
+ fio.op_flags = WRITE_SYNC;
fio.new_blkaddr = newaddr;
f2fs_submit_page_mbio(&fio);
@@ -649,7 +651,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.type = DATA,
- .rw = WRITE_SYNC,
+ .op = REQ_OP_WRITE,
+ .op_flags = WRITE_SYNC,
.page = page,
.encrypted_page = NULL,
};
@@ -730,7 +733,8 @@ next_step:
start_bidx = start_bidx_of_node(nofs, inode);
data_page = get_read_data_page(inode,
- start_bidx + ofs_in_node, READA, true);
+ start_bidx + ofs_in_node, REQ_RAHEAD,
+ true);
if (IS_ERR(data_page)) {
iput(inode);
continue;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index a4bb155dd00a..c15e53c1d794 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -108,7 +108,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(dn->inode),
.type = DATA,
- .rw = WRITE_SYNC | REQ_PRIO,
+ .op = REQ_OP_WRITE,
+ .op_flags = WRITE_SYNC | REQ_PRIO,
.page = page,
.encrypted_page = NULL,
};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 1f21aae80c40..d1867698e601 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1070,14 +1070,15 @@ fail:
* 0: f2fs_put_page(page, 0)
* LOCKED_PAGE or error: f2fs_put_page(page, 1)
*/
-static int read_node_page(struct page *page, int rw)
+static int read_node_page(struct page *page, int op_flags)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct node_info ni;
struct f2fs_io_info fio = {
.sbi = sbi,
.type = NODE,
- .rw = rw,
+ .op = REQ_OP_READ,
+ .op_flags = op_flags,
.page = page,
.encrypted_page = NULL,
};
@@ -1118,7 +1119,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
if (!apage)
return;
- err = read_node_page(apage, READA);
+ err = read_node_page(apage, REQ_RAHEAD);
f2fs_put_page(apage, err ? 1 : 0);
}
@@ -1568,7 +1569,8 @@ static int f2fs_write_node_page(struct page *page,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = NODE,
- .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ .op = REQ_OP_WRITE,
+ .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
.page = page,
.encrypted_page = NULL,
};
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 2e6f537a0e7d..4c2d1fa1e0e2 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -257,7 +257,8 @@ static int __commit_inmem_pages(struct inode *inode,
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
- .rw = WRITE_SYNC | REQ_PRIO,
+ .op = REQ_OP_WRITE,
+ .op_flags = WRITE_SYNC | REQ_PRIO,
.encrypted_page = NULL,
};
bool submit_bio = false;
@@ -406,7 +407,8 @@ repeat:
fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
bio->bi_bdev = sbi->sb->s_bdev;
- ret = submit_bio_wait(WRITE_FLUSH, bio);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+ ret = submit_bio_wait(bio);
llist_for_each_entry_safe(cmd, next,
fcc->dispatch_list, llnode) {
@@ -438,7 +440,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
int ret;
bio->bi_bdev = sbi->sb->s_bdev;
- ret = submit_bio_wait(WRITE_FLUSH, bio);
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
+ ret = submit_bio_wait(bio);
bio_put(bio);
return ret;
}
@@ -1401,7 +1404,8 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
struct f2fs_io_info fio = {
.sbi = sbi,
.type = META,
- .rw = WRITE_SYNC | REQ_META | REQ_PRIO,
+ .op = REQ_OP_WRITE,
+ .op_flags = WRITE_SYNC | REQ_META | REQ_PRIO,
.old_blkaddr = page->index,
.new_blkaddr = page->index,
.page = page,
@@ -1409,7 +1413,7 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
};
if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
- fio.rw &= ~REQ_META;
+ fio.op_flags &= ~REQ_META;
set_page_writeback(page);
f2fs_submit_page_mbio(&fio);
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index 562ce0821559..73b4e1d1912a 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -25,11 +25,11 @@ static inline void __print_last_io(void)
if (!last_io.len)
return;
- trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n",
+ trace_printk("%3x:%3x %4x %-16s %2x %5x %5x %12x %4x\n",
last_io.major, last_io.minor,
last_io.pid, "----------------",
last_io.type,
- last_io.fio.rw,
+ last_io.fio.op, last_io.fio.op_flags,
last_io.fio.new_blkaddr,
last_io.len);
memset(&last_io, 0, sizeof(last_io));
@@ -101,7 +101,8 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
if (last_io.major == major && last_io.minor == minor &&
last_io.pid == pid &&
last_io.type == __file_type(inode, pid) &&
- last_io.fio.rw == fio->rw &&
+ last_io.fio.op == fio->op &&
+ last_io.fio.op_flags == fio->op_flags &&
last_io.fio.new_blkaddr + last_io.len ==
fio->new_blkaddr) {
last_io.len++;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index c4589e981760..8a8698119ff7 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -267,7 +267,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
int i, err = 0;
for (i = 0; i < nr_bhs; i++)
- write_dirty_buffer(bhs[i], WRITE);
+ write_dirty_buffer(bhs[i], 0);
for (i = 0; i < nr_bhs; i++) {
wait_on_buffer(bhs[i]);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 989a2cef6b76..fe7e83a45eff 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -483,9 +483,9 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
goto out_free;
}
inode->i_state |= I_WB_SWITCH;
+ __iget(inode);
spin_unlock(&inode->i_lock);
- ihold(inode);
isw->inode = inode;
atomic_inc(&isw_nr_in_flight);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 3078b679fcd1..c8c4f79c7ce1 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -887,6 +887,8 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
put_page(results[i]);
}
+ wake_up_bit(&cookie->flags, 0);
+
_leave("");
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ccd4971cc6c1..cca7b048c07b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
struct dentry *newent;
bool outarg_valid = true;
+ fuse_lock_inode(dir);
err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
&outarg, &inode);
+ fuse_unlock_inode(dir);
if (err == -ENOENT) {
outarg_valid = false;
err = 0;
@@ -478,7 +480,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
struct fuse_conn *fc = get_fuse_conn(dir);
struct dentry *res = NULL;
- if (d_unhashed(entry)) {
+ if (d_in_lookup(entry)) {
res = fuse_lookup(dir, entry, 0);
if (IS_ERR(res))
return PTR_ERR(res);
@@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
+ fuse_lock_inode(inode);
fuse_request_send(fc, req);
+ fuse_unlock_inode(inode);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index eddbe02c4028..929c383432b0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -110,6 +110,9 @@ struct fuse_inode {
/** Miscellaneous bits describing inode state */
unsigned long state;
+
+ /** Lock for serializing lookup and readdir for back compatibility*/
+ struct mutex mutex;
};
/** FUSE inode state bits */
@@ -540,6 +543,9 @@ struct fuse_conn {
/** write-back cache policy (default is write-through) */
unsigned writeback_cache:1;
+ /** allow parallel lookups and readdir (default is serialized) */
+ unsigned parallel_dirops:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
void fuse_set_initialized(struct fuse_conn *fc);
+void fuse_unlock_inode(struct inode *inode);
+void fuse_lock_inode(struct inode *inode);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1ce67668a8e1..9961d8432ce3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&fi->queued_writes);
INIT_LIST_HEAD(&fi->writepages);
init_waitqueue_head(&fi->page_waitq);
+ mutex_init(&fi->mutex);
fi->forget = fuse_alloc_forget();
if (!fi->forget) {
kmem_cache_free(fuse_inode_cachep, inode);
@@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode)
struct fuse_inode *fi = get_fuse_inode(inode);
BUG_ON(!list_empty(&fi->write_files));
BUG_ON(!list_empty(&fi->queued_writes));
+ mutex_destroy(&fi->mutex);
kfree(fi->forget);
call_rcu(&inode->i_rcu, fuse_i_callback);
}
@@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
return 0;
}
+void fuse_lock_inode(struct inode *inode)
+{
+ if (!get_fuse_conn(inode)->parallel_dirops)
+ mutex_lock(&get_fuse_inode(inode)->mutex);
+}
+
+void fuse_unlock_inode(struct inode *inode)
+{
+ if (!get_fuse_conn(inode)->parallel_dirops)
+ mutex_unlock(&get_fuse_inode(inode)->mutex);
+}
+
static void fuse_umount_begin(struct super_block *sb)
{
fuse_abort_conn(get_fuse_conn_super(sb));
@@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->async_dio = 1;
if (arg->flags & FUSE_WRITEBACK_CACHE)
fc->writeback_cache = 1;
+ if (arg->flags & FUSE_PARALLEL_DIROPS)
+ fc->parallel_dirops = 1;
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
} else {
@@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
- FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
+ FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
+ FUSE_PARALLEL_DIROPS;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 37b7bc14c8da..82df36886938 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -140,6 +140,32 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
return nobh_writepage(page, gfs2_get_block_noalloc, wbc);
}
+/* This is the same as calling block_write_full_page, but it also
+ * writes pages outside of i_size
+ */
+int gfs2_write_full_page(struct page *page, get_block_t *get_block,
+ struct writeback_control *wbc)
+{
+ struct inode * const inode = page->mapping->host;
+ loff_t i_size = i_size_read(inode);
+ const pgoff_t end_index = i_size >> PAGE_SHIFT;
+ unsigned offset;
+
+ /*
+ * The page straddles i_size. It must be zeroed out on each and every
+ * writepage invocation because it may be mmapped. "A file is mapped
+ * in multiples of the page size. For a file that is not a multiple of
+ * the page size, the remaining memory is zeroed when mapped, and
+ * writes to that region are not written out to the file."
+ */
+ offset = i_size & (PAGE_SIZE-1);
+ if (page->index == end_index && offset)
+ zero_user_segment(page, offset, PAGE_SIZE);
+
+ return __block_write_full_page(inode, page, get_block, wbc,
+ end_buffer_async_write);
+}
+
/**
* __gfs2_jdata_writepage - The core of jdata writepage
* @page: The page to write
@@ -165,7 +191,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
}
gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
}
- return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+ return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
}
/**
@@ -180,27 +206,20 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
int ret;
- int done_trans = 0;
- if (PageChecked(page)) {
- if (wbc->sync_mode != WB_SYNC_ALL)
- goto out_ignore;
- ret = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
- if (ret)
- goto out_ignore;
- done_trans = 1;
- }
- ret = gfs2_writepage_common(page, wbc);
- if (ret > 0)
- ret = __gfs2_jdata_writepage(page, wbc);
- if (done_trans)
- gfs2_trans_end(sdp);
+ if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
+ goto out;
+ if (PageChecked(page) || current->journal_info)
+ goto out_ignore;
+ ret = __gfs2_jdata_writepage(page, wbc);
return ret;
out_ignore:
redirty_page_for_writepage(wbc, page);
+out:
unlock_page(page);
return 0;
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 24ce1cdd434a..6e2bec1cd289 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -285,7 +285,8 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl,
if (trylock_buffer(rabh)) {
if (!buffer_uptodate(rabh)) {
rabh->b_end_io = end_buffer_read_sync;
- submit_bh(READA | REQ_META, rabh);
+ submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META,
+ rabh);
continue;
}
unlock_buffer(rabh);
@@ -974,7 +975,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
if (!buffer_uptodate(bh)) {
err = -EIO;
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
wait_on_buffer(bh);
/* Uhhuh. Read error. Complain and punt. */
if (!buffer_uptodate(bh))
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 30822b148f3e..5173b98ca036 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -117,7 +117,7 @@ static int gfs2_dentry_delete(const struct dentry *dentry)
return 0;
ginode = GFS2_I(d_inode(dentry));
- if (!ginode->i_iopen_gh.gh_gl)
+ if (!gfs2_holder_initialized(&ginode->i_iopen_gh))
return 0;
if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags))
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 271d93905bac..fcb59b23f1e3 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1513,7 +1513,7 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
continue;
}
bh->b_end_io = end_buffer_read_sync;
- submit_bh(READA | REQ_META, bh);
+ submit_bh(REQ_OP_READ, REQ_RAHEAD | REQ_META, bh);
continue;
}
brelse(bh);
@@ -1663,7 +1663,8 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name,
brelse(bh);
if (fail_on_exist)
return ERR_PTR(-EEXIST);
- inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino);
+ inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino,
+ GFS2_BLKST_FREE /* ignore */);
if (!IS_ERR(inode))
GFS2_I(inode)->i_rahead = rahead;
return inode;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index d5bda8513457..a332f3cd925e 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -137,21 +137,10 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
struct gfs2_sbd *sdp = sb->s_fs_info;
struct inode *inode;
- inode = gfs2_ilookup(sb, inum->no_addr);
- if (inode) {
- if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
- iput(inode);
- return ERR_PTR(-ESTALE);
- }
- goto out_inode;
- }
-
inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
GFS2_BLKST_DINODE);
if (IS_ERR(inode))
return ERR_CAST(inode);
-
-out_inode:
return d_obtain_alias(inode);
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e0f98e483aec..320e65e61938 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1098,7 +1098,7 @@ static void do_unflock(struct file *file, struct file_lock *fl)
mutex_lock(&fp->f_fl_mutex);
locks_lock_file_wait(file, fl);
- if (fl_gh->gh_gl) {
+ if (gfs2_holder_initialized(fl_gh)) {
gfs2_glock_dq(fl_gh);
gfs2_holder_uninit(fl_gh);
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 706fd9352f36..3a90b2b5b9bb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -575,7 +575,6 @@ static void delete_work_func(struct work_struct *work)
{
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct gfs2_inode *ip;
struct inode *inode;
u64 no_addr = gl->gl_name.ln_number;
@@ -585,13 +584,7 @@ static void delete_work_func(struct work_struct *work)
if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
goto out;
- ip = gl->gl_object;
- /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
-
- if (ip)
- inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
- else
- inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
+ inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
if (inode && !IS_ERR(inode)) {
d_prune_aliases(inode);
iput(inode);
@@ -808,7 +801,7 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
{
put_pid(gh->gh_owner_pid);
gfs2_glock_put(gh->gh_gl);
- gh->gh_gl = NULL;
+ gfs2_holder_mark_uninitialized(gh);
gh->gh_ip = 0;
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 46ab67fc16da..ab1ef322f7a5 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -247,4 +247,14 @@ extern void gfs2_unregister_debugfs(void);
extern const struct lm_lockops gfs2_dlm_ops;
+static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh)
+{
+ gh->gh_gl = NULL;
+}
+
+static inline bool gfs2_holder_initialized(struct gfs2_holder *gh)
+{
+ return gh->gh_gl;
+}
+
#endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 21dc784f66c2..e0621cacf134 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -37,9 +37,35 @@
#include "super.h"
#include "glops.h"
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+static int iget_test(struct inode *inode, void *opaque)
{
- return ilookup(sb, (unsigned long)no_addr);
+ u64 no_addr = *(u64 *)opaque;
+
+ return GFS2_I(inode)->i_no_addr == no_addr;
+}
+
+static int iget_set(struct inode *inode, void *opaque)
+{
+ u64 no_addr = *(u64 *)opaque;
+
+ GFS2_I(inode)->i_no_addr = no_addr;
+ inode->i_ino = no_addr;
+ return 0;
+}
+
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
+{
+ struct inode *inode;
+
+repeat:
+ inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr);
+ if (!inode)
+ return inode;
+ if (is_bad_inode(inode)) {
+ iput(inode);
+ goto repeat;
+ }
+ return inode;
}
/**
@@ -78,26 +104,37 @@ static void gfs2_set_iop(struct inode *inode)
/**
* gfs2_inode_lookup - Lookup an inode
* @sb: The super block
- * @no_addr: The inode number
* @type: The type of the inode
+ * @no_addr: The inode number
+ * @no_formal_ino: The inode generation number
+ * @blktype: Requested block type (GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED;
+ * GFS2_BLKST_FREE do indicate not to verify)
+ *
+ * If @type is DT_UNKNOWN, the inode type is fetched from disk.
+ *
+ * If @blktype is anything other than GFS2_BLKST_FREE (which is used as a
+ * placeholder because it doesn't otherwise make sense), the on-disk block type
+ * is verified to be @blktype.
*
* Returns: A VFS inode, or an error
*/
struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
- u64 no_addr, u64 no_formal_ino)
+ u64 no_addr, u64 no_formal_ino,
+ unsigned int blktype)
{
struct inode *inode;
struct gfs2_inode *ip;
struct gfs2_glock *io_gl = NULL;
+ struct gfs2_holder i_gh;
int error;
- inode = iget_locked(sb, (unsigned long)no_addr);
+ gfs2_holder_mark_uninitialized(&i_gh);
+ inode = gfs2_iget(sb, no_addr);
if (!inode)
return ERR_PTR(-ENOMEM);
ip = GFS2_I(inode);
- ip->i_no_addr = no_addr;
if (inode->i_state & I_NEW) {
struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -112,10 +149,29 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (unlikely(error))
goto fail_put;
+ if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) {
+ /*
+ * The GL_SKIP flag indicates to skip reading the inode
+ * block. We read the inode with gfs2_inode_refresh
+ * after possibly checking the block type.
+ */
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
+ GL_SKIP, &i_gh);
+ if (error)
+ goto fail_put;
+
+ if (blktype != GFS2_BLKST_FREE) {
+ error = gfs2_check_blk_type(sdp, no_addr,
+ blktype);
+ if (error)
+ goto fail_put;
+ }
+ }
+
set_bit(GIF_INVALID, &ip->i_flags);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
- goto fail_iopen;
+ goto fail_put;
ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
@@ -134,6 +190,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
unlock_new_inode(inode);
}
+ if (gfs2_holder_initialized(&i_gh))
+ gfs2_glock_dq_uninit(&i_gh);
return inode;
fail_refresh:
@@ -141,10 +199,11 @@ fail_refresh:
ip->i_iopen_gh.gh_gl->gl_object = NULL;
gfs2_glock_dq_wait(&ip->i_iopen_gh);
gfs2_holder_uninit(&ip->i_iopen_gh);
-fail_iopen:
+fail_put:
if (io_gl)
gfs2_glock_put(io_gl);
-fail_put:
+ if (gfs2_holder_initialized(&i_gh))
+ gfs2_glock_dq_uninit(&i_gh);
ip->i_gl->gl_object = NULL;
fail:
iget_failed(inode);
@@ -155,23 +214,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
u64 *no_formal_ino, unsigned int blktype)
{
struct super_block *sb = sdp->sd_vfs;
- struct gfs2_holder i_gh;
- struct inode *inode = NULL;
+ struct inode *inode;
int error;
- /* Must not read in block until block type is verified */
- error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
- LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
- if (error)
- return ERR_PTR(error);
-
- error = gfs2_check_blk_type(sdp, no_addr, blktype);
- if (error)
- goto fail;
-
- inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
+ inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, blktype);
if (IS_ERR(inode))
- goto fail;
+ return inode;
/* Two extra checks for NFS only */
if (no_formal_ino) {
@@ -182,16 +230,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
error = -EIO;
if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
goto fail_iput;
-
- error = 0;
}
+ return inode;
-fail:
- gfs2_glock_dq_uninit(&i_gh);
- return error ? ERR_PTR(error) : inode;
fail_iput:
iput(inode);
- goto fail;
+ return ERR_PTR(error);
}
@@ -236,8 +280,8 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
struct gfs2_holder d_gh;
int error = 0;
struct inode *inode = NULL;
- int unlock = 0;
+ gfs2_holder_mark_uninitialized(&d_gh);
if (!name->len || name->len > GFS2_FNAMESIZE)
return ERR_PTR(-ENAMETOOLONG);
@@ -252,7 +296,6 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
return ERR_PTR(error);
- unlock = 1;
}
if (!is_root) {
@@ -265,7 +308,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
if (IS_ERR(inode))
error = PTR_ERR(inode);
out:
- if (unlock)
+ if (gfs2_holder_initialized(&d_gh))
gfs2_glock_dq_uninit(&d_gh);
if (error == -ENOENT)
return NULL;
@@ -1189,7 +1232,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
struct dentry *d;
bool excl = !!(flags & O_EXCL);
- if (!d_unhashed(dentry))
+ if (!d_in_lookup(dentry))
goto skip_lookup;
d = __gfs2_lookup(dir, dentry, file, opened);
@@ -1309,7 +1352,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
struct gfs2_inode *ip = GFS2_I(d_inode(odentry));
struct gfs2_inode *nip = NULL;
struct gfs2_sbd *sdp = GFS2_SB(odir);
- struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
+ struct gfs2_holder ghs[5], r_gh;
struct gfs2_rgrpd *nrgd;
unsigned int num_gh;
int dir_rename = 0;
@@ -1317,6 +1360,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
unsigned int x;
int error;
+ gfs2_holder_mark_uninitialized(&r_gh);
if (d_really_is_positive(ndentry)) {
nip = GFS2_I(d_inode(ndentry));
if (ip == nip)
@@ -1506,7 +1550,7 @@ out_gunlock:
gfs2_holder_uninit(ghs + x);
}
out_gunlock_r:
- if (r_gh.gh_gl)
+ if (gfs2_holder_initialized(&r_gh))
gfs2_glock_dq_uninit(&r_gh);
out:
return error;
@@ -1532,13 +1576,14 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
struct gfs2_inode *oip = GFS2_I(odentry->d_inode);
struct gfs2_inode *nip = GFS2_I(ndentry->d_inode);
struct gfs2_sbd *sdp = GFS2_SB(odir);
- struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
+ struct gfs2_holder ghs[5], r_gh;
unsigned int num_gh;
unsigned int x;
umode_t old_mode = oip->i_inode.i_mode;
umode_t new_mode = nip->i_inode.i_mode;
int error;
+ gfs2_holder_mark_uninitialized(&r_gh);
error = gfs2_rindex_update(sdp);
if (error)
return error;
@@ -1646,7 +1691,7 @@ out_gunlock:
gfs2_holder_uninit(ghs + x);
}
out_gunlock_r:
- if (r_gh.gh_gl)
+ if (gfs2_holder_initialized(&r_gh))
gfs2_glock_dq_uninit(&r_gh);
out:
return error;
@@ -1743,9 +1788,8 @@ int gfs2_permission(struct inode *inode, int mask)
struct gfs2_inode *ip;
struct gfs2_holder i_gh;
int error;
- int unlock = 0;
-
+ gfs2_holder_mark_uninitialized(&i_gh);
ip = GFS2_I(inode);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
if (mask & MAY_NOT_BLOCK)
@@ -1753,14 +1797,13 @@ int gfs2_permission(struct inode *inode, int mask)
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return error;
- unlock = 1;
}
if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
error = -EACCES;
else
error = generic_permission(inode, mask);
- if (unlock)
+ if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
return error;
@@ -1932,17 +1975,16 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int error;
- int unlock = 0;
+ gfs2_holder_mark_uninitialized(&gh);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
if (error)
return error;
- unlock = 1;
}
generic_fillattr(inode, stat);
- if (unlock)
+ if (gfs2_holder_initialized(&gh))
gfs2_glock_dq_uninit(&gh);
return 0;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index e1af0d4aa308..7710dfd3af35 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -94,11 +94,11 @@ err:
}
extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
- u64 no_addr, u64 no_formal_ino);
+ u64 no_addr, u64 no_formal_ino,
+ unsigned int blktype);
extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
u64 *no_formal_ino,
unsigned int blktype);
-extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
extern int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0ff028c15199..e58ccef09c91 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -657,7 +657,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
struct gfs2_log_header *lh;
unsigned int tail;
u32 hash;
- int rw = WRITE_FLUSH_FUA | REQ_META;
+ int op_flags = WRITE_FLUSH_FUA | REQ_META;
struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
lh = page_address(page);
@@ -682,12 +682,12 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
gfs2_ordered_wait(sdp);
log_flush_wait(sdp);
- rw = WRITE_SYNC | REQ_META | REQ_PRIO;
+ op_flags = WRITE_SYNC | REQ_META | REQ_PRIO;
}
sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
gfs2_log_write_page(sdp, page);
- gfs2_log_flush_bio(sdp, rw);
+ gfs2_log_flush_bio(sdp, REQ_OP_WRITE, op_flags);
log_flush_wait(sdp);
if (sdp->sd_log_tail != tail)
@@ -738,7 +738,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
gfs2_ordered_write(sdp);
lops_before_commit(sdp, tr);
- gfs2_log_flush_bio(sdp, WRITE);
+ gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
log_flush_wait(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index d5369a109781..49d5a1b61b06 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -230,17 +230,19 @@ static void gfs2_end_log_write(struct bio *bio)
/**
* gfs2_log_flush_bio - Submit any pending log bio
* @sdp: The superblock
- * @rw: The rw flags
+ * @op: REQ_OP
+ * @op_flags: rq_flag_bits
*
* Submit any pending part-built or full bio to the block device. If
* there is no pending bio, then this is a no-op.
*/
-void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw)
+void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags)
{
if (sdp->sd_log_bio) {
atomic_inc(&sdp->sd_log_in_flight);
- submit_bio(rw, sdp->sd_log_bio);
+ bio_set_op_attrs(sdp->sd_log_bio, op, op_flags);
+ submit_bio(sdp->sd_log_bio);
sdp->sd_log_bio = NULL;
}
}
@@ -299,7 +301,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno)
nblk >>= sdp->sd_fsb2bb_shift;
if (blkno == nblk)
return bio;
- gfs2_log_flush_bio(sdp, WRITE);
+ gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
}
return gfs2_log_alloc_bio(sdp, blkno);
@@ -328,7 +330,7 @@ static void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
bio = gfs2_log_get_bio(sdp, blkno);
ret = bio_add_page(bio, page, size, offset);
if (ret == 0) {
- gfs2_log_flush_bio(sdp, WRITE);
+ gfs2_log_flush_bio(sdp, REQ_OP_WRITE, 0);
bio = gfs2_log_alloc_bio(sdp, blkno);
ret = bio_add_page(bio, page, size, offset);
WARN_ON(ret == 0);
@@ -535,9 +537,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
return 0;
- gfs2_replay_incr_blk(sdp, &start);
+ gfs2_replay_incr_blk(jd, &start);
- for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+ for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
blkno = be64_to_cpu(*ptr++);
jd->jd_found_blocks++;
@@ -693,7 +695,7 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
offset = sizeof(struct gfs2_log_descriptor);
- for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+ for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
error = gfs2_replay_read_block(jd, start, &bh);
if (error)
return error;
@@ -762,7 +764,6 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
__be64 *ptr, int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
- struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_glock *gl = ip->i_gl;
unsigned int blks = be32_to_cpu(ld->ld_data1);
struct buffer_head *bh_log, *bh_ip;
@@ -773,8 +774,8 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
return 0;
- gfs2_replay_incr_blk(sdp, &start);
- for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+ gfs2_replay_incr_blk(jd, &start);
+ for (; blks; gfs2_replay_incr_blk(jd, &start), blks--) {
blkno = be64_to_cpu(*ptr++);
esc = be64_to_cpu(*ptr++);
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index a65a7ba32ffd..e529f536c117 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -27,7 +27,7 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
extern const struct gfs2_log_operations *gfs2_log_ops[];
extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
-extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw);
+extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int op, int op_flags);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index f99f8e94de3f..74fd0139e6c2 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -45,6 +45,7 @@ static void gfs2_init_inode_once(void *foo)
memset(&ip->i_res, 0, sizeof(ip->i_res));
RB_CLEAR_NODE(&ip->i_res.rs_node);
ip->i_hash_cache = NULL;
+ gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
}
static void gfs2_init_glock_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 8eaadabbc771..950b8be68e41 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,8 +37,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
{
struct buffer_head *bh, *head;
int nr_underway = 0;
- int write_op = REQ_META | REQ_PRIO |
- (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+ int write_flags = REQ_META | REQ_PRIO |
+ (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
BUG_ON(!PageLocked(page));
BUG_ON(!page_has_buffers(page));
@@ -79,7 +79,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh(write_op, bh);
+ submit_bh(REQ_OP_WRITE, write_flags, bh);
nr_underway++;
}
bh = next;
@@ -213,7 +213,8 @@ static void gfs2_meta_read_endio(struct bio *bio)
* Submit several consecutive buffer head I/O requests as a single bio I/O
* request. (See submit_bh_wbc.)
*/
-static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
+static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
+ int num)
{
struct buffer_head *bh = bhs[0];
struct bio *bio;
@@ -230,7 +231,8 @@ static void gfs2_submit_bhs(int rw, struct buffer_head *bhs[], int num)
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
}
bio->bi_end_io = gfs2_meta_read_endio;
- submit_bio(rw, bio);
+ bio_set_op_attrs(bio, op, op_flags);
+ submit_bio(bio);
}
/**
@@ -280,7 +282,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
}
}
- gfs2_submit_bhs(READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
+ gfs2_submit_bhs(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, bhs, num);
if (!(flags & DIO_WAIT))
return 0;
@@ -448,7 +450,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
if (buffer_uptodate(first_bh))
goto out;
if (!buffer_locked(first_bh))
- ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
+ ll_rw_block(REQ_OP_READ, READ_SYNC | REQ_META, 1, &first_bh);
dblock++;
extlen--;
@@ -457,7 +459,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
bh = gfs2_getbuf(gl, dblock, CREATE);
if (!buffer_uptodate(bh) && !buffer_locked(bh))
- ll_rw_block(READA | REQ_META, 1, &bh);
+ ll_rw_block(REQ_OP_READ, REQ_RAHEAD | REQ_META, 1, &bh);
brelse(bh);
dblock++;
extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 45463600fb81..ef1e1822977f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -246,7 +246,8 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
bio->bi_end_io = end_bio_io_page;
bio->bi_private = page;
- submit_bio(READ_SYNC | REQ_META, bio);
+ bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC | REQ_META);
+ submit_bio(bio);
wait_on_page_locked(page);
bio_put(bio);
if (!PageUptodate(page)) {
@@ -454,7 +455,8 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
struct dentry *dentry;
struct inode *inode;
- inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
+ inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0,
+ GFS2_BLKST_FREE /* ignore */);
if (IS_ERR(inode)) {
fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
return PTR_ERR(inode);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index ce7d69a2fdc0..77930ca25303 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -730,7 +730,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
if (PageUptodate(page))
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh)) {
- ll_rw_block(READ | REQ_META, 1, &bh);
+ ll_rw_block(REQ_OP_READ, REQ_META, 1, &bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
goto unlock_out;
@@ -883,7 +883,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
&data_blocks, &ind_blocks);
- ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
+ ghs = kmalloc(num_qd * sizeof(struct gfs2_holder), GFP_NOFS);
if (!ghs)
return -ENOMEM;
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 1b645773c98e..113b6095a58d 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -338,7 +338,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_header_host lh;
error = get_log_header(jd, start, &lh);
if (!error) {
- gfs2_replay_incr_blk(sdp, &start);
+ gfs2_replay_incr_blk(jd, &start);
brelse(bh);
continue;
}
@@ -360,7 +360,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
}
while (length--)
- gfs2_replay_incr_blk(sdp, &start);
+ gfs2_replay_incr_blk(jd, &start);
brelse(bh);
}
@@ -390,7 +390,7 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea
struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
lblock = head->lh_blkno;
- gfs2_replay_incr_blk(sdp, &lblock);
+ gfs2_replay_incr_blk(jd, &lblock);
bh_map.b_size = 1 << ip->i_inode.i_blkbits;
error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
if (error)
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h
index 6142836cce96..11fdfab4bf99 100644
--- a/fs/gfs2/recovery.h
+++ b/fs/gfs2/recovery.h
@@ -14,9 +14,9 @@
extern struct workqueue_struct *gfs_recovery_wq;
-static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
+static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, unsigned int *blk)
{
- if (++*blk == sdp->sd_jdesc->jd_blocks)
+ if (++*blk == jd->jd_blocks)
*blk = 0;
}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5bd216901e89..86ccc0159393 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -658,6 +658,7 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
if (rgd) {
spin_lock(&rgd->rd_rsspin);
__rs_deltree(rs);
+ BUG_ON(rs->rs_free);
spin_unlock(&rgd->rd_rsspin);
}
}
@@ -671,10 +672,8 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
{
down_write(&ip->i_rw_mutex);
- if ((wcount == NULL) || (atomic_read(wcount) <= 1)) {
+ if ((wcount == NULL) || (atomic_read(wcount) <= 1))
gfs2_rs_deltree(&ip->i_res);
- BUG_ON(ip->i_res.rs_free);
- }
up_write(&ip->i_rw_mutex);
gfs2_qa_delete(ip, wcount);
}
@@ -722,6 +721,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
gfs2_free_clones(rgd);
kfree(rgd->rd_bits);
+ rgd->rd_bits = NULL;
return_all_reservations(rgd);
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
}
@@ -916,9 +916,6 @@ static int read_rindex_entry(struct gfs2_inode *ip)
if (error)
goto fail;
- rgd->rd_gl->gl_object = rgd;
- rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
- rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
if (rgd->rd_data > sdp->sd_max_rg_data)
@@ -926,14 +923,20 @@ static int read_rindex_entry(struct gfs2_inode *ip)
spin_lock(&sdp->sd_rindex_spin);
error = rgd_insert(rgd);
spin_unlock(&sdp->sd_rindex_spin);
- if (!error)
+ if (!error) {
+ rgd->rd_gl->gl_object = rgd;
+ rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
+ rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr +
+ rgd->rd_length) * bsize) - 1;
return 0;
+ }
error = 0; /* someone else read in the rgrp; free it and ignore it */
gfs2_glock_put(rgd->rd_gl);
fail:
kfree(rgd->rd_bits);
+ rgd->rd_bits = NULL;
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
return error;
}
@@ -2096,7 +2099,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
{
struct gfs2_blkreserv *rs = &ip->i_res;
- if (rs->rs_rgd_gh.gh_gl)
+ if (gfs2_holder_initialized(&rs->rs_rgd_gh))
gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
}
@@ -2596,7 +2599,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
{
unsigned int x;
- rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder),
+ rlist->rl_ghs = kmalloc(rlist->rl_rgrps * sizeof(struct gfs2_holder),
GFP_NOFS | __GFP_NOFAIL);
for (x = 0; x < rlist->rl_rgrps; x++)
gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9b2ff353e45f..3a7e60bb39f8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -855,7 +855,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
- if (freeze_gh.gh_gl)
+ if (gfs2_holder_initialized(&freeze_gh))
gfs2_glock_dq_uninit(&freeze_gh);
gfs2_quota_cleanup(sdp);
@@ -1033,7 +1033,7 @@ static int gfs2_unfreeze(struct super_block *sb)
mutex_lock(&sdp->sd_freeze_mutex);
if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
- sdp->sd_freeze_gh.gh_gl == NULL) {
+ !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
mutex_unlock(&sdp->sd_freeze_mutex);
return 0;
}
@@ -1084,9 +1084,11 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host
int error = 0, err;
memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
- gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
+ gha = kmalloc(slots * sizeof(struct gfs2_holder), GFP_KERNEL);
if (!gha)
return -ENOMEM;
+ for (x = 0; x < slots; x++)
+ gfs2_holder_mark_uninitialized(gha + x);
rgd_next = gfs2_rgrpd_get_first(sdp);
@@ -1096,7 +1098,7 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host
for (x = 0; x < slots; x++) {
gh = gha + x;
- if (gh->gh_gl && gfs2_glock_poll(gh)) {
+ if (gfs2_holder_initialized(gh) && gfs2_glock_poll(gh)) {
err = gfs2_glock_wait(gh);
if (err) {
gfs2_holder_uninit(gh);
@@ -1109,7 +1111,7 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host
}
}
- if (gh->gh_gl)
+ if (gfs2_holder_initialized(gh))
done = 0;
else if (rgd_next && !error) {
error = gfs2_glock_nq_init(rgd_next->rd_gl,
@@ -1304,9 +1306,11 @@ static int gfs2_drop_inode(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
- if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && inode->i_nlink) {
+ if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
+ inode->i_nlink &&
+ gfs2_holder_initialized(&ip->i_iopen_gh)) {
struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
- if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
+ if (test_bit(GLF_DEMOTE, &gl->gl_flags))
clear_nlink(inode);
}
return generic_drop_inode(inode);
@@ -1551,7 +1555,7 @@ static void gfs2_evict_inode(struct inode *inode)
goto out_truncate;
}
- if (ip->i_iopen_gh.gh_gl &&
+ if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(&ip->i_iopen_gh);
@@ -1610,7 +1614,7 @@ out_unlock:
if (gfs2_rs_active(&ip->i_res))
gfs2_rs_deltree(&ip->i_res);
- if (ip->i_iopen_gh.gh_gl) {
+ if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(&ip->i_iopen_gh);
@@ -1632,7 +1636,7 @@ out:
gfs2_glock_add_to_lru(ip->i_gl);
gfs2_glock_put(ip->i_gl);
ip->i_gl = NULL;
- if (ip->i_iopen_gh.gh_gl) {
+ if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
ip->i_iopen_gh.gh_gl->gl_object = NULL;
ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(&ip->i_iopen_gh);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index fdc3446d934a..047245bd2cd6 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -526,7 +526,7 @@ int hfsplus_compare_dentry(const struct dentry *parent,
/* wrapper.c */
int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf,
- void **data, int rw);
+ void **data, int op, int op_flags);
int hfsplus_read_wrapper(struct super_block *sb);
/* time macros */
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index eb355d81e279..63164ebc52fa 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -112,7 +112,8 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
if ((u8 *)pm - (u8 *)buf >= buf_size) {
res = hfsplus_submit_bio(sb,
*part_start + HFS_PMAP_BLK + i,
- buf, (void **)&pm, READ);
+ buf, (void **)&pm, REQ_OP_READ,
+ 0);
if (res)
return res;
}
@@ -136,7 +137,7 @@ int hfs_part_find(struct super_block *sb,
return -ENOMEM;
res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK,
- buf, &data, READ);
+ buf, &data, REQ_OP_READ, 0);
if (res)
goto out;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 755bf30ba1ce..11854dd84572 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -220,7 +220,8 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
error2 = hfsplus_submit_bio(sb,
sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
- sbi->s_vhdr_buf, NULL, WRITE_SYNC);
+ sbi->s_vhdr_buf, NULL, REQ_OP_WRITE,
+ WRITE_SYNC);
if (!error)
error = error2;
if (!write_backup)
@@ -228,7 +229,8 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait)
error2 = hfsplus_submit_bio(sb,
sbi->part_start + sbi->sect_count - 2,
- sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC);
+ sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE,
+ WRITE_SYNC);
if (!error)
error2 = error;
out:
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index cc6235671437..ebb85e5f6549 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -30,7 +30,8 @@ struct hfsplus_wd {
* @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
* @buf: buffer for I/O
* @data: output pointer for location of requested data
- * @rw: direction of I/O
+ * @op: direction of I/O
+ * @op_flags: request op flags
*
* The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
* HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
@@ -44,7 +45,7 @@ struct hfsplus_wd {
* will work correctly.
*/
int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
- void *buf, void **data, int rw)
+ void *buf, void **data, int op, int op_flags)
{
struct bio *bio;
int ret = 0;
@@ -65,8 +66,9 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
bio = bio_alloc(GFP_NOIO, 1);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = sb->s_bdev;
+ bio_set_op_attrs(bio, op, op_flags);
- if (!(rw & WRITE) && data)
+ if (op != WRITE && data)
*data = (u8 *)buf + offset;
while (io_size > 0) {
@@ -83,7 +85,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
buf = (u8 *)buf + len;
}
- ret = submit_bio_wait(rw, bio);
+ ret = submit_bio_wait(bio);
out:
bio_put(bio);
return ret < 0 ? ret : 0;
@@ -181,7 +183,7 @@ int hfsplus_read_wrapper(struct super_block *sb)
reread:
error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR,
sbi->s_vhdr_buf, (void **)&sbi->s_vhdr,
- READ);
+ REQ_OP_READ, 0);
if (error)
goto out_free_backup_vhdr;
@@ -213,7 +215,8 @@ reread:
error = hfsplus_submit_bio(sb, part_start + part_size - 2,
sbi->s_backup_vhdr_buf,
- (void **)&sbi->s_backup_vhdr, READ);
+ (void **)&sbi->s_backup_vhdr, REQ_OP_READ,
+ 0);
if (error)
goto out_free_backup_vhdr;
diff --git a/fs/internal.h b/fs/internal.h
index b71deeecea17..f57ced528cde 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -130,6 +130,7 @@ extern int invalidate_inodes(struct super_block *, bool);
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
extern int d_set_mounted(struct dentry *dentry);
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
+extern struct dentry *d_alloc_cursor(struct dentry *);
/*
* read_write.c
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index 2e4e834d1a98..2ce5b75ee9a5 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -81,7 +81,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
blocknum = block_start >> bufshift;
memset(bhs, 0, (needblocks + 1) * sizeof(struct buffer_head *));
haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks);
- ll_rw_block(READ, haveblocks, bhs);
+ ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs);
curbh = 0;
curpage = 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 78313adb3c95..5bb565f9989c 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -155,9 +155,9 @@ static int journal_submit_commit_record(journal_t *journal,
if (journal->j_flags & JBD2_BARRIER &&
!jbd2_has_feature_async_commit(journal))
- ret = submit_bh(WRITE_SYNC | WRITE_FLUSH_FUA, bh);
+ ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC | WRITE_FLUSH_FUA, bh);
else
- ret = submit_bh(WRITE_SYNC, bh);
+ ret = submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
*cbh = bh;
return ret;
@@ -718,7 +718,7 @@ start_journal_io:
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
- submit_bh(WRITE_SYNC, bh);
+ submit_bh(REQ_OP_WRITE, WRITE_SYNC, bh);
}
cond_resched();
stats.run.rs_blocks_logged += bufs;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index fc1d7a39b082..46261a6f902d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1351,15 +1351,15 @@ static int journal_reset(journal_t *journal)
return jbd2_journal_start_thread(journal);
}
-static int jbd2_write_superblock(journal_t *journal, int write_op)
+static int jbd2_write_superblock(journal_t *journal, int write_flags)
{
struct buffer_head *bh = journal->j_sb_buffer;
journal_superblock_t *sb = journal->j_superblock;
int ret;
- trace_jbd2_write_superblock(journal, write_op);
+ trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
- write_op &= ~(REQ_FUA | REQ_FLUSH);
+ write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
lock_buffer(bh);
if (buffer_write_io_error(bh)) {
/*
@@ -1379,7 +1379,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_op)
jbd2_superblock_csum_set(journal, sb);
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(write_op, bh);
+ ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
wait_on_buffer(bh);
if (buffer_write_io_error(bh)) {
clear_buffer_write_io_error(bh);
@@ -1503,7 +1503,7 @@ static int journal_get_superblock(journal_t *journal)
J_ASSERT(bh != NULL);
if (!buffer_uptodate(bh)) {
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
printk(KERN_ERR
@@ -2334,18 +2334,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
BUG_ON(size & (size-1)); /* Must be a power of 2 */
- flags |= __GFP_REPEAT;
- if (size == PAGE_SIZE)
- ptr = (void *)__get_free_pages(flags, 0);
- else if (size > PAGE_SIZE) {
- int order = get_order(size);
-
- if (order < 3)
- ptr = (void *)__get_free_pages(flags, order);
- else
- ptr = vmalloc(size);
- } else
+ if (size < PAGE_SIZE)
ptr = kmem_cache_alloc(get_slab(size), flags);
+ else
+ ptr = (void *)__get_free_pages(flags, get_order(size));
/* Check alignment; SLUB has gotten this wrong in the past,
* and this can lead to user data corruption! */
@@ -2356,20 +2348,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
void jbd2_free(void *ptr, size_t size)
{
- if (size == PAGE_SIZE) {
- free_pages((unsigned long)ptr, 0);
- return;
- }
- if (size > PAGE_SIZE) {
- int order = get_order(size);
-
- if (order < 3)
- free_pages((unsigned long)ptr, order);
- else
- vfree(ptr);
- return;
- }
- kmem_cache_free(get_slab(size), ptr);
+ if (size < PAGE_SIZE)
+ kmem_cache_free(get_slab(size), ptr);
+ else
+ free_pages((unsigned long)ptr, get_order(size));
};
/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 805bc6bcd8ab..02dd3360cb20 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -104,7 +104,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
bufs[nbufs++] = bh;
if (nbufs == MAXBUF) {
- ll_rw_block(READ, nbufs, bufs);
+ ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
journal_brelse_array(bufs, nbufs);
nbufs = 0;
}
@@ -113,7 +113,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
}
if (nbufs)
- ll_rw_block(READ, nbufs, bufs);
+ ll_rw_block(REQ_OP_READ, 0, nbufs, bufs);
err = 0;
failed:
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 63759d723920..a74752146ec9 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2002,12 +2002,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
bio->bi_end_io = lbmIODone;
bio->bi_private = bp;
+ bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC);
/*check if journaling to disk has been disabled*/
if (log->no_integrity) {
bio->bi_iter.bi_size = 0;
lbmIODone(bio);
} else {
- submit_bio(READ_SYNC, bio);
+ submit_bio(bio);
}
wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
@@ -2145,13 +2146,14 @@ static void lbmStartIO(struct lbuf * bp)
bio->bi_end_io = lbmIODone;
bio->bi_private = bp;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
/* check if journaling to disk has been disabled */
if (log->no_integrity) {
bio->bi_iter.bi_size = 0;
lbmIODone(bio);
} else {
- submit_bio(WRITE_SYNC, bio);
+ submit_bio(bio);
INCREMENT(lmStat.submitted);
}
}
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index b60e015cc757..e7fa9e513040 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -411,7 +411,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
inc_io(page);
if (!bio->bi_iter.bi_size)
goto dump_bio;
- submit_bio(WRITE, bio);
+ submit_bio(bio);
nr_underway++;
bio = NULL;
} else
@@ -434,6 +434,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_write_end_io;
bio->bi_private = page;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
/* Don't call bio_add_page yet, we may add to this vec */
bio_offset = offset;
@@ -448,7 +449,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
if (!bio->bi_iter.bi_size)
goto dump_bio;
- submit_bio(WRITE, bio);
+ submit_bio(bio);
nr_underway++;
}
if (redirty)
@@ -506,7 +507,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
insert_metapage(page, NULL);
inc_io(page);
if (bio)
- submit_bio(READ, bio);
+ submit_bio(bio);
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_bdev = inode->i_sb->s_bdev;
@@ -514,6 +515,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_read_end_io;
bio->bi_private = page;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
len = xlen << inode->i_blkbits;
offset = block_offset << inode->i_blkbits;
if (bio_add_page(bio, page, len, offset) < len)
@@ -523,7 +525,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
block_offset++;
}
if (bio)
- submit_bio(READ, bio);
+ submit_bio(bio);
else
unlock_page(page);
diff --git a/fs/libfs.c b/fs/libfs.c
index 3db2721144c2..74dc8b9e7f53 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup);
int dcache_dir_open(struct inode *inode, struct file *file)
{
- static struct qstr cursor_name = QSTR_INIT(".", 1);
-
- file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
+ file->private_data = d_alloc_cursor(file->f_path.dentry);
return file->private_data ? 0 : -ENOMEM;
}
@@ -86,6 +84,61 @@ int dcache_dir_close(struct inode *inode, struct file *file)
}
EXPORT_SYMBOL(dcache_dir_close);
+/* parent is locked at least shared */
+static struct dentry *next_positive(struct dentry *parent,
+ struct list_head *from,
+ int count)
+{
+ unsigned *seq = &parent->d_inode->i_dir_seq, n;
+ struct dentry *res;
+ struct list_head *p;
+ bool skipped;
+ int i;
+
+retry:
+ i = count;
+ skipped = false;
+ n = smp_load_acquire(seq) & ~1;
+ res = NULL;
+ rcu_read_lock();
+ for (p = from->next; p != &parent->d_subdirs; p = p->next) {
+ struct dentry *d = list_entry(p, struct dentry, d_child);
+ if (!simple_positive(d)) {
+ skipped = true;
+ } else if (!--i) {
+ res = d;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ if (skipped) {
+ smp_rmb();
+ if (unlikely(*seq != n))
+ goto retry;
+ }
+ return res;
+}
+
+static void move_cursor(struct dentry *cursor, struct list_head *after)
+{
+ struct dentry *parent = cursor->d_parent;
+ unsigned n, *seq = &parent->d_inode->i_dir_seq;
+ spin_lock(&parent->d_lock);
+ for (;;) {
+ n = *seq;
+ if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
+ break;
+ cpu_relax();
+ }
+ __list_del(cursor->d_child.prev, cursor->d_child.next);
+ if (after)
+ list_add(&cursor->d_child, after);
+ else
+ list_add_tail(&cursor->d_child, &parent->d_subdirs);
+ smp_store_release(seq, n + 2);
+ spin_unlock(&parent->d_lock);
+}
+
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry *dentry = file->f_path.dentry;
@@ -101,25 +154,14 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
if (offset != file->f_pos) {
file->f_pos = offset;
if (file->f_pos >= 2) {
- struct list_head *p;
struct dentry *cursor = file->private_data;
+ struct dentry *to;
loff_t n = file->f_pos - 2;
- spin_lock(&dentry->d_lock);
- /* d_lock not required for cursor */
- list_del(&cursor->d_child);
- p = dentry->d_subdirs.next;
- while (n && p != &dentry->d_subdirs) {
- struct dentry *next;
- next = list_entry(p, struct dentry, d_child);
- spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
- if (simple_positive(next))
- n--;
- spin_unlock(&next->d_lock);
- p = p->next;
- }
- list_add_tail(&cursor->d_child, p);
- spin_unlock(&dentry->d_lock);
+ inode_lock_shared(dentry->d_inode);
+ to = next_positive(dentry, &dentry->d_subdirs, n);
+ move_cursor(cursor, to ? &to->d_child : NULL);
+ inode_unlock_shared(dentry->d_inode);
}
}
return offset;
@@ -142,36 +184,25 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dentry = file->f_path.dentry;
struct dentry *cursor = file->private_data;
- struct list_head *p, *q = &cursor->d_child;
+ struct list_head *p = &cursor->d_child;
+ struct dentry *next;
+ bool moved = false;
if (!dir_emit_dots(file, ctx))
return 0;
- spin_lock(&dentry->d_lock);
- if (ctx->pos == 2)
- list_move(q, &dentry->d_subdirs);
- for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
- struct dentry *next = list_entry(p, struct dentry, d_child);
- spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
- if (!simple_positive(next)) {
- spin_unlock(&next->d_lock);
- continue;
- }
-
- spin_unlock(&next->d_lock);
- spin_unlock(&dentry->d_lock);
+ if (ctx->pos == 2)
+ p = &dentry->d_subdirs;
+ while ((next = next_positive(dentry, p, 1)) != NULL) {
if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
d_inode(next)->i_ino, dt_type(d_inode(next))))
- return 0;
- spin_lock(&dentry->d_lock);
- spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
- /* next is still alive */
- list_move(q, p);
- spin_unlock(&next->d_lock);
- p = q;
+ break;
+ moved = true;
+ p = &next->d_child;
ctx->pos++;
}
- spin_unlock(&dentry->d_lock);
+ if (moved)
+ move_cursor(cursor, p);
return 0;
}
EXPORT_SYMBOL(dcache_readdir);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 154a107cd376..fc4084ef4736 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = {
};
#endif
-static void lockd_svc_exit_thread(void)
+static void lockd_unregister_notifiers(void)
{
unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
#endif
+}
+
+static void lockd_svc_exit_thread(void)
+{
+ lockd_unregister_notifiers();
svc_exit_thread(nlmsvc_rqst);
}
@@ -462,7 +467,7 @@ int lockd_up(struct net *net)
* Note: svc_serv structures have an initial use count of 1,
* so we exit through here on both success and failure.
*/
-err_net:
+err_put:
svc_destroy(serv);
err_create:
mutex_unlock(&nlmsvc_mutex);
@@ -470,7 +475,9 @@ err_create:
err_start:
lockd_down_net(serv, net);
- goto err_net;
+err_net:
+ lockd_unregister_notifiers();
+ goto err_put;
}
EXPORT_SYMBOL_GPL(lockd_up);
diff --git a/fs/locks.c b/fs/locks.c
index 7c5f91be9b65..ee1b15f6fc13 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
{
struct file_lock *fl, *my_fl = NULL, *lease;
struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index cc26f8f215f5..a8329cc47dec 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -14,7 +14,7 @@
#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
-static int sync_request(struct page *page, struct block_device *bdev, int rw)
+static int sync_request(struct page *page, struct block_device *bdev, int op)
{
struct bio bio;
struct bio_vec bio_vec;
@@ -29,8 +29,9 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw)
bio.bi_bdev = bdev;
bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9);
bio.bi_iter.bi_size = PAGE_SIZE;
+ bio_set_op_attrs(&bio, op, 0);
- return submit_bio_wait(rw, &bio);
+ return submit_bio_wait(&bio);
}
static int bdev_readpage(void *_sb, struct page *page)
@@ -95,8 +96,9 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
bio->bi_iter.bi_sector = ofs >> 9;
bio->bi_private = sb;
bio->bi_end_io = writeseg_end_io;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
atomic_inc(&super->s_pending_writes);
- submit_bio(WRITE, bio);
+ submit_bio(bio);
ofs += i * PAGE_SIZE;
index += i;
@@ -122,8 +124,9 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
bio->bi_iter.bi_sector = ofs >> 9;
bio->bi_private = sb;
bio->bi_end_io = writeseg_end_io;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
atomic_inc(&super->s_pending_writes);
- submit_bio(WRITE, bio);
+ submit_bio(bio);
return 0;
}
@@ -185,8 +188,9 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
bio->bi_iter.bi_sector = ofs >> 9;
bio->bi_private = sb;
bio->bi_end_io = erase_end_io;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
atomic_inc(&super->s_pending_writes);
- submit_bio(WRITE, bio);
+ submit_bio(bio);
ofs += i * PAGE_SIZE;
index += i;
@@ -206,8 +210,9 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
bio->bi_iter.bi_sector = ofs >> 9;
bio->bi_private = sb;
bio->bi_end_io = erase_end_io;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
atomic_inc(&super->s_pending_writes);
- submit_bio(WRITE, bio);
+ submit_bio(bio);
return 0;
}
diff --git a/fs/mpage.c b/fs/mpage.c
index eedc644b78d7..37b28280ad04 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -56,11 +56,12 @@ static void mpage_end_io(struct bio *bio)
bio_put(bio);
}
-static struct bio *mpage_bio_submit(int rw, struct bio *bio)
+static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
{
bio->bi_end_io = mpage_end_io;
- guard_bio_eod(rw, bio);
- submit_bio(rw, bio);
+ bio_set_op_attrs(bio, op, op_flags);
+ guard_bio_eod(op, bio);
+ submit_bio(bio);
return NULL;
}
@@ -269,7 +270,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && (*last_block_in_bio != blocks[0] - 1))
- bio = mpage_bio_submit(READ, bio);
+ bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
alloc_new:
if (bio == NULL) {
@@ -286,7 +287,7 @@ alloc_new:
length = first_hole << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
- bio = mpage_bio_submit(READ, bio);
+ bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
goto alloc_new;
}
@@ -294,7 +295,7 @@ alloc_new:
nblocks = map_bh->b_size >> blkbits;
if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
(first_hole != blocks_per_page))
- bio = mpage_bio_submit(READ, bio);
+ bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
else
*last_block_in_bio = blocks[blocks_per_page - 1];
out:
@@ -302,7 +303,7 @@ out:
confused:
if (bio)
- bio = mpage_bio_submit(READ, bio);
+ bio = mpage_bio_submit(REQ_OP_READ, 0, bio);
if (!PageUptodate(page))
block_read_full_page(page, get_block);
else
@@ -384,7 +385,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
}
BUG_ON(!list_empty(pages));
if (bio)
- mpage_bio_submit(READ, bio);
+ mpage_bio_submit(REQ_OP_READ, 0, bio);
return 0;
}
EXPORT_SYMBOL(mpage_readpages);
@@ -405,7 +406,7 @@ int mpage_readpage(struct page *page, get_block_t get_block)
bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
&map_bh, &first_logical_block, get_block, gfp);
if (bio)
- mpage_bio_submit(READ, bio);
+ mpage_bio_submit(REQ_OP_READ, 0, bio);
return 0;
}
EXPORT_SYMBOL(mpage_readpage);
@@ -486,7 +487,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
- int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+ int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
@@ -595,7 +596,7 @@ page_is_mapped:
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
- bio = mpage_bio_submit(wr, bio);
+ bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
alloc_new:
if (bio == NULL) {
@@ -622,7 +623,7 @@ alloc_new:
wbc_account_io(wbc, page, PAGE_SIZE);
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
- bio = mpage_bio_submit(wr, bio);
+ bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
goto alloc_new;
}
@@ -632,7 +633,7 @@ alloc_new:
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
- bio = mpage_bio_submit(wr, bio);
+ bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
@@ -644,7 +645,7 @@ alloc_new:
confused:
if (bio)
- bio = mpage_bio_submit(wr, bio);
+ bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
if (mpd->use_writepage) {
ret = mapping->a_ops->writepage(page, wbc);
@@ -701,9 +702,9 @@ mpage_writepages(struct address_space *mapping,
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
if (mpd.bio) {
- int wr = (wbc->sync_mode == WB_SYNC_ALL ?
- WRITE_SYNC : WRITE);
- mpage_bio_submit(wr, mpd.bio);
+ int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : 0);
+ mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
}
}
blk_finish_plug(&plug);
@@ -722,9 +723,9 @@ int mpage_writepage(struct page *page, get_block_t get_block,
};
int ret = __mpage_writepage(page, wbc, &mpd);
if (mpd.bio) {
- int wr = (wbc->sync_mode == WB_SYNC_ALL ?
- WRITE_SYNC : WRITE);
- mpage_bio_submit(wr, mpd.bio);
+ int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
+ WRITE_SYNC : 0);
+ mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
}
return ret;
}
diff --git a/fs/namei.c b/fs/namei.c
index 4c4f95ac8aa5..70580ab1445c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1416,21 +1416,28 @@ static void follow_mount(struct path *path)
}
}
+static int path_parent_directory(struct path *path)
+{
+ struct dentry *old = path->dentry;
+ /* rare case of legitimate dget_parent()... */
+ path->dentry = dget_parent(path->dentry);
+ dput(old);
+ if (unlikely(!path_connected(path)))
+ return -ENOENT;
+ return 0;
+}
+
static int follow_dotdot(struct nameidata *nd)
{
while(1) {
- struct dentry *old = nd->path.dentry;
-
if (nd->path.dentry == nd->root.dentry &&
nd->path.mnt == nd->root.mnt) {
break;
}
if (nd->path.dentry != nd->path.mnt->mnt_root) {
- /* rare case of legitimate dget_parent()... */
- nd->path.dentry = dget_parent(nd->path.dentry);
- dput(old);
- if (unlikely(!path_connected(&nd->path)))
- return -ENOENT;
+ int ret = path_parent_directory(&nd->path);
+ if (ret)
+ return ret;
break;
}
if (!follow_up(&nd->path))
@@ -2514,6 +2521,34 @@ struct dentry *lookup_one_len_unlocked(const char *name,
}
EXPORT_SYMBOL(lookup_one_len_unlocked);
+#ifdef CONFIG_UNIX98_PTYS
+int path_pts(struct path *path)
+{
+ /* Find something mounted on "pts" in the same directory as
+ * the input path.
+ */
+ struct dentry *child, *parent;
+ struct qstr this;
+ int ret;
+
+ ret = path_parent_directory(path);
+ if (ret)
+ return ret;
+
+ parent = path->dentry;
+ this.name = "pts";
+ this.len = 3;
+ child = d_hash_and_lookup(parent, &this);
+ if (!child)
+ return -ENOENT;
+
+ path->dentry = child;
+ dput(parent);
+ follow_mount(path);
+ return 0;
+}
+#endif
+
int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
struct path *path, int *empty)
{
@@ -2995,9 +3030,13 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
}
if (*opened & FILE_CREATED)
fsnotify_create(dir, dentry);
- path->dentry = dentry;
- path->mnt = nd->path.mnt;
- return 1;
+ if (unlikely(d_is_negative(dentry))) {
+ error = -ENOENT;
+ } else {
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
+ return 1;
+ }
}
}
dput(dentry);
@@ -3166,9 +3205,7 @@ static int do_last(struct nameidata *nd,
int acc_mode = op->acc_mode;
unsigned seq;
struct inode *inode;
- struct path save_parent = { .dentry = NULL, .mnt = NULL };
struct path path;
- bool retried = false;
int error;
nd->flags &= ~LOOKUP_PARENT;
@@ -3211,7 +3248,6 @@ static int do_last(struct nameidata *nd,
return -EISDIR;
}
-retry_lookup:
if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
error = mnt_want_write(nd->path.mnt);
if (!error)
@@ -3263,6 +3299,10 @@ retry_lookup:
got_write = false;
}
+ error = follow_managed(&path, nd);
+ if (unlikely(error < 0))
+ return error;
+
if (unlikely(d_is_negative(path.dentry))) {
path_to_nameidata(&path, nd);
return -ENOENT;
@@ -3278,10 +3318,6 @@ retry_lookup:
return -EEXIST;
}
- error = follow_managed(&path, nd);
- if (unlikely(error < 0))
- return error;
-
seq = 0; /* out of RCU mode, so the value doesn't matter */
inode = d_backing_inode(path.dentry);
finish_lookup:
@@ -3292,23 +3328,14 @@ finish_lookup:
if (unlikely(error))
return error;
- if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
- path_to_nameidata(&path, nd);
- } else {
- save_parent.dentry = nd->path.dentry;
- save_parent.mnt = mntget(path.mnt);
- nd->path.dentry = path.dentry;
-
- }
+ path_to_nameidata(&path, nd);
nd->inode = inode;
nd->seq = seq;
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
finish_open:
error = complete_walk(nd);
- if (error) {
- path_put(&save_parent);
+ if (error)
return error;
- }
audit_inode(nd->name, nd->path.dentry, 0);
error = -EISDIR;
if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
@@ -3331,13 +3358,9 @@ finish_open_created:
goto out;
BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
error = vfs_open(&nd->path, file, current_cred());
- if (!error) {
- *opened |= FILE_OPENED;
- } else {
- if (error == -EOPENSTALE)
- goto stale_open;
+ if (error)
goto out;
- }
+ *opened |= FILE_OPENED;
opened:
error = open_check_o_direct(file);
if (!error)
@@ -3353,26 +3376,7 @@ out:
}
if (got_write)
mnt_drop_write(nd->path.mnt);
- path_put(&save_parent);
return error;
-
-stale_open:
- /* If no saved parent or already retried then can't retry */
- if (!save_parent.dentry || retried)
- goto out;
-
- BUG_ON(save_parent.dentry != dir);
- path_put(&nd->path);
- nd->path = save_parent;
- nd->inode = dir->d_inode;
- save_parent.mnt = NULL;
- save_parent.dentry = NULL;
- if (got_write) {
- mnt_drop_write(nd->path.mnt);
- got_write = false;
- }
- retried = true;
- goto retry_lookup;
}
static int do_tmpfile(struct nameidata *nd, unsigned flags,
diff --git a/fs/namespace.c b/fs/namespace.c
index 4fb1691b4355..419f746d851d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry)
goto out_unlock;
lock_mount_hash();
+ event++;
while (!hlist_empty(&mp->m_list)) {
mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
@@ -2409,8 +2410,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
}
if (type->fs_flags & FS_USERNS_VISIBLE) {
- if (!fs_fully_visible(type, &mnt_flags))
+ if (!fs_fully_visible(type, &mnt_flags)) {
+ put_filesystem(type);
return -EPERM;
+ }
}
}
@@ -3245,6 +3248,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
+ /* Don't miss readonly hidden in the superblock flags */
+ if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
+ mnt_flags |= MNT_LOCK_READONLY;
+
/* Verify the mount flags are equal to or more permissive
* than the proposed new mount.
*/
@@ -3271,7 +3278,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
struct inode *inode = child->mnt_mountpoint->d_inode;
/* Only worry about locked mounts */
- if (!(mnt_flags & MNT_LOCKED))
+ if (!(child->mnt.mnt_flags & MNT_LOCKED))
continue;
/* Is the directory permanetly empty? */
if (!is_empty_dir_inode(inode))
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 17a42e4eb872..f55a4e756047 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -102,14 +102,15 @@ static inline void put_parallel(struct parallel_io *p)
}
static struct bio *
-bl_submit_bio(int rw, struct bio *bio)
+bl_submit_bio(struct bio *bio)
{
if (bio) {
get_parallel(bio->bi_private);
dprintk("%s submitting %s bio %u@%llu\n", __func__,
- rw == READ ? "read" : "write", bio->bi_iter.bi_size,
+ bio_op(bio) == READ ? "read" : "write",
+ bio->bi_iter.bi_size,
(unsigned long long)bio->bi_iter.bi_sector);
- submit_bio(rw, bio);
+ submit_bio(bio);
}
return NULL;
}
@@ -158,7 +159,7 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
if (disk_addr < map->start || disk_addr >= map->start + map->len) {
if (!dev->map(dev, disk_addr, map))
return ERR_PTR(-EIO);
- bio = bl_submit_bio(rw, bio);
+ bio = bl_submit_bio(bio);
}
disk_addr += map->disk_offset;
disk_addr -= map->start;
@@ -174,9 +175,10 @@ retry:
disk_addr >> SECTOR_SHIFT, end_io, par);
if (!bio)
return ERR_PTR(-ENOMEM);
+ bio_set_op_attrs(bio, rw, 0);
}
if (bio_add_page(bio, page, *len, offset) < *len) {
- bio = bl_submit_bio(rw, bio);
+ bio = bl_submit_bio(bio);
goto retry;
}
return bio;
@@ -252,7 +254,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
for (i = pg_index; i < header->page_array.npages; i++) {
if (extent_length <= 0) {
/* We've used up the previous extent */
- bio = bl_submit_bio(READ, bio);
+ bio = bl_submit_bio(bio);
/* Get the next one */
if (!ext_tree_lookup(bl, isect, &be, false)) {
@@ -273,7 +275,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
}
if (is_hole(&be)) {
- bio = bl_submit_bio(READ, bio);
+ bio = bl_submit_bio(bio);
/* Fill hole w/ zeroes w/o accessing device */
dprintk("%s Zeroing page for hole\n", __func__);
zero_user_segment(pages[i], pg_offset, pg_len);
@@ -306,7 +308,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
header->res.count = (isect << SECTOR_SHIFT) - header->args.offset;
}
out:
- bl_submit_bio(READ, bio);
+ bl_submit_bio(bio);
blk_finish_plug(&plug);
put_parallel(par);
return PNFS_ATTEMPTED;
@@ -398,7 +400,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
for (i = pg_index; i < header->page_array.npages; i++) {
if (extent_length <= 0) {
/* We've used up the previous extent */
- bio = bl_submit_bio(WRITE, bio);
+ bio = bl_submit_bio(bio);
/* Get the next one */
if (!ext_tree_lookup(bl, isect, &be, true)) {
header->pnfs_error = -EINVAL;
@@ -427,7 +429,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
header->res.count = header->args.count;
out:
- bl_submit_bio(WRITE, bio);
+ bl_submit_bio(bio);
blk_finish_plug(&plug);
put_parallel(par);
return PNFS_ATTEMPTED;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index aaf7bd0cbae2..19d93d0cd400 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -424,12 +424,17 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
static
int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
{
+ struct inode *inode;
struct nfs_inode *nfsi;
if (d_really_is_negative(dentry))
return 0;
- nfsi = NFS_I(d_inode(dentry));
+ inode = d_inode(dentry);
+ if (is_bad_inode(inode) || NFS_STALE(inode))
+ return 0;
+
+ nfsi = NFS_I(inode);
if (entry->fattr->fileid == nfsi->fileid)
return 1;
if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
@@ -1363,7 +1368,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations);
struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
struct dentry *res;
- struct dentry *parent;
struct inode *inode = NULL;
struct nfs_fh *fhandle = NULL;
struct nfs_fattr *fattr = NULL;
@@ -1393,7 +1397,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
if (IS_ERR(label))
goto out;
- parent = dentry->d_parent;
/* Protect against concurrent sillydeletes */
trace_nfs_lookup_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
@@ -1482,11 +1485,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned open_flags,
umode_t mode, int *opened)
{
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
struct nfs_open_context *ctx;
struct dentry *res;
struct iattr attr = { .ia_valid = ATTR_OPEN };
struct inode *inode;
unsigned int lookup_flags = 0;
+ bool switched = false;
int err;
/* Expect a negative dentry */
@@ -1501,7 +1506,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
/* NFS only supports OPEN on regular files */
if ((open_flags & O_DIRECTORY)) {
- if (!d_unhashed(dentry)) {
+ if (!d_in_lookup(dentry)) {
/*
* Hashed negative dentry with O_DIRECTORY: dentry was
* revalidated and is fine, no need to perform lookup
@@ -1525,6 +1530,17 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
attr.ia_size = 0;
}
+ if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
+ d_drop(dentry);
+ switched = true;
+ dentry = d_alloc_parallel(dentry->d_parent,
+ &dentry->d_name, &wq);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ if (unlikely(!d_in_lookup(dentry)))
+ return finish_no_open(file, dentry);
+ }
+
ctx = create_nfs_open_context(dentry, open_flags);
err = PTR_ERR(ctx);
if (IS_ERR(ctx))
@@ -1536,9 +1552,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(inode);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
put_nfs_open_context(ctx);
+ d_drop(dentry);
switch (err) {
case -ENOENT:
- d_drop(dentry);
d_add(dentry, NULL);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
@@ -1560,14 +1576,23 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
put_nfs_open_context(ctx);
out:
+ if (unlikely(switched)) {
+ d_lookup_done(dentry);
+ dput(dentry);
+ }
return err;
no_open:
res = nfs_lookup(dir, dentry, lookup_flags);
- err = PTR_ERR(res);
+ if (switched) {
+ d_lookup_done(dentry);
+ if (!res)
+ res = dentry;
+ else
+ dput(dentry);
+ }
if (IS_ERR(res))
- goto out;
-
+ return PTR_ERR(res);
return finish_no_open(file, res);
}
EXPORT_SYMBOL_GPL(nfs_atomic_open);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 979b3c4dee6a..c7326c2af2c3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -353,10 +353,12 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
result = wait_for_completion_killable(&dreq->completion);
+ if (!result) {
+ result = dreq->count;
+ WARN_ON_ONCE(dreq->count < 0);
+ }
if (!result)
result = dreq->error;
- if (!result)
- result = dreq->count;
out:
return (ssize_t) result;
@@ -386,8 +388,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
if (dreq->iocb) {
long res = (long) dreq->error;
- if (!res)
+ if (dreq->count != 0) {
res = (long) dreq->count;
+ WARN_ON_ONCE(dreq->count < 0);
+ }
dreq->iocb->ki_complete(dreq->iocb, res, 0);
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 52e7d6869e3b..dda689d7a8a7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -282,6 +282,7 @@ nfs_init_locked(struct inode *inode, void *opaque)
struct nfs_fattr *fattr = desc->fattr;
set_nfs_fileid(inode, fattr->fileid);
+ inode->i_mode = fattr->mode;
nfs_copy_fh(NFS_FH(inode), desc->fh);
return 0;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de97567795a5..ff416d0e24bc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2882,12 +2882,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
call_close |= is_wronly;
else if (is_wronly)
calldata->arg.fmode |= FMODE_WRITE;
+ if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE))
+ call_close |= is_rdwr;
} else if (is_rdwr)
calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
- if (calldata->arg.fmode == 0)
- call_close |= is_rdwr;
-
if (!nfs4_valid_open_stateid(state))
call_close = 0;
spin_unlock(&state->owner->so_lock);
@@ -7924,8 +7923,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
break;
}
lo = NFS_I(inode)->layout;
- if (lo && nfs4_stateid_match(&lgp->args.stateid,
- &lo->plh_stateid)) {
+ if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) &&
+ nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
LIST_HEAD(head);
/*
@@ -7936,10 +7935,10 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&head);
+ status = -EAGAIN;
+ goto out;
} else
spin_unlock(&inode->i_lock);
- status = -EAGAIN;
- goto out;
}
status = nfs4_handle_exception(server, status, exception);
@@ -8036,7 +8035,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
.flags = RPC_TASK_ASYNC,
};
struct pnfs_layout_segment *lseg = NULL;
- struct nfs4_exception exception = { .timeout = *timeout };
+ struct nfs4_exception exception = {
+ .inode = inode,
+ .timeout = *timeout,
+ };
int status = 0;
dprintk("--> %s\n", __func__);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9679f4749364..834b875900d6 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1488,9 +1488,9 @@ restart:
}
spin_unlock(&state->state_lock);
}
- nfs4_put_open_state(state);
clear_bit(NFS_STATE_RECLAIM_NOGRACE,
&state->flags);
+ nfs4_put_open_state(state);
spin_lock(&sp->so_lock);
goto restart;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0c7e0d45a4de..0fbe734cc38c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,8 +361,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
list_del_init(&lseg->pls_list);
/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
atomic_dec(&lo->plh_refcount);
- if (list_empty(&lo->plh_segs))
+ if (list_empty(&lo->plh_segs)) {
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ }
rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
}
@@ -1290,6 +1292,7 @@ alloc_init_layout_hdr(struct inode *ino,
INIT_LIST_HEAD(&lo->plh_bulk_destroy);
lo->plh_inode = ino;
lo->plh_lc_cred = get_rpccred(ctx->cred);
+ lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
return lo;
}
@@ -1297,6 +1300,8 @@ static struct pnfs_layout_hdr *
pnfs_find_alloc_layout(struct inode *ino,
struct nfs_open_context *ctx,
gfp_t gfp_flags)
+ __releases(&ino->i_lock)
+ __acquires(&ino->i_lock)
{
struct nfs_inode *nfsi = NFS_I(ino);
struct pnfs_layout_hdr *new = NULL;
@@ -1565,8 +1570,7 @@ lookup_again:
* stateid, or it has been invalidated, then we must use the open
* stateid.
*/
- if (lo->plh_stateid.seqid == 0 ||
- test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
+ if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
/*
* The first layoutget for the file. Need to serialize per
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 0dfc476da3e1..b38e3c0dc790 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
}
/* Helper function for pnfs_generic_commit_pagelist to catch an empty
- * page list. This can happen when two commits race. */
+ * page list. This can happen when two commits race.
+ *
+ * This must be called instead of nfs_init_commit - call one or the other, but
+ * not both!
+ */
static bool
pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
struct nfs_commit_data *data,
@@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
if (list_empty(pages)) {
if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
wake_up_atomic_t(&cinfo->mds->rpcs_out);
- nfs_commitdata_release(data);
+ /* don't call nfs_commitdata_release - it tries to put
+ * the open_context which is not acquired until nfs_init_commit
+ * which has not been called on @data */
+ WARN_ON_ONCE(data->context);
+ nfs_commit_free(data);
return true;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6776d7a7839e..572e5b3b06f1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page)
nfs_list_remove_request(new);
nfs_readpage_release(new);
error = desc->pgio->pg_error;
- goto out_unlock;
+ goto out;
}
return 0;
out_error:
error = PTR_ERR(new);
-out_unlock:
unlock_page(page);
+out:
return error;
}
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index e55b5242614d..31f3df193bdb 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -290,7 +290,7 @@ out_free_buf:
return error;
}
-#define NFSD_MDS_PR_KEY 0x0100000000000000
+#define NFSD_MDS_PR_KEY 0x0100000000000000ULL
/*
* We use the client ID as a unique key for the reservations.
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1580ea6fd64d..d08cd88155c7 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
goto out;
inode = d_inode(fh->fh_dentry);
- if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
- error = -EOPNOTSUPP;
- goto out_errno;
- }
error = fh_want_write(fh);
if (error)
goto out_errno;
- error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+ fh_lock(fh);
+
+ error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
if (error)
- goto out_drop_write;
- error = inode->i_op->set_acl(inode, argp->acl_default,
- ACL_TYPE_DEFAULT);
+ goto out_drop_lock;
+ error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
if (error)
- goto out_drop_write;
+ goto out_drop_lock;
+
+ fh_unlock(fh);
fh_drop_write(fh);
@@ -131,7 +130,8 @@ out:
posix_acl_release(argp->acl_access);
posix_acl_release(argp->acl_default);
return nfserr;
-out_drop_write:
+out_drop_lock:
+ fh_unlock(fh);
fh_drop_write(fh);
out_errno:
nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 01df4cd7c753..0c890347cde3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
goto out;
inode = d_inode(fh->fh_dentry);
- if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
- error = -EOPNOTSUPP;
- goto out_errno;
- }
error = fh_want_write(fh);
if (error)
goto out_errno;
- error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+ fh_lock(fh);
+
+ error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
if (error)
- goto out_drop_write;
- error = inode->i_op->set_acl(inode, argp->acl_default,
- ACL_TYPE_DEFAULT);
+ goto out_drop_lock;
+ error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
-out_drop_write:
+out_drop_lock:
+ fh_unlock(fh);
fh_drop_write(fh);
out_errno:
nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 6adabd6049b7..71292a0d6f09 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
inode = d_inode(dentry);
- if (!inode->i_op->set_acl || !IS_POSIXACL(inode))
- return nfserr_attrnotsupp;
-
if (S_ISDIR(inode->i_mode))
flags = NFS4_ACL_DIR;
@@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (host_error < 0)
goto out_nfserr;
- host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS);
+ fh_lock(fhp);
+
+ host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
if (host_error < 0)
- goto out_release;
+ goto out_drop_lock;
if (S_ISDIR(inode->i_mode)) {
- host_error = inode->i_op->set_acl(inode, dpacl,
- ACL_TYPE_DEFAULT);
+ host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
}
-out_release:
+out_drop_lock:
+ fh_unlock(fhp);
+
posix_acl_release(pacl);
posix_acl_release(dpacl);
out_nfserr:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7389cb1d7409..04c68d900324 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
}
}
-static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
-{
- struct rpc_xprt *xprt;
-
- if (args->protocol != XPRT_TRANSPORT_BC_TCP)
- return rpc_create(args);
-
- xprt = args->bc_xprt->xpt_bc_xprt;
- if (xprt) {
- xprt_get(xprt);
- return rpc_create_xprt(args, xprt);
- }
-
- return rpc_create(args);
-}
-
static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
{
int maxtime = max_cb_time(clp->net);
@@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
args.authflavor = ses->se_cb_sec.flavor;
}
/* Create RPC client */
- client = create_backchannel_client(&args);
+ client = rpc_create(&args);
if (IS_ERR(client)) {
dprintk("NFSD: couldn't create callback client: %ld\n",
PTR_ERR(client));
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f5f82e145018..70d0b9b33031 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3480,12 +3480,17 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
}
static struct nfs4_ol_stateid *
-init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
- struct nfsd4_open *open)
+init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
{
struct nfs4_openowner *oo = open->op_openowner;
struct nfs4_ol_stateid *retstp = NULL;
+ struct nfs4_ol_stateid *stp;
+
+ stp = open->op_stp;
+ /* We are moving these outside of the spinlocks to avoid the warnings */
+ mutex_init(&stp->st_mutex);
+ mutex_lock(&stp->st_mutex);
spin_lock(&oo->oo_owner.so_client->cl_lock);
spin_lock(&fp->fi_lock);
@@ -3493,6 +3498,8 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
retstp = nfsd4_find_existing_open(fp, open);
if (retstp)
goto out_unlock;
+
+ open->op_stp = NULL;
atomic_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_OPEN_STID;
INIT_LIST_HEAD(&stp->st_locks);
@@ -3502,14 +3509,19 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
stp->st_access_bmap = 0;
stp->st_deny_bmap = 0;
stp->st_openstp = NULL;
- init_rwsem(&stp->st_rwsem);
list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
list_add(&stp->st_perfile, &fp->fi_stateids);
out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
- return retstp;
+ if (retstp) {
+ mutex_lock(&retstp->st_mutex);
+ /* To keep mutex tracking happy */
+ mutex_unlock(&stp->st_mutex);
+ stp = retstp;
+ }
+ return stp;
}
/*
@@ -4305,7 +4317,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
struct nfs4_file *fp = NULL;
struct nfs4_ol_stateid *stp = NULL;
- struct nfs4_ol_stateid *swapstp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
@@ -4335,32 +4346,28 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
if (stp) {
/* Stateid was found, this is an OPEN upgrade */
- down_read(&stp->st_rwsem);
+ mutex_lock(&stp->st_mutex);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
if (status) {
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
} else {
- stp = open->op_stp;
- open->op_stp = NULL;
- swapstp = init_open_stateid(stp, fp, open);
- if (swapstp) {
- nfs4_put_stid(&stp->st_stid);
- stp = swapstp;
- down_read(&stp->st_rwsem);
+ /* stp is returned locked. */
+ stp = init_open_stateid(fp, open);
+ /* See if we lost the race to some other thread */
+ if (stp->st_access_bmap != 0) {
status = nfs4_upgrade_open(rqstp, fp, current_fh,
stp, open);
if (status) {
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
goto upgrade_out;
}
- down_read(&stp->st_rwsem);
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
release_open_stateid(stp);
goto out;
}
@@ -4372,7 +4379,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
}
upgrade_out:
nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
if (nfsd4_has_session(&resp->cstate)) {
if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
@@ -4977,12 +4984,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* revoked delegations are kept only for free_stateid.
*/
return nfserr_bad_stateid;
- down_write(&stp->st_rwsem);
+ mutex_lock(&stp->st_mutex);
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
if (status == nfs_ok)
status = nfs4_check_fh(current_fh, &stp->st_stid);
if (status != nfs_ok)
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
return status;
}
@@ -5030,7 +5037,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
return status;
oo = openowner(stp->st_stateowner);
if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
return nfserr_bad_stateid;
}
@@ -5062,12 +5069,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
oo = openowner(stp->st_stateowner);
status = nfserr_bad_stateid;
if (oo->oo_flags & NFS4_OO_CONFIRMED) {
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
goto put_stateid;
}
oo->oo_flags |= NFS4_OO_CONFIRMED;
nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
@@ -5143,7 +5150,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
status = nfs_ok;
put_stateid:
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
@@ -5196,7 +5203,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfsd4_close_open_stateid(stp);
@@ -5422,7 +5429,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
- init_rwsem(&stp->st_rwsem);
+ mutex_init(&stp->st_mutex);
list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
spin_lock(&fp->fi_lock);
@@ -5591,7 +5598,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&open_stp, nn);
if (status)
goto out;
- up_write(&open_stp->st_rwsem);
+ mutex_unlock(&open_stp->st_mutex);
open_sop = openowner(open_stp->st_stateowner);
status = nfserr_bad_stateid;
if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
@@ -5600,7 +5607,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new);
if (status == nfs_ok)
- down_write(&lock_stp->st_rwsem);
+ mutex_lock(&lock_stp->st_mutex);
} else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
@@ -5704,7 +5711,7 @@ out:
seqid_mutating_err(ntohl(status)))
lock_sop->lo_owner.so_seqid++;
- up_write(&lock_stp->st_rwsem);
+ mutex_unlock(&lock_stp->st_mutex);
/*
* If this is a new, never-before-used stateid, and we are
@@ -5874,7 +5881,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fput:
fput(filp);
put_stateid:
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 986e51e5ceac..64053eadeb81 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -535,7 +535,7 @@ struct nfs4_ol_stateid {
unsigned char st_access_bmap;
unsigned char st_deny_bmap;
struct nfs4_ol_stateid *st_openstp;
- struct rw_semaphore st_rwsem;
+ struct mutex st_mutex;
};
static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 0576033699bc..4cca998ec7a0 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -62,7 +62,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
}
int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
- sector_t pblocknr, int mode,
+ sector_t pblocknr, int mode, int mode_flags,
struct buffer_head **pbh, sector_t *submit_ptr)
{
struct buffer_head *bh;
@@ -95,7 +95,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
}
}
- if (mode == READA) {
+ if (mode_flags & REQ_RAHEAD) {
if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) {
err = -EBUSY; /* internal code */
brelse(bh);
@@ -114,7 +114,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
bh->b_blocknr = pblocknr; /* set block address for read */
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
- submit_bh(mode, bh);
+ submit_bh(mode, mode_flags, bh);
bh->b_blocknr = blocknr; /* set back to the given block address */
*submit_ptr = pblocknr;
err = 0;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 2cc1b80e18f7..4e8aaa1aeb65 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -43,7 +43,7 @@ void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
__u64 blocknr);
int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int,
- struct buffer_head **, sector_t *);
+ int, struct buffer_head **, sector_t *);
void nilfs_btnode_delete(struct buffer_head *);
int nilfs_btnode_prepare_change_key(struct address_space *,
struct nilfs_btnode_chkey_ctxt *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index eccb1c89ccbb..982d1e3df3a5 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -476,7 +476,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
sector_t submit_ptr = 0;
int ret;
- ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
+ ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
+ &submit_ptr);
if (ret) {
if (ret != -EEXIST)
return ret;
@@ -492,7 +493,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
n > 0 && i < ra->ncmax; n--, i++) {
ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
- ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
+ ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
+ REQ_OP_READ, REQ_RAHEAD,
&ra_bh, &submit_ptr);
if (likely(!ret || ret == -EEXIST))
brelse(ra_bh);
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 693aded72498..e9148f94d696 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -101,7 +101,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
bh->b_blocknr = pbn;
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
- submit_bh(READ, bh);
+ submit_bh(REQ_OP_READ, 0, bh);
if (vbn)
bh->b_blocknr = vbn;
out:
@@ -138,7 +138,8 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
int ret;
ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache,
- vbn ? : pbn, pbn, READ, out_bh, &pbn);
+ vbn ? : pbn, pbn, REQ_OP_READ, 0,
+ out_bh, &pbn);
if (ret == -EEXIST) /* internal code (cache hit) */
ret = 0;
return ret;
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 3417d859a03c..0d7b71fbeff8 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -121,7 +121,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
static int
nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
- int mode, struct buffer_head **out_bh)
+ int mode, int mode_flags, struct buffer_head **out_bh)
{
struct buffer_head *bh;
__u64 blknum = 0;
@@ -135,7 +135,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
if (buffer_uptodate(bh))
goto out;
- if (mode == READA) {
+ if (mode_flags & REQ_RAHEAD) {
if (!trylock_buffer(bh)) {
ret = -EBUSY;
goto failed_bh;
@@ -157,7 +157,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
- submit_bh(mode, bh);
+ submit_bh(mode, mode_flags, bh);
ret = 0;
trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode);
@@ -181,7 +181,7 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS;
int err;
- err = nilfs_mdt_submit_block(inode, block, READ, &first_bh);
+ err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, 0, &first_bh);
if (err == -EEXIST) /* internal code */
goto out;
@@ -191,7 +191,8 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block,
if (readahead) {
blkoff = block + 1;
for (i = 0; i < nr_ra_blocks; i++, blkoff++) {
- err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh);
+ err = nilfs_mdt_submit_block(inode, blkoff, REQ_OP_READ,
+ REQ_RAHEAD, &bh);
if (likely(!err || err == -EEXIST))
brelse(bh);
else if (err != -EBUSY)
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index bf36df10540b..a962d7d83447 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -346,7 +346,8 @@ static void nilfs_end_bio_write(struct bio *bio)
}
static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
- struct nilfs_write_info *wi, int mode)
+ struct nilfs_write_info *wi, int mode,
+ int mode_flags)
{
struct bio *bio = wi->bio;
int err;
@@ -364,7 +365,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
bio->bi_end_io = nilfs_end_bio_write;
bio->bi_private = segbuf;
- submit_bio(mode, bio);
+ bio_set_op_attrs(bio, mode, mode_flags);
+ submit_bio(bio);
segbuf->sb_nbio++;
wi->bio = NULL;
@@ -437,7 +439,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf,
return 0;
}
/* bio is FULL */
- err = nilfs_segbuf_submit_bio(segbuf, wi, mode);
+ err = nilfs_segbuf_submit_bio(segbuf, wi, mode, 0);
/* never submit current bh */
if (likely(!err))
goto repeat;
@@ -461,19 +463,19 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
{
struct nilfs_write_info wi;
struct buffer_head *bh;
- int res = 0, rw = WRITE;
+ int res = 0;
wi.nilfs = nilfs;
nilfs_segbuf_prepare_write(segbuf, &wi);
list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) {
- res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
+ res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE);
if (unlikely(res))
goto failed_bio;
}
list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
- res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw);
+ res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, REQ_OP_WRITE);
if (unlikely(res))
goto failed_bio;
}
@@ -483,8 +485,8 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
* Last BIO is always sent through the following
* submission.
*/
- rw |= REQ_SYNC;
- res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
+ res = nilfs_segbuf_submit_bio(segbuf, &wi, REQ_OP_WRITE,
+ REQ_SYNC);
}
failed_bio:
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 809bd2de7ad0..e9fd241b9a0a 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -439,7 +439,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
return 0;
bytes = le16_to_cpu(sbp->s_bytes);
- if (bytes > BLOCK_SIZE)
+ if (bytes < sumoff + 4 || bytes > BLOCK_SIZE)
return 0;
crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
sumoff);
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 97768a1379f2..fe251f187ff8 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -362,7 +362,7 @@ handle_zblock:
for (i = 0; i < nr; i++) {
tbh = arr[i];
if (likely(!buffer_uptodate(tbh)))
- submit_bh(READ, tbh);
+ submit_bh(REQ_OP_READ, 0, tbh);
else
ntfs_end_buffer_async_read(tbh, 1);
}
@@ -877,7 +877,7 @@ lock_retry_remap:
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh(WRITE, bh);
+ submit_bh(REQ_OP_WRITE, 0, bh);
need_end_writeback = false;
}
bh = next;
@@ -1202,7 +1202,7 @@ lock_retry_remap:
BUG_ON(!buffer_mapped(tbh));
get_bh(tbh);
tbh->b_end_io = end_buffer_write_sync;
- submit_bh(WRITE, tbh);
+ submit_bh(REQ_OP_WRITE, 0, tbh);
}
/* Synchronize the mft mirror now if not @sync. */
if (is_mft && !sync)
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index f2b5e746f49b..f8eb04387ca4 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -670,7 +670,7 @@ lock_retry_remap:
}
get_bh(tbh);
tbh->b_end_io = end_buffer_read_sync;
- submit_bh(READ, tbh);
+ submit_bh(REQ_OP_READ, 0, tbh);
}
/* Wait for io completion on all buffer heads. */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5622ed5a201e..f548629dfaac 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -553,7 +553,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
lock_buffer(bh);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- return submit_bh(READ, bh);
+ return submit_bh(REQ_OP_READ, 0, bh);
}
/**
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 9d71213ca81e..761f12f7f3ef 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -821,7 +821,7 @@ map_vcn:
* completed ignore errors afterwards as we can assume
* that if one buffer worked all of them will work.
*/
- submit_bh(WRITE, bh);
+ submit_bh(REQ_OP_WRITE, 0, bh);
if (should_wait) {
should_wait = false;
wait_on_buffer(bh);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 37b2501caaa4..d15d492ce47b 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -592,7 +592,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
clear_buffer_dirty(tbh);
get_bh(tbh);
tbh->b_end_io = end_buffer_write_sync;
- submit_bh(WRITE, tbh);
+ submit_bh(REQ_OP_WRITE, 0, tbh);
}
/* Wait on i/o completion of buffers. */
for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
@@ -785,7 +785,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
clear_buffer_dirty(tbh);
get_bh(tbh);
tbh->b_end_io = end_buffer_write_sync;
- submit_bh(WRITE, tbh);
+ submit_bh(REQ_OP_WRITE, 0, tbh);
}
/* Synchronize the mft mirror now if not @sync. */
if (!sync && ni->mft_no < vol->mftmirr_size)
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index e27e6527912b..4342c7ee7d20 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,7 +1,5 @@
ccflags-y := -Ifs/ocfs2
-ccflags-y += -DCATCH_BH_JBD_RACES
-
obj-$(CONFIG_OCFS2_FS) += \
ocfs2.o \
ocfs2_stackglue.o
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c034edf3ef38..e97a37179614 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -640,7 +640,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
!buffer_new(bh) &&
ocfs2_should_read_blk(inode, page, block_start) &&
(block_start < from || block_end > to)) {
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
*wait_bh++=bh;
}
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index fe50ded1b4ce..8f040f88ade4 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -79,7 +79,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
get_bh(bh); /* for end_buffer_write_sync() */
bh->b_end_io = end_buffer_write_sync;
- submit_bh(WRITE, bh);
+ submit_bh(REQ_OP_WRITE, 0, bh);
wait_on_buffer(bh);
@@ -139,17 +139,22 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
lock_buffer(bh);
if (buffer_jbd(bh)) {
+#ifdef CATCH_BH_JBD_RACES
mlog(ML_ERROR,
"block %llu had the JBD bit set "
"while I was in lock_buffer!",
(unsigned long long)bh->b_blocknr);
BUG();
+#else
+ unlock_buffer(bh);
+ continue;
+#endif
}
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync;
- submit_bh(READ, bh);
+ submit_bh(REQ_OP_READ, 0, bh);
}
for (i = nr; i > 0; i--) {
@@ -305,7 +310,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
if (validate)
set_buffer_needs_validate(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(READ, bh);
+ submit_bh(REQ_OP_READ, 0, bh);
continue;
}
}
@@ -419,7 +424,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
get_bh(bh); /* for end_buffer_write_sync() */
bh->b_end_io = end_buffer_write_sync;
ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
- submit_bh(WRITE, bh);
+ submit_bh(REQ_OP_WRITE, 0, bh);
wait_on_buffer(bh);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 6aaf3e351391..636abcbd4650 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -530,7 +530,8 @@ static void o2hb_bio_end_io(struct bio *bio)
static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
struct o2hb_bio_wait_ctxt *wc,
unsigned int *current_slot,
- unsigned int max_slots)
+ unsigned int max_slots, int op,
+ int op_flags)
{
int len, current_page;
unsigned int vec_len, vec_start;
@@ -556,6 +557,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
bio->bi_bdev = reg->hr_bdev;
bio->bi_private = wc;
bio->bi_end_io = o2hb_bio_end_io;
+ bio_set_op_attrs(bio, op, op_flags);
vec_start = (cs << bits) % PAGE_SIZE;
while(cs < max_slots) {
@@ -591,7 +593,8 @@ static int o2hb_read_slots(struct o2hb_region *reg,
o2hb_bio_wait_init(&wc);
while(current_slot < max_slots) {
- bio = o2hb_setup_one_bio(reg, &wc, &current_slot, max_slots);
+ bio = o2hb_setup_one_bio(reg, &wc, &current_slot, max_slots,
+ REQ_OP_READ, 0);
if (IS_ERR(bio)) {
status = PTR_ERR(bio);
mlog_errno(status);
@@ -599,7 +602,7 @@ static int o2hb_read_slots(struct o2hb_region *reg,
}
atomic_inc(&wc.wc_num_reqs);
- submit_bio(READ, bio);
+ submit_bio(bio);
}
status = 0;
@@ -623,7 +626,8 @@ static int o2hb_issue_node_write(struct o2hb_region *reg,
slot = o2nm_this_node();
- bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1);
+ bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE,
+ WRITE_SYNC);
if (IS_ERR(bio)) {
status = PTR_ERR(bio);
mlog_errno(status);
@@ -631,7 +635,7 @@ static int o2hb_issue_node_write(struct o2hb_region *reg,
}
atomic_inc(&write_wc->wc_num_reqs);
- submit_bio(WRITE_SYNC, bio);
+ submit_bio(bio);
status = 0;
bail:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d7cae3327de5..3971146228d3 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1819,7 +1819,7 @@ static int ocfs2_get_sector(struct super_block *sb,
if (!buffer_dirty(*bh))
clear_buffer_uptodate(*bh);
unlock_buffer(*bh);
- ll_rw_block(READ, 1, bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, bh);
wait_on_buffer(*bh);
if (!buffer_uptodate(*bh)) {
mlog_errno(-EIO);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 22f0253a3567..5c9d2d80ff70 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -405,12 +405,21 @@ static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
} else {
const struct cred *old_cred;
+ struct cred *override_cred;
old_cred = ovl_override_creds(dentry->d_sb);
- err = ovl_create_over_whiteout(dentry, inode, &stat, link,
- hardlink);
+ err = -ENOMEM;
+ override_cred = prepare_creds();
+ if (override_cred) {
+ override_cred->fsuid = old_cred->fsuid;
+ override_cred->fsgid = old_cred->fsgid;
+ put_cred(override_creds(override_cred));
+ put_cred(override_cred);
+ err = ovl_create_over_whiteout(dentry, inode, &stat,
+ link, hardlink);
+ }
revert_creds(old_cred);
}
@@ -496,6 +505,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
struct dentry *upper;
struct dentry *opaquedir = NULL;
int err;
+ int flags = 0;
if (WARN_ON(!workdir))
return -EROFS;
@@ -525,46 +535,39 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
if (err)
goto out_dput;
- whiteout = ovl_whiteout(workdir, dentry);
- err = PTR_ERR(whiteout);
- if (IS_ERR(whiteout))
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (IS_ERR(upper))
goto out_unlock;
- upper = ovl_dentry_upper(dentry);
- if (!upper) {
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
- err = PTR_ERR(upper);
- if (IS_ERR(upper))
- goto kill_whiteout;
-
- err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
- dput(upper);
- if (err)
- goto kill_whiteout;
- } else {
- int flags = 0;
+ err = -ESTALE;
+ if ((opaquedir && upper != opaquedir) ||
+ (!opaquedir && ovl_dentry_upper(dentry) &&
+ upper != ovl_dentry_upper(dentry))) {
+ goto out_dput_upper;
+ }
- if (opaquedir)
- upper = opaquedir;
- err = -ESTALE;
- if (upper->d_parent != upperdir)
- goto kill_whiteout;
+ whiteout = ovl_whiteout(workdir, dentry);
+ err = PTR_ERR(whiteout);
+ if (IS_ERR(whiteout))
+ goto out_dput_upper;
- if (is_dir)
- flags |= RENAME_EXCHANGE;
+ if (d_is_dir(upper))
+ flags = RENAME_EXCHANGE;
- err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
- if (err)
- goto kill_whiteout;
+ err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
+ if (err)
+ goto kill_whiteout;
+ if (flags)
+ ovl_cleanup(wdir, upper);
- if (is_dir)
- ovl_cleanup(wdir, upper);
- }
ovl_dentry_version_inc(dentry->d_parent);
out_d_drop:
d_drop(dentry);
dput(whiteout);
+out_dput_upper:
+ dput(upper);
out_unlock:
unlock_rename(workdir, upperdir);
out_dput:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 0ed7c4012437..d1cdc60dd68f 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -59,16 +59,40 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (err)
goto out;
+ if (attr->ia_valid & ATTR_SIZE) {
+ struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+
+ err = -ETXTBSY;
+ if (atomic_read(&realinode->i_writecount) < 0)
+ goto out_drop_write;
+ }
+
err = ovl_copy_up(dentry);
if (!err) {
+ struct inode *winode = NULL;
+
upperdentry = ovl_dentry_upper(dentry);
+ if (attr->ia_valid & ATTR_SIZE) {
+ winode = d_inode(upperdentry);
+ err = get_write_access(winode);
+ if (err)
+ goto out_drop_write;
+ }
+
+ if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+ attr->ia_valid &= ~ATTR_MODE;
+
inode_lock(upperdentry->d_inode);
err = notify_change(upperdentry, attr, NULL);
if (!err)
ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
inode_unlock(upperdentry->d_inode);
+
+ if (winode)
+ put_write_access(winode);
}
+out_drop_write:
ovl_drop_write(dentry);
out:
return err;
@@ -121,16 +145,18 @@ int ovl_permission(struct inode *inode, int mask)
err = vfs_getattr(&realpath, &stat);
if (err)
- return err;
+ goto out_dput;
+ err = -ESTALE;
if ((stat.mode ^ inode->i_mode) & S_IFMT)
- return -ESTALE;
+ goto out_dput;
inode->i_mode = stat.mode;
inode->i_uid = stat.uid;
inode->i_gid = stat.gid;
- return generic_permission(inode, mask);
+ err = generic_permission(inode, mask);
+ goto out_dput;
}
/* Careful in RCU walk mode */
@@ -238,41 +264,27 @@ out:
return err;
}
-static bool ovl_need_xattr_filter(struct dentry *dentry,
- enum ovl_path_type type)
-{
- if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER)
- return S_ISDIR(dentry->d_inode->i_mode);
- else
- return false;
-}
-
ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
const char *name, void *value, size_t size)
{
- struct path realpath;
- enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+ struct dentry *realdentry = ovl_dentry_real(dentry);
- if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
+ if (ovl_is_private_xattr(name))
return -ENODATA;
- return vfs_getxattr(realpath.dentry, name, value, size);
+ return vfs_getxattr(realdentry, name, value, size);
}
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
{
- struct path realpath;
- enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+ struct dentry *realdentry = ovl_dentry_real(dentry);
ssize_t res;
int off;
- res = vfs_listxattr(realpath.dentry, list, size);
+ res = vfs_listxattr(realdentry, list, size);
if (res <= 0 || size == 0)
return res;
- if (!ovl_need_xattr_filter(dentry, type))
- return res;
-
/* filter out private xattrs */
for (off = 0; off < res;) {
char *s = list + off;
@@ -302,7 +314,7 @@ int ovl_removexattr(struct dentry *dentry, const char *name)
goto out;
err = -ENODATA;
- if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
+ if (ovl_is_private_xattr(name))
goto out_drop_write;
if (!OVL_TYPE_UPPER(type)) {
@@ -401,12 +413,11 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
if (!inode)
return NULL;
- mode &= S_IFMT;
-
inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_flags |= S_NOATIME | S_NOCMTIME;
+ mode &= S_IFMT;
switch (mode) {
case S_IFDIR:
inode->i_private = oe;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4bd9b5ba8f42..cfbca53590d0 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -187,6 +187,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
to->i_gid = from->i_gid;
+ to->i_mode = from->i_mode;
}
/* dir.c */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ce02f46029da..9a7693d5f8ff 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1082,11 +1082,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (err < 0)
goto out_put_workdir;
- if (!err) {
- pr_err("overlayfs: upper fs needs to support d_type.\n");
- err = -EINVAL;
- goto out_put_workdir;
- }
+ /*
+ * We allowed this configuration and don't want to
+ * break users over kernel upgrade. So warn instead
+ * of erroring out.
+ */
+ if (!err)
+ pr_warn("overlayfs: upper fs needs to support d_type.\n");
}
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 8a4a266beff3..edc452c2a563 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -820,39 +820,43 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
return error;
}
-static int
-posix_acl_xattr_set(const struct xattr_handler *handler,
- struct dentry *unused, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
+int
+set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
{
- struct posix_acl *acl = NULL;
- int ret;
-
if (!IS_POSIXACL(inode))
return -EOPNOTSUPP;
if (!inode->i_op->set_acl)
return -EOPNOTSUPP;
- if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
- return value ? -EACCES : 0;
+ if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
if (!inode_owner_or_capable(inode))
return -EPERM;
+ if (acl) {
+ int ret = posix_acl_valid(acl);
+ if (ret)
+ return ret;
+ }
+ return inode->i_op->set_acl(inode, acl, type);
+}
+EXPORT_SYMBOL(set_posix_acl);
+
+static int
+posix_acl_xattr_set(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct posix_acl *acl = NULL;
+ int ret;
+
if (value) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
-
- if (acl) {
- ret = posix_acl_valid(acl);
- if (ret)
- goto out;
- }
}
-
- ret = inode->i_op->set_acl(inode, acl, handler->flags);
-out:
+ ret = set_posix_acl(inode, handler->flags, acl);
posix_acl_release(acl);
return ret;
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 55bc7d6c8aac..06702783bf40 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -121,6 +121,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
if (IS_ERR(sb))
return ERR_CAST(sb);
+ /*
+ * procfs isn't actually a stacking filesystem; however, there is
+ * too much magic going on inside it to permit stacking things on
+ * top of it
+ */
+ sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
+
if (!proc_parse_options(options, ns)) {
deactivate_locked_super(sb);
return ERR_PTR(-EINVAL);
diff --git a/fs/read_write.c b/fs/read_write.c
index 933b53a375b4..66215a7b17cf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1168,6 +1168,15 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
return do_compat_preadv64(fd, vec, vlen, pos, 0);
}
+#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
+COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
+ const struct compat_iovec __user *,vec,
+ unsigned long, vlen, loff_t, pos, int, flags)
+{
+ return do_compat_preadv64(fd, vec, vlen, pos, flags);
+}
+#endif
+
COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
@@ -1265,6 +1274,15 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
return do_compat_pwritev64(fd, vec, vlen, pos, 0);
}
+#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
+COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
+ const struct compat_iovec __user *,vec,
+ unsigned long, vlen, loff_t, pos, int, flags)
+{
+ return do_compat_pwritev64(fd, vec, vlen, pos, flags);
+}
+#endif
+
COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 825455d3e4ba..c2c59f9ff04b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2668,7 +2668,7 @@ static int reiserfs_write_full_page(struct page *page,
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh(WRITE, bh);
+ submit_bh(REQ_OP_WRITE, 0, bh);
nr++;
}
put_bh(bh);
@@ -2728,7 +2728,7 @@ fail:
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh(WRITE, bh);
+ submit_bh(REQ_OP_WRITE, 0, bh);
nr++;
}
put_bh(bh);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 2ace90e981f0..bc2dde2423c2 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -652,7 +652,7 @@ static void submit_logged_buffer(struct buffer_head *bh)
BUG();
if (!buffer_uptodate(bh))
BUG();
- submit_bh(WRITE, bh);
+ submit_bh(REQ_OP_WRITE, 0, bh);
}
static void submit_ordered_buffer(struct buffer_head *bh)
@@ -662,7 +662,7 @@ static void submit_ordered_buffer(struct buffer_head *bh)
clear_buffer_dirty(bh);
if (!buffer_uptodate(bh))
BUG();
- submit_bh(WRITE, bh);
+ submit_bh(REQ_OP_WRITE, 0, bh);
}
#define CHUNK_SIZE 32
@@ -870,7 +870,7 @@ loop_next:
*/
if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
spin_unlock(lock);
- ll_rw_block(WRITE, 1, &bh);
+ ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
spin_lock(lock);
}
put_bh(bh);
@@ -1057,7 +1057,7 @@ static int flush_commit_list(struct super_block *s,
if (tbh) {
if (buffer_dirty(tbh)) {
depth = reiserfs_write_unlock_nested(s);
- ll_rw_block(WRITE, 1, &tbh);
+ ll_rw_block(REQ_OP_WRITE, 0, 1, &tbh);
reiserfs_write_lock_nested(s, depth);
}
put_bh(tbh) ;
@@ -2244,7 +2244,7 @@ abort_replay:
}
}
/* read in the log blocks, memcpy to the corresponding real block */
- ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
+ ll_rw_block(REQ_OP_READ, 0, get_desc_trans_len(desc), log_blocks);
for (i = 0; i < get_desc_trans_len(desc); i++) {
wait_on_buffer(log_blocks[i]);
@@ -2269,7 +2269,7 @@ abort_replay:
/* flush out the real blocks */
for (i = 0; i < get_desc_trans_len(desc); i++) {
set_buffer_dirty(real_blocks[i]);
- write_dirty_buffer(real_blocks[i], WRITE);
+ write_dirty_buffer(real_blocks[i], 0);
}
for (i = 0; i < get_desc_trans_len(desc); i++) {
wait_on_buffer(real_blocks[i]);
@@ -2346,7 +2346,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
} else
bhlist[j++] = bh;
}
- ll_rw_block(READ, j, bhlist);
+ ll_rw_block(REQ_OP_READ, 0, j, bhlist);
for (i = 1; i < j; i++)
brelse(bhlist[i]);
bh = bhlist[0];
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 5feacd689241..4032d1e87c8f 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -551,7 +551,7 @@ static int search_by_key_reada(struct super_block *s,
if (!buffer_uptodate(bh[j])) {
if (depth == -1)
depth = reiserfs_write_unlock_nested(s);
- ll_rw_block(READA, 1, bh + j);
+ ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j);
}
brelse(bh[j]);
}
@@ -660,7 +660,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key,
if (!buffer_uptodate(bh) && depth == -1)
depth = reiserfs_write_unlock_nested(sb);
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
wait_on_buffer(bh);
if (depth != -1)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b8f2d1e8c645..7a4a85a6821e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1393,7 +1393,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
unsigned long safe_mask = 0;
unsigned int commit_max_age = (unsigned int)-1;
struct reiserfs_journal *journal = SB_JOURNAL(s);
- char *new_opts = kstrdup(arg, GFP_KERNEL);
+ char *new_opts;
int err;
char *qf_names[REISERFS_MAXQUOTAS];
unsigned int qfmt = 0;
@@ -1401,6 +1401,10 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
int i;
#endif
+ new_opts = kstrdup(arg, GFP_KERNEL);
+ if (arg && !new_opts)
+ return -ENOMEM;
+
sync_filesystem(s);
reiserfs_write_lock(s);
@@ -1546,7 +1550,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
}
out_ok_unlocked:
- replace_mount_options(s, new_opts);
+ if (new_opts)
+ replace_mount_options(s, new_opts);
return 0;
out_err_unlock:
@@ -1661,7 +1666,7 @@ static int read_super_block(struct super_block *s, int offset)
/* after journal replay, reread all bitmap and super blocks */
static int reread_meta_blocks(struct super_block *s)
{
- ll_rw_block(READ, 1, &SB_BUFFER_WITH_SB(s));
+ ll_rw_block(REQ_OP_READ, 0, 1, &SB_BUFFER_WITH_SB(s));
wait_on_buffer(SB_BUFFER_WITH_SB(s));
if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
reiserfs_warning(s, "reiserfs-2504", "error reading the super");
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2c2618410d51..ce62a380314f 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -124,7 +124,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
goto block_release;
bytes += msblk->devblksize;
}
- ll_rw_block(READ, b, bh);
+ ll_rw_block(REQ_OP_READ, 0, b, bh);
} else {
/*
* Metadata block.
@@ -156,7 +156,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
goto block_release;
bytes += msblk->devblksize;
}
- ll_rw_block(READ, b - 1, bh + 1);
+ ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1);
}
for (i = 0; i < b; i++) {
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 053818dd6c18..9ae4abb4110b 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -390,6 +390,11 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
clockid != CLOCK_BOOTTIME_ALARM))
return -EINVAL;
+ if (!capable(CAP_WAKE_ALARM) &&
+ (clockid == CLOCK_REALTIME_ALARM ||
+ clockid == CLOCK_BOOTTIME_ALARM))
+ return -EPERM;
+
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -433,6 +438,11 @@ static int do_timerfd_settime(int ufd, int flags,
return ret;
ctx = f.file->private_data;
+ if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) {
+ fdput(f);
+ return -EPERM;
+ }
+
timerfd_setup_cancel(ctx, flags);
/*
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 08316972ff93..7bbf420d1289 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -52,6 +52,7 @@
#include "ubifs.h"
#include <linux/mount.h>
#include <linux/slab.h>
+#include <linux/migrate.h>
static int read_block(struct inode *inode, void *addr, unsigned int block,
struct ubifs_data_node *dn)
@@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page)
return ret;
}
+#ifdef CONFIG_MIGRATION
+static int ubifs_migrate_page(struct address_space *mapping,
+ struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+ int rc;
+
+ rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+ if (rc != MIGRATEPAGE_SUCCESS)
+ return rc;
+
+ if (PagePrivate(page)) {
+ ClearPagePrivate(page);
+ SetPagePrivate(newpage);
+ }
+
+ migrate_page_copy(newpage, page);
+ return MIGRATEPAGE_SUCCESS;
+}
+#endif
+
static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
{
/*
@@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = {
.write_end = ubifs_write_end,
.invalidatepage = ubifs_invalidatepage,
.set_page_dirty = ubifs_set_page_dirty,
+#ifdef CONFIG_MIGRATION
+ .migratepage = ubifs_migrate_page,
+#endif
.releasepage = ubifs_releasepage,
};
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 4c5593abc553..aaec13c95253 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -113,7 +113,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
brelse(tmp);
}
if (num) {
- ll_rw_block(READA, num, bha);
+ ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha);
for (i = 0; i < num; i++)
brelse(bha[i]);
}
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index c763fda257bf..988d5352bdb8 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -87,7 +87,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
brelse(tmp);
}
if (num) {
- ll_rw_block(READA, num, bha);
+ ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha);
for (i = 0; i < num; i++)
brelse(bha[i]);
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index f323aff740ef..55aa587bbc38 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1199,7 +1199,7 @@ struct buffer_head *udf_bread(struct inode *inode, int block,
if (buffer_uptodate(bh))
return bh;
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 5f861ed287c3..888c364b2fe9 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -295,7 +295,8 @@ static uint32_t udf_try_read_meta(struct inode *inode, uint32_t block,
map = &UDF_SB(sb)->s_partmaps[partition];
/* map to sparable/physical partition desc */
phyblock = udf_get_pblock(sb, eloc.logicalBlockNum,
- map->s_partition_num, ext_offset + offset);
+ map->s_type_specific.s_metadata.s_phys_partition_ref,
+ ext_offset + offset);
}
brelse(epos.bh);
@@ -317,14 +318,18 @@ uint32_t udf_get_pblock_meta25(struct super_block *sb, uint32_t block,
mdata = &map->s_type_specific.s_metadata;
inode = mdata->s_metadata_fe ? : mdata->s_mirror_fe;
- /* We shouldn't mount such media... */
- BUG_ON(!inode);
+ if (!inode)
+ return 0xFFFFFFFF;
+
retblk = udf_try_read_meta(inode, block, partition, offset);
if (retblk == 0xFFFFFFFF && mdata->s_metadata_fe) {
udf_warn(sb, "error reading from METADATA, trying to read from MIRROR\n");
if (!(mdata->s_flags & MF_MIRROR_FE_LOADED)) {
mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb,
- mdata->s_mirror_file_loc, map->s_partition_num);
+ mdata->s_mirror_file_loc,
+ mdata->s_phys_partition_ref);
+ if (IS_ERR(mdata->s_mirror_fe))
+ mdata->s_mirror_fe = NULL;
mdata->s_flags |= MF_MIRROR_FE_LOADED;
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 5e2c8c814e1b..4942549e7dc8 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -951,13 +951,13 @@ out2:
}
struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
- u32 meta_file_loc, u32 partition_num)
+ u32 meta_file_loc, u32 partition_ref)
{
struct kernel_lb_addr addr;
struct inode *metadata_fe;
addr.logicalBlockNum = meta_file_loc;
- addr.partitionReferenceNum = partition_num;
+ addr.partitionReferenceNum = partition_ref;
metadata_fe = udf_iget_special(sb, &addr);
@@ -974,7 +974,8 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
return metadata_fe;
}
-static int udf_load_metadata_files(struct super_block *sb, int partition)
+static int udf_load_metadata_files(struct super_block *sb, int partition,
+ int type1_index)
{
struct udf_sb_info *sbi = UDF_SB(sb);
struct udf_part_map *map;
@@ -984,20 +985,21 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
map = &sbi->s_partmaps[partition];
mdata = &map->s_type_specific.s_metadata;
+ mdata->s_phys_partition_ref = type1_index;
/* metadata address */
udf_debug("Metadata file location: block = %d part = %d\n",
- mdata->s_meta_file_loc, map->s_partition_num);
+ mdata->s_meta_file_loc, mdata->s_phys_partition_ref);
fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc,
- map->s_partition_num);
+ mdata->s_phys_partition_ref);
if (IS_ERR(fe)) {
/* mirror file entry */
udf_debug("Mirror metadata file location: block = %d part = %d\n",
- mdata->s_mirror_file_loc, map->s_partition_num);
+ mdata->s_mirror_file_loc, mdata->s_phys_partition_ref);
fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc,
- map->s_partition_num);
+ mdata->s_phys_partition_ref);
if (IS_ERR(fe)) {
udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
@@ -1015,7 +1017,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
*/
if (mdata->s_bitmap_file_loc != 0xFFFFFFFF) {
addr.logicalBlockNum = mdata->s_bitmap_file_loc;
- addr.partitionReferenceNum = map->s_partition_num;
+ addr.partitionReferenceNum = mdata->s_phys_partition_ref;
udf_debug("Bitmap file location: block = %d part = %d\n",
addr.logicalBlockNum, addr.partitionReferenceNum);
@@ -1283,7 +1285,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
p = (struct partitionDesc *)bh->b_data;
partitionNumber = le16_to_cpu(p->partitionNumber);
- /* First scan for TYPE1, SPARABLE and METADATA partitions */
+ /* First scan for TYPE1 and SPARABLE partitions */
for (i = 0; i < sbi->s_partitions; i++) {
map = &sbi->s_partmaps[i];
udf_debug("Searching map: (%d == %d)\n",
@@ -1333,7 +1335,7 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
goto out_bh;
if (map->s_partition_type == UDF_METADATA_MAP25) {
- ret = udf_load_metadata_files(sb, i);
+ ret = udf_load_metadata_files(sb, i, type1_idx);
if (ret < 0) {
udf_err(sb, "error loading MetaData partition map %d\n",
i);
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 27b5335730c9..c13875d669c0 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -61,6 +61,11 @@ struct udf_meta_data {
__u32 s_bitmap_file_loc;
__u32 s_alloc_unit_size;
__u16 s_align_unit_size;
+ /*
+ * Partition Reference Number of the associated physical / sparable
+ * partition
+ */
+ __u16 s_phys_partition_ref;
int s_flags;
struct inode *s_metadata_fe;
struct inode *s_mirror_fe;
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 0447b949c7f5..67e085d591d8 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -292,7 +292,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
if (!buffer_mapped(bh))
map_bh(bh, inode->i_sb, oldb + pos);
if (!buffer_uptodate(bh)) {
- ll_rw_block(READ, 1, &bh);
+ ll_rw_block(REQ_OP_READ, 0, 1, &bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
ufs_error(inode->i_sb, __func__,
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index a409e3e7827a..f41ad0a6106f 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -118,7 +118,7 @@ void ubh_sync_block(struct ufs_buffer_head *ubh)
unsigned i;
for (i = 0; i < ubh->count; i++)
- write_dirty_buffer(ubh->bh[i], WRITE);
+ write_dirty_buffer(ubh->bh[i], 0);
for (i = 0; i < ubh->count; i++)
wait_on_buffer(ubh->bh[i]);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4c463b99fe57..87d2b215cbbd 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -438,7 +438,8 @@ xfs_submit_ioend(
ioend->io_bio->bi_private = ioend;
ioend->io_bio->bi_end_io = xfs_end_bio;
-
+ bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
+ (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
/*
* If we are failing the IO now, just mark the ioend with an
* error and finish it. This will run IO completion immediately
@@ -451,8 +452,7 @@ xfs_submit_ioend(
return status;
}
- submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
- ioend->io_bio);
+ submit_bio(ioend->io_bio);
return 0;
}
@@ -510,8 +510,9 @@ xfs_chain_bio(
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
- submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
- ioend->io_bio);
+ bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
+ (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e71cfbd5acb3..a87a0d5477bd 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1127,7 +1127,8 @@ xfs_buf_ioapply_map(
int map,
int *buf_offset,
int *count,
- int rw)
+ int op,
+ int op_flags)
{
int page_index;
int total_nr_pages = bp->b_page_count;
@@ -1157,16 +1158,14 @@ xfs_buf_ioapply_map(
next_chunk:
atomic_inc(&bp->b_io_remaining);
- nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
- if (nr_pages > total_nr_pages)
- nr_pages = total_nr_pages;
+ nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
bio = bio_alloc(GFP_NOIO, nr_pages);
bio->bi_bdev = bp->b_target->bt_bdev;
bio->bi_iter.bi_sector = sector;
bio->bi_end_io = xfs_buf_bio_end_io;
bio->bi_private = bp;
-
+ bio_set_op_attrs(bio, op, op_flags);
for (; size && nr_pages; nr_pages--, page_index++) {
int rbytes, nbytes = PAGE_SIZE - offset;
@@ -1190,7 +1189,7 @@ next_chunk:
flush_kernel_vmap_range(bp->b_addr,
xfs_buf_vmap_len(bp));
}
- submit_bio(rw, bio);
+ submit_bio(bio);
if (size)
goto next_chunk;
} else {
@@ -1210,7 +1209,8 @@ _xfs_buf_ioapply(
struct xfs_buf *bp)
{
struct blk_plug plug;
- int rw;
+ int op;
+ int op_flags = 0;
int offset;
int size;
int i;
@@ -1229,14 +1229,13 @@ _xfs_buf_ioapply(
bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
if (bp->b_flags & XBF_WRITE) {
+ op = REQ_OP_WRITE;
if (bp->b_flags & XBF_SYNCIO)
- rw = WRITE_SYNC;
- else
- rw = WRITE;
+ op_flags = WRITE_SYNC;
if (bp->b_flags & XBF_FUA)
- rw |= REQ_FUA;
+ op_flags |= REQ_FUA;
if (bp->b_flags & XBF_FLUSH)
- rw |= REQ_FLUSH;
+ op_flags |= REQ_PREFLUSH;
/*
* Run the write verifier callback function if it exists. If
@@ -1266,13 +1265,14 @@ _xfs_buf_ioapply(
}
}
} else if (bp->b_flags & XBF_READ_AHEAD) {
- rw = READA;
+ op = REQ_OP_READ;
+ op_flags = REQ_RAHEAD;
} else {
- rw = READ;
+ op = REQ_OP_READ;
}
/* we only use the buffer cache for meta-data */
- rw |= REQ_META;
+ op_flags |= REQ_META;
/*
* Walk all the vectors issuing IO on them. Set up the initial offset
@@ -1284,7 +1284,7 @@ _xfs_buf_ioapply(
size = BBTOB(bp->b_io_length);
blk_start_plug(&plug);
for (i = 0; i < bp->b_map_count; i++) {
- xfs_buf_ioapply_map(bp, i, &offset, &size, rw);
+ xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
if (bp->b_error)
break;
if (size <= 0)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index dbca7375deef..63a6ff2cfc68 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1575,6 +1575,12 @@ xfs_ioc_swapext(
goto out_put_tmp_file;
}
+ if (f.file->f_op != &xfs_file_operations ||
+ tmp.file->f_op != &xfs_file_operations) {
+ error = -EINVAL;
+ goto out_put_tmp_file;
+ }
+
ip = XFS_I(file_inode(f.file));
tip = XFS_I(file_inode(tmp.file));